From 3c0b976bf20d236c57adcefa80f86a0a1d737727 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Wed, 14 Oct 2020 18:28:36 +1100 Subject: [PATCH 001/304] powerpc/64: Set up a kernel stack for secondaries before cpu_restore() Currently in generic_secondary_smp_init(), cur_cpu_spec->cpu_restore() is called before a stack has been set up in r1. This was previously fine as the cpu_restore() functions were implemented in assembly and did not use a stack. However commit 5a61ef74f269 ("powerpc/64s: Support new device tree binding for discovering CPU features") used __restore_cpu_cpufeatures() as the cpu_restore() function for a device-tree features based cputable entry. This is a C function and hence uses a stack in r1. generic_secondary_smp_init() is entered on the secondary cpus via the primary cpu using the OPAL call opal_start_cpu(). In OPAL, each hardware thread has its own stack. The OPAL call is ran in the primary's hardware thread. During the call, a job is scheduled on a secondary cpu that will start executing at the address of generic_secondary_smp_init(). Hence the value that will be left in r1 when the secondary cpu enters the kernel is part of that secondary cpu's individual OPAL stack. This means that __restore_cpu_cpufeatures() will write to that OPAL stack. This is not horribly bad as each hardware thread has its own stack and the call that enters the kernel from OPAL never returns, but it is still wrong and should be corrected. Create the temp kernel stack before calling cpu_restore(). As noted by mpe, for a kexec boot, the secondary CPUs are released from the spin loop at address 0x60 by smp_release_cpus() and then jump to generic_secondary_smp_init(). The call to smp_release_cpus() is in setup_arch(), and it comes before the call to emergency_stack_init(). emergency_stack_init() allocates an emergency stack in the PACA for each CPU. This address in the PACA is what is used to set up the temp kernel stack in generic_secondary_smp_init(). Move releasing the secondary CPUs to after the PACAs have been allocated an emergency stack, otherwise the PACA stack pointer will contain garbage and hence the temp kernel stack created from it will be broken. Fixes: 5a61ef74f269 ("powerpc/64s: Support new device tree binding for discovering CPU features") Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201014072837.24539-1-jniethe5@gmail.com --- arch/powerpc/kernel/head_64.S | 8 ++++---- arch/powerpc/kernel/setup-common.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 1510b2a56669..7b7c8c5ee660 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -417,6 +417,10 @@ generic_secondary_common_init: /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) ld r23,0(r23) @@ -445,10 +449,6 @@ generic_secondary_common_init: sync /* order paca.run and cur_cpu_spec */ isync /* In case code patching happened */ - /* Create a temp kernel stack for use before relocation is on. */ - ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD - b __secondary_start #endif /* SMP */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 808ec9fab605..da8c71f321ad 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -919,8 +919,6 @@ void __init setup_arch(char **cmdline_p) /* On BookE, setup per-core TLB data structures. */ setup_tlb_core_data(); - - smp_release_cpus(); #endif /* Print various info about the machine that has been gathered so far. */ @@ -944,6 +942,8 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); + smp_release_cpus(); + initmem_init(); early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); From 344fbab991a568dc33ad90711b489d870e18d26d Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Wed, 14 Oct 2020 18:28:37 +1100 Subject: [PATCH 002/304] powerpc/64s: Convert some cpu_setup() and cpu_restore() functions to C The only thing keeping the cpu_setup() and cpu_restore() functions used in the cputable entries for Power7, Power8, Power9 and Power10 in assembly was cpu_restore() being called before there was a stack in generic_secondary_smp_init(). Commit ("powerpc/64: Set up a kernel stack for secondaries before cpu_restore()") means that it is now possible to use C. Rewrite the functions in C so they are a little bit easier to read. This is not changing their functionality. Signed-off-by: Jordan Niethe [mpe: Tweak copyright and authorship notes] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201014072837.24539-2-jniethe5@gmail.com --- arch/powerpc/include/asm/cpu_setup_power.h | 12 + arch/powerpc/kernel/cpu_setup_power.S | 252 ------------------- arch/powerpc/kernel/cpu_setup_power.c | 271 +++++++++++++++++++++ arch/powerpc/kernel/cputable.c | 12 +- 4 files changed, 287 insertions(+), 260 deletions(-) create mode 100644 arch/powerpc/include/asm/cpu_setup_power.h delete mode 100644 arch/powerpc/kernel/cpu_setup_power.S create mode 100644 arch/powerpc/kernel/cpu_setup_power.c diff --git a/arch/powerpc/include/asm/cpu_setup_power.h b/arch/powerpc/include/asm/cpu_setup_power.h new file mode 100644 index 000000000000..24be9131f803 --- /dev/null +++ b/arch/powerpc/include/asm/cpu_setup_power.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 IBM Corporation + */ +void __setup_cpu_power7(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power7(void); +void __setup_cpu_power8(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power8(void); +void __setup_cpu_power9(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power9(void); +void __setup_cpu_power10(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power10(void); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S deleted file mode 100644 index 704e8b9501ee..000000000000 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ /dev/null @@ -1,252 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * This file contains low level CPU setup functions. - * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) - */ - -#include -#include -#include -#include -#include -#include -#include - -/* Entry: r3 = crap, r4 = ptr to cputable entry - * - * Note that we can be called twice for pseudo-PVRs - */ -_GLOBAL(__setup_cpu_power7) - mflr r11 - bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - li r4,(LPCR_LPES1 >> LPCR_LPES_SH) - bl __init_LPCR_ISA206 - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power7) - mflr r11 - mfmsr r3 - rldicl. r0,r3,4,63 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - li r4,(LPCR_LPES1 >> LPCR_LPES_SH) - bl __init_LPCR_ISA206 - mtlr r11 - blr - -_GLOBAL(__setup_cpu_power8) - mflr r11 - bl __init_FSCR - bl __init_PMU - bl __init_PMU_ISA207 - bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - ori r3, r3, LPCR_PECEDH - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA206 - bl __init_HFSCR - bl __init_PMU_HV - bl __init_PMU_HV_ISA207 - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power8) - mflr r11 - bl __init_FSCR - bl __init_PMU - bl __init_PMU_ISA207 - mfmsr r3 - rldicl. r0,r3,4,63 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - ori r3, r3, LPCR_PECEDH - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA206 - bl __init_HFSCR - bl __init_PMU_HV - bl __init_PMU_HV_ISA207 - mtlr r11 - blr - -_GLOBAL(__setup_cpu_power10) - mflr r11 - bl __init_FSCR_power10 - bl __init_PMU - bl __init_PMU_ISA31 - b 1f - -_GLOBAL(__setup_cpu_power9) - mflr r11 - bl __init_FSCR_power9 - bl __init_PMU -1: bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_PSSCR,r0 - mtspr SPRN_LPID,r0 - mtspr SPRN_PID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) - or r3, r3, r4 - LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) - andc r3, r3, r4 - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA300 - bl __init_HFSCR - bl __init_PMU_HV - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power10) - mflr r11 - bl __init_FSCR_power10 - bl __init_PMU - bl __init_PMU_ISA31 - b 1f - -_GLOBAL(__restore_cpu_power9) - mflr r11 - bl __init_FSCR_power9 - bl __init_PMU -1: mfmsr r3 - rldicl. r0,r3,4,63 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_PSSCR,r0 - mtspr SPRN_LPID,r0 - mtspr SPRN_PID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) - or r3, r3, r4 - LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) - andc r3, r3, r4 - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA300 - bl __init_HFSCR - bl __init_PMU_HV - mtlr r11 - blr - -__init_hvmode_206: - /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */ - mfmsr r3 - rldicl. r0,r3,4,63 - bnelr - ld r5,CPU_SPEC_FEATURES(r4) - LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST) - andc r5,r5,r6 - std r5,CPU_SPEC_FEATURES(r4) - blr - -__init_LPCR_ISA206: - /* Setup a sane LPCR: - * Called with initial LPCR in R3 and desired LPES 2-bit value in R4 - * - * LPES = 0b01 (HSRR0/1 used for 0x500) - * PECE = 0b111 - * DPFD = 4 - * HDICE = 0 - * VC = 0b100 (VPM0=1, VPM1=0, ISL=0) - * VRMASD = 0b10000 (L=1, LP=00) - * - * Other bits untouched for now - */ - li r5,0x10 - rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 - - /* POWER9 has no VRMASD */ -__init_LPCR_ISA300: - rldimi r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 - ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) - li r5,4 - rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3 - clrrdi r3,r3,1 /* clear HDICE */ - li r5,4 - rldimi r3,r5, LPCR_VC_SH, 0 - mtspr SPRN_LPCR,r3 - isync - blr - -__init_FSCR_power10: - mfspr r3, SPRN_FSCR - ori r3, r3, FSCR_PREFIX - mtspr SPRN_FSCR, r3 - // fall through - -__init_FSCR_power9: - mfspr r3, SPRN_FSCR - ori r3, r3, FSCR_SCV - mtspr SPRN_FSCR, r3 - // fall through - -__init_FSCR: - mfspr r3,SPRN_FSCR - ori r3,r3,FSCR_TAR|FSCR_EBB - mtspr SPRN_FSCR,r3 - blr - -__init_HFSCR: - mfspr r3,SPRN_HFSCR - ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\ - HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP - mtspr SPRN_HFSCR,r3 - blr - -__init_PMU_HV: - li r5,0 - mtspr SPRN_MMCRC,r5 - blr - -__init_PMU_HV_ISA207: - li r5,0 - mtspr SPRN_MMCRH,r5 - blr - -__init_PMU: - li r5,0 - mtspr SPRN_MMCRA,r5 - mtspr SPRN_MMCR0,r5 - mtspr SPRN_MMCR1,r5 - mtspr SPRN_MMCR2,r5 - blr - -__init_PMU_ISA207: - li r5,0 - mtspr SPRN_MMCRS,r5 - blr - -__init_PMU_ISA31: - li r5,0 - mtspr SPRN_MMCR3,r5 - LOAD_REG_IMMEDIATE(r5, MMCRA_BHRB_DISABLE) - mtspr SPRN_MMCRA,r5 - blr diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c new file mode 100644 index 000000000000..0c2191ee139e --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_power.c @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2020, Jordan Niethe, IBM Corporation. + * + * This file contains low level CPU setup functions. + * Originally written in assembly by Benjamin Herrenschmidt & various other + * authors. + */ + +#include +#include +#include +#include +#include + +/* Disable CPU_FTR_HVMODE and return false if MSR:HV is not set */ +static bool init_hvmode_206(struct cpu_spec *t) +{ + u64 msr; + + msr = mfmsr(); + if (msr & MSR_HV) + return true; + + t->cpu_features &= ~(CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST); + return false; +} + +static void init_LPCR_ISA300(u64 lpcr, u64 lpes) +{ + /* POWER9 has no VRMASD */ + lpcr |= (lpes << LPCR_LPES_SH) & LPCR_LPES; + lpcr |= LPCR_PECE0|LPCR_PECE1|LPCR_PECE2; + lpcr |= (4ull << LPCR_DPFD_SH) & LPCR_DPFD; + lpcr &= ~LPCR_HDICE; /* clear HDICE */ + lpcr |= (4ull << LPCR_VC_SH); + mtspr(SPRN_LPCR, lpcr); + isync(); +} + +/* + * Setup a sane LPCR: + * Called with initial LPCR and desired LPES 2-bit value + * + * LPES = 0b01 (HSRR0/1 used for 0x500) + * PECE = 0b111 + * DPFD = 4 + * HDICE = 0 + * VC = 0b100 (VPM0=1, VPM1=0, ISL=0) + * VRMASD = 0b10000 (L=1, LP=00) + * + * Other bits untouched for now + */ +static void init_LPCR_ISA206(u64 lpcr, u64 lpes) +{ + lpcr |= (0x10ull << LPCR_VRMASD_SH) & LPCR_VRMASD; + init_LPCR_ISA300(lpcr, lpes); +} + +static void init_FSCR(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_TAR|FSCR_EBB; + mtspr(SPRN_FSCR, fscr); +} + +static void init_FSCR_power9(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_SCV; + mtspr(SPRN_FSCR, fscr); + init_FSCR(); +} + +static void init_FSCR_power10(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_PREFIX; + mtspr(SPRN_FSCR, fscr); + init_FSCR_power9(); +} + +static void init_HFSCR(void) +{ + u64 hfscr; + + hfscr = mfspr(SPRN_HFSCR); + hfscr |= HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|HFSCR_DSCR|\ + HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP; + mtspr(SPRN_HFSCR, hfscr); +} + +static void init_PMU_HV(void) +{ + mtspr(SPRN_MMCRC, 0); +} + +static void init_PMU_HV_ISA207(void) +{ + mtspr(SPRN_MMCRH, 0); +} + +static void init_PMU(void) +{ + mtspr(SPRN_MMCRA, 0); + mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR1, 0); + mtspr(SPRN_MMCR2, 0); +} + +static void init_PMU_ISA207(void) +{ + mtspr(SPRN_MMCRS, 0); +} + +static void init_PMU_ISA31(void) +{ + mtspr(SPRN_MMCR3, 0); + mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); +} + +/* + * Note that we can be called twice of pseudo-PVRs. + * The parameter offset is not used. + */ + +void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t) +{ + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); +} + +void __restore_cpu_power7(void) +{ + u64 msr; + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); +} + +void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR(); + init_PMU(); + init_PMU_ISA207(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ + init_HFSCR(); + init_PMU_HV(); + init_PMU_HV_ISA207(); +} + +void __restore_cpu_power8(void) +{ + u64 msr; + + init_FSCR(); + init_PMU(); + init_PMU_ISA207(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ + init_HFSCR(); + init_PMU_HV(); + init_PMU_HV_ISA207(); +} + +void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR_power9(); + init_PMU(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __restore_cpu_power9(void) +{ + u64 msr; + + init_FSCR_power9(); + init_PMU(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR_power10(); + init_PMU(); + init_PMU_ISA31(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __restore_cpu_power10(void) +{ + u64 msr; + + init_FSCR_power10(); + init_PMU(); + init_PMU_ISA31(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 29de58d4dfb7..8fdb40ee86d1 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -60,19 +60,15 @@ extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec); #endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 +#include extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_pa6t(void); extern void __restore_cpu_ppc970(void); -extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power7(void); -extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power8(void); -extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power9(void); -extern void __setup_cpu_power10(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power10(void); +extern long __machine_check_early_realmode_p7(struct pt_regs *regs); +extern long __machine_check_early_realmode_p8(struct pt_regs *regs); +extern long __machine_check_early_realmode_p9(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); From 1891ef21d92c4801ea082ee8ed478e304ddc6749 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 14:05:46 +0000 Subject: [PATCH 003/304] powerpc/bitops: Fix possible undefined behaviour with fls() and fls64() fls() and fls64() are using __builtin_ctz() and _builtin_ctzll(). On powerpc, those builtins trivially use ctlzw and ctlzd power instructions. Allthough those instructions provide the expected result with input argument 0, __builtin_ctz() and __builtin_ctzll() are documented as undefined for value 0. The easiest fix would be to use fls() and fls64() functions defined in include/asm-generic/bitops/builtin-fls.h and include/asm-generic/bitops/fls64.h, but GCC output is not optimal: 00000388 : 388: 2c 03 00 00 cmpwi r3,0 38c: 41 82 00 10 beq 39c 390: 7c 63 00 34 cntlzw r3,r3 394: 20 63 00 20 subfic r3,r3,32 398: 4e 80 00 20 blr 39c: 38 60 00 00 li r3,0 3a0: 4e 80 00 20 blr 000003b0 : 3b0: 2c 03 00 00 cmpwi r3,0 3b4: 40 82 00 1c bne 3d0 3b8: 2f 84 00 00 cmpwi cr7,r4,0 3bc: 38 60 00 00 li r3,0 3c0: 4d 9e 00 20 beqlr cr7 3c4: 7c 83 00 34 cntlzw r3,r4 3c8: 20 63 00 20 subfic r3,r3,32 3cc: 4e 80 00 20 blr 3d0: 7c 63 00 34 cntlzw r3,r3 3d4: 20 63 00 40 subfic r3,r3,64 3d8: 4e 80 00 20 blr When the input of fls(x) is a constant, just check x for nullity and return either 0 or __builtin_clz(x). Otherwise, use cntlzw instruction directly. For fls64() on PPC64, do the same but with __builtin_clzll() and cntlzd instruction. On PPC32, lets take the generic fls64() which will use our fls(). The result is as expected: 00000388 : 388: 7c 63 00 34 cntlzw r3,r3 38c: 20 63 00 20 subfic r3,r3,32 390: 4e 80 00 20 blr 000003a0 : 3a0: 2c 03 00 00 cmpwi r3,0 3a4: 40 82 00 10 bne 3b4 3a8: 7c 83 00 34 cntlzw r3,r4 3ac: 20 63 00 20 subfic r3,r3,32 3b0: 4e 80 00 20 blr 3b4: 7c 63 00 34 cntlzw r3,r3 3b8: 20 63 00 40 subfic r3,r3,64 3bc: 4e 80 00 20 blr Fixes: 2fcff790dcb4 ("powerpc: Use builtin functions for fls()/__fls()/fls64()") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Acked-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/348c2d3f19ffcff8abe50d52513f989c4581d000.1603375524.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/bitops.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 4a4d3afd5340..299ab33505a6 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -216,15 +216,34 @@ static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr) */ static inline int fls(unsigned int x) { - return 32 - __builtin_clz(x); + int lz; + + if (__builtin_constant_p(x)) + return x ? 32 - __builtin_clz(x) : 0; + asm("cntlzw %0,%1" : "=r" (lz) : "r" (x)); + return 32 - lz; } #include +/* + * 64-bit can do this using one cntlzd (count leading zeroes doubleword) + * instruction; for 32-bit we use the generic version, which does two + * 32-bit fls calls. + */ +#ifdef CONFIG_PPC64 static inline int fls64(__u64 x) { - return 64 - __builtin_clzll(x); + int lz; + + if (__builtin_constant_p(x)) + return x ? 64 - __builtin_clzll(x) : 0; + asm("cntlzd %0,%1" : "=r" (lz) : "r" (x)); + return 64 - lz; } +#else +#include +#endif #ifdef CONFIG_PPC64 unsigned int __arch_hweight8(unsigned int w); From 53f45ecc9cd04b4b963f3040f2a54c3baf03b229 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 22 Oct 2020 14:41:15 +0530 Subject: [PATCH 004/304] powerpc/mm: Move setting PTE specific flags to pfn_pmd() powerpc used to set the PTE specific flags in set_pte_at(). That is different from other architectures. To be consistent with other architectures powerpc updated pfn_pte() to set _PAGE_PTE in commit 379c926d6334 ("powerpc/mm: move setting pte specific flags to pfn_pte") That commit didn't do the same for pfn_pmd() because we expect pmd_mkhuge() to do that. But as per Linus that is a bad rule: The rule that you must use "pmd_mkhuge()" seems _completely_ wrong. The only valid use to ever make a pmd out of a pfn is to make a huge-page. Hence update pfn_pmd() to set _PAGE_PTE. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201022091115.39568-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/pgtable.h | 17 ++++++++++++++++- arch/powerpc/mm/book3s64/pgtable.c | 8 +++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index cd3feeac6e87..a39886681629 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1231,13 +1231,28 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) return hash__pmd_same(pmd_a, pmd_b); } -static inline pmd_t pmd_mkhuge(pmd_t pmd) +static inline pmd_t __pmd_mkhuge(pmd_t pmd) { if (radix_enabled()) return radix__pmd_mkhuge(pmd); return hash__pmd_mkhuge(pmd); } +/* + * pfn_pmd return a pmd_t that can be used as pmd pte entry. + */ +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ +#ifdef CONFIG_DEBUG_VM + if (radix_enabled()) + WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)) == 0); + else + WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE)) != + cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE)); +#endif + return pmd; +} + #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS extern int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index e18ae50a275c..5b3a3bae21aa 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -136,12 +136,18 @@ static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) return __pmd(pmd_val(pmd) | pgprot_val(pgprot)); } +/* + * At some point we should be able to get rid of + * pmd_mkhuge() and mk_huge_pmd() when we update all the + * other archs to mark the pmd huge in pfn_pmd() + */ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot) { unsigned long pmdv; pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK; - return pmd_set_protbits(__pmd(pmdv), pgprot); + + return __pmd_mkhuge(pmd_set_protbits(__pmd(pmdv), pgprot)); } pmd_t mk_pmd(struct page *page, pgprot_t pgprot) From ef78f2dd2398ce8ed9eeaab9c9f8af2e15f5d870 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 23 Oct 2020 13:08:38 +1100 Subject: [PATCH 005/304] powerpc/85xx: Fix declaration made after definition Currently the clang build of corenet64_smp_defconfig fails with: arch/powerpc/platforms/85xx/corenet_generic.c:210:1: error: attribute declaration must precede definition machine_arch_initcall(corenet_generic, corenet_gen_publish_devices); Fix it by moving the initcall definition prior to the machine definition, and directly below the function it calls, which is the usual style anyway. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201023020838.3274226-1-mpe@ellerman.id.au --- arch/powerpc/platforms/85xx/corenet_generic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 6aa8defb5857..8d6029099848 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -106,6 +106,7 @@ int __init corenet_gen_publish_devices(void) { return of_platform_bus_probe(NULL, of_device_ids, NULL); } +machine_arch_initcall(corenet_generic, corenet_gen_publish_devices); static const char * const boards[] __initconst = { "fsl,P2041RDB", @@ -206,5 +207,3 @@ define_machine(corenet_generic) { .power_save = e500_idle, #endif }; - -machine_arch_initcall(corenet_generic, corenet_gen_publish_devices); From cb5d4c465f31bc44b8bbd4934678c2b140a2ad29 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 23 Oct 2020 14:13:05 +1100 Subject: [PATCH 006/304] powerpc/ps3: Drop unused DBG macro This DBG macro is unused, and has been unused since the file was originally merged into mainline. Just drop it. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201023031305.3284819-1-mpe@ellerman.id.au --- arch/powerpc/boot/ps3.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c index 6e4efbdb6b7c..f157717ae814 100644 --- a/arch/powerpc/boot/ps3.c +++ b/arch/powerpc/boot/ps3.c @@ -21,13 +21,6 @@ extern int lv1_get_logical_ppe_id(u64 *out_1); extern int lv1_get_repository_node_value(u64 in_1, u64 in_2, u64 in_3, u64 in_4, u64 in_5, u64 *out_1, u64 *out_2); -#ifdef DEBUG -#define DBG(fmt...) printf(fmt) -#else -static inline int __attribute__ ((format (printf, 1, 2))) DBG( - const char *fmt, ...) {return 0;} -#endif - BSS_STACK(4096); /* A buffer that may be edited by tools operating on a zImage binary so as to From f5eca0b279117f25020112a2f65ec9c3ea25f3ac Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Fri, 23 Oct 2020 10:45:39 +0800 Subject: [PATCH 007/304] selftests/powerpc/eeh: disable kselftest timeout setting for eeh-basic The eeh-basic test got its own 60 seconds timeout (defined in commit 414f50434aa2 "selftests/eeh: Bump EEH wait time to 60s") per breakable device. And we have discovered that the number of breakable devices varies on different hardware. The device recovery time ranges from 0 to 35 seconds. In our test pool it will take about 30 seconds to run on a Power8 system that with 5 breakable devices, 60 seconds to run on a Power9 system that with 4 breakable devices. Extend the timeout setting in the kselftest framework to 5 minutes to give it a chance to finish. Signed-off-by: Po-Hsu Lin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201023024539.9512-1-po-hsu.lin@canonical.com --- tools/testing/selftests/powerpc/eeh/Makefile | 2 +- tools/testing/selftests/powerpc/eeh/settings | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/eeh/settings diff --git a/tools/testing/selftests/powerpc/eeh/Makefile b/tools/testing/selftests/powerpc/eeh/Makefile index b397babd569b..ae963eb2dc5b 100644 --- a/tools/testing/selftests/powerpc/eeh/Makefile +++ b/tools/testing/selftests/powerpc/eeh/Makefile @@ -3,7 +3,7 @@ noarg: $(MAKE) -C ../ TEST_PROGS := eeh-basic.sh -TEST_FILES := eeh-functions.sh +TEST_FILES := eeh-functions.sh settings top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/eeh/settings b/tools/testing/selftests/powerpc/eeh/settings new file mode 100644 index 000000000000..694d70710ff0 --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/settings @@ -0,0 +1 @@ +timeout=300 From ffa1797040c5da391859a9556be7b735acbe1242 Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Wed, 28 Oct 2020 17:15:51 +0800 Subject: [PATCH 008/304] powerpc: sysdev: add missing iounmap() on error in mpic_msgr_probe() I noticed that iounmap() of msgr_block_addr before return from mpic_msgr_probe() in the error handling case is missing. So use devm_ioremap() instead of just ioremap() when remapping the message register block, so the mapping will be automatically released on probe failure. Signed-off-by: Qinglang Miao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201028091551.136400-1-miaoqinglang@huawei.com --- arch/powerpc/sysdev/mpic_msgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index f6b253e2be40..36ec0bdd8b63 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -191,7 +191,7 @@ static int mpic_msgr_probe(struct platform_device *dev) /* IO map the message register block. */ of_address_to_resource(np, 0, &rsrc); - msgr_block_addr = ioremap(rsrc.start, resource_size(&rsrc)); + msgr_block_addr = devm_ioremap(&dev->dev, rsrc.start, resource_size(&rsrc)); if (!msgr_block_addr) { dev_err(&dev->dev, "Failed to iomap MPIC message registers"); return -EFAULT; From a7223f5bfcaeade4a86d35263493bcda6c940891 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 28 Oct 2020 09:04:33 +0100 Subject: [PATCH 009/304] powerpc: Avoid broken GCC __attribute__((optimize)) Commit 7053f80d9696 ("powerpc/64: Prevent stack protection in early boot") introduced a couple of uses of __attribute__((optimize)) with function scope, to disable the stack protector in some early boot code. Unfortunately, and this is documented in the GCC man pages [0], overriding function attributes for optimization is broken, and is only supported for debug scenarios, not for production: the problem appears to be that setting GCC -f flags using this method will cause it to forget about some or all other optimization settings that have been applied. So the only safe way to disable the stack protector is to disable it for the entire source file. [0] https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html Fixes: 7053f80d9696 ("powerpc/64: Prevent stack protection in early boot") Signed-off-by: Ard Biesheuvel [mpe: Drop one remaining use of __nostackprotector, reported by snowpatch] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201028080433.26799-1-ardb@kernel.org --- arch/powerpc/kernel/Makefile | 3 +++ arch/powerpc/kernel/paca.c | 4 ++-- arch/powerpc/kernel/setup.h | 6 ------ arch/powerpc/kernel/setup_64.c | 2 +- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index bf0bf1b900d2..fe2ef598e2ea 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -173,6 +173,9 @@ KCOV_INSTRUMENT_cputable.o := n KCOV_INSTRUMENT_setup_64.o := n KCOV_INSTRUMENT_paca.o := n +CFLAGS_setup_64.o += -fno-stack-protector +CFLAGS_paca.o += -fno-stack-protector + extra-$(CONFIG_PPC_FPU) += fpu.o extra-$(CONFIG_ALTIVEC) += vector.o extra-$(CONFIG_PPC64) += entry_64.o diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 0ad15768d762..7f5aae3c387d 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -208,7 +208,7 @@ static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit) struct paca_struct **paca_ptrs __read_mostly; EXPORT_SYMBOL(paca_ptrs); -void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int cpu) +void __init initialise_paca(struct paca_struct *new_paca, int cpu) { #ifdef CONFIG_PPC_PSERIES new_paca->lppaca_ptr = NULL; @@ -241,7 +241,7 @@ void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int } /* Put the paca pointer into r13 and SPRG_PACA */ -void __nostackprotector setup_paca(struct paca_struct *new_paca) +void setup_paca(struct paca_struct *new_paca) { /* Setup r13 */ local_paca = new_paca; diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 2ec835574cc9..2dd0d9cb5a20 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -8,12 +8,6 @@ #ifndef __ARCH_POWERPC_KERNEL_SETUP_H #define __ARCH_POWERPC_KERNEL_SETUP_H -#ifdef CONFIG_CC_IS_CLANG -#define __nostackprotector -#else -#define __nostackprotector __attribute__((__optimize__("no-stack-protector"))) -#endif - void initialize_cache_info(void); void irqstack_early_init(void); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index bb9cab3641d7..da447a62ea1e 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -283,7 +283,7 @@ void __init record_spr_defaults(void) * device-tree is not accessible via normal means at this point. */ -void __init __nostackprotector early_setup(unsigned long dt_ptr) +void __init early_setup(unsigned long dt_ptr) { static __initdata struct paca_struct boot_paca; From e80639405c40127727812a0e1f8a65ba9979f146 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 7 Oct 2020 11:03:05 +0530 Subject: [PATCH 010/304] powerpc/mm: Update tlbiel loop on POWER10 With POWER10, single tlbiel instruction invalidates all the congruence class of the TLB and hence we need to issue only one tlbiel with SET=0. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201007053305.232879-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 7 ++++++- arch/powerpc/kvm/book3s_hv_builtin.c | 11 ++++++++++- arch/powerpc/mm/book3s64/radix_tlb.c | 23 ++++++++++++++++------- 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e3b1839fc251..0faafe6f8c4e 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4949,7 +4949,12 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) * Work out how many sets the TLB has, for the use of * the TLB invalidation loop in book3s_hv_rmhandlers.S. */ - if (radix_enabled()) + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + /* + * P10 will flush all the congruence class with a single tlbiel + */ + kvm->arch.tlb_sets = 1; + } else if (radix_enabled()) kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX; /* 128 */ else if (cpu_has_feature(CPU_FTR_ARCH_300)) kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */ diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 8f58dd20b362..8053efdf7ea7 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -694,6 +694,7 @@ static void wait_for_sync(struct kvm_split_mode *sip, int phase) void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip) { + int num_sets; unsigned long rb, set; /* wait for every other thread to get to real mode */ @@ -704,11 +705,19 @@ void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip) mtspr(SPRN_LPID, sip->lpidr_req); isync(); + /* + * P10 will flush all the congruence class with a single tlbiel + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + num_sets = 1; + else + num_sets = POWER9_TLB_SETS_RADIX; + /* Invalidate the TLB on thread 0 */ if (local_paca->kvm_hstate.tid == 0) { sip->do_set = 0; asm volatile("ptesync" : : : "memory"); - for (set = 0; set < POWER9_TLB_SETS_RADIX; ++set) { + for (set = 0; set < num_sets; ++set) { rb = TLBIEL_INVAL_SET_LPID + (set << TLBIEL_INVAL_SET_SHIFT); asm volatile(PPC_TLBIEL(%0, %1, 0, 0, 0) : : diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index b487b489d4b6..fb66d154b26c 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -56,14 +56,21 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) if (early_cpu_has_feature(CPU_FTR_HVMODE)) { /* MSR[HV] should flush partition scope translations first. */ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); - for (set = 1; set < num_sets; set++) - tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); + + if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { + for (set = 1; set < num_sets; set++) + tlbiel_radix_set_isa300(set, is, 0, + RIC_FLUSH_TLB, 0); + } } /* Flush process scoped entries. */ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); - for (set = 1; set < num_sets; set++) - tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); + + if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { + for (set = 1; set < num_sets; set++) + tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); + } ppc_after_tlbiel_barrier(); } @@ -300,9 +307,11 @@ static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) return; } - /* For the remaining sets, just flush the TLB */ - for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) - __tlbiel_pid(pid, set, RIC_FLUSH_TLB); + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + /* For the remaining sets, just flush the TLB */ + for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) + __tlbiel_pid(pid, set, RIC_FLUSH_TLB); + } ppc_after_tlbiel_barrier(); asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); From 78665179e569c7e1fe102fb6c21d0f5b6951f084 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 3 Nov 2020 18:07:12 +0000 Subject: [PATCH 011/304] powerpc/feature: Fix CPU_FTRS_ALWAYS by removing CPU_FTRS_GENERIC_32 On 8xx, we get the following features: [ 0.000000] cpu_features = 0x0000000000000100 [ 0.000000] possible = 0x0000000000000120 [ 0.000000] always = 0x0000000000000000 This is not correct. As CONFIG_PPC_8xx is mutually exclusive with all other configurations, the three lines should be equal. The problem is due to CPU_FTRS_GENERIC_32 which is taken when CONFIG_BOOK3S_32 is NOT selected. This CPU_FTRS_GENERIC_32 is pointless because there is no generic configuration supporting all 32 bits but book3s/32. Remove this pointless generic features definition to unbreak the calculation of 'possible' features and 'always' features. Fixes: 76bc080ef5a3 ("[POWERPC] Make default cputable entries reflect selected CPU family") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/76a85f30bf981d1aeaae00df99321235494da254.1604426550.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cputable.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 3d2f94afc13a..5e31960a56a9 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -409,7 +409,6 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT) -#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ #define CPU_FTRS_PPC970 (CPU_FTR_LWSYNC | \ @@ -520,8 +519,6 @@ enum { CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_82XX | CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 | CPU_FTRS_CLASSIC32 | -#else - CPU_FTRS_GENERIC_32 | #endif #ifdef CONFIG_PPC_8xx CPU_FTRS_8XX | @@ -596,8 +593,6 @@ enum { CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_82XX & CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 & CPU_FTRS_CLASSIC32 & -#else - CPU_FTRS_GENERIC_32 & #endif #ifdef CONFIG_PPC_8xx CPU_FTRS_8XX & From fdcfeaba38e5b183045f5b079af94f97658eabe6 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Wed, 4 Nov 2020 18:59:10 +0800 Subject: [PATCH 012/304] powerpc: Use the common INIT_DATA_SECTION macro in vmlinux.lds.S Use the common INIT_DATA_SECTION rule for the linker script in an effort to regularize the linker script. Signed-off-by: Youling Tang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1604487550-20040-1-git-send-email-tangyouling@loongson.cn --- arch/powerpc/kernel/vmlinux.lds.S | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index e0548b4950de..5dc05f30349e 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -186,21 +186,7 @@ SECTIONS EXIT_TEXT } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - INIT_DATA - } - - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - INIT_SETUP(16) - } - - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - INIT_CALLS - } - - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - CON_INITCALL - } + INIT_DATA_SECTION(16) . = ALIGN(8); __ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) { @@ -228,9 +214,6 @@ SECTIONS __stop___fw_ftr_fixup = .; } #endif - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - INIT_RAM_FS - } PERCPU_SECTION(L1_CACHE_BYTES) From 987c426320cce72d1b28f55c8603b239e4f7187c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 11 Nov 2020 22:01:51 +1000 Subject: [PATCH 013/304] powerpc/64s/perf: perf interrupt does not have to get_user_pages to access user memory read_user_stack_slow that walks user address translation by hand is only required on hash, because a hash fault can not be serviced from "NMI" context (to avoid re-entering the hash code) so the user stack can be mapped into Linux page tables but not accessible by the CPU. Radix MMU mode does not have this restriction. A page fault failure would indicate the page is not accessible via get_user_pages either, so avoid this on radix. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111120151.3150658-1-npiggin@gmail.com --- arch/powerpc/perf/callchain.h | 2 +- arch/powerpc/perf/callchain_64.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h index ae24d4a00da6..d6fa6e25234f 100644 --- a/arch/powerpc/perf/callchain.h +++ b/arch/powerpc/perf/callchain.h @@ -33,7 +33,7 @@ static inline int __read_user_stack(const void __user *ptr, void *ret, rc = copy_from_user_nofault(ret, ptr, size); - if (IS_ENABLED(CONFIG_PPC64) && rc) + if (IS_ENABLED(CONFIG_PPC64) && !radix_enabled() && rc) return read_user_stack_slow(ptr, ret, size); return rc; diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index fed90e827f3a..0777b04a0c56 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -21,7 +21,8 @@ /* * On 64-bit we don't want to invoke hash_page on user addresses from * interrupt context, so if the access faults, we read the page tables - * to find which page (if any) is mapped and access it directly. + * to find which page (if any) is mapped and access it directly. Radix + * has no need for this so it doesn't use read_user_stack_slow. */ int read_user_stack_slow(const void __user *ptr, void *buf, int nb) { From a40fdaf1420d6e6bda0dd2df1e6806013e58dbe1 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 10 Nov 2020 21:07:52 -0500 Subject: [PATCH 014/304] Revert "powerpc/pseries/hotplug-cpu: Remove double free in error path" This reverts commit a0ff72f9f5a780341e7ff5e9ba50a0dad5fa1980. Since the commit b015f6bc9547 ("powerpc/pseries: Add cpu DLPAR support for drc-info property"), the 'cpu_drcs' wouldn't be double freed when the 'cpus' node not found. So we needn't apply this patch, otherwise, the memory will be leaked. Fixes: a0ff72f9f5a7 ("powerpc/pseries/hotplug-cpu: Remove double free in error path") Reported-by: Hulk Robot Signed-off-by: Zhang Xiaoxu [mpe: Caused by me applying a patch to a function that had changed in the interim] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111020752.1686139-1-zhangxiaoxu5@huawei.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index f2837e33bf5d..4bb1c9f2bb11 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -743,6 +743,7 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add) parent = of_find_node_by_path("/cpus"); if (!parent) { pr_warn("Could not find CPU root node in device tree\n"); + kfree(cpu_drcs); return -1; } From 027717a45ca251a7ba67a63db359994836962cd2 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Tue, 10 Nov 2020 19:19:30 +0800 Subject: [PATCH 015/304] powerpc/powernv/sriov: fix unsigned int win compared to less than zero Fix coccicheck warning: arch/powerpc/platforms/powernv/pci-sriov.c:443:7-10: WARNING: Unsigned expression compared with zero: win < 0 arch/powerpc/platforms/powernv/pci-sriov.c:462:7-10: WARNING: Unsigned expression compared with zero: win < 0 Fixes: 39efc03e3ee8 ("powerpc/powernv/sriov: Move M64 BAR allocation into a helper") Reported-by: Tosk Robot Signed-off-by: Kaixu Xia Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1605007170-22171-1-git-send-email-kaixuxia@tencent.com --- arch/powerpc/platforms/powernv/pci-sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c index c4434f20f42f..28aac933a439 100644 --- a/arch/powerpc/platforms/powernv/pci-sriov.c +++ b/arch/powerpc/platforms/powernv/pci-sriov.c @@ -422,7 +422,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) { struct pnv_iov_data *iov; struct pnv_phb *phb; - unsigned int win; + int win; struct resource *res; int i, j; int64_t rc; From 879add7720172ffd2986c44587510fabb7af52f5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 8 Nov 2020 16:57:35 +0000 Subject: [PATCH 016/304] powerpc/64s: Replace RFI by RFI_TO_KERNEL and remove RFI In head_64.S, we have two places using RFI to return to kernel. Use RFI_TO_KERNEL instead. They are the two only places using RFI on book3s/64, so the RFI macro can go away. Signed-off-by: Christophe Leroy Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7719261b0a0d2787772339484c33eb809723bca7.1604854583.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc_asm.h | 1 - arch/powerpc/kernel/head_64.S | 9 +++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 511786f0e40d..bedf3eb52ebc 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -495,7 +495,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #endif #ifdef CONFIG_PPC_BOOK3S_64 -#define RFI rfid #define MTMSRD(r) mtmsrd r #define MTMSR_EERI(reg) mtmsrd reg,1 #else diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 7b7c8c5ee660..3bae6286c17c 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -41,6 +41,11 @@ #include #include #include +#ifdef CONFIG_PPC_BOOK3S +#include +#else +#include +#endif /* The physical memory is laid out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -829,7 +834,7 @@ __secondary_start: mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ /* @@ -966,7 +971,7 @@ start_here_multiplatform: ld r4,PACAKMSR(r13) mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ /* This is where all platforms converge execution */ From 120c0518ec321f33cdc4670059fb76e96ceb56eb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 8 Nov 2020 16:57:36 +0000 Subject: [PATCH 017/304] powerpc: Replace RFI by rfi on book3s/32 and booke For book3s/32 and for booke, RFI is just an rfi. Only 40x has a non trivial RFI. CONFIG_PPC_RTAS is never selected by 40x platforms. Make it more explicit by replacing RFI by rfi wherever possible. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b901ddfdeb8a0a3b7cb59999599cdfde1bbfe834.1604854583.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 6 +++--- arch/powerpc/kernel/head_book3s_32.S | 18 +++++++++--------- arch/powerpc/kernel/head_booke.h | 2 +- arch/powerpc/kvm/book3s_rmhandlers.S | 4 ++-- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 8cdc8bcde703..e10e1167ffb1 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1027,7 +1027,7 @@ exc_exit_restart: lwz r1,GPR1(r1) .globl exc_exit_restart_end exc_exit_restart_end: - RFI + rfi _ASM_NOKPROBE_SYMBOL(exc_exit_restart) _ASM_NOKPROBE_SYMBOL(exc_exit_restart_end) @@ -1356,7 +1356,7 @@ _GLOBAL(enter_rtas) stw r7, THREAD + RTAS_SP(r2) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 - RFI + rfi 1: tophys_novmstack r9, r1 #ifdef CONFIG_VMAP_STACK li r0, MSR_KERNEL & ~MSR_IR /* can take DTLB miss */ @@ -1371,6 +1371,6 @@ _GLOBAL(enter_rtas) stw r0, THREAD + RTAS_SP(r7) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 - RFI /* return to caller */ + rfi /* return to caller */ _ASM_NOKPROBE_SYMBOL(enter_rtas) #endif /* CONFIG_PPC_RTAS */ diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 5eb9eedac920..40e8c8ce4018 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -206,7 +206,7 @@ turn_on_mmu: lis r0,start_here@h ori r0,r0,start_here@l mtspr SPRN_SRR0,r0 - RFI /* enables MMU */ + rfi /* enables MMU */ /* * We need __secondary_hold as a place to hold the other cpus on @@ -769,13 +769,13 @@ fast_hash_page_return: mtcr r11 lwz r11, THR11(r10) mfspr r10, SPRN_SPRG_SCRATCH0 - RFI + rfi 1: /* ISI */ mtcr r11 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 - RFI + rfi stack_overflow: vmap_stack_overflow_exception @@ -910,7 +910,7 @@ __secondary_start: ori r3,r3,start_secondary@l mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + rfi #endif /* CONFIG_SMP */ #ifdef CONFIG_KVM_BOOK3S_HANDLER @@ -1038,7 +1038,7 @@ start_here: .align 4 mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r3 - RFI + rfi /* Load up the kernel context */ 2: bl load_up_mmu @@ -1062,7 +1062,7 @@ start_here: ori r3,r3,start_kernel@l mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + rfi /* * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); @@ -1177,7 +1177,7 @@ _ENTRY(update_bats) .align 4 mtspr SPRN_SRR0, r4 mtspr SPRN_SRR1, r3 - RFI + rfi 1: bl clear_bats lis r3, BATS@ha addi r3, r3, BATS@l @@ -1196,7 +1196,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) mtmsr r3 mtspr SPRN_SRR0, r7 mtspr SPRN_SRR1, r6 - RFI + rfi flush_tlbs: lis r10, 0x40 @@ -1217,7 +1217,7 @@ mmu_off: mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r3 sync - RFI + rfi /* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */ initial_bats: diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 71c359d438b5..e26d35de27e5 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -176,7 +176,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #endif mtspr SPRN_SRR1,r10 mtspr SPRN_SRR0,r11 - RFI /* jump to handler, enable MMU */ + rfi /* jump to handler, enable MMU */ 99: b ret_from_kernel_syscall .endm diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 3dc129a254b5..b45b750fa77a 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -36,8 +36,8 @@ #define FUNC(name) name -#define RFI_TO_KERNEL RFI -#define RFI_TO_GUEST RFI +#define RFI_TO_KERNEL rfi +#define RFI_TO_GUEST rfi .macro INTERRUPT_TRAMPOLINE intno From 62182e6c0faf75117f8d1719c118bb5fc8574012 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 8 Nov 2020 16:57:37 +0000 Subject: [PATCH 018/304] powerpc: Remove RFI macro RFI macro is just there to add an infinite loop past rfi in order to avoid prefetch on 40x in half a dozen of places in entry_32 and head_32. Those places are already full of #ifdefs, so just add a few more to explicitely show those loops and remove RFI. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f7e9cb9e9240feec63cb330abf40b67d1aad852f.1604854583.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc_asm.h | 5 ----- arch/powerpc/kernel/entry_32.S | 30 ++++++++++++++++++++++++------ arch/powerpc/kernel/head_32.h | 5 ++++- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index bedf3eb52ebc..101986d4a29d 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -498,11 +498,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #define MTMSRD(r) mtmsrd r #define MTMSR_EERI(reg) mtmsrd reg,1 #else -#ifndef CONFIG_40x -#define RFI rfi -#else -#define RFI rfi; b . /* Prevent prefetch past rfi */ -#endif #define MTMSRD(r) mtmsr r #define MTMSR_EERI(reg) mtmsr reg #endif diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e10e1167ffb1..c7c28e8acc10 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -234,7 +234,10 @@ transfer_to_handler_cont: mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r10 mtlr r9 - RFI /* jump to handler, enable MMU */ + rfi /* jump to handler, enable MMU */ +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING @@ -263,7 +266,10 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) LOAD_REG_IMMEDIATE(r0, MSR_KERNEL) mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r0 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif reenable_mmu: /* @@ -321,7 +327,10 @@ stack_ovf: #endif mtspr SPRN_SRR0,r9 mtspr SPRN_SRR1,r10 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(stack_ovf) #endif @@ -470,7 +479,10 @@ syscall_exit_finish: #endif mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(syscall_exit_finish) #ifdef CONFIG_44x 2: li r7,0 @@ -600,7 +612,10 @@ ret_from_kernel_syscall: #endif mtspr SPRN_SRR0, r9 mtspr SPRN_SRR1, r10 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall) /* @@ -803,7 +818,10 @@ fast_exception_return: REST_GPR(9, r11) REST_GPR(12, r11) lwz r11,GPR11(r11) - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(fast_exception_return) #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 7c767765071d..232000742c9a 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -222,7 +222,10 @@ #endif mtspr SPRN_SRR1,r10 mtspr SPRN_SRR0,r11 - RFI /* jump to handler, enable MMU */ + rfi /* jump to handler, enable MMU */ +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif 99: b ret_from_kernel_syscall .endm From b84bf098fcc49ed6bf4b0a8bed52e9df0e8f1de7 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Tue, 10 Nov 2020 10:56:01 +0800 Subject: [PATCH 019/304] powerpc/mm: Fix comparing pointer to 0 warning Fixes coccicheck warning: ./arch/powerpc/mm/pgtable_32.c:87:11-12: WARNING comparing pointer to 0 Avoid pointer type value compared to 0. Reported-by: Tosk Robot Signed-off-by: Kaixu Xia Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1604976961-20441-1-git-send-email-kaixuxia@tencent.com --- arch/powerpc/mm/pgtable_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 079159e97bca..888b9713a316 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -84,7 +84,7 @@ int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) pg = pte_alloc_kernel(pd, va); else pg = early_pte_alloc_kernel(pd, va); - if (pg != 0) { + if (pg) { err = 0; /* The PTE should never be already set nor present in the * hash table From 9e8d13697c38a86e0fcf1bb20d419e3d6103e085 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 21 Oct 2020 14:23:25 +0530 Subject: [PATCH 020/304] powerpc/perf: Add new power PMU flag "PPMU_P10_DD1" for power10 DD1 Add a new power PMU flag "PPMU_P10_DD1" which can be used to conditionally add any code path for power10 DD1 processor version. Also modify power10 PMU driver code to set this flag only for DD1, based on the Processor Version Register (PVR) value. Signed-off-by: Athira Rajeev Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201021085329.384535-1-maddy@linux.ibm.com --- arch/powerpc/include/asm/perf_event_server.h | 1 + arch/powerpc/perf/power10-pmu.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index f6acabb6c9be..3b7baba01c92 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -82,6 +82,7 @@ struct power_pmu { #define PPMU_ARCH_207S 0x00000080 /* PMC is architecture v2.07S */ #define PPMU_NO_SIAR 0x00000100 /* Do not use SIAR */ #define PPMU_ARCH_31 0x00000200 /* Has MMCR3, SIER2 and SIER3 */ +#define PPMU_P10_DD1 0x00000400 /* Is power10 DD1 processor version */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 9dbe8f9b89b4..a01e87f0b8d0 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -403,6 +403,7 @@ static struct power_pmu power10_pmu = { int init_power10_pmu(void) { + unsigned int pvr; int rc; /* Comes from cpu_specs[] */ @@ -410,6 +411,11 @@ int init_power10_pmu(void) strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10")) return -ENODEV; + pvr = mfspr(SPRN_PVR); + /* Add the ppmu flag for power10 DD1 */ + if ((PVR_CFG(pvr) == 1)) + power10_pmu.flags |= PPMU_P10_DD1; + /* Set the PERF_REG_EXTENDED_MASK here */ PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31; From fdf13a657508a12cd21a4d7b988cb260cb8fbd38 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 21 Oct 2020 14:23:26 +0530 Subject: [PATCH 021/304] powerpc/perf: Drop the check for SIAR_VALID In power10 DD1, there is an issue that causes the SIAR_VALID bit of the SIER (Sampled Instruction Event Register) to not be set. But the SIAR_VALID bit is used for fetching the instruction address from the SIAR (Sampled Instruction Address Register), and marked events are sampled only if the SIAR_VALID bit is set. So drop the check for SIAR_VALID and return true always incase of power10 DD1. Signed-off-by: Athira Rajeev Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201021085329.384535-2-maddy@linux.ibm.com --- arch/powerpc/perf/core-book3s.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 08643cba1494..3b62dbb94796 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -350,7 +350,14 @@ static inline int siar_valid(struct pt_regs *regs) int marked = mmcra & MMCRA_SAMPLE_ENABLE; if (marked) { - if (ppmu->flags & PPMU_HAS_SIER) + /* + * SIER[SIAR_VALID] is not set for some + * marked events on power10 DD1, so drop + * the check for SIER[SIAR_VALID] and return true. + */ + if (ppmu->flags & PPMU_P10_DD1) + return 0x1; + else if (ppmu->flags & PPMU_HAS_SIER) return regs->dar & SIER_SIAR_VALID; if (ppmu->flags & PPMU_SIAR_VALID) From d9f7088dd6d8859f385565ca8acd2681e1f700f9 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 21 Oct 2020 14:23:27 +0530 Subject: [PATCH 022/304] powerpc/perf: Use the address from SIAR register to set cpumode flags While setting the processor mode for any sample, perf_get_misc_flags() expects the privilege level to differentiate the userspace and kernel address. On power10 DD1, there is an issue that causes MSR_HV MSR_PR bits of Sampled Instruction Event Register (SIER) not to be set for marked events. Hence add a check to use the address in SIAR (Sampled Instruction Address Register) to identify the privilege level. Signed-off-by: Athira Rajeev Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201021085329.384535-3-maddy@linux.ibm.com --- arch/powerpc/perf/core-book3s.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3b62dbb94796..6be0349e01ad 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -250,10 +250,24 @@ static inline u32 perf_flags_from_msr(struct pt_regs *regs) static inline u32 perf_get_misc_flags(struct pt_regs *regs) { bool use_siar = regs_use_siar(regs); + unsigned long mmcra = regs->dsisr; + int marked = mmcra & MMCRA_SAMPLE_ENABLE; if (!use_siar) return perf_flags_from_msr(regs); + /* + * Check the address in SIAR to identify the + * privilege levels since the SIER[MSR_HV, MSR_PR] + * bits are not set for marked events in power10 + * DD1. + */ + if (marked && (ppmu->flags & PPMU_P10_DD1)) { + if (is_kernel_addr(mfspr(SPRN_SIAR))) + return PERF_RECORD_MISC_KERNEL; + return PERF_RECORD_MISC_USER; + } + /* * If we don't have flags in MMCRA, rather than using * the MSR, we intuit the flags from the address in From 2ca13a4cc56c920a6c9fc8ee45d02bccacd7f46c Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 21 Oct 2020 14:23:29 +0530 Subject: [PATCH 023/304] powerpc/perf: Use regs->nip when SIAR is zero In power10 DD1, there is an issue where the SIAR (Sampled Instruction Address Register) is not latching to the sampled address during random sampling. This results in value of 0s in the SIAR. Add a check to use regs->nip when SIAR is zero. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201021085329.384535-5-maddy@linux.ibm.com --- arch/powerpc/perf/core-book3s.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 6be0349e01ad..3c8c6ce634c5 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -263,9 +263,16 @@ static inline u32 perf_get_misc_flags(struct pt_regs *regs) * DD1. */ if (marked && (ppmu->flags & PPMU_P10_DD1)) { - if (is_kernel_addr(mfspr(SPRN_SIAR))) - return PERF_RECORD_MISC_KERNEL; - return PERF_RECORD_MISC_USER; + unsigned long siar = mfspr(SPRN_SIAR); + if (siar) { + if (is_kernel_addr(siar)) + return PERF_RECORD_MISC_KERNEL; + return PERF_RECORD_MISC_USER; + } else { + if (is_kernel_addr(regs->nip)) + return PERF_RECORD_MISC_KERNEL; + return PERF_RECORD_MISC_USER; + } } /* @@ -2199,8 +2206,14 @@ unsigned long perf_misc_flags(struct pt_regs *regs) unsigned long perf_instruction_pointer(struct pt_regs *regs) { bool use_siar = regs_use_siar(regs); + unsigned long siar = mfspr(SPRN_SIAR); - if (use_siar && siar_valid(regs)) + if (ppmu->flags & PPMU_P10_DD1) { + if (siar) + return siar; + else + return regs->nip; + } else if (use_siar && siar_valid(regs)) return mfspr(SPRN_SIAR) + perf_ip_adjust(regs); else if (use_siar) return 0; // no valid instruction pointer From c74cf7a3d59a21b290fe0468f5b470d0b8ee37df Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 11 Nov 2020 15:53:15 +0100 Subject: [PATCH 024/304] powerpc/powernv/memtrace: Don't leak kernel memory to user space We currently leak kernel memory to user space, because memory offlining doesn't do any implicit clearing of memory and we are missing explicit clearing of memory. Let's keep it simple and clear pages before removing the linear mapping. Reproduced in QEMU/TCG with 10 GiB of main memory: [root@localhost ~]# dd obs=9G if=/dev/urandom of=/dev/null [... wait until "free -m" used counter no longer changes and cancel] 19665802+0 records in 1+0 records out 9663676416 bytes (9.7 GB, 9.0 GiB) copied, 135.548 s, 71.3 MB/s [root@localhost ~]# cat /sys/devices/system/memory/block_size_bytes 40000000 [root@localhost ~]# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable [ 402.978663][ T1086] page:000000001bc4bc74 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x24900 [ 402.980063][ T1086] flags: 0x7ffff000001000(reserved) [ 402.980415][ T1086] raw: 007ffff000001000 c00c000000924008 c00c000000924008 0000000000000000 [ 402.980627][ T1086] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 [ 402.980845][ T1086] page dumped because: unmovable page [ 402.989608][ T1086] Offlined Pages 16384 [ 403.324155][ T1086] memtrace: Allocated trace memory on node 0 at 0x0000000200000000 Before this patch: [root@localhost ~]# hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace | head 00000000 c8 25 72 51 4d 26 36 c5 5c c2 56 15 d5 1a cd 10 |.%rQM&6.\.V.....| 00000010 19 b9 50 b2 cb e3 60 b8 ec 0a f3 ec 4b 3c 39 f0 |..P...`.....K<9.|$ 00000020 4e 5a 4c cf bd 26 19 ff 37 79 13 67 24 b7 b8 57 |NZL..&..7y.g$..W|$ 00000030 98 3e f5 be 6f 14 6a bd a4 52 bc 6e e9 e0 c1 5d |.>..o.j..R.n...]|$ 00000040 76 b3 ae b5 88 d7 da e3 64 23 85 2c 10 88 07 b6 |v.......d#.,....|$ 00000050 9a d8 91 de f7 50 27 69 2e 64 9c 6f d3 19 45 79 |.....P'i.d.o..Ey|$ 00000060 6a 6f 8a 61 71 19 1f c7 f1 df 28 26 ca 0f 84 55 |jo.aq.....(&...U|$ 00000070 01 3f be e4 e2 e1 da ff 7b 8c 8e 32 37 b4 24 53 |.?......{..27.$S|$ 00000080 1b 70 30 45 56 e6 8c c4 0e b5 4c fb 9f dd 88 06 |.p0EV.....L.....|$ 00000090 ef c4 18 79 f1 60 b1 5c 79 59 4d f4 36 d7 4a 5c |...y.`.\yYM.6.J\|$ After this patch: [root@localhost ~]# hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace | head 00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| * 40000000 Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in memory tracing") Cc: stable@vger.kernel.org # v4.14+ Reported-by: Michael Ellerman Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111145322.15793-2-david@redhat.com --- arch/powerpc/platforms/powernv/memtrace.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 6828108486f8..eea1f94482ff 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -67,6 +67,23 @@ static int change_memblock_state(struct memory_block *mem, void *arg) return 0; } +static void memtrace_clear_range(unsigned long start_pfn, + unsigned long nr_pages) +{ + unsigned long pfn; + + /* + * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM + * does not apply, avoid passing around "struct page" and use + * clear_page() instead directly. + */ + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { + if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) + cond_resched(); + clear_page(__va(PFN_PHYS(pfn))); + } +} + /* called with device_hotplug_lock held */ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) { @@ -111,6 +128,11 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) lock_device_hotplug(); for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { + /* + * Clear the range while we still have a linear + * mapping. + */ + memtrace_clear_range(base_pfn, nr_pages); /* * Remove memory in memory block size chunks so that * iomem resources are always split to the same size and From d6718941a2767fb383e105d257d2105fe4f15f0e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 11 Nov 2020 15:53:16 +0100 Subject: [PATCH 025/304] powerpc/powernv/memtrace: Fix crashing the kernel when enabling concurrently It's very easy to crash the kernel right now by simply trying to enable memtrace concurrently, hammering on the "enable" interface loop.sh: #!/bin/bash dmesg --console-off while true; do echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable done [root@localhost ~]# loop.sh & [root@localhost ~]# loop.sh & Resulting quickly in a kernel crash. Let's properly protect using a mutex. Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in memory tracing") Cc: stable@vger.kernel.org# v4.14+ Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111145322.15793-3-david@redhat.com --- arch/powerpc/platforms/powernv/memtrace.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index eea1f94482ff..0e42fe2d7b6a 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -30,6 +30,7 @@ struct memtrace_entry { char name[16]; }; +static DEFINE_MUTEX(memtrace_mutex); static u64 memtrace_size; static struct memtrace_entry *memtrace_array; @@ -279,6 +280,7 @@ static int memtrace_online(void) static int memtrace_enable_set(void *data, u64 val) { + int rc = -EAGAIN; u64 bytes; /* @@ -291,25 +293,31 @@ static int memtrace_enable_set(void *data, u64 val) return -EINVAL; } + mutex_lock(&memtrace_mutex); + /* Re-add/online previously removed/offlined memory */ if (memtrace_size) { if (memtrace_online()) - return -EAGAIN; + goto out_unlock; } - if (!val) - return 0; + if (!val) { + rc = 0; + goto out_unlock; + } /* Offline and remove memory */ if (memtrace_init_regions_runtime(val)) - return -EINVAL; + goto out_unlock; if (memtrace_init_debugfs()) - return -EINVAL; + goto out_unlock; memtrace_size = val; - - return 0; + rc = 0; +out_unlock: + mutex_unlock(&memtrace_mutex); + return rc; } static int memtrace_enable_get(void *data, u64 *val) From 4abb1e5b63ac3281275315fc6b0cde0b9c2e2e42 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 11 Nov 2020 15:53:17 +0100 Subject: [PATCH 026/304] powerpc/mm: factor out creating/removing linear mapping We want to stop abusing memory hotplug infrastructure in memtrace code to perform allocations and remove the linear mapping. Instead we will use alloc_contig_pages() and remove the linear mapping manually. Let's factor out creating/removing the linear mapping into arch_create_linear_mapping() / arch_remove_linear_mapping() - so in the future, we might be able to have whole arch_add_memory() / arch_remove_memory() be implemented in common code. Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111145322.15793-4-david@redhat.com --- arch/powerpc/mm/mem.c | 41 +++++++++++++++++++++++----------- include/linux/memory_hotplug.h | 3 +++ 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 01ec2a252f09..8a86d81f8df0 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -120,34 +120,26 @@ static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, } } -int __ref arch_add_memory(int nid, u64 start, u64 size, - struct mhp_params *params) +int __ref arch_create_linear_mapping(int nid, u64 start, u64 size, + struct mhp_params *params) { - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; int rc; start = (unsigned long)__va(start); rc = create_section_mapping(start, start + size, nid, params->pgprot); if (rc) { - pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", + pr_warn("Unable to create linear mapping for 0x%llx..0x%llx: %d\n", start, start + size, rc); return -EFAULT; } - - return __add_pages(nid, start_pfn, nr_pages, params); + return 0; } -void __ref arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void __ref arch_remove_linear_mapping(u64 start, u64 size) { - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - __remove_pages(start_pfn, nr_pages, altmap); - /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); @@ -160,6 +152,29 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, */ vm_unmap_aliases(); } + +int __ref arch_add_memory(int nid, u64 start, u64 size, + struct mhp_params *params) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + int rc; + + rc = arch_create_linear_mapping(nid, start, size, params); + if (rc) + return rc; + return __add_pages(nid, start_pfn, nr_pages, params); +} + +void __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + + __remove_pages(start_pfn, nr_pages, altmap); + arch_remove_linear_mapping(start, size); +} #endif #ifndef CONFIG_NEED_MULTIPLE_NODES diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index d65c6fdc5cfc..00b9e9bd3850 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -375,6 +375,9 @@ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn, unsigned long nr_pages); +extern int arch_create_linear_mapping(int nid, u64 start, u64 size, + struct mhp_params *params); +void arch_remove_linear_mapping(u64 start, u64 size); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __LINUX_MEMORY_HOTPLUG_H */ From e5b2af044f31bf18defa557a8cd11c23caefa34c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 11 Nov 2020 15:53:18 +0100 Subject: [PATCH 027/304] powerpc/mm: protect linear mapping modifications by a mutex This code currently relies on mem_hotplug_begin()/mem_hotplug_done() - create_section_mapping()/remove_section_mapping() implementations cannot tollerate getting called concurrently. Let's prepare for callers (memtrace) not holding any such locks (and don't force them to mess with memory hotplug locks). Other parts in these functions don't seem to rely on external locking. Signed-off-by: David Hildenbrand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111145322.15793-5-david@redhat.com --- arch/powerpc/mm/mem.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8a86d81f8df0..ca5c4b54c366 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -58,6 +58,7 @@ #define CPU_FTR_NOEXECUTE 0 #endif +static DEFINE_MUTEX(linear_mapping_mutex); unsigned long long memory_limit; bool init_mem_is_free; @@ -126,8 +127,10 @@ int __ref arch_create_linear_mapping(int nid, u64 start, u64 size, int rc; start = (unsigned long)__va(start); + mutex_lock(&linear_mapping_mutex); rc = create_section_mapping(start, start + size, nid, params->pgprot); + mutex_unlock(&linear_mapping_mutex); if (rc) { pr_warn("Unable to create linear mapping for 0x%llx..0x%llx: %d\n", start, start + size, rc); @@ -144,7 +147,9 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size) start = (unsigned long)__va(start); flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); + mutex_lock(&linear_mapping_mutex); ret = remove_section_mapping(start, start + size); + mutex_unlock(&linear_mapping_mutex); WARN_ON_ONCE(ret); /* Ensure all vmalloc mappings are flushed in case they also From 1f73ad3e8d755dbec52fcec98618a7ce4de12af2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 11 Nov 2020 15:53:19 +0100 Subject: [PATCH 028/304] powerpc/mm: print warning in arch_remove_linear_mapping() Let's print a warning similar to in arch_add_linear_mapping() instead of WARN_ON_ONCE() and eventually crashing the kernel. Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111145322.15793-6-david@redhat.com --- arch/powerpc/mm/mem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ca5c4b54c366..c5755b9efb64 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -150,7 +150,9 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size) mutex_lock(&linear_mapping_mutex); ret = remove_section_mapping(start, start + size); mutex_unlock(&linear_mapping_mutex); - WARN_ON_ONCE(ret); + if (ret) + pr_warn("Unable to remove linear mapping for 0x%llx..0x%llx: %d\n", + start, start + size, ret); /* Ensure all vmalloc mappings are flushed in case they also * hit that section of memory From d8bd9a121c2f2bc8b36da930dc91b69fd2a705e2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 11 Nov 2020 15:53:20 +0100 Subject: [PATCH 029/304] powerpc/book3s64/hash: Drop WARN_ON in hash__remove_section_mapping() The single caller (arch_remove_linear_mapping()) prints a proper warning when this function fails. No need to eventually crash the kernel - let's drop this WARN_ON. Suggested-by: Oscar Salvador Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111145322.15793-7-david@redhat.com --- arch/powerpc/mm/book3s64/hash_utils.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 24702c0a92e0..d2dcb7757c68 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -845,7 +845,6 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end) { int rc = htab_remove_mapping(start, end, mmu_linear_psize, mmu_kernel_ssize); - WARN_ON(rc < 0); if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) pr_warn("Hash collision while resizing HPT\n"); From ca2c36cae9d48b180ea51259e35ab3d95d327df2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 11 Nov 2020 15:53:21 +0100 Subject: [PATCH 030/304] powerpc/mm: remove linear mapping if __add_pages() fails in arch_add_memory() Let's revert what we did in case something goes wrong and we return an error - as already done on arm64 and s390x. Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111145322.15793-8-david@redhat.com --- arch/powerpc/mm/mem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c5755b9efb64..8b946ec68d1b 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -170,7 +170,10 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, rc = arch_create_linear_mapping(nid, start, size, params); if (rc) return rc; - return __add_pages(nid, start_pfn, nr_pages, params); + rc = __add_pages(nid, start_pfn, nr_pages, params); + if (rc) + arch_remove_linear_mapping(start, size); + return rc; } void __ref arch_remove_memory(int nid, u64 start, u64 size, From 0bd4b96d99108b7ea9bac0573957483be7781d70 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 11 Nov 2020 15:53:22 +0100 Subject: [PATCH 031/304] powernv/memtrace: don't abuse memory hot(un)plug infrastructure for memory allocations Let's use alloc_contig_pages() for allocating memory and remove the linear mapping manually via arch_remove_linear_mapping(). Mark all pages PG_offline, such that they will definitely not get touched - e.g., when hibernating. When freeing memory, try to revert what we did. The original idea was discussed in: https://lkml.kernel.org/r/48340e96-7e6b-736f-9e23-d3111b915b6e@redhat.com This is similar to CONFIG_DEBUG_PAGEALLOC handling on other architectures, whereby only single pages are unmapped from the linear mapping. Let's mimic what memory hot(un)plug would do with the linear mapping. We now need MEMORY_HOTPLUG and CONTIG_ALLOC as dependencies. Add a TODO that we want to use __GFP_ZERO for clearing once alloc_contig_pages() understands that. Tested with in QEMU/TCG with 10 GiB of main memory: [root@localhost ~]# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable [ 105.903043][ T1080] memtrace: Allocated trace memory on node 0 at 0x0000000080000000 [root@localhost ~]# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable [ 145.042493][ T1080] radix-mmu: Mapped 0x0000000080000000-0x00000000c0000000 with 64.0 KiB pages [ 145.049019][ T1080] memtrace: Freed trace memory back on node 0 [ 145.333960][ T1080] memtrace: Allocated trace memory on node 0 at 0x0000000080000000 [root@localhost ~]# echo 0x80000000 > /sys/kernel/debug/powerpc/memtrace/enable [ 213.606916][ T1080] radix-mmu: Mapped 0x0000000080000000-0x00000000c0000000 with 64.0 KiB pages [ 213.613855][ T1080] memtrace: Freed trace memory back on node 0 [ 214.185094][ T1080] memtrace: Allocated trace memory on node 0 at 0x0000000080000000 [root@localhost ~]# echo 0x100000000 > /sys/kernel/debug/powerpc/memtrace/enable [ 234.874872][ T1080] radix-mmu: Mapped 0x0000000080000000-0x0000000100000000 with 64.0 KiB pages [ 234.886974][ T1080] memtrace: Freed trace memory back on node 0 [ 234.890153][ T1080] memtrace: Failed to allocate trace memory on node 0 [root@localhost ~]# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable [ 259.490196][ T1080] memtrace: Allocated trace memory on node 0 at 0x0000000080000000 I also made sure allocated memory is properly zeroed. Note 1: We currently won't be allocating from ZONE_MOVABLE - because our pages are not movable. However, as we don't run with any memory hot(un)plug mechanism around, we could make an exception to increase the chance of allocations succeeding. Note 2: PG_reserved isn't sufficient. E.g., kernel_page_present() used along PG_reserved in hibernation code will always return "true" on powerpc, resulting in the pages getting touched. It's too generic - e.g., indicates boot allocations. Note 3: For now, we keep using memory_block_size_bytes() as minimum granularity. Suggested-by: Michal Hocko Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111145322.15793-9-david@redhat.com --- arch/powerpc/platforms/powernv/Kconfig | 8 +- arch/powerpc/platforms/powernv/memtrace.c | 159 ++++++++-------------- 2 files changed, 60 insertions(+), 107 deletions(-) diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 938803eab0ad..619b093a0657 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -27,11 +27,11 @@ config OPAL_PRD recovery diagnostics on OpenPower machines config PPC_MEMTRACE - bool "Enable removal of RAM from kernel mappings for tracing" - depends on PPC_POWERNV && MEMORY_HOTREMOVE + bool "Enable runtime allocation of RAM for tracing" + depends on PPC_POWERNV && MEMORY_HOTPLUG && CONTIG_ALLOC help - Enabling this option allows for the removal of memory (RAM) - from the kernel mappings to be used for hardware tracing. + Enabling this option allows for runtime allocation of memory (RAM) + for hardware tracing. config PPC_VAS bool "IBM Virtual Accelerator Switchboard (VAS)" diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 0e42fe2d7b6a..5fc9408bb0b3 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -51,33 +51,12 @@ static const struct file_operations memtrace_fops = { .open = simple_open, }; -static int check_memblock_online(struct memory_block *mem, void *arg) -{ - if (mem->state != MEM_ONLINE) - return -1; - - return 0; -} - -static int change_memblock_state(struct memory_block *mem, void *arg) -{ - unsigned long state = (unsigned long)arg; - - mem->state = state; - - return 0; -} - static void memtrace_clear_range(unsigned long start_pfn, unsigned long nr_pages) { unsigned long pfn; - /* - * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM - * does not apply, avoid passing around "struct page" and use - * clear_page() instead directly. - */ + /* As HIGHMEM does not apply, use clear_page() directly. */ for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) cond_resched(); @@ -85,72 +64,39 @@ static void memtrace_clear_range(unsigned long start_pfn, } } -/* called with device_hotplug_lock held */ -static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) -{ - const unsigned long start = PFN_PHYS(start_pfn); - const unsigned long size = PFN_PHYS(nr_pages); - - if (walk_memory_blocks(start, size, NULL, check_memblock_online)) - return false; - - walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE, - change_memblock_state); - - if (offline_pages(start_pfn, nr_pages)) { - walk_memory_blocks(start, size, (void *)MEM_ONLINE, - change_memblock_state); - return false; - } - - walk_memory_blocks(start, size, (void *)MEM_OFFLINE, - change_memblock_state); - - - return true; -} - static u64 memtrace_alloc_node(u32 nid, u64 size) { - u64 start_pfn, end_pfn, nr_pages, pfn; - u64 base_pfn; - u64 bytes = memory_block_size_bytes(); + const unsigned long nr_pages = PHYS_PFN(size); + unsigned long pfn, start_pfn; + struct page *page; - if (!node_spanned_pages(nid)) + /* + * Trace memory needs to be aligned to the size, which is guaranteed + * by alloc_contig_pages(). + */ + page = alloc_contig_pages(nr_pages, GFP_KERNEL | __GFP_THISNODE | + __GFP_NOWARN, nid, NULL); + if (!page) return 0; + start_pfn = page_to_pfn(page); - start_pfn = node_start_pfn(nid); - end_pfn = node_end_pfn(nid); - nr_pages = size >> PAGE_SHIFT; + /* + * Clear the range while we still have a linear mapping. + * + * TODO: use __GFP_ZERO with alloc_contig_pages() once supported. + */ + memtrace_clear_range(start_pfn, nr_pages); - /* Trace memory needs to be aligned to the size */ - end_pfn = round_down(end_pfn - nr_pages, nr_pages); + /* + * Set pages PageOffline(), to indicate that nobody (e.g., hibernation, + * dumping, ...) should be touching these pages. + */ + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) + __SetPageOffline(pfn_to_page(pfn)); - lock_device_hotplug(); - for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { - if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { - /* - * Clear the range while we still have a linear - * mapping. - */ - memtrace_clear_range(base_pfn, nr_pages); - /* - * Remove memory in memory block size chunks so that - * iomem resources are always split to the same size and - * we never try to remove memory that spans two iomem - * resources. - */ - end_pfn = base_pfn + nr_pages; - for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { - __remove_memory(nid, pfn << PAGE_SHIFT, bytes); - } - unlock_device_hotplug(); - return base_pfn << PAGE_SHIFT; - } - } - unlock_device_hotplug(); + arch_remove_linear_mapping(PFN_PHYS(start_pfn), size); - return 0; + return PFN_PHYS(start_pfn); } static int memtrace_init_regions_runtime(u64 size) @@ -220,16 +166,30 @@ static int memtrace_init_debugfs(void) return ret; } -static int online_mem_block(struct memory_block *mem, void *arg) +static int memtrace_free(int nid, u64 start, u64 size) { - return device_online(&mem->dev); + struct mhp_params params = { .pgprot = PAGE_KERNEL }; + const unsigned long nr_pages = PHYS_PFN(size); + const unsigned long start_pfn = PHYS_PFN(start); + unsigned long pfn; + int ret; + + ret = arch_create_linear_mapping(nid, start, size, ¶ms); + if (ret) + return ret; + + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) + __ClearPageOffline(pfn_to_page(pfn)); + + free_contig_range(start_pfn, nr_pages); + return 0; } /* - * Iterate through the chunks of memory we have removed from the kernel - * and attempt to add them back to the kernel. + * Iterate through the chunks of memory we allocated and attempt to expose + * them back to the kernel. */ -static int memtrace_online(void) +static int memtrace_free_regions(void) { int i, ret = 0; struct memtrace_entry *ent; @@ -237,7 +197,7 @@ static int memtrace_online(void) for (i = memtrace_array_nr - 1; i >= 0; i--) { ent = &memtrace_array[i]; - /* We have onlined this chunk previously */ + /* We have freed this chunk previously */ if (ent->nid == NUMA_NO_NODE) continue; @@ -247,30 +207,25 @@ static int memtrace_online(void) ent->mem = 0; } - if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) { - pr_err("Failed to add trace memory to node %d\n", + if (memtrace_free(ent->nid, ent->start, ent->size)) { + pr_err("Failed to free trace memory on node %d\n", ent->nid); ret += 1; continue; } - lock_device_hotplug(); - walk_memory_blocks(ent->start, ent->size, NULL, - online_mem_block); - unlock_device_hotplug(); - /* - * Memory was added successfully so clean up references to it - * so on reentry we can tell that this chunk was added. + * Memory was freed successfully so clean up references to it + * so on reentry we can tell that this chunk was freed. */ debugfs_remove_recursive(ent->dir); - pr_info("Added trace memory back to node %d\n", ent->nid); + pr_info("Freed trace memory back on node %d\n", ent->nid); ent->size = ent->start = ent->nid = NUMA_NO_NODE; } if (ret) return ret; - /* If all chunks of memory were added successfully, reset globals */ + /* If all chunks of memory were freed successfully, reset globals */ kfree(memtrace_array); memtrace_array = NULL; memtrace_size = 0; @@ -295,18 +250,16 @@ static int memtrace_enable_set(void *data, u64 val) mutex_lock(&memtrace_mutex); - /* Re-add/online previously removed/offlined memory */ - if (memtrace_size) { - if (memtrace_online()) - goto out_unlock; - } + /* Free all previously allocated memory. */ + if (memtrace_size && memtrace_free_regions()) + goto out_unlock; if (!val) { rc = 0; goto out_unlock; } - /* Offline and remove memory */ + /* Allocate memory. */ if (memtrace_init_regions_runtime(val)) goto out_unlock; From 640586f8af356096e084d69a9909d217852bde48 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 19 Nov 2020 17:02:21 +0100 Subject: [PATCH 032/304] powerpc/ptrace: Simplify gpr_get()/tm_cgpr_get() gpr_get() does membuf_write() twice to override pt_regs->msr in between. We can call membuf_write() once and change ->msr in the kernel buffer, this simplifies the code and the next fix. The patch adds a new simple helper, membuf_at(offs), it returns the new membuf which can be safely used after membuf_write(). Signed-off-by: Oleg Nesterov [mpe: Fixup some minor whitespace issues noticed by Christophe] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201119160221.GA5188@redhat.com --- arch/powerpc/kernel/ptrace/ptrace-tm.c | 12 ++++-------- arch/powerpc/kernel/ptrace/ptrace-view.c | 10 +++------- include/linux/regset.h | 12 ++++++++++++ 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c index 54f2d076206f..f15cbbab45b7 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -86,6 +86,8 @@ int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset) int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr)); + if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; @@ -96,16 +98,10 @@ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, flush_fp_to_thread(target); flush_altivec_to_thread(target); - membuf_write(&to, &target->thread.ckpt_regs, - offsetof(struct pt_regs, msr)); - membuf_store(&to, get_user_ckpt_msr(target)); + membuf_write(&to, &target->thread.ckpt_regs, sizeof(struct user_pt_regs)); - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); + membuf_store(&to_msr, get_user_ckpt_msr(target)); - membuf_write(&to, &target->thread.ckpt_regs.orig_gpr3, - sizeof(struct user_pt_regs) - - offsetof(struct pt_regs, orig_gpr3)); return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) - sizeof(struct user_pt_regs)); } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 7e6478e7ed07..299e0b6d709d 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -217,6 +217,7 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) static int gpr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr)); int i; if (target->thread.regs == NULL) @@ -228,15 +229,10 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, target->thread.regs->gpr[i] = NV_REG_POISON; } - membuf_write(&to, target->thread.regs, offsetof(struct pt_regs, msr)); - membuf_store(&to, get_user_msr(target)); + membuf_write(&to, target->thread.regs, sizeof(struct user_pt_regs)); - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); + membuf_store(&to_msr, get_user_msr(target)); - membuf_write(&to, &target->thread.regs->orig_gpr3, - sizeof(struct user_pt_regs) - - offsetof(struct pt_regs, orig_gpr3)); return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) - sizeof(struct user_pt_regs)); } diff --git a/include/linux/regset.h b/include/linux/regset.h index c3403f328257..a00765f0e8cf 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -46,6 +46,18 @@ static inline int membuf_write(struct membuf *s, const void *v, size_t size) return s->left; } +static inline struct membuf membuf_at(const struct membuf *s, size_t offs) +{ + struct membuf n = *s; + + if (offs > n.left) + offs = n.left; + n.p += offs; + n.left -= offs; + + return n; +} + /* current s->p must be aligned for v; v must be a scalar */ #define membuf_store(s, v) \ ({ \ From 324a69467f12652b21b17f9644faa967d3d8bbdf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 19 Nov 2020 17:02:47 +0100 Subject: [PATCH 033/304] powerpc/ptrace: Hard wire PT_SOFTE value to 1 in gpr_get() too The commit a8a4b03ab95f ("powerpc: Hard wire PT_SOFTE value to 1 in ptrace & signals") changed ptrace_get_reg(PT_SOFTE) to report 0x1, but PTRACE_GETREGS still copies pt_regs->softe as is. This is not consistent and this breaks the user-regs-peekpoke test from https://sourceware.org/systemtap/wiki/utrace/tests/ Reported-by: Jan Kratochvil Signed-off-by: Oleg Nesterov Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201119160247.GB5188@redhat.com --- arch/powerpc/kernel/ptrace/ptrace-tm.c | 7 ++++++- arch/powerpc/kernel/ptrace/ptrace-view.c | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c index f15cbbab45b7..44045363a903 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -87,6 +87,9 @@ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr)); +#ifdef CONFIG_PPC64 + struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe)); +#endif if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; @@ -101,7 +104,9 @@ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, membuf_write(&to, &target->thread.ckpt_regs, sizeof(struct user_pt_regs)); membuf_store(&to_msr, get_user_ckpt_msr(target)); - +#ifdef CONFIG_PPC64 + membuf_store(&to_softe, 0x1ul); +#endif return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) - sizeof(struct user_pt_regs)); } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 299e0b6d709d..142d58337f40 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -218,6 +218,9 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr)); +#ifdef CONFIG_PPC64 + struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe)); +#endif int i; if (target->thread.regs == NULL) @@ -232,7 +235,9 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, membuf_write(&to, target->thread.regs, sizeof(struct user_pt_regs)); membuf_store(&to_msr, get_user_msr(target)); - +#ifdef CONFIG_PPC64 + membuf_store(&to_softe, 0x1ul); +#endif return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) - sizeof(struct user_pt_regs)); } From a538d184e3f0e3b5f800c5ab148e83bb5cdd0133 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 16 Oct 2020 17:01:51 -0700 Subject: [PATCH 034/304] powerpc/boot: Move the .got section to after the .dynamic section Both .dynamic and .got are RELRO sections and should be placed together, and LLD emits an error: ld.lld: error: section: .got is not contiguous with other relro sections Place them together to avoid this. Signed-off-by: Bill Wendling Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201017000151.150788-1-morbo@google.com --- arch/powerpc/boot/zImage.lds.S | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S index a21f3a76e06f..d6f072865627 100644 --- a/arch/powerpc/boot/zImage.lds.S +++ b/arch/powerpc/boot/zImage.lds.S @@ -34,6 +34,17 @@ SECTIONS __dynamic_start = .; *(.dynamic) } + +#ifdef CONFIG_PPC64_BOOT_WRAPPER + . = ALIGN(256); + .got : + { + __toc_start = .; + *(.got) + *(.toc) + } +#endif + .hash : { *(.hash) } .interp : { *(.interp) } .rela.dyn : @@ -76,16 +87,6 @@ SECTIONS _esm_blob_end = .; } -#ifdef CONFIG_PPC64_BOOT_WRAPPER - . = ALIGN(256); - .got : - { - __toc_start = .; - *(.got) - *(.toc) - } -#endif - . = ALIGN(4096); .bss : { From 26ba9f9651d802ba38583138f43fea5dc7eb0fd6 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 18 Nov 2020 14:39:10 -0800 Subject: [PATCH 035/304] powerpc/boot/wrapper: Add "-z rodynamic" when using LLD Normally all read-only sections precede SHF_WRITE sections. .dynamic and .got have the SHF_WRITE flag; .dynamic probably because of DT_DEBUG. LLD emits an error when this happens, so use "-z rodynamic" to mark .dynamic as read-only. Signed-off-by: Bill Wendling Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201118223910.2711337-1-morbo@google.com --- arch/powerpc/boot/wrapper | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index cd58a62e810d..e1194955adbb 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -46,6 +46,7 @@ compression=.gz uboot_comp=gzip pie= format= +rodynamic= # cross-compilation prefix CROSS= @@ -353,6 +354,7 @@ epapr) platformo="$object/pseries-head.o $object/epapr.o $object/epapr-wrapper.o" link_address='0x20000000' pie=-pie + rodynamic=$(if ${CROSS}ld -V 2>&1 | grep -q LLD ; then echo "-z rodynamic"; fi) ;; mvme5100) platformo="$object/fixed-head.o $object/mvme5100.o" @@ -493,7 +495,7 @@ if [ "$platform" != "miboot" ]; then text_start="-Ttext $link_address" fi #link everything - ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" $map \ + ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $rodynamic -o "$ofile" $map \ $platformo $tmp $object/wrapper.a rm $tmp fi From 4c078c86b4a466db221a08d423c2eae9332c2641 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 20 Nov 2020 14:40:32 -0800 Subject: [PATCH 036/304] powerpc/boot/wrapper: Add "-z notext" flag to disable diagnostic The "-z notext" flag disables reporting an error if DT_TEXTREL is set. ld.lld: error: can't create dynamic relocation R_PPC64_ADDR64 against symbol: _start in readonly segment; recompile object files with -fPIC or pass '-Wl,-z,notext' to allow text relocations in the output >>> defined in >>> referenced by crt0.o:(.text+0x8) in archive arch/powerpc/boot/wrapper.a The BFD linker disables this by default (though it's configurable in current versions). LLD enables this by default. So we add the flag to keep LLD from emitting the error. Signed-off-by: Bill Wendling Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201120224034.191382-2-morbo@google.com --- arch/powerpc/boot/wrapper | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index e1194955adbb..41fa0a8715e3 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -46,6 +46,7 @@ compression=.gz uboot_comp=gzip pie= format= +notext= rodynamic= # cross-compilation prefix @@ -354,6 +355,7 @@ epapr) platformo="$object/pseries-head.o $object/epapr.o $object/epapr-wrapper.o" link_address='0x20000000' pie=-pie + notext='-z notext' rodynamic=$(if ${CROSS}ld -V 2>&1 | grep -q LLD ; then echo "-z rodynamic"; fi) ;; mvme5100) @@ -495,7 +497,7 @@ if [ "$platform" != "miboot" ]; then text_start="-Ttext $link_address" fi #link everything - ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $rodynamic -o "$ofile" $map \ + ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $rodynamic $notext -o "$ofile" $map \ $platformo $tmp $object/wrapper.a rm $tmp fi From 215fadfe87259f38418ec78744796f099092fff1 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 20 Nov 2020 14:40:33 -0800 Subject: [PATCH 037/304] powerpc/boot: Use clang when CC is clang The gcc compiler may not be available if CC is clang. Signed-off-by: Bill Wendling Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201120224034.191382-3-morbo@google.com --- arch/powerpc/boot/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index f8ce6d2dde7b..68a7534454cd 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -21,7 +21,11 @@ all: $(obj)/zImage ifdef CROSS32_COMPILE +ifdef CONFIG_CC_IS_CLANG + BOOTCC := $(CROSS32_COMPILE)clang +else BOOTCC := $(CROSS32_COMPILE)gcc +endif BOOTAR := $(CROSS32_COMPILE)ar else BOOTCC := $(CC) From f47462c9d8af437ae7d3ef410cf11513f5e3714c Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 20 Nov 2020 14:40:34 -0800 Subject: [PATCH 038/304] powerpc: Work around inline asm issues in alternate feature sections The clang toolchain treats inline assembly a bit differently than straight assembly code. In particular, inline assembly doesn't have the complete context available to resolve expressions. This is intentional to avoid divergence in the resulting assembly code. We can work around this issue by borrowing a workaround done for ARM, i.e. not directly testing the labels themselves, but by moving the current output pointer by a value that should always be zero. If this value is not null, then we will trigger a backward move, which is explicitly forbidden. Signed-off-by: Bill Wendling [mpe: Put it in a macro and only do the workaround for clang] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201120224034.191382-4-morbo@google.com --- arch/powerpc/include/asm/feature-fixups.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index fbd406cd6916..c509f784a5f6 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -36,6 +36,24 @@ label##2: \ .align 2; \ label##3: + +#ifndef CONFIG_CC_IS_CLANG +#define CHECK_ALT_SIZE(else_size, body_size) \ + .ifgt (else_size) - (body_size); \ + .error "Feature section else case larger than body"; \ + .endif; +#else +/* + * If we use the ifgt syntax above, clang's assembler complains about the + * expression being non-absolute when the code appears in an inline assembly + * statement. + * As a workaround use an .org directive that has no effect if the else case + * instructions are smaller than the body, but fails otherwise. + */ +#define CHECK_ALT_SIZE(else_size, body_size) \ + .org . + ((else_size) > (body_size)); +#endif + #define MAKE_FTR_SECTION_ENTRY(msk, val, label, sect) \ label##4: \ .popsection; \ @@ -48,9 +66,7 @@ label##5: \ FTR_ENTRY_OFFSET label##2b-label##5b; \ FTR_ENTRY_OFFSET label##3b-label##5b; \ FTR_ENTRY_OFFSET label##4b-label##5b; \ - .ifgt (label##4b- label##3b)-(label##2b- label##1b); \ - .error "Feature section else case larger than body"; \ - .endif; \ + CHECK_ALT_SIZE((label##4b-label##3b), (label##2b-label##1b)); \ .popsection; From 3d635aba0b35ad5412042d40732f8cec5f58e6c6 Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Fri, 27 Nov 2020 11:48:42 +1100 Subject: [PATCH 039/304] powerpc/boot: Make use of REL16 relocs in powerpc/boot/util.S Use bcl 20,31,0f rather than plain bl to avoid unbalancing the link stack. Update the code to use REL16 relocs, available for ppc64 in 2009 (and ppc32 in 2005). Signed-off-by: Alan Modra [mpe: Incorporate more detail into the change log] Signed-off-by: Michael Ellerman --- arch/powerpc/boot/util.S | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S index d03cdb7606dc..6a92376daf3f 100644 --- a/arch/powerpc/boot/util.S +++ b/arch/powerpc/boot/util.S @@ -42,14 +42,11 @@ udelay: * (nanoseconds + (timebase_period_ns - 1 )) / timebase_period_ns * timebase_period_ns defaults to 60 (16.6MHz) */ mflr r5 - bl 0f + bcl 20,31,0f 0: mflr r6 mtlr r5 - lis r5,0b@ha - addi r5,r5,0b@l - subf r5,r5,r6 /* In case we're relocated */ - addis r5,r5,timebase_period_ns@ha - lwz r5,timebase_period_ns@l(r5) + addis r5,r6,(timebase_period_ns-0b)@ha + lwz r5,(timebase_period_ns-0b)@l(r5) add r4,r4,r5 addi r4,r4,-1 divw r4,r4,r5 /* BUS ticks */ From f75e7d73bdf73f07b0701a6d21c111ef5d9021dd Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Mon, 23 Nov 2020 21:40:40 -0500 Subject: [PATCH 040/304] powerpc/perf: Fix crash with is_sier_available when pmu is not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On systems without any specific PMU driver support registered, running 'perf record' with —intr-regs will crash ( perf record -I ). The relevant portion from crash logs and Call Trace: Unable to handle kernel paging request for data at address 0x00000068 Faulting instruction address: 0xc00000000013eb18 Oops: Kernel access of bad area, sig: 11 [#1] CPU: 2 PID: 13435 Comm: kill Kdump: loaded Not tainted 4.18.0-193.el8.ppc64le #1 NIP: c00000000013eb18 LR: c000000000139f2c CTR: c000000000393d80 REGS: c0000004a07ab4f0 TRAP: 0300 Not tainted (4.18.0-193.el8.ppc64le) NIP [c00000000013eb18] is_sier_available+0x18/0x30 LR [c000000000139f2c] perf_reg_value+0x6c/0xb0 Call Trace: [c0000004a07ab770] [c0000004a07ab7c8] 0xc0000004a07ab7c8 (unreliable) [c0000004a07ab7a0] [c0000000003aa77c] perf_output_sample+0x60c/0xac0 [c0000004a07ab840] [c0000000003ab3f0] perf_event_output_forward+0x70/0xb0 [c0000004a07ab8c0] [c00000000039e208] __perf_event_overflow+0x88/0x1a0 [c0000004a07ab910] [c00000000039e42c] perf_swevent_hrtimer+0x10c/0x1d0 [c0000004a07abc50] [c000000000228b9c] __hrtimer_run_queues+0x17c/0x480 [c0000004a07abcf0] [c00000000022aaf4] hrtimer_interrupt+0x144/0x520 [c0000004a07abdd0] [c00000000002a864] timer_interrupt+0x104/0x2f0 [c0000004a07abe30] [c0000000000091c4] decrementer_common+0x114/0x120 When perf record session is started with "-I" option, capturing registers on each sample calls is_sier_available() to check for the SIER (Sample Instruction Event Register) availability in the platform. This function in core-book3s accesses 'ppmu->flags'. If a platform specific PMU driver is not registered, ppmu is set to NULL and accessing its members results in a crash. Fix the crash by returning false in is_sier_available() if ppmu is not set. Fixes: 333804dc3b7a ("powerpc/perf: Update perf_regs structure to include SIER") Reported-by: Sachin Sant Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1606185640-1720-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3c8c6ce634c5..8e20ef6252e1 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -137,6 +137,9 @@ static void pmao_restore_workaround(bool ebb) { } bool is_sier_available(void) { + if (!ppmu) + return false; + if (ppmu->flags & PPMU_HAS_SIER) return true; From 894fa235eb4ca0bfa692dbe4932c2f940cdc8c1e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 21 Nov 2020 17:59:19 +0000 Subject: [PATCH 041/304] powerpc: inline iomap accessors ioreadXX()/ioreadXXbe() accessors are equivalent to ppc in_leXX()/in_be16() accessors but they are not inlined. Since commit 0eb573682872 ("powerpc/kerenl: Enable EEH for IO accessors"), the 'le' versions are equivalent to the ones defined in asm-generic/io.h, allthough the ones there are inlined. Include asm-generic/io.h to get them. Keep ppc versions of the 'be' ones as they are optimised, but make them inline in ppc io.h. This reduces the size of ppc64e_defconfig build by 3 kbytes: text data bss dec hex filename 10160733 4343422 562972 15067127 e5e7f7 vmlinux.before 10159239 4341590 562972 15063801 e5daf9 vmlinux.after A typical function using ioread and iowrite before the change: c00000000066a3c4 <.ata_bmdma_stop>: c00000000066a3c4: 7c 08 02 a6 mflr r0 c00000000066a3c8: fb c1 ff f0 std r30,-16(r1) c00000000066a3cc: f8 01 00 10 std r0,16(r1) c00000000066a3d0: fb e1 ff f8 std r31,-8(r1) c00000000066a3d4: f8 21 ff 81 stdu r1,-128(r1) c00000000066a3d8: eb e3 00 00 ld r31,0(r3) c00000000066a3dc: eb df 00 98 ld r30,152(r31) c00000000066a3e0: 7f c3 f3 78 mr r3,r30 c00000000066a3e4: 4b 9b 6f 7d bl c000000000021360 <.ioread8> c00000000066a3e8: 60 00 00 00 nop c00000000066a3ec: 7f c4 f3 78 mr r4,r30 c00000000066a3f0: 54 63 06 3c rlwinm r3,r3,0,24,30 c00000000066a3f4: 4b 9b 70 4d bl c000000000021440 <.iowrite8> c00000000066a3f8: 60 00 00 00 nop c00000000066a3fc: 7f e3 fb 78 mr r3,r31 c00000000066a400: 38 21 00 80 addi r1,r1,128 c00000000066a404: e8 01 00 10 ld r0,16(r1) c00000000066a408: eb c1 ff f0 ld r30,-16(r1) c00000000066a40c: 7c 08 03 a6 mtlr r0 c00000000066a410: eb e1 ff f8 ld r31,-8(r1) c00000000066a414: 4b ff ff 8c b c00000000066a3a0 <.ata_sff_dma_pause> The same function with this patch: c000000000669cb4 <.ata_bmdma_stop>: c000000000669cb4: e8 63 00 00 ld r3,0(r3) c000000000669cb8: e9 43 00 98 ld r10,152(r3) c000000000669cbc: 7c 00 04 ac hwsync c000000000669cc0: 89 2a 00 00 lbz r9,0(r10) c000000000669cc4: 0c 09 00 00 twi 0,r9,0 c000000000669cc8: 4c 00 01 2c isync c000000000669ccc: 55 29 06 3c rlwinm r9,r9,0,24,30 c000000000669cd0: 7c 00 04 ac hwsync c000000000669cd4: 99 2a 00 00 stb r9,0(r10) c000000000669cd8: a1 4d 06 f0 lhz r10,1776(r13) c000000000669cdc: 2c 2a 00 00 cmpdi r10,0 c000000000669ce0: 41 c2 00 08 beq- c000000000669ce8 <.ata_bmdma_stop+0x34> c000000000669ce4: b1 4d 06 f2 sth r10,1778(r13) c000000000669ce8: 4b ff ff a8 b c000000000669c90 <.ata_sff_dma_pause> Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/18b357d68c4cde149f75c7a1031c850925cd8128.1605981539.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/io.h | 154 ++++++++++++++++++++++++++++++- arch/powerpc/kernel/iomap.c | 166 ---------------------------------- 2 files changed, 153 insertions(+), 167 deletions(-) diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 58635960403c..2469b46ac2c4 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -302,41 +302,56 @@ static inline unsigned char __raw_readb(const volatile void __iomem *addr) { return *(volatile unsigned char __force *)PCI_FIX_ADDR(addr); } +#define __raw_readb __raw_readb + static inline unsigned short __raw_readw(const volatile void __iomem *addr) { return *(volatile unsigned short __force *)PCI_FIX_ADDR(addr); } +#define __raw_readw __raw_readw + static inline unsigned int __raw_readl(const volatile void __iomem *addr) { return *(volatile unsigned int __force *)PCI_FIX_ADDR(addr); } +#define __raw_readl __raw_readl + static inline void __raw_writeb(unsigned char v, volatile void __iomem *addr) { *(volatile unsigned char __force *)PCI_FIX_ADDR(addr) = v; } +#define __raw_writeb __raw_writeb + static inline void __raw_writew(unsigned short v, volatile void __iomem *addr) { *(volatile unsigned short __force *)PCI_FIX_ADDR(addr) = v; } +#define __raw_writew __raw_writew + static inline void __raw_writel(unsigned int v, volatile void __iomem *addr) { *(volatile unsigned int __force *)PCI_FIX_ADDR(addr) = v; } +#define __raw_writel __raw_writel #ifdef __powerpc64__ static inline unsigned long __raw_readq(const volatile void __iomem *addr) { return *(volatile unsigned long __force *)PCI_FIX_ADDR(addr); } +#define __raw_readq __raw_readq + static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr) { *(volatile unsigned long __force *)PCI_FIX_ADDR(addr) = v; } +#define __raw_writeq __raw_writeq static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr) { __raw_writeq((__force unsigned long)cpu_to_be64(v), addr); } +#define __raw_writeq_be __raw_writeq_be /* * Real mode versions of the above. Those instructions are only supposed @@ -609,10 +624,37 @@ static inline void name at \ /* Some drivers check for the presence of readq & writeq with * a #ifdef, so we make them happy here. */ +#define readb readb +#define readw readw +#define readl readl +#define writeb writeb +#define writew writew +#define writel writel +#define readsb readsb +#define readsw readsw +#define readsl readsl +#define writesb writesb +#define writesw writesw +#define writesl writesl +#define inb inb +#define inw inw +#define inl inl +#define outb outb +#define outw outw +#define outl outl +#define insb insb +#define insw insw +#define insl insl +#define outsb outsb +#define outsw outsw +#define outsl outsl #ifdef __powerpc64__ #define readq readq #define writeq writeq #endif +#define memset_io memset_io +#define memcpy_fromio memcpy_fromio +#define memcpy_toio memcpy_toio /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem @@ -637,7 +679,106 @@ static inline void name at \ #define writel_relaxed(v, addr) writel(v, addr) #define writeq_relaxed(v, addr) writeq(v, addr) +#ifdef CONFIG_GENERIC_IOMAP #include +#else +/* + * Here comes the implementation of the IOMAP interfaces. + */ +static inline unsigned int ioread16be(const void __iomem *addr) +{ + return readw_be(addr); +} +#define ioread16be ioread16be + +static inline unsigned int ioread32be(const void __iomem *addr) +{ + return readl_be(addr); +} +#define ioread32be ioread32be + +#ifdef __powerpc64__ +static inline u64 ioread64_lo_hi(const void __iomem *addr) +{ + return readq(addr); +} +#define ioread64_lo_hi ioread64_lo_hi + +static inline u64 ioread64_hi_lo(const void __iomem *addr) +{ + return readq(addr); +} +#define ioread64_hi_lo ioread64_hi_lo + +static inline u64 ioread64be(const void __iomem *addr) +{ + return readq_be(addr); +} +#define ioread64be ioread64be + +static inline u64 ioread64be_lo_hi(const void __iomem *addr) +{ + return readq_be(addr); +} +#define ioread64be_lo_hi ioread64be_lo_hi + +static inline u64 ioread64be_hi_lo(const void __iomem *addr) +{ + return readq_be(addr); +} +#define ioread64be_hi_lo ioread64be_hi_lo +#endif /* __powerpc64__ */ + +static inline void iowrite16be(u16 val, void __iomem *addr) +{ + writew_be(val, addr); +} +#define iowrite16be iowrite16be + +static inline void iowrite32be(u32 val, void __iomem *addr) +{ + writel_be(val, addr); +} +#define iowrite32be iowrite32be + +#ifdef __powerpc64__ +static inline void iowrite64_lo_hi(u64 val, void __iomem *addr) +{ + writeq(val, addr); +} +#define iowrite64_lo_hi iowrite64_lo_hi + +static inline void iowrite64_hi_lo(u64 val, void __iomem *addr) +{ + writeq(val, addr); +} +#define iowrite64_hi_lo iowrite64_hi_lo + +static inline void iowrite64be(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} +#define iowrite64be iowrite64be + +static inline void iowrite64be_lo_hi(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} +#define iowrite64be_lo_hi iowrite64be_lo_hi + +static inline void iowrite64be_hi_lo(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} +#define iowrite64be_hi_lo iowrite64be_hi_lo +#endif /* __powerpc64__ */ + +struct pci_dev; +void pci_iounmap(struct pci_dev *dev, void __iomem *addr); +#define pci_iounmap pci_iounmap +void __iomem *ioport_map(unsigned long port, unsigned int len); +#define ioport_map ioport_map +#endif static inline void iosync(void) { @@ -670,7 +811,6 @@ static inline void iosync(void) #define IO_SPACE_LIMIT ~(0UL) - /** * ioremap - map bus memory into CPU space * @address: bus address of the memory @@ -706,7 +846,13 @@ extern void __iomem *ioremap(phys_addr_t address, unsigned long size); extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size, unsigned long flags); extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size); +#define ioremap_wc ioremap_wc + +#ifdef CONFIG_PPC32 void __iomem *ioremap_wt(phys_addr_t address, unsigned long size); +#define ioremap_wt ioremap_wt +#endif + void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size); #define ioremap_uc(addr, size) ioremap((addr), (size)) #define ioremap_cache(addr, size) \ @@ -766,6 +912,7 @@ static inline unsigned long virt_to_phys(volatile void * address) return __pa((unsigned long)address); } +#define virt_to_phys virt_to_phys /** * phys_to_virt - map physical address to virtual @@ -783,6 +930,7 @@ static inline void * phys_to_virt(unsigned long address) { return (void *)__va(address); } +#define phys_to_virt phys_to_virt /* * Change "struct page" to physical address. @@ -810,6 +958,7 @@ static inline unsigned long virt_to_bus(volatile void * address) return 0; return __pa(address) + PCI_DRAM_OFFSET; } +#define virt_to_bus virt_to_bus static inline void * bus_to_virt(unsigned long address) { @@ -817,6 +966,7 @@ static inline void * bus_to_virt(unsigned long address) return NULL; return __va(address - PCI_DRAM_OFFSET); } +#define bus_to_virt bus_to_virt #define page_to_bus(page) (page_to_phys(page) + PCI_DRAM_OFFSET) @@ -855,6 +1005,8 @@ static inline void * bus_to_virt(unsigned long address) #define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set) +#include + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_IO_H */ diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 9fe4fb3b08aa..72862a4d3a5d 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -11,177 +11,11 @@ #include #include -/* - * Here comes the ppc64 implementation of the IOMAP - * interfaces. - */ -unsigned int ioread8(const void __iomem *addr) -{ - return readb(addr); -} -unsigned int ioread16(const void __iomem *addr) -{ - return readw(addr); -} -unsigned int ioread16be(const void __iomem *addr) -{ - return readw_be(addr); -} -unsigned int ioread32(const void __iomem *addr) -{ - return readl(addr); -} -unsigned int ioread32be(const void __iomem *addr) -{ - return readl_be(addr); -} -EXPORT_SYMBOL(ioread8); -EXPORT_SYMBOL(ioread16); -EXPORT_SYMBOL(ioread16be); -EXPORT_SYMBOL(ioread32); -EXPORT_SYMBOL(ioread32be); -#ifdef __powerpc64__ -u64 ioread64(const void __iomem *addr) -{ - return readq(addr); -} -u64 ioread64_lo_hi(const void __iomem *addr) -{ - return readq(addr); -} -u64 ioread64_hi_lo(const void __iomem *addr) -{ - return readq(addr); -} -u64 ioread64be(const void __iomem *addr) -{ - return readq_be(addr); -} -u64 ioread64be_lo_hi(const void __iomem *addr) -{ - return readq_be(addr); -} -u64 ioread64be_hi_lo(const void __iomem *addr) -{ - return readq_be(addr); -} -EXPORT_SYMBOL(ioread64); -EXPORT_SYMBOL(ioread64_lo_hi); -EXPORT_SYMBOL(ioread64_hi_lo); -EXPORT_SYMBOL(ioread64be); -EXPORT_SYMBOL(ioread64be_lo_hi); -EXPORT_SYMBOL(ioread64be_hi_lo); -#endif /* __powerpc64__ */ - -void iowrite8(u8 val, void __iomem *addr) -{ - writeb(val, addr); -} -void iowrite16(u16 val, void __iomem *addr) -{ - writew(val, addr); -} -void iowrite16be(u16 val, void __iomem *addr) -{ - writew_be(val, addr); -} -void iowrite32(u32 val, void __iomem *addr) -{ - writel(val, addr); -} -void iowrite32be(u32 val, void __iomem *addr) -{ - writel_be(val, addr); -} -EXPORT_SYMBOL(iowrite8); -EXPORT_SYMBOL(iowrite16); -EXPORT_SYMBOL(iowrite16be); -EXPORT_SYMBOL(iowrite32); -EXPORT_SYMBOL(iowrite32be); -#ifdef __powerpc64__ -void iowrite64(u64 val, void __iomem *addr) -{ - writeq(val, addr); -} -void iowrite64_lo_hi(u64 val, void __iomem *addr) -{ - writeq(val, addr); -} -void iowrite64_hi_lo(u64 val, void __iomem *addr) -{ - writeq(val, addr); -} -void iowrite64be(u64 val, void __iomem *addr) -{ - writeq_be(val, addr); -} -void iowrite64be_lo_hi(u64 val, void __iomem *addr) -{ - writeq_be(val, addr); -} -void iowrite64be_hi_lo(u64 val, void __iomem *addr) -{ - writeq_be(val, addr); -} -EXPORT_SYMBOL(iowrite64); -EXPORT_SYMBOL(iowrite64_lo_hi); -EXPORT_SYMBOL(iowrite64_hi_lo); -EXPORT_SYMBOL(iowrite64be); -EXPORT_SYMBOL(iowrite64be_lo_hi); -EXPORT_SYMBOL(iowrite64be_hi_lo); -#endif /* __powerpc64__ */ - -/* - * These are the "repeat read/write" functions. Note the - * non-CPU byte order. We do things in "IO byteorder" - * here. - * - * FIXME! We could make these do EEH handling if we really - * wanted. Not clear if we do. - */ -void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) -{ - readsb(addr, dst, count); -} -void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) -{ - readsw(addr, dst, count); -} -void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) -{ - readsl(addr, dst, count); -} -EXPORT_SYMBOL(ioread8_rep); -EXPORT_SYMBOL(ioread16_rep); -EXPORT_SYMBOL(ioread32_rep); - -void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) -{ - writesb(addr, src, count); -} -void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) -{ - writesw(addr, src, count); -} -void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) -{ - writesl(addr, src, count); -} -EXPORT_SYMBOL(iowrite8_rep); -EXPORT_SYMBOL(iowrite16_rep); -EXPORT_SYMBOL(iowrite32_rep); - void __iomem *ioport_map(unsigned long port, unsigned int len) { return (void __iomem *) (port + _IO_BASE); } - -void ioport_unmap(void __iomem *addr) -{ - /* Nothing to do */ -} EXPORT_SYMBOL(ioport_map); -EXPORT_SYMBOL(ioport_unmap); #ifdef CONFIG_PCI void pci_iounmap(struct pci_dev *dev, void __iomem *addr) From 25395cd2f8cb24ce6a5ce073c898acfb091e06cf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Nov 2020 23:05:45 +1100 Subject: [PATCH 042/304] powerpc: Make NUMA depend on SMP Our Kconfig allows NUMA to be enabled without SMP, but none of our defconfigs use that combination. This means it can easily be broken inadvertently by code changes, which has happened recently. Although it's theoretically possible to have a machine with a single CPU and multiple memory nodes, I can't think of any real systems where that's the case. Even so if such a system exists, it can just run an SMP kernel anyway. So to avoid the need to add extra #ifdefs and/or build breaks, make NUMA depend on SMP. Reported-by: kernel test robot Reported-by: Randy Dunlap Signed-off-by: Michael Ellerman Reviewed-by: Srikar Dronamraju Reviewed-by: Randy Dunlap Link: https://lore.kernel.org/r/20201124120547.1940635-1-mpe@ellerman.id.au --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e9f13fe08492..a22db3db6b96 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -660,7 +660,7 @@ config IRQ_ALL_CPUS config NUMA bool "NUMA support" - depends on PPC64 + depends on PPC64 && SMP default y if SMP && PPC_PSERIES config NODES_SHIFT From 4c28b32b886f1489c5f510ed8e3f0c4e3dcb59f5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Nov 2020 23:05:46 +1100 Subject: [PATCH 043/304] powerpc: Make NUMA default y for powernv Our NUMA option is default y for pseries, but not powernv. The bulk of powernv systems are NUMA, so make NUMA default y for powernv also. Signed-off-by: Michael Ellerman Reviewed-by: Srikar Dronamraju Link: https://lore.kernel.org/r/20201124120547.1940635-2-mpe@ellerman.id.au --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a22db3db6b96..4d688b426353 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -661,7 +661,7 @@ config IRQ_ALL_CPUS config NUMA bool "NUMA support" depends on PPC64 && SMP - default y if SMP && PPC_PSERIES + default y if PPC_PSERIES || PPC_POWERNV config NODES_SHIFT int From bae80c27fc2195b9e5723d7b05c592e0874f4ba9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Nov 2020 23:05:47 +1100 Subject: [PATCH 044/304] powerpc: Update NUMA Kconfig description & help text Update the NUMA Kconfig description to match other architectures, and add some help text. Shamelessly borrowed from x86/arm64. Signed-off-by: Michael Ellerman Reviewed-by: Randy Dunlap Link: https://lore.kernel.org/r/20201124120547.1940635-3-mpe@ellerman.id.au --- arch/powerpc/Kconfig | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4d688b426353..7f4995b245a3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -659,9 +659,15 @@ config IRQ_ALL_CPUS reported with SMP Power Macintoshes with this option enabled. config NUMA - bool "NUMA support" + bool "NUMA Memory Allocation and Scheduler Support" depends on PPC64 && SMP default y if PPC_PSERIES || PPC_POWERNV + help + Enable NUMA (Non-Uniform Memory Access) support. + + The kernel will try to allocate memory used by a CPU on the + local memory controller of the CPU and add some more + NUMA awareness to the kernel. config NODES_SHIFT int From 8d1eeabf253657ae3e76970514f30b7e53a6898f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 27 Nov 2020 00:09:58 +1100 Subject: [PATCH 045/304] powerpc/feature: Use CONFIG_PPC64 instead of __powerpc64__ to define possible features In order to build VDSO32 for PPC64, we need to have CPU_FTRS_POSSIBLE and CPU_FTRS_ALWAYS independant of whether we are building the 32 bits VDSO or the 64 bits VDSO. Use #ifdef CONFIG_PPC64 instead of #ifdef __powerpc64__ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126131006.2431205-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/cputable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 5e31960a56a9..e069a2d9f7c1 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -488,7 +488,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX) #define CPU_FTRS_COMPATIBLE (CPU_FTR_PPCAS_ARCH_V2) -#ifdef __powerpc64__ +#ifdef CONFIG_PPC64 #ifdef CONFIG_PPC_BOOK3E #define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500) #else @@ -545,7 +545,7 @@ enum { }; #endif /* __powerpc64__ */ -#ifdef __powerpc64__ +#ifdef CONFIG_PPC64 #ifdef CONFIG_PPC_BOOK3E #define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500) #else From 8f8cffd9df81612b5b06d2c57ebf74f8961b41be Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 27 Nov 2020 00:09:59 +1100 Subject: [PATCH 046/304] powerpc/processor: Move cpu_relax() into asm/vdso/processor.h cpu_relax() need to be in asm/vdso/processor.h to be used by the C VDSO generic library. Move it there. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126131006.2431205-2-mpe@ellerman.id.au --- arch/powerpc/include/asm/processor.h | 13 ++----------- arch/powerpc/include/asm/vdso/processor.h | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 arch/powerpc/include/asm/vdso/processor.h diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index c61c859b51a8..333e3b6c76fb 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -6,6 +6,8 @@ * Copyright (C) 2001 PPC 64 Team, IBM Corp */ +#include + #include #ifdef CONFIG_VSX @@ -63,14 +65,6 @@ extern int _chrp_type; #endif /* defined(__KERNEL__) && defined(CONFIG_PPC32) */ -/* Macros for adjusting thread priority (hardware multi-threading) */ -#define HMT_very_low() asm volatile("or 31,31,31 # very low priority") -#define HMT_low() asm volatile("or 1,1,1 # low priority") -#define HMT_medium_low() asm volatile("or 6,6,6 # medium low priority") -#define HMT_medium() asm volatile("or 2,2,2 # medium priority") -#define HMT_medium_high() asm volatile("or 5,5,5 # medium high priority") -#define HMT_high() asm volatile("or 3,3,3 # high priority") - #ifdef __KERNEL__ #ifdef CONFIG_PPC64 @@ -344,7 +338,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) } #ifdef CONFIG_PPC64 -#define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0) #define spin_begin() HMT_low() @@ -363,8 +356,6 @@ do { \ } \ } while (0) -#else -#define cpu_relax() barrier() #endif /* Check that a certain kernel stack pointer is valid in task_struct p */ diff --git a/arch/powerpc/include/asm/vdso/processor.h b/arch/powerpc/include/asm/vdso/processor.h new file mode 100644 index 000000000000..e072577bc7c0 --- /dev/null +++ b/arch/powerpc/include/asm/vdso/processor.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_POWERPC_VDSO_PROCESSOR_H +#define _ASM_POWERPC_VDSO_PROCESSOR_H + +#ifndef __ASSEMBLY__ + +/* Macros for adjusting thread priority (hardware multi-threading) */ +#define HMT_very_low() asm volatile("or 31, 31, 31 # very low priority") +#define HMT_low() asm volatile("or 1, 1, 1 # low priority") +#define HMT_medium_low() asm volatile("or 6, 6, 6 # medium low priority") +#define HMT_medium() asm volatile("or 2, 2, 2 # medium priority") +#define HMT_medium_high() asm volatile("or 5, 5, 5 # medium high priority") +#define HMT_high() asm volatile("or 3, 3, 3 # high priority") + +#ifdef CONFIG_PPC64 +#define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0) +#else +#define cpu_relax() barrier() +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_VDSO_PROCESSOR_H */ From d26b3817d9eefae6b777739c1ea5daba5e72624e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 27 Nov 2020 00:10:00 +1100 Subject: [PATCH 047/304] powerpc/time: Move timebase functions into new asm/vdso/timebase.h In order to easily use get_tb() from C VDSO, move timebase functions into a new header named asm/vdso/timebase.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126131006.2431205-3-mpe@ellerman.id.au --- arch/powerpc/include/asm/reg.h | 31 ----------- arch/powerpc/include/asm/time.h | 30 +--------- arch/powerpc/include/asm/timex.h | 2 +- arch/powerpc/include/asm/vdso/timebase.h | 71 ++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 61 deletions(-) create mode 100644 arch/powerpc/include/asm/vdso/timebase.h diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index f877a576b338..602236e223c4 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1419,37 +1419,6 @@ static inline void msr_check_and_clear(unsigned long bits) __msr_check_and_clear(bits); } -#if defined(CONFIG_PPC_CELL) || defined(CONFIG_E500) -#define mftb() ({unsigned long rval; \ - asm volatile( \ - "90: mfspr %0, %2;\n" \ - ASM_FTR_IFSET( \ - "97: cmpwi %0,0;\n" \ - " beq- 90b;\n", "", %1) \ - : "=r" (rval) \ - : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ - rval;}) -#elif defined(CONFIG_PPC_8xx) -#define mftb() ({unsigned long rval; \ - asm volatile("mftbl %0" : "=r" (rval)); rval;}) -#else -#define mftb() ({unsigned long rval; \ - asm volatile("mfspr %0, %1" : \ - "=r" (rval) : "i" (SPRN_TBRL)); rval;}) -#endif /* !CONFIG_PPC_CELL */ - -#if defined(CONFIG_PPC_8xx) -#define mftbu() ({unsigned long rval; \ - asm volatile("mftbu %0" : "=r" (rval)); rval;}) -#else -#define mftbu() ({unsigned long rval; \ - asm volatile("mfspr %0, %1" : "=r" (rval) : \ - "i" (SPRN_TBRU)); rval;}) -#endif - -#define mttbl(v) asm volatile("mttbl %0":: "r"(v)) -#define mttbu(v) asm volatile("mttbu %0":: "r"(v)) - #ifdef CONFIG_PPC32 #define mfsrin(v) ({unsigned int rval; \ asm volatile("mfsrin %0,%1" : "=r" (rval) : "r" (v)); \ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 2f566c1a754c..a59f8030f020 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -15,6 +15,7 @@ #include #include +#include /* time.c */ extern unsigned long tb_ticks_per_jiffy; @@ -38,12 +39,6 @@ struct div_result { u64 result_low; }; -/* For compatibility, get_tbl() is defined as get_tb() on ppc64 */ -static inline unsigned long get_tbl(void) -{ - return mftb(); -} - static inline u64 get_vtb(void) { #ifdef CONFIG_PPC_BOOK3S_64 @@ -53,29 +48,6 @@ static inline u64 get_vtb(void) return 0; } -static inline u64 get_tb(void) -{ - unsigned int tbhi, tblo, tbhi2; - - if (IS_ENABLED(CONFIG_PPC64)) - return mftb(); - - do { - tbhi = mftbu(); - tblo = mftb(); - tbhi2 = mftbu(); - } while (tbhi != tbhi2); - - return ((u64)tbhi << 32) | tblo; -} - -static inline void set_tb(unsigned int upper, unsigned int lower) -{ - mtspr(SPRN_TBWL, 0); - mtspr(SPRN_TBWU, upper); - mtspr(SPRN_TBWL, lower); -} - /* Accessor functions for the decrementer register. * The 4xx doesn't even have a decrementer. I tried to use the * generic timer interrupt code, which seems OK, with the 4xx PIT diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index 95988870a57b..fa2e76e4093a 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -9,7 +9,7 @@ */ #include -#include +#include #define CLOCK_TICK_RATE 1024000 /* Underlying HZ */ diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h new file mode 100644 index 000000000000..ac6769b348c6 --- /dev/null +++ b/arch/powerpc/include/asm/vdso/timebase.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Common timebase prototypes and such for all ppc machines. + */ + +#ifndef _ASM_POWERPC_VDSO_TIMEBASE_H +#define _ASM_POWERPC_VDSO_TIMEBASE_H + +#include + +#if defined(CONFIG_PPC_CELL) || defined(CONFIG_E500) +#define mftb() ({unsigned long rval; \ + asm volatile( \ + "90: mfspr %0, %2;\n" \ + ASM_FTR_IFSET( \ + "97: cmpwi %0,0;\n" \ + " beq- 90b;\n", "", %1) \ + : "=r" (rval) \ + : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ + rval;}) +#elif defined(CONFIG_PPC_8xx) +#define mftb() ({unsigned long rval; \ + asm volatile("mftbl %0" : "=r" (rval)); rval;}) +#else +#define mftb() ({unsigned long rval; \ + asm volatile("mfspr %0, %1" : \ + "=r" (rval) : "i" (SPRN_TBRL)); rval;}) +#endif /* !CONFIG_PPC_CELL */ + +#if defined(CONFIG_PPC_8xx) +#define mftbu() ({unsigned long rval; \ + asm volatile("mftbu %0" : "=r" (rval)); rval;}) +#else +#define mftbu() ({unsigned long rval; \ + asm volatile("mfspr %0, %1" : "=r" (rval) : \ + "i" (SPRN_TBRU)); rval;}) +#endif + +#define mttbl(v) asm volatile("mttbl %0":: "r"(v)) +#define mttbu(v) asm volatile("mttbu %0":: "r"(v)) + +/* For compatibility, get_tbl() is defined as get_tb() on ppc64 */ +static inline unsigned long get_tbl(void) +{ + return mftb(); +} + +static inline u64 get_tb(void) +{ + unsigned int tbhi, tblo, tbhi2; + + if (IS_ENABLED(CONFIG_PPC64)) + return mftb(); + + do { + tbhi = mftbu(); + tblo = mftb(); + tbhi2 = mftbu(); + } while (tbhi != tbhi2); + + return ((u64)tbhi << 32) | tblo; +} + +static inline void set_tb(unsigned int upper, unsigned int lower) +{ + mtspr(SPRN_TBWL, 0); + mtspr(SPRN_TBWU, upper); + mtspr(SPRN_TBWL, lower); +} + +#endif /* _ASM_POWERPC_VDSO_TIMEBASE_H */ From 5c189c523e78d4a70e874477e4b0628fd74207e4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 27 Nov 2020 00:10:01 +1100 Subject: [PATCH 048/304] powerpc/time: Fix mftb()/get_tb() for use with the compat VDSO When we're building the compat VDSO we are building 32-bit code but in the context of a 64-bit kernel configuration. To make this work we need to be careful in some places when using ifdefs to differentiate between CONFIG_PPC64 and __powerpc64__. CONFIG_PPC64 indicates the kernel we're building is 64-bit, but it doesn't tell us that we're currently building 64-bit code - we could be building 32-bit code for the compat VDSO. On the other hand __powerpc64__ tells us that we are currently building 64-bit code (and therefore we must also be building a 64-bit kernel). In the case of get_tb() we want to use the 32-bit code sequence regardless of whether the kernel we're building for is 64-bit or 32-bit, what matters is the word size of the current object. So we need to check __powerpc64__ to decide if we use mftb() or the mftbu()/mftb() sequence. For mftb() the logic for CPU_FTR_CELL_TB_BUG only makes sense if we're building 64-bit code, so guard that with a __powerpc64__ check. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126131006.2431205-4-mpe@ellerman.id.au --- arch/powerpc/include/asm/vdso/timebase.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h index ac6769b348c6..b558b07959ce 100644 --- a/arch/powerpc/include/asm/vdso/timebase.h +++ b/arch/powerpc/include/asm/vdso/timebase.h @@ -8,7 +8,11 @@ #include -#if defined(CONFIG_PPC_CELL) || defined(CONFIG_E500) +/* + * We use __powerpc64__ here because we want the compat VDSO to use the 32-bit + * version below in the else case of the ifdef. + */ +#if defined(__powerpc64__) && (defined(CONFIG_PPC_CELL) || defined(CONFIG_E500)) #define mftb() ({unsigned long rval; \ asm volatile( \ "90: mfspr %0, %2;\n" \ @@ -49,7 +53,11 @@ static inline u64 get_tb(void) { unsigned int tbhi, tblo, tbhi2; - if (IS_ENABLED(CONFIG_PPC64)) + /* + * We use __powerpc64__ here not CONFIG_PPC64 because we want the compat + * VDSO to use the 32-bit compatible version in the while loop below. + */ + if (__is_defined(__powerpc64__)) return mftb(); do { From 1f1676bb2dd52c1054db8476d6387e6dcf62a1ba Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 27 Nov 2020 00:10:02 +1100 Subject: [PATCH 049/304] powerpc/barrier: Use CONFIG_PPC64 for barrier selection Currently we use ifdef __powerpc64__ in barrier.h to decide if we should use lwsync or eieio for SMPWMB which is then used by __smp_wmb(). That means when we are building the compat VDSO we will use eieio, because it's 32-bit code, even though we're building a 64-bit kernel for a 64-bit CPU. Although eieio should work, it would be cleaner if we always used the same barrier, even for the 32-bit VDSO. So change the ifdef to CONFIG_PPC64, so that the selection is made based on the bitness of the kernel we're building for, not the current compilation unit. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126131006.2431205-5-mpe@ellerman.id.au --- arch/powerpc/include/asm/barrier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index f53c42380832..aecfde829d5d 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -40,7 +40,7 @@ #define wmb() __asm__ __volatile__ ("sync" : : : "memory") /* The sub-arch has lwsync */ -#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC) +#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_E500MC) # define SMPWMB LWSYNC #else # define SMPWMB eieio From ce7d8056e38b770f070fc4499c577322b6ccb9c7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 27 Nov 2020 00:10:03 +1100 Subject: [PATCH 050/304] powerpc/vdso: Prepare for switching VDSO to generic C implementation. Prepare for switching VDSO to generic C implementation in following patch. Here, we: - Prepare the helpers to call the C VDSO functions - Prepare the required callbacks for the C VDSO functions - Prepare the clocksource.h files to define VDSO_ARCH_CLOCKMODES - Add the C trampolines to the generic C VDSO functions powerpc is a bit special for VDSO as well as system calls in the way that it requires setting CR SO bit which cannot be done in C. Therefore, entry/exit needs to be performed in ASM. Implementing __arch_get_vdso_data() would clobber the link register, requiring the caller to save it. As the ASM calling function already has to set a stack frame and saves the link register before calling the C vdso function, retriving the vdso data pointer there is lighter. Implement __arch_vdso_capable() and always return true. Provide vdso_shift_ns(), as the generic x >> s gives the following bad result: 18: 35 25 ff e0 addic. r9,r5,-32 1c: 41 80 00 10 blt 2c 20: 7c 64 4c 30 srw r4,r3,r9 24: 38 60 00 00 li r3,0 ... 2c: 54 69 08 3c rlwinm r9,r3,1,0,30 30: 21 45 00 1f subfic r10,r5,31 34: 7c 84 2c 30 srw r4,r4,r5 38: 7d 29 50 30 slw r9,r9,r10 3c: 7c 63 2c 30 srw r3,r3,r5 40: 7d 24 23 78 or r4,r9,r4 In our case the shift is always <= 32. In addition, the upper 32 bits of the result are likely nul. Lets GCC know it, it also optimises the following calculations. With the patch, we get: 0: 21 25 00 20 subfic r9,r5,32 4: 7c 69 48 30 slw r9,r3,r9 8: 7c 84 2c 30 srw r4,r4,r5 c: 7d 24 23 78 or r4,r9,r4 10: 7c 63 2c 30 srw r3,r3,r5 Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126131006.2431205-6-mpe@ellerman.id.au --- arch/powerpc/include/asm/clocksource.h | 7 + arch/powerpc/include/asm/ppc_asm.h | 2 + arch/powerpc/include/asm/vdso/clocksource.h | 7 + arch/powerpc/include/asm/vdso/gettimeofday.h | 187 +++++++++++++++++++ arch/powerpc/kernel/vdso32/vgettimeofday.c | 28 +++ arch/powerpc/kernel/vdso64/vgettimeofday.c | 29 +++ 6 files changed, 260 insertions(+) create mode 100644 arch/powerpc/include/asm/clocksource.h create mode 100644 arch/powerpc/include/asm/vdso/clocksource.h create mode 100644 arch/powerpc/include/asm/vdso/gettimeofday.h create mode 100644 arch/powerpc/kernel/vdso32/vgettimeofday.c create mode 100644 arch/powerpc/kernel/vdso64/vgettimeofday.c diff --git a/arch/powerpc/include/asm/clocksource.h b/arch/powerpc/include/asm/clocksource.h new file mode 100644 index 000000000000..0a26ef13a34a --- /dev/null +++ b/arch/powerpc/include/asm/clocksource.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_CLOCKSOURCE_H +#define _ASM_POWERPC_CLOCKSOURCE_H + +#include + +#endif /* _ASM_POWERPC_CLOCKSOURCE_H */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 101986d4a29d..cfa814824285 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -251,6 +251,8 @@ GLUE(.,name): #define _GLOBAL_TOC(name) _GLOBAL(name) +#define DOTSYM(a) a + #endif /* diff --git a/arch/powerpc/include/asm/vdso/clocksource.h b/arch/powerpc/include/asm/vdso/clocksource.h new file mode 100644 index 000000000000..c1ba56b82ee5 --- /dev/null +++ b/arch/powerpc/include/asm/vdso/clocksource.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_VDSO_CLOCKSOURCE_H +#define _ASM_POWERPC_VDSO_CLOCKSOURCE_H + +#define VDSO_ARCH_CLOCKMODES VDSO_CLOCKMODE_ARCHTIMER + +#endif diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..43dd1dc47c37 --- /dev/null +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -0,0 +1,187 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H +#define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H + +#ifdef __ASSEMBLY__ + +#include + +/* + * The macros sets two stack frames, one for the caller and one for the callee + * because there are no requirement for the caller to set a stack frame when + * calling VDSO so it may have omitted to set one, especially on PPC64 + */ + +.macro cvdso_call funct + .cfi_startproc + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + mflr r0 + .cfi_register lr, r0 + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) + get_datapage r5, r0 + addi r5, r5, VDSO_DATA_OFFSET + bl DOTSYM(\funct) + PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) + cmpwi r3, 0 + mtlr r0 + .cfi_restore lr + addi r1, r1, 2 * PPC_MIN_STKFRM + crclr so + beqlr+ + crset so + neg r3, r3 + blr + .cfi_endproc +.endm + +.macro cvdso_call_time funct + .cfi_startproc + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + mflr r0 + .cfi_register lr, r0 + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) + get_datapage r4, r0 + addi r4, r4, VDSO_DATA_OFFSET + bl DOTSYM(\funct) + PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) + crclr so + mtlr r0 + .cfi_restore lr + addi r1, r1, 2 * PPC_MIN_STKFRM + blr + .cfi_endproc +.endm + +#else + +#include +#include +#include +#include + +#define VDSO_HAS_CLOCK_GETRES 1 + +#define VDSO_HAS_TIME 1 + +static __always_inline int do_syscall_2(const unsigned long _r0, const unsigned long _r3, + const unsigned long _r4) +{ + register long r0 asm("r0") = _r0; + register unsigned long r3 asm("r3") = _r3; + register unsigned long r4 asm("r4") = _r4; + register int ret asm ("r3"); + + asm volatile( + " sc\n" + " bns+ 1f\n" + " neg %0, %0\n" + "1:\n" + : "=r" (ret), "+r" (r4), "+r" (r0) + : "r" (r3) + : "memory", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cr0", "ctr"); + + return ret; +} + +static __always_inline +int gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz) +{ + return do_syscall_2(__NR_gettimeofday, (unsigned long)_tv, (unsigned long)_tz); +} + +static __always_inline +int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts); +} + +static __always_inline +int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts); +} + +#ifdef CONFIG_VDSO32 + +#define BUILD_VDSO32 1 + +static __always_inline +int clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts); +} + +static __always_inline +int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts); +} +#endif + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) +{ + return get_tb(); +} + +const struct vdso_data *__arch_get_vdso_data(void); + +static inline bool vdso_clocksource_ok(const struct vdso_data *vd) +{ + return true; +} +#define vdso_clocksource_ok vdso_clocksource_ok + +/* + * powerpc specific delta calculation. + * + * This variant removes the masking of the subtraction because the + * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX + * which would result in a pointless operation. The compiler cannot + * optimize it away as the mask comes from the vdso data and is not compile + * time constant. + */ +static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + return (cycles - last) * mult; +} +#define vdso_calc_delta vdso_calc_delta + +#ifndef __powerpc64__ +static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift) +{ + u32 hi = ns >> 32; + u32 lo = ns; + + lo >>= shift; + lo |= hi << (32 - shift); + hi >>= shift; + + if (likely(hi == 0)) + return lo; + + return ((u64)hi << 32) | lo; +} +#define vdso_shift_ns vdso_shift_ns +#endif + +#ifdef __powerpc64__ +int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd); +int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, + const struct vdso_data *vd); +#else +int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, + const struct vdso_data *vd); +int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, + const struct vdso_data *vd); +#endif +int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, + const struct vdso_data *vd); +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, + const struct vdso_data *vd); +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/powerpc/kernel/vdso32/vgettimeofday.c b/arch/powerpc/kernel/vdso32/vgettimeofday.c new file mode 100644 index 000000000000..0d4bc217529e --- /dev/null +++ b/arch/powerpc/kernel/vdso32/vgettimeofday.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Powerpc userspace implementations of gettimeofday() and similar. + */ +#include + +int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime32_data(vd, clock, ts); +} + +int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, + const struct vdso_data *vd) +{ + return __cvdso_gettimeofday_data(vd, tv, tz); +} + +int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_time32_data(vd, clock_id, res); +} + +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) +{ + return __cvdso_time_data(vd, time); +} diff --git a/arch/powerpc/kernel/vdso64/vgettimeofday.c b/arch/powerpc/kernel/vdso64/vgettimeofday.c new file mode 100644 index 000000000000..5b5500058344 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/vgettimeofday.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Powerpc userspace implementations of gettimeofday() and similar. + */ +#include +#include + +int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime_data(vd, clock, ts); +} + +int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, + const struct vdso_data *vd) +{ + return __cvdso_gettimeofday_data(vd, tv, tz); +} + +int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_data(vd, clock_id, res); +} + +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) +{ + return __cvdso_time_data(vd, time); +} From 7fec9f5d41979dbe273ec337327d5939449562e7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 27 Nov 2020 00:10:04 +1100 Subject: [PATCH 051/304] powerpc/vdso: Save and restore TOC pointer on PPC64 On PPC64, the TOC pointer needs to be saved and restored. Suggested-by: Michael Ellerman Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126131006.2431205-7-mpe@ellerman.id.au --- arch/powerpc/include/asm/vdso/gettimeofday.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 43dd1dc47c37..6f56a6bce615 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -19,10 +19,16 @@ .cfi_register lr, r0 PPC_STLU r1, -PPC_MIN_STKFRM(r1) PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif get_datapage r5, r0 addi r5, r5, VDSO_DATA_OFFSET bl DOTSYM(\funct) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif cmpwi r3, 0 mtlr r0 .cfi_restore lr @@ -42,10 +48,16 @@ .cfi_register lr, r0 PPC_STLU r1, -PPC_MIN_STKFRM(r1) PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif get_datapage r4, r0 addi r4, r4, VDSO_DATA_OFFSET bl DOTSYM(\funct) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif crclr so mtlr r0 .cfi_restore lr From ab037dd87a2f946556850e204c06cbd7a2a19390 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 27 Nov 2020 00:10:05 +1100 Subject: [PATCH 052/304] powerpc/vdso: Switch VDSO to generic C implementation. With the C VDSO, the performance is slightly lower, but it is worth it as it will ease maintenance and evolution, and also brings clocks that are not supported with the ASM VDSO. On an 8xx at 132 MHz, vdsotest with the ASM VDSO: gettimeofday: vdso: 828 nsec/call clock-getres-realtime-coarse: vdso: 391 nsec/call clock-gettime-realtime-coarse: vdso: 614 nsec/call clock-getres-realtime: vdso: 460 nsec/call clock-gettime-realtime: vdso: 876 nsec/call clock-getres-monotonic-coarse: vdso: 399 nsec/call clock-gettime-monotonic-coarse: vdso: 691 nsec/call clock-getres-monotonic: vdso: 460 nsec/call clock-gettime-monotonic: vdso: 1026 nsec/call On an 8xx at 132 MHz, vdsotest with the C VDSO: gettimeofday: vdso: 955 nsec/call clock-getres-realtime-coarse: vdso: 545 nsec/call clock-gettime-realtime-coarse: vdso: 592 nsec/call clock-getres-realtime: vdso: 545 nsec/call clock-gettime-realtime: vdso: 941 nsec/call clock-getres-monotonic-coarse: vdso: 545 nsec/call clock-gettime-monotonic-coarse: vdso: 591 nsec/call clock-getres-monotonic: vdso: 545 nsec/call clock-gettime-monotonic: vdso: 940 nsec/call It is even better for gettime with monotonic clocks. Unsupported clocks with ASM VDSO: clock-gettime-boottime: vdso: 3851 nsec/call clock-gettime-tai: vdso: 3852 nsec/call clock-gettime-monotonic-raw: vdso: 3396 nsec/call Same clocks with C VDSO: clock-gettime-tai: vdso: 941 nsec/call clock-gettime-monotonic-raw: vdso: 1001 nsec/call clock-gettime-monotonic-coarse: vdso: 591 nsec/call On an 8321E at 333 MHz, vdsotest with the ASM VDSO: gettimeofday: vdso: 220 nsec/call clock-getres-realtime-coarse: vdso: 102 nsec/call clock-gettime-realtime-coarse: vdso: 178 nsec/call clock-getres-realtime: vdso: 129 nsec/call clock-gettime-realtime: vdso: 235 nsec/call clock-getres-monotonic-coarse: vdso: 105 nsec/call clock-gettime-monotonic-coarse: vdso: 208 nsec/call clock-getres-monotonic: vdso: 129 nsec/call clock-gettime-monotonic: vdso: 274 nsec/call On an 8321E at 333 MHz, vdsotest with the C VDSO: gettimeofday: vdso: 272 nsec/call clock-getres-realtime-coarse: vdso: 160 nsec/call clock-gettime-realtime-coarse: vdso: 184 nsec/call clock-getres-realtime: vdso: 166 nsec/call clock-gettime-realtime: vdso: 281 nsec/call clock-getres-monotonic-coarse: vdso: 160 nsec/call clock-gettime-monotonic-coarse: vdso: 184 nsec/call clock-getres-monotonic: vdso: 169 nsec/call clock-gettime-monotonic: vdso: 275 nsec/call On a Power9 Nimbus DD2.2 at 3.8GHz, with the ASM VDSO: clock-gettime-monotonic: vdso: 35 nsec/call clock-getres-monotonic: vdso: 16 nsec/call clock-gettime-monotonic-coarse: vdso: 18 nsec/call clock-getres-monotonic-coarse: vdso: 522 nsec/call clock-gettime-monotonic-raw: vdso: 598 nsec/call clock-getres-monotonic-raw: vdso: 520 nsec/call clock-gettime-realtime: vdso: 34 nsec/call clock-getres-realtime: vdso: 16 nsec/call clock-gettime-realtime-coarse: vdso: 18 nsec/call clock-getres-realtime-coarse: vdso: 517 nsec/call getcpu: vdso: 8 nsec/call gettimeofday: vdso: 25 nsec/call And with the C VDSO: clock-gettime-monotonic: vdso: 37 nsec/call clock-getres-monotonic: vdso: 20 nsec/call clock-gettime-monotonic-coarse: vdso: 21 nsec/call clock-getres-monotonic-coarse: vdso: 19 nsec/call clock-gettime-monotonic-raw: vdso: 38 nsec/call clock-getres-monotonic-raw: vdso: 20 nsec/call clock-gettime-realtime: vdso: 37 nsec/call clock-getres-realtime: vdso: 20 nsec/call clock-gettime-realtime-coarse: vdso: 20 nsec/call clock-getres-realtime-coarse: vdso: 19 nsec/call getcpu: vdso: 8 nsec/call gettimeofday: vdso: 28 nsec/call Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126131006.2431205-8-mpe@ellerman.id.au --- arch/powerpc/Kconfig | 2 + arch/powerpc/include/asm/vdso/vsyscall.h | 25 ++ arch/powerpc/include/asm/vdso_datapage.h | 40 +-- arch/powerpc/kernel/asm-offsets.c | 47 +--- arch/powerpc/kernel/time.c | 91 +------ arch/powerpc/kernel/vdso.c | 5 +- arch/powerpc/kernel/vdso32/Makefile | 26 +- arch/powerpc/kernel/vdso32/gettimeofday.S | 291 +--------------------- arch/powerpc/kernel/vdso32/vdso32.lds.S | 1 + arch/powerpc/kernel/vdso64/Makefile | 23 +- arch/powerpc/kernel/vdso64/gettimeofday.S | 242 +----------------- arch/powerpc/kernel/vdso64/vdso64.lds.S | 2 +- 12 files changed, 105 insertions(+), 690 deletions(-) create mode 100644 arch/powerpc/include/asm/vdso/vsyscall.h diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7f4995b245a3..aad8532a718e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -176,6 +176,7 @@ config PPC select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select GENERIC_GETTIMEOFDAY select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL @@ -206,6 +207,7 @@ config PPC select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC + select HAVE_GENERIC_VDSO select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IDE select HAVE_IOREMAP_PROT diff --git a/arch/powerpc/include/asm/vdso/vsyscall.h b/arch/powerpc/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..48cf23f1e273 --- /dev/null +++ b/arch/powerpc/include/asm/vdso/vsyscall.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_VDSO_VSYSCALL_H +#define _ASM_POWERPC_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include +#include + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__arch_get_k_vdso_data(void) +{ + return vdso_data->data; +} +#define __arch_get_k_vdso_data __arch_get_k_vdso_data + +/* The asm-generic header needs to be included after the definitions above */ +#include + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_VDSO_VSYSCALL_H */ diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index b9ef6cf50ea5..c4d320504d26 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -36,6 +36,7 @@ #include #include +#include #define SYSCALL_MAP_SIZE ((NR_syscalls + 31) / 32) @@ -45,7 +46,7 @@ #ifdef CONFIG_PPC64 -struct vdso_data { +struct vdso_arch_data { __u8 eye_catcher[16]; /* Eyecatcher: SYSTEMCFG:PPC64 0x00 */ struct { /* Systemcfg version numbers */ __u32 major; /* Major number 0x10 */ @@ -59,13 +60,13 @@ struct vdso_data { __u32 processor; /* Processor type 0x1C */ __u64 processorCount; /* # of physical processors 0x20 */ __u64 physicalMemorySize; /* Size of real memory(B) 0x28 */ - __u64 tb_orig_stamp; /* Timebase at boot 0x30 */ + __u64 tb_orig_stamp; /* (NU) Timebase at boot 0x30 */ __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ - __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */ - __u64 stamp_xsec; /* 0x48 */ - __u64 tb_update_count; /* Timebase atomicity ctr 0x50 */ - __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */ - __u32 tz_dsttime; /* Type of dst correction 0x5C */ + __u64 tb_to_xs; /* (NU) Inverse of TB to 2^20 0x40 */ + __u64 stamp_xsec; /* (NU) 0x48 */ + __u64 tb_update_count; /* (NU) Timebase atomicity ctr 0x50 */ + __u32 tz_minuteswest; /* (NU) Min. west of Greenwich 0x58 */ + __u32 tz_dsttime; /* (NU) Type of dst correction 0x5C */ __u32 dcache_size; /* L1 d-cache size 0x60 */ __u32 dcache_line_size; /* L1 d-cache line size 0x64 */ __u32 icache_size; /* L1 i-cache size 0x68 */ @@ -78,14 +79,10 @@ struct vdso_data { __u32 icache_block_size; /* L1 i-cache block size */ __u32 dcache_log_block_size; /* L1 d-cache log block size */ __u32 icache_log_block_size; /* L1 i-cache log block size */ - __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ - __s32 wtom_clock_nsec; /* Wall to monotonic clock nsec */ - __s64 wtom_clock_sec; /* Wall to monotonic clock sec */ - __s64 stamp_xtime_sec; /* xtime secs as at tb_orig_stamp */ - __s64 stamp_xtime_nsec; /* xtime nsecs as at tb_orig_stamp */ - __u32 hrtimer_res; /* hrtimer resolution */ __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ + + struct vdso_data data[CS_BASES]; }; #else /* CONFIG_PPC64 */ @@ -93,26 +90,15 @@ struct vdso_data { /* * And here is the simpler 32 bits version */ -struct vdso_data { - __u64 tb_orig_stamp; /* Timebase at boot 0x30 */ +struct vdso_arch_data { __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ - __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */ - __u64 stamp_xsec; /* 0x48 */ - __u32 tb_update_count; /* Timebase atomicity ctr 0x50 */ - __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */ - __u32 tz_dsttime; /* Type of dst correction 0x5C */ - __s32 wtom_clock_sec; /* Wall to monotonic clock */ - __s32 wtom_clock_nsec; - __s32 stamp_xtime_sec; /* xtime seconds as at tb_orig_stamp */ - __s32 stamp_xtime_nsec; /* xtime nsecs as at tb_orig_stamp */ - __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ - __u32 hrtimer_res; /* hrtimer resolution */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ + struct vdso_data data[CS_BASES]; }; #endif /* CONFIG_PPC64 */ -extern struct vdso_data *vdso_data; +extern struct vdso_arch_data *vdso_data; #else /* __ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c2722ff36e98..a2dcb8ed79b9 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -398,47 +398,16 @@ int main(void) #endif /* ! CONFIG_PPC64 */ /* datapage offsets for use by vdso */ - OFFSET(CFG_TB_ORIG_STAMP, vdso_data, tb_orig_stamp); - OFFSET(CFG_TB_TICKS_PER_SEC, vdso_data, tb_ticks_per_sec); - OFFSET(CFG_TB_TO_XS, vdso_data, tb_to_xs); - OFFSET(CFG_TB_UPDATE_COUNT, vdso_data, tb_update_count); - OFFSET(CFG_TZ_MINUTEWEST, vdso_data, tz_minuteswest); - OFFSET(CFG_TZ_DSTTIME, vdso_data, tz_dsttime); - OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32); - OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec); - OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec); - OFFSET(STAMP_XTIME_SEC, vdso_data, stamp_xtime_sec); - OFFSET(STAMP_XTIME_NSEC, vdso_data, stamp_xtime_nsec); - OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction); - OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res); + OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data); + OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec); + OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map_32); #ifdef CONFIG_PPC64 - OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size); - OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size); - OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size); - OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size); - OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64); - OFFSET(TVAL64_TV_SEC, __kernel_old_timeval, tv_sec); - OFFSET(TVAL64_TV_USEC, __kernel_old_timeval, tv_usec); + OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size); + OFFSET(CFG_DCACHE_BLOCKSZ, vdso_arch_data, dcache_block_size); + OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_arch_data, icache_log_block_size); + OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_arch_data, dcache_log_block_size); + OFFSET(CFG_SYSCALL_MAP64, vdso_arch_data, syscall_map_64); #endif - OFFSET(TSPC64_TV_SEC, __kernel_timespec, tv_sec); - OFFSET(TSPC64_TV_NSEC, __kernel_timespec, tv_nsec); - OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec); - OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec); - OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec); - OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec); - /* timeval/timezone offsets for use by vdso */ - OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest); - OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime); - - /* Other bits used by the vdso */ - DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); - DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); - DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); - DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); - DEFINE(CLOCK_MAX, CLOCK_TAI); - DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); - DEFINE(EINVAL, EINVAL); - DEFINE(KTIME_LOW_RES, KTIME_LOW_RES); #ifdef CONFIG_BUG DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 74efe46f5532..92481463f9dc 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -82,6 +82,7 @@ static struct clocksource clocksource_timebase = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, .mask = CLOCKSOURCE_MASK(64), .read = timebase_read, + .vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER, }; #define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF @@ -831,95 +832,6 @@ static notrace u64 timebase_read(struct clocksource *cs) return (u64)get_tb(); } - -void update_vsyscall(struct timekeeper *tk) -{ - struct timespec64 xt; - struct clocksource *clock = tk->tkr_mono.clock; - u32 mult = tk->tkr_mono.mult; - u32 shift = tk->tkr_mono.shift; - u64 cycle_last = tk->tkr_mono.cycle_last; - u64 new_tb_to_xs, new_stamp_xsec; - u64 frac_sec; - - if (clock != &clocksource_timebase) - return; - - xt.tv_sec = tk->xtime_sec; - xt.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); - - /* Make userspace gettimeofday spin until we're done. */ - ++vdso_data->tb_update_count; - smp_mb(); - - /* - * This computes ((2^20 / 1e9) * mult) >> shift as a - * 0.64 fixed-point fraction. - * The computation in the else clause below won't overflow - * (as long as the timebase frequency is >= 1.049 MHz) - * but loses precision because we lose the low bits of the constant - * in the shift. Note that 19342813113834067 ~= 2^(20+64) / 1e9. - * For a shift of 24 the error is about 0.5e-9, or about 0.5ns - * over a second. (Shift values are usually 22, 23 or 24.) - * For high frequency clocks such as the 512MHz timebase clock - * on POWER[6789], the mult value is small (e.g. 32768000) - * and so we can shift the constant by 16 initially - * (295147905179 ~= 2^(20+64-16) / 1e9) and then do the - * remaining shifts after the multiplication, which gives a - * more accurate result (e.g. with mult = 32768000, shift = 24, - * the error is only about 1.2e-12, or 0.7ns over 10 minutes). - */ - if (mult <= 62500000 && clock->shift >= 16) - new_tb_to_xs = ((u64) mult * 295147905179ULL) >> (clock->shift - 16); - else - new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift); - - /* - * Compute the fractional second in units of 2^-32 seconds. - * The fractional second is tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift - * in nanoseconds, so multiplying that by 2^32 / 1e9 gives - * it in units of 2^-32 seconds. - * We assume shift <= 32 because clocks_calc_mult_shift() - * generates shift values in the range 0 - 32. - */ - frac_sec = tk->tkr_mono.xtime_nsec << (32 - shift); - do_div(frac_sec, NSEC_PER_SEC); - - /* - * Work out new stamp_xsec value for any legacy users of systemcfg. - * stamp_xsec is in units of 2^-20 seconds. - */ - new_stamp_xsec = frac_sec >> 12; - new_stamp_xsec += tk->xtime_sec * XSEC_PER_SEC; - - /* - * tb_update_count is used to allow the userspace gettimeofday code - * to assure itself that it sees a consistent view of the tb_to_xs and - * stamp_xsec variables. It reads the tb_update_count, then reads - * tb_to_xs and stamp_xsec and then reads tb_update_count again. If - * the two values of tb_update_count match and are even then the - * tb_to_xs and stamp_xsec values are consistent. If not, then it - * loops back and reads them again until this criteria is met. - */ - vdso_data->tb_orig_stamp = cycle_last; - vdso_data->stamp_xsec = new_stamp_xsec; - vdso_data->tb_to_xs = new_tb_to_xs; - vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec; - vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec; - vdso_data->stamp_xtime_sec = xt.tv_sec; - vdso_data->stamp_xtime_nsec = xt.tv_nsec; - vdso_data->stamp_sec_fraction = frac_sec; - vdso_data->hrtimer_res = hrtimer_resolution; - smp_wmb(); - ++(vdso_data->tb_update_count); -} - -void update_vsyscall_tz(void) -{ - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; - vdso_data->tz_dsttime = sys_tz.tz_dsttime; -} - static void __init clocksource_init(void) { struct clocksource *clock = &clocksource_timebase; @@ -1079,7 +991,6 @@ void __init time_init(void) sys_tz.tz_dsttime = 0; } - vdso_data->tb_update_count = 0; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; /* initialise and enable the large decrementer (if we have one) */ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 8dad44262e75..23208a051af5 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -70,10 +71,10 @@ static int vdso_ready; * with it, it will become dynamically allocated */ static union { - struct vdso_data data; + struct vdso_arch_data data; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; +struct vdso_arch_data *vdso_data = &vdso_data_store.data; /* Format of the patch table */ struct vdso_patch_def diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 73eada6bc8cd..853545a19a1e 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -2,8 +2,20 @@ # List of files in the vdso, has to be asm only for now +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN +include $(srctree)/lib/vdso/Makefile + obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) +endif + # Build rules ifdef CROSS32_COMPILE @@ -15,6 +27,7 @@ endif CC32FLAGS := ifdef CONFIG_PPC64 CC32FLAGS += -m32 +KBUILD_CFLAGS := $(filter-out -mcmodel=medium,$(KBUILD_CFLAGS)) endif targets := $(obj-vdso32) vdso32.so vdso32.so.dbg @@ -23,6 +36,7 @@ obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both @@ -36,8 +50,8 @@ CPPFLAGS_vdso32.lds += -P -C -Upowerpc $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE - $(call if_changed,vdso32ld) +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday.o FORCE + $(call if_changed,vdso32ld_and_check) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -47,12 +61,16 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # assembly rules for the .S files $(obj-vdso32): %.o: %.S FORCE $(call if_changed_dep,vdso32as) +$(obj)/vgettimeofday.o: %.o: %.c FORCE + $(call if_changed_dep,vdso32cc) # actual build commands -quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) +quiet_cmd_vdso32ld_and_check = VDSO32L $@ + cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< +quiet_cmd_vdso32cc = VDSO32C $@ + cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index e7f8f9f1b3f4..fd7b01c51281 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -12,13 +12,7 @@ #include #include #include - -/* Offset for the low 32-bit part of a field of long type */ -#ifdef CONFIG_PPC64 -#define LOPART 4 -#else -#define LOPART 0 -#endif +#include .text /* @@ -28,32 +22,7 @@ * */ V_FUNCTION_BEGIN(__kernel_gettimeofday) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr. r10,r3 /* r10 saves tv */ - mr r11,r4 /* r11 saves tz */ - get_datapage r9, r0 - beq 3f - LOAD_REG_IMMEDIATE(r7, 1000000) /* load up USEC_PER_SEC */ - bl __do_get_tspec@local /* get sec/usec from tb & kernel */ - stw r3,TVAL32_TV_SEC(r10) - stw r4,TVAL32_TV_USEC(r10) - -3: cmplwi r11,0 /* check if tz is NULL */ - mtlr r12 - crclr cr0*4+so - li r3,0 - beqlr - - lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */ - lwz r5,CFG_TZ_DSTTIME(r9) - stw r4,TZONE_TZ_MINWEST(r11) - stw r5,TZONE_TZ_DSTTIME(r11) - - blr - .cfi_endproc + cvdso_call __c_kernel_gettimeofday V_FUNCTION_END(__kernel_gettimeofday) /* @@ -63,127 +32,7 @@ V_FUNCTION_END(__kernel_gettimeofday) * */ V_FUNCTION_BEGIN(__kernel_clock_gettime) - .cfi_startproc - /* Check for supported clock IDs */ - cmpli cr0,r3,CLOCK_REALTIME - cmpli cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - - cmpli cr5,r3,CLOCK_REALTIME_COARSE - cmpli cr6,r3,CLOCK_MONOTONIC_COARSE - cror cr5*4+eq,cr5*4+eq,cr6*4+eq - - cror cr0*4+eq,cr0*4+eq,cr5*4+eq - bne cr0, .Lgettime_fallback - - mflr r12 /* r12 saves lr */ - .cfi_register lr,r12 - mr r11,r4 /* r11 saves tp */ - get_datapage r9, r0 - LOAD_REG_IMMEDIATE(r7, NSEC_PER_SEC) /* load up NSEC_PER_SEC */ - beq cr5, .Lcoarse_clocks -.Lprecise_clocks: - bl __do_get_tspec@local /* get sec/nsec from tb & kernel */ - bne cr1, .Lfinish /* not monotonic -> all done */ - - /* - * CLOCK_MONOTONIC - */ - - /* now we must fixup using wall to monotonic. We need to snapshot - * that value and do the counter trick again. Fortunately, we still - * have the counter value in r8 that was returned by __do_get_xsec. - * At this point, r3,r4 contain our sec/nsec values, r5 and r6 - * can be used, r7 contains NSEC_PER_SEC. - */ - - lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9) - lwz r6,WTOM_CLOCK_NSEC(r9) - - /* We now have our offset in r5,r6. We create a fake dependency - * on that value and re-check the counter - */ - or r0,r6,r5 - xor r0,r0,r0 - add r9,r9,r0 - lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - cmpl cr0,r8,r0 /* check if updated */ - bne- .Lprecise_clocks - b .Lfinish_monotonic - - /* - * For coarse clocks we get data directly from the vdso data page, so - * we don't need to call __do_get_tspec, but we still need to do the - * counter trick. - */ -.Lcoarse_clocks: - lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - andi. r0,r8,1 /* pending update ? loop */ - bne- .Lcoarse_clocks - add r9,r9,r0 /* r0 is already 0 */ - - /* - * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE - * too - */ - lwz r3,STAMP_XTIME_SEC+LOPART(r9) - lwz r4,STAMP_XTIME_NSEC+LOPART(r9) - bne cr6,1f - - /* CLOCK_MONOTONIC_COARSE */ - lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9) - lwz r6,WTOM_CLOCK_NSEC(r9) - - /* check if counter has updated */ - or r0,r6,r5 -1: or r0,r0,r3 - or r0,r0,r4 - xor r0,r0,r0 - add r3,r3,r0 - lwz r0,CFG_TB_UPDATE_COUNT+LOPART(r9) - cmpl cr0,r0,r8 /* check if updated */ - bne- .Lcoarse_clocks - - /* Counter has not updated, so continue calculating proper values for - * sec and nsec if monotonic coarse, or just return with the proper - * values for realtime. - */ - bne cr6, .Lfinish - - /* Calculate and store result. Note that this mimics the C code, - * which may cause funny results if nsec goes negative... is that - * possible at all ? - */ -.Lfinish_monotonic: - add r3,r3,r5 - add r4,r4,r6 - cmpw cr0,r4,r7 - cmpwi cr1,r4,0 - blt 1f - subf r4,r7,r4 - addi r3,r3,1 -1: bge cr1, .Lfinish - addi r3,r3,-1 - add r4,r4,r7 - -.Lfinish: - stw r3,TSPC32_TV_SEC(r11) - stw r4,TSPC32_TV_NSEC(r11) - - mtlr r12 - crclr cr0*4+so - li r3,0 - blr - - /* - * syscall fallback - */ -.Lgettime_fallback: - li r0,__NR_clock_gettime - .cfi_restore lr - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_gettime V_FUNCTION_END(__kernel_clock_gettime) @@ -194,37 +43,7 @@ V_FUNCTION_END(__kernel_clock_gettime) * */ V_FUNCTION_BEGIN(__kernel_clock_getres) - .cfi_startproc - /* Check for supported clock IDs */ - cmplwi cr0, r3, CLOCK_MAX - cmpwi cr1, r3, CLOCK_REALTIME_COARSE - cmpwi cr7, r3, CLOCK_MONOTONIC_COARSE - bgt cr0, 99f - LOAD_REG_IMMEDIATE(r5, KTIME_LOW_RES) - beq cr1, 1f - beq cr7, 1f - - mflr r12 - .cfi_register lr,r12 - get_datapage r3, r0 - lwz r5, CLOCK_HRTIMER_RES(r3) - mtlr r12 -1: li r3,0 - cmpli cr0,r4,0 - crclr cr0*4+so - beqlr - stw r3,TSPC32_TV_SEC(r4) - stw r5,TSPC32_TV_NSEC(r4) - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_getres - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_getres V_FUNCTION_END(__kernel_clock_getres) @@ -235,105 +54,5 @@ V_FUNCTION_END(__kernel_clock_getres) * */ V_FUNCTION_BEGIN(__kernel_time) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds t */ - get_datapage r9, r0 - - lwz r3,STAMP_XTIME_SEC+LOPART(r9) - - cmplwi r11,0 /* check if t is NULL */ - mtlr r12 - crclr cr0*4+so - beqlr - stw r3,0(r11) /* store result at *t */ - blr - .cfi_endproc + cvdso_call_time __c_kernel_time V_FUNCTION_END(__kernel_time) - -/* - * This is the core of clock_gettime() and gettimeofday(), - * it returns the current time in r3 (seconds) and r4. - * On entry, r7 gives the resolution of r4, either USEC_PER_SEC - * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds. - * It expects the datapage ptr in r9 and doesn't clobber it. - * It clobbers r0, r5 and r6. - * On return, r8 contains the counter value that can be reused. - * This clobbers cr0 but not any other cr field. - */ -__do_get_tspec: - .cfi_startproc - /* Check for update count & load values. We use the low - * order 32 bits of the update count - */ -1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - andi. r0,r8,1 /* pending update ? loop */ - bne- 1b - xor r0,r8,r8 /* create dependency */ - add r9,r9,r0 - - /* Load orig stamp (offset to TB) */ - lwz r5,CFG_TB_ORIG_STAMP(r9) - lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) - - /* Get a stable TB value */ -2: MFTBU(r3) - MFTBL(r4) - MFTBU(r0) - cmplw cr0,r3,r0 - bne- 2b - - /* Subtract tb orig stamp and shift left 12 bits. - */ - subfc r4,r6,r4 - subfe r0,r5,r3 - slwi r0,r0,12 - rlwimi. r0,r4,12,20,31 - slwi r4,r4,12 - - /* - * Load scale factor & do multiplication. - * We only use the high 32 bits of the tb_to_xs value. - * Even with a 1GHz timebase clock, the high 32 bits of - * tb_to_xs will be at least 4 million, so the error from - * ignoring the low 32 bits will be no more than 0.25ppm. - * The error will just make the clock run very very slightly - * slow until the next time the kernel updates the VDSO data, - * at which point the clock will catch up to the kernel's value, - * so there is no long-term error accumulation. - */ - lwz r5,CFG_TB_TO_XS(r9) /* load values */ - mulhwu r4,r4,r5 - li r3,0 - - beq+ 4f /* skip high part computation if 0 */ - mulhwu r3,r0,r5 - mullw r5,r0,r5 - addc r4,r4,r5 - addze r3,r3 -4: - /* At this point, we have seconds since the xtime stamp - * as a 32.32 fixed-point number in r3 and r4. - * Load & add the xtime stamp. - */ - lwz r5,STAMP_XTIME_SEC+LOPART(r9) - lwz r6,STAMP_SEC_FRAC(r9) - addc r4,r4,r6 - adde r3,r3,r5 - - /* We create a fake dependency on the result in r3/r4 - * and re-check the counter - */ - or r6,r4,r3 - xor r0,r6,r6 - add r9,r9,r0 - lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - cmplw cr0,r8,r0 /* check if updated */ - bne- 1b - - mulhwu r4,r4,r7 /* convert to micro or nanoseconds */ - - blr - .cfi_endproc diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 7eadac74c7f9..51e9b3f3f88a 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -111,6 +111,7 @@ SECTIONS *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.got1) } } diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index dfd34f68bfa1..4a8c5e4d25c0 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -1,8 +1,20 @@ # SPDX-License-Identifier: GPL-2.0 # List of files in the vdso, has to be asm only for now +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN +include $(srctree)/lib/vdso/Makefile + obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) +endif + # Build rules targets := $(obj-vdso64) vdso64.so vdso64.so.dbg @@ -11,6 +23,7 @@ obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both @@ -20,12 +33,14 @@ obj-y += vdso64_wrapper.o targets += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) +$(obj)/vgettimeofday.o: %.o: %.c FORCE + # Force dependency (incbin is bad) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE - $(call if_changed,vdso64ld) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday.o FORCE + $(call if_changed,vdso64ld_and_check) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -33,8 +48,8 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # actual build commands -quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) +quiet_cmd_vdso64ld_and_check = VDSO64L $@ + cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 20f8be40c653..d7a7bfb51081 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -12,6 +12,7 @@ #include #include #include +#include .text /* @@ -21,31 +22,7 @@ * */ V_FUNCTION_BEGIN(__kernel_gettimeofday) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds tv */ - mr r10,r4 /* r10 holds tz */ - get_datapage r3, r0 - cmpldi r11,0 /* check if tv is NULL */ - beq 2f - lis r7,1000000@ha /* load up USEC_PER_SEC */ - addi r7,r7,1000000@l - bl V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */ - std r4,TVAL64_TV_SEC(r11) /* store sec in tv */ - std r5,TVAL64_TV_USEC(r11) /* store usec in tv */ -2: cmpldi r10,0 /* check if tz is NULL */ - beq 1f - lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ - lwz r5,CFG_TZ_DSTTIME(r3) - stw r4,TZONE_TZ_MINWEST(r10) - stw r5,TZONE_TZ_DSTTIME(r10) -1: mtlr r12 - crclr cr0*4+so - li r3,0 /* always success */ - blr - .cfi_endproc + cvdso_call __c_kernel_gettimeofday V_FUNCTION_END(__kernel_gettimeofday) @@ -56,120 +33,7 @@ V_FUNCTION_END(__kernel_gettimeofday) * */ V_FUNCTION_BEGIN(__kernel_clock_gettime) - .cfi_startproc - /* Check for supported clock IDs */ - cmpwi cr0,r3,CLOCK_REALTIME - cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - - cmpwi cr5,r3,CLOCK_REALTIME_COARSE - cmpwi cr6,r3,CLOCK_MONOTONIC_COARSE - cror cr5*4+eq,cr5*4+eq,cr6*4+eq - - cror cr0*4+eq,cr0*4+eq,cr5*4+eq - bne cr0,99f - - mflr r12 /* r12 saves lr */ - .cfi_register lr,r12 - mr r11,r4 /* r11 saves tp */ - get_datapage r3, r0 - lis r7,NSEC_PER_SEC@h /* want nanoseconds */ - ori r7,r7,NSEC_PER_SEC@l - beq cr5,70f -50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */ - bne cr1,80f /* if not monotonic, all done */ - - /* - * CLOCK_MONOTONIC - */ - - /* now we must fixup using wall to monotonic. We need to snapshot - * that value and do the counter trick again. Fortunately, we still - * have the counter value in r8 that was returned by __do_get_tspec. - * At this point, r4,r5 contain our sec/nsec values. - */ - - ld r6,WTOM_CLOCK_SEC(r3) - lwa r9,WTOM_CLOCK_NSEC(r3) - - /* We now have our result in r6,r9. We create a fake dependency - * on that result and re-check the counter - */ - or r0,r6,r9 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld cr0,r0,r8 /* check if updated */ - bne- 50b - b 78f - - /* - * For coarse clocks we get data directly from the vdso data page, so - * we don't need to call __do_get_tspec, but we still need to do the - * counter trick. - */ -70: ld r8,CFG_TB_UPDATE_COUNT(r3) - andi. r0,r8,1 /* pending update ? loop */ - bne- 70b - add r3,r3,r0 /* r0 is already 0 */ - - /* - * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE - * too - */ - ld r4,STAMP_XTIME_SEC(r3) - ld r5,STAMP_XTIME_NSEC(r3) - bne cr6,75f - - /* CLOCK_MONOTONIC_COARSE */ - ld r6,WTOM_CLOCK_SEC(r3) - lwa r9,WTOM_CLOCK_NSEC(r3) - - /* check if counter has updated */ - or r0,r6,r9 -75: or r0,r0,r4 - or r0,r0,r5 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld cr0,r0,r8 /* check if updated */ - bne- 70b - - /* Counter has not updated, so continue calculating proper values for - * sec and nsec if monotonic coarse, or just return with the proper - * values for realtime. - */ - bne cr6,80f - - /* Add wall->monotonic offset and check for overflow or underflow */ -78: add r4,r4,r6 - add r5,r5,r9 - cmpd cr0,r5,r7 - cmpdi cr1,r5,0 - blt 79f - subf r5,r7,r5 - addi r4,r4,1 -79: bge cr1,80f - addi r4,r4,-1 - add r5,r5,r7 - -80: std r4,TSPC64_TV_SEC(r11) - std r5,TSPC64_TV_NSEC(r11) - - mtlr r12 - crclr cr0*4+so - li r3,0 - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_gettime - .cfi_restore lr - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_gettime V_FUNCTION_END(__kernel_clock_gettime) @@ -180,34 +44,7 @@ V_FUNCTION_END(__kernel_clock_gettime) * */ V_FUNCTION_BEGIN(__kernel_clock_getres) - .cfi_startproc - /* Check for supported clock IDs */ - cmpwi cr0,r3,CLOCK_REALTIME - cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - bne cr0,99f - - mflr r12 - .cfi_register lr,r12 - get_datapage r3, r0 - lwz r5, CLOCK_HRTIMER_RES(r3) - mtlr r12 - li r3,0 - cmpldi cr0,r4,0 - crclr cr0*4+so - beqlr - std r3,TSPC64_TV_SEC(r4) - std r5,TSPC64_TV_NSEC(r4) - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_getres - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_getres V_FUNCTION_END(__kernel_clock_getres) /* @@ -217,74 +54,5 @@ V_FUNCTION_END(__kernel_clock_getres) * */ V_FUNCTION_BEGIN(__kernel_time) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds t */ - get_datapage r3, r0 - - ld r4,STAMP_XTIME_SEC(r3) - - cmpldi r11,0 /* check if t is NULL */ - beq 2f - std r4,0(r11) /* store result at *t */ -2: mtlr r12 - crclr cr0*4+so - mr r3,r4 - blr - .cfi_endproc + cvdso_call_time __c_kernel_time V_FUNCTION_END(__kernel_time) - - -/* - * This is the core of clock_gettime() and gettimeofday(), - * it returns the current time in r4 (seconds) and r5. - * On entry, r7 gives the resolution of r5, either USEC_PER_SEC - * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds. - * It expects the datapage ptr in r3 and doesn't clobber it. - * It clobbers r0, r6 and r9. - * On return, r8 contains the counter value that can be reused. - * This clobbers cr0 but not any other cr field. - */ -V_FUNCTION_BEGIN(__do_get_tspec) - .cfi_startproc - /* check for update count & load values */ -1: ld r8,CFG_TB_UPDATE_COUNT(r3) - andi. r0,r8,1 /* pending update ? loop */ - bne- 1b - xor r0,r8,r8 /* create dependency */ - add r3,r3,r0 - - /* Get TB & offset it. We use the MFTB macro which will generate - * workaround code for Cell. - */ - MFTB(r6) - ld r9,CFG_TB_ORIG_STAMP(r3) - subf r6,r9,r6 - - /* Scale result */ - ld r5,CFG_TB_TO_XS(r3) - sldi r6,r6,12 /* compute time since stamp_xtime */ - mulhdu r6,r6,r5 /* in units of 2^-32 seconds */ - - /* Add stamp since epoch */ - ld r4,STAMP_XTIME_SEC(r3) - lwz r5,STAMP_SEC_FRAC(r3) - or r0,r4,r5 - or r0,r0,r6 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld r0,r8 /* check if updated */ - bne- 1b /* reload if so */ - - /* convert to seconds & nanoseconds and add to stamp */ - add r6,r6,r5 /* add on fractional seconds of xtime */ - mulhwu r5,r6,r7 /* compute micro or nanoseconds and */ - srdi r6,r6,32 /* seconds since stamp_xtime */ - clrldi r5,r5,32 - add r4,r4,r6 - blr - .cfi_endproc -V_FUNCTION_END(__do_get_tspec) diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 256fb9720298..71be083b24ed 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -61,7 +61,6 @@ SECTIONS .gcc_except_table : { *(.gcc_except_table) } .rela.dyn ALIGN(8) : { *(.rela.dyn) } - .opd ALIGN(8) : { KEEP (*(.opd)) } .got ALIGN(8) : { *(.got .toc) } _end = .; @@ -111,6 +110,7 @@ SECTIONS *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.opd) } } From d0e3fc69d00d1f50d22d6b6acfc555ccda80ad1e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 27 Nov 2020 00:10:06 +1100 Subject: [PATCH 053/304] powerpc/vdso: Provide __kernel_clock_gettime64() on vdso32 Provides __kernel_clock_gettime64() on vdso32. This is the 64 bits version of __kernel_clock_gettime() which is y2038 compliant. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126131006.2431205-9-mpe@ellerman.id.au --- arch/powerpc/include/asm/vdso/gettimeofday.h | 2 ++ arch/powerpc/kernel/vdso32/gettimeofday.S | 9 +++++++++ arch/powerpc/kernel/vdso32/vdso32.lds.S | 1 + arch/powerpc/kernel/vdso32/vgettimeofday.c | 6 ++++++ 4 files changed, 18 insertions(+) diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 6f56a6bce615..0f95569e8fc3 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -187,6 +187,8 @@ int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, #else int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, const struct vdso_data *vd); +int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd); int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, const struct vdso_data *vd); #endif diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index fd7b01c51281..a6e29f880e0e 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -35,6 +35,15 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) cvdso_call __c_kernel_clock_gettime V_FUNCTION_END(__kernel_clock_gettime) +/* + * Exact prototype of clock_gettime64() + * + * int __kernel_clock_gettime64(clockid_t clock_id, struct __timespec64 *ts); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_gettime64) + cvdso_call __c_kernel_clock_gettime64 +V_FUNCTION_END(__kernel_clock_gettime64) /* * Exact prototype of clock_getres() diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 51e9b3f3f88a..27a2d03c72d5 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -147,6 +147,7 @@ VERSION __kernel_get_syscall_map; __kernel_gettimeofday; __kernel_clock_gettime; + __kernel_clock_gettime64; __kernel_clock_getres; __kernel_time; __kernel_get_tbfreq; diff --git a/arch/powerpc/kernel/vdso32/vgettimeofday.c b/arch/powerpc/kernel/vdso32/vgettimeofday.c index 0d4bc217529e..65fb03fb1731 100644 --- a/arch/powerpc/kernel/vdso32/vgettimeofday.c +++ b/arch/powerpc/kernel/vdso32/vgettimeofday.c @@ -10,6 +10,12 @@ int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, return __cvdso_clock_gettime32_data(vd, clock, ts); } +int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime_data(vd, clock, ts); +} + int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, const struct vdso_data *vd) { From 95593e930d7d067ca9bbee996c845248930a01f9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:13 +0000 Subject: [PATCH 054/304] powerpc/signal: Move inline functions in signal.h To really be inlined, the functions need to be defined in the same C file as the caller, or in an included header. Move functions defined inline from signal .c in signal.h Fixes: 3dd4eb83a9c0 ("powerpc: move common register copy functions from signal_32.c to signal.c") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/35b1bd44a1a66f5bcf9b457a1c480ac8d5ef50b2.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal.c | 30 -------------------------- arch/powerpc/kernel/signal.h | 41 +++++++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index d2c356f37077..7cc305aaf44e 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -133,36 +133,6 @@ unsigned long copy_ckvsx_from_user(struct task_struct *task, return 0; } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -#else -inline unsigned long copy_fpr_to_user(void __user *to, - struct task_struct *task) -{ - return __copy_to_user(to, task->thread.fp_state.fpr, - ELF_NFPREG * sizeof(double)); -} - -inline unsigned long copy_fpr_from_user(struct task_struct *task, - void __user *from) -{ - return __copy_from_user(task->thread.fp_state.fpr, from, - ELF_NFPREG * sizeof(double)); -} - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -inline unsigned long copy_ckfpr_to_user(void __user *to, - struct task_struct *task) -{ - return __copy_to_user(to, task->thread.ckfp_state.fpr, - ELF_NFPREG * sizeof(double)); -} - -inline unsigned long copy_ckfpr_from_user(struct task_struct *task, - void __user *from) -{ - return __copy_from_user(task->thread.ckfp_state.fpr, from, - ELF_NFPREG * sizeof(double)); -} -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #endif /* Log an error when sending an unhandled signal to a process. Controlled diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index d396efca4068..4626d39cc0f0 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -19,14 +19,6 @@ extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk); -extern unsigned long copy_fpr_to_user(void __user *to, - struct task_struct *task); -extern unsigned long copy_ckfpr_to_user(void __user *to, - struct task_struct *task); -extern unsigned long copy_fpr_from_user(struct task_struct *task, - void __user *from); -extern unsigned long copy_ckfpr_from_user(struct task_struct *task, - void __user *from); extern unsigned long get_tm_stackpointer(struct task_struct *tsk); #ifdef CONFIG_VSX @@ -38,6 +30,39 @@ extern unsigned long copy_vsx_from_user(struct task_struct *task, void __user *from); extern unsigned long copy_ckvsx_from_user(struct task_struct *task, void __user *from); +unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task); +unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task); +unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from); +unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); +#else +static inline unsigned long +copy_fpr_to_user(void __user *to, struct task_struct *task) +{ + return __copy_to_user(to, task->thread.fp_state.fpr, + ELF_NFPREG * sizeof(double)); +} + +static inline unsigned long +copy_fpr_from_user(struct task_struct *task, void __user *from) +{ + return __copy_from_user(task->thread.fp_state.fpr, from, + ELF_NFPREG * sizeof(double)); +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +inline unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task) +{ + return __copy_to_user(to, task->thread.ckfp_state.fpr, + ELF_NFPREG * sizeof(double)); +} + +static inline unsigned long +copy_ckfpr_from_user(struct task_struct *task, void __user *from) +{ + return __copy_from_user(task->thread.ckfp_state.fpr, from, + ELF_NFPREG * sizeof(double)); +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #endif #ifdef CONFIG_PPC64 From 67e364b3295f9dbf3b820d0edde86fb7c95efc98 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:14 +0000 Subject: [PATCH 055/304] powerpc/ptrace: Move declaration of ptrace_get_reg() and ptrace_set_reg() ptrace_get_reg() and ptrace_set_reg() are only used internally by ptrace. Move them in arch/powerpc/kernel/ptrace/ptrace-decl.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/376c258267aeae54a4423bc4a2e107a9611f0039.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ptrace.h | 6 ------ arch/powerpc/kernel/ptrace/ptrace-decl.h | 3 +++ arch/powerpc/kernel/ptrace/ptrace32.c | 2 ++ 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index e2c778c176a3..297d30fed945 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -171,12 +171,6 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) set_thread_flag(TIF_NOERROR); \ } while(0) -struct task_struct; -extern int ptrace_get_reg(struct task_struct *task, int regno, - unsigned long *data); -extern int ptrace_put_reg(struct task_struct *task, int regno, - unsigned long data); - #define current_pt_regs() \ ((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1) diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index 67447a6197eb..2ddc68412fa8 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -159,6 +159,9 @@ int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, /* ptrace-view */ +int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data); +int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data); + extern const struct user_regset_view user_ppc_native_view; /* ptrace-(no)adv */ diff --git a/arch/powerpc/kernel/ptrace/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c index 7589a9665ffb..d30b9ad70edc 100644 --- a/arch/powerpc/kernel/ptrace/ptrace32.c +++ b/arch/powerpc/kernel/ptrace/ptrace32.c @@ -23,6 +23,8 @@ #include +#include "ptrace-decl.h" + /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. From e009fa433542cd09d6279e361b767a1f44ffd29a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:15 +0000 Subject: [PATCH 056/304] powerpc/ptrace: Consolidate reg index calculation Today we have: #ifdef CONFIG_PPC32 index = addr >> 2; if ((addr & 3) || child->thread.regs == NULL) #else index = addr >> 3; if ((addr & 7)) #endif sizeof(long) has value 4 for PPC32 and value 8 for PPC64. Dividing by 4 is equivalent to >> 2 and dividing by 8 is equivalent to >> 3. And 3 and 7 are respectively (sizeof(long) - 1). Use sizeof(long) to get rid of the #ifdef CONFIG_PPC32 and consolidate the calculation and checking. thread.regs have to be not NULL on both PPC32 and PPC64 so adding that test on PPC64 is harmless. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3cd1e284e93c60db981659585e18d1f6bb73ed2f.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/ptrace/ptrace.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index f6e51be47c6e..51557a9c0765 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -55,14 +55,9 @@ long arch_ptrace(struct task_struct *child, long request, ret = -EIO; /* convert to index and check */ -#ifdef CONFIG_PPC32 - index = addr >> 2; - if ((addr & 3) || (index > PT_FPSCR) + index = addr / sizeof(long); + if ((addr & (sizeof(long) - 1)) || (index > PT_FPSCR) || (child->thread.regs == NULL)) -#else - index = addr >> 3; - if ((addr & 7) || (index > PT_FPSCR)) -#endif break; CHECK_FULL_REGS(child->thread.regs); @@ -90,14 +85,9 @@ long arch_ptrace(struct task_struct *child, long request, ret = -EIO; /* convert to index and check */ -#ifdef CONFIG_PPC32 - index = addr >> 2; - if ((addr & 3) || (index > PT_FPSCR) + index = addr / sizeof(long); + if ((addr & (sizeof(long) - 1)) || (index > PT_FPSCR) || (child->thread.regs == NULL)) -#else - index = addr >> 3; - if ((addr & 7) || (index > PT_FPSCR)) -#endif break; CHECK_FULL_REGS(child->thread.regs); From 4d90eb97e292c7b14de8ba59fded35b340c73101 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:16 +0000 Subject: [PATCH 057/304] powerpc/ptrace: Create ptrace_get_fpr() and ptrace_put_fpr() On the same model as ptrace_get_reg() and ptrace_put_reg(), create ptrace_get_fpr() and ptrace_put_fpr() to get/set the floating points registers. We move the boundary checkings in them. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/24a1baedea7f7ae7b6bf27be98bab6d01b5ca2c1.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/ptrace/Makefile | 1 + arch/powerpc/kernel/ptrace/ptrace-decl.h | 4 +++ arch/powerpc/kernel/ptrace/ptrace-fpu.c | 40 ++++++++++++++++++++++++ arch/powerpc/kernel/ptrace/ptrace.c | 38 ++++++---------------- 4 files changed, 55 insertions(+), 28 deletions(-) create mode 100644 arch/powerpc/kernel/ptrace/ptrace-fpu.c diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile index c2f2402ebc8c..77abd1a5a508 100644 --- a/arch/powerpc/kernel/ptrace/Makefile +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -6,6 +6,7 @@ CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y += ptrace.o ptrace-view.o +obj-y += ptrace-fpu.o obj-$(CONFIG_COMPAT) += ptrace32.o obj-$(CONFIG_VSX) += ptrace-vsx.o ifneq ($(CONFIG_VSX),y) diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index 2ddc68412fa8..eafe5f0f6289 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -164,6 +164,10 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data); extern const struct user_regset_view user_ppc_native_view; +/* ptrace-fpu */ +int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data); +int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data); + /* ptrace-(no)adv */ void ppc_gethwdinfo(struct ppc_debug_info *dbginfo); int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, diff --git a/arch/powerpc/kernel/ptrace/ptrace-fpu.c b/arch/powerpc/kernel/ptrace/ptrace-fpu.c new file mode 100644 index 000000000000..8301cb52dd99 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-fpu.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include + +#include "ptrace-decl.h" + +int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data) +{ + unsigned int fpidx = index - PT_FPR0; + + if (index > PT_FPSCR) + return -EIO; + + flush_fp_to_thread(child); + if (fpidx < (PT_FPSCR - PT_FPR0)) + memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long)); + else + *data = child->thread.fp_state.fpscr; + + return 0; +} + +int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data) +{ + unsigned int fpidx = index - PT_FPR0; + + if (index > PT_FPSCR) + return -EIO; + + flush_fp_to_thread(child); + if (fpidx < (PT_FPSCR - PT_FPR0)) + memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long)); + else + child->thread.fp_state.fpscr = data; + + return 0; +} + diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 51557a9c0765..3d44b73adb83 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -56,25 +56,17 @@ long arch_ptrace(struct task_struct *child, long request, ret = -EIO; /* convert to index and check */ index = addr / sizeof(long); - if ((addr & (sizeof(long) - 1)) || (index > PT_FPSCR) - || (child->thread.regs == NULL)) + if ((addr & (sizeof(long) - 1)) || !child->thread.regs) break; CHECK_FULL_REGS(child->thread.regs); - if (index < PT_FPR0) { + if (index < PT_FPR0) ret = ptrace_get_reg(child, (int) index, &tmp); - if (ret) - break; - } else { - unsigned int fpidx = index - PT_FPR0; + else + ret = ptrace_get_fpr(child, index, &tmp); - flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&tmp, &child->thread.TS_FPR(fpidx), - sizeof(long)); - else - tmp = child->thread.fp_state.fpscr; - } + if (ret) + break; ret = put_user(tmp, datalp); break; } @@ -86,24 +78,14 @@ long arch_ptrace(struct task_struct *child, long request, ret = -EIO; /* convert to index and check */ index = addr / sizeof(long); - if ((addr & (sizeof(long) - 1)) || (index > PT_FPSCR) - || (child->thread.regs == NULL)) + if ((addr & (sizeof(long) - 1)) || !child->thread.regs) break; CHECK_FULL_REGS(child->thread.regs); - if (index < PT_FPR0) { + if (index < PT_FPR0) ret = ptrace_put_reg(child, index, data); - } else { - unsigned int fpidx = index - PT_FPR0; - - flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&child->thread.TS_FPR(fpidx), &data, - sizeof(long)); - else - child->thread.fp_state.fpscr = data; - ret = 0; - } + else + ret = ptrace_put_fpr(child, index, data); break; } From b6254ced4da6cf28d49fbffe24ee4b3286dcb3f4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:17 +0000 Subject: [PATCH 058/304] powerpc/signal: Don't manage floating point regs when no FPU There is no point in copying floating point regs when there is no FPU and MATH_EMULATION is not selected. Create a new CONFIG_PPC_FPU_REGS bool that is selected by CONFIG_MATH_EMULATION and CONFIG_PPC_FPU, and use it to opt out everything related to fp_state in thread_struct. The asm const used only by fpu.S are opted out with CONFIG_PPC_FPU as fpu.S build is conditionnal to CONFIG_PPC_FPU. The following app spends approx 8.1 seconds system time on an 8xx without the patch, and 7.0 seconds with the patch (13.5% reduction). On an 832x, it spends approx 2.6 seconds system time without the patch and 2.1 seconds with the patch (19% reduction). void sigusr1(int sig) { } int main(int argc, char **argv) { int i = 100000; signal(SIGUSR1, sigusr1); for (;i--;) raise(SIGUSR1); exit(0); } Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7569070083e6cd5b279bb5023da601aba3c06f3c.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/processor.h | 2 ++ arch/powerpc/kernel/asm-offsets.c | 2 ++ arch/powerpc/kernel/process.c | 4 ++++ arch/powerpc/kernel/ptrace/Makefile | 4 ++-- arch/powerpc/kernel/ptrace/ptrace-decl.h | 14 ++++++++++++++ arch/powerpc/kernel/ptrace/ptrace-view.c | 2 ++ arch/powerpc/kernel/signal.h | 14 +++++++++++++- arch/powerpc/kernel/signal_32.c | 4 ++++ arch/powerpc/kernel/traps.c | 2 ++ arch/powerpc/platforms/Kconfig.cputype | 4 ++++ 11 files changed, 50 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index aad8532a718e..8d12da224cb9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -422,6 +422,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE config MATH_EMULATION bool "Math emulation" depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE + select PPC_FPU_REGS help Some PowerPC chips designed for embedded applications do not have a floating-point unit and therefore do not implement the diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 333e3b6c76fb..0792530bedef 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -164,8 +164,10 @@ struct thread_struct { #endif /* Debug Registers */ struct debug_reg debug; +#ifdef CONFIG_PPC_FPU_REGS struct thread_fp_state fp_state; struct thread_fp_state *fp_save_area; +#endif int fpexc_mode; /* floating-point exception mode */ unsigned int align_ctl; /* alignment handling control */ #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a2dcb8ed79b9..81d68494d026 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -110,9 +110,11 @@ int main(void) #ifdef CONFIG_BOOKE OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]); #endif +#ifdef CONFIG_PPC_FPU OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode); OFFSET(THREAD_FPSTATE, thread_struct, fp_state.fpr); OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area); +#endif OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr); OFFSET(THREAD_LOAD_FP, thread_struct, load_fp); #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d421a2c7f822..ba2c987b8403 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1730,7 +1730,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.ptrace_bps[i] = NULL; #endif +#ifdef CONFIG_PPC_FPU_REGS p->thread.fp_save_area = NULL; +#endif #ifdef CONFIG_ALTIVEC p->thread.vr_save_area = NULL; #endif @@ -1855,8 +1857,10 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) #endif current->thread.load_slb = 0; current->thread.load_fp = 0; +#ifdef CONFIG_PPC_FPU_REGS memset(¤t->thread.fp_state, 0, sizeof(current->thread.fp_state)); current->thread.fp_save_area = NULL; +#endif #ifdef CONFIG_ALTIVEC memset(¤t->thread.vr_state, 0, sizeof(current->thread.vr_state)); current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */ diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile index 77abd1a5a508..8ebc11d1168d 100644 --- a/arch/powerpc/kernel/ptrace/Makefile +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -6,11 +6,11 @@ CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y += ptrace.o ptrace-view.o -obj-y += ptrace-fpu.o +obj-$(CONFIG_PPC_FPU_REGS) += ptrace-fpu.o obj-$(CONFIG_COMPAT) += ptrace32.o obj-$(CONFIG_VSX) += ptrace-vsx.o ifneq ($(CONFIG_VSX),y) -obj-y += ptrace-novsx.o +obj-$(CONFIG_PPC_FPU_REGS) += ptrace-novsx.o endif obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o obj-$(CONFIG_SPE) += ptrace-spe.o diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index eafe5f0f6289..3487f2c9735c 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -165,8 +165,22 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data); extern const struct user_regset_view user_ppc_native_view; /* ptrace-fpu */ +#ifdef CONFIG_PPC_FPU_REGS int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data); int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data); +#else +static inline int +ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data) +{ + return -EIO; +} + +static inline int +ptrace_put_fpr(struct task_struct *child, int index, unsigned long data) +{ + return -EIO; +} +#endif /* ptrace-(no)adv */ void ppc_gethwdinfo(struct ppc_debug_info *dbginfo); diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 142d58337f40..00a765f00d31 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -521,11 +521,13 @@ static const struct user_regset native_regsets[] = { .size = sizeof(long), .align = sizeof(long), .regset_get = gpr_get, .set = gpr_set }, +#ifdef CONFIG_PPC_FPU_REGS [REGSET_FPR] = { .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), .regset_get = fpr_get, .set = fpr_set }, +#endif #ifdef CONFIG_ALTIVEC [REGSET_VMX] = { .core_note_type = NT_PPC_VMX, .n = 34, diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 4626d39cc0f0..6c2a33ab042c 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -34,7 +34,7 @@ unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task); unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task); unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from); unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); -#else +#elif defined(CONFIG_PPC_FPU_REGS) static inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) { @@ -63,6 +63,18 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from) ELF_NFPREG * sizeof(double)); } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#else +static inline unsigned long +copy_fpr_to_user(void __user *to, struct task_struct *task) +{ + return 0; +} + +static inline unsigned long +copy_fpr_from_user(struct task_struct *task, void __user *from) +{ + return 0; +} #endif #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 96950f189b5a..7b291707eb31 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -814,7 +814,9 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, } regs->link = tramp; +#ifdef CONFIG_PPC_FPU_REGS tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ +#endif /* create a stack frame for the caller of the handler */ newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); @@ -1271,7 +1273,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, regs->link = tramp; +#ifdef CONFIG_PPC_FPU_REGS tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ +#endif /* create a stack frame for the caller of the handler */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5006dcbe1d9f..5b39baa61590 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1190,7 +1190,9 @@ static void parse_fpe(struct pt_regs *regs) flush_fp_to_thread(current); +#ifdef CONFIG_PPC_FPU_REGS code = __parse_fpscr(current->thread.fp_state.fpscr); +#endif _exception(SIGFPE, regs, code, regs->nip); } diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index c194c4ae8bc7..3e36e3712deb 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -218,9 +218,13 @@ config PPC_E500MC such as e5500/e6500), and must be disabled for running on e500v1 or e500v2. +config PPC_FPU_REGS + bool + config PPC_FPU bool default y if PPC64 + select PPC_FPU_REGS config FSL_EMB_PERFMON bool "Freescale Embedded Perfmon" From 7d68c89169508064c460a1208f38ed0589d226fa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:18 +0000 Subject: [PATCH 059/304] powerpc/32s: Allow deselecting CONFIG_PPC_FPU on mpc832x The e300c2 core which is embedded in mpc832x CPU doesn't have an FPU. Make it possible to not select CONFIG_PPC_FPU when building a kernel dedicated to that target. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/fcdc60d85baf80eaa0a7f3261d9d889282068216.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 4 ++++ arch/powerpc/platforms/Kconfig.cputype | 11 +++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index ed25d9991233..bbcc84c5cf5f 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -394,6 +394,7 @@ Alignment: . = 0x800 DO_KVM 0x800 FPUnavailable: +#ifdef CONFIG_PPC_FPU BEGIN_FTR_SECTION /* * Certain Freescale cores don't have a FPU and treat fp instructions @@ -407,6 +408,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) +#else + b ProgramCheck +#endif /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 3e36e3712deb..44ab03fbcadc 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -32,7 +32,7 @@ choice config PPC_BOOK3S_6xx bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" select PPC_BOOK3S_32 - select PPC_FPU + imply PPC_FPU select PPC_HAVE_PMU_SUPPORT select PPC_HAVE_KUEP select PPC_HAVE_KUAP @@ -222,9 +222,16 @@ config PPC_FPU_REGS bool config PPC_FPU - bool + bool "Support for Floating Point Unit (FPU)" if PPC_MPC832x default y if PPC64 select PPC_FPU_REGS + help + This must be enabled to support the Floating Point Unit + Most 6xx have an FPU but e300c2 core (mpc832x) don't have + an FPU, so when building an embedded kernel for that target + you can disable FPU support. + + If unsure say Y. config FSL_EMB_PERFMON bool "Freescale Embedded Perfmon" From 3fcfb5d1bf731bdbd847c29df57a5372d8ea58d3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:19 +0000 Subject: [PATCH 060/304] powerpc/signal: Remove BUG_ON() in handler_signal functions There is already the same BUG_ON() check in do_signal() which is the only caller of handle_rt_signal64() handle_rt_signal32() and handle_signal32(). Remove those three redundant BUG_ON(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3582e10a341d523c9c3f1ac925c3aaefc9d9293d.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 4 ---- arch/powerpc/kernel/signal_64.c | 2 -- 2 files changed, 6 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 7b291707eb31..8cbc9ac1343d 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -764,8 +764,6 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, unsigned long msr = regs->msr; #endif - BUG_ON(tsk != current); - /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ rt_sf = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*rt_sf), 1); @@ -1227,8 +1225,6 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, unsigned long msr = regs->msr; #endif - BUG_ON(tsk != current); - /* Set up Signal Frame */ frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 1); if (unlikely(frame == NULL)) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index bfc939360bad..cae612bdde5f 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -822,8 +822,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsigned long msr = regs->msr; #endif - BUG_ON(tsk != current); - frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 0); if (unlikely(frame == NULL)) goto badframe; From 454b1abb588b3942655638a8bcf1ea4501260579 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:20 +0000 Subject: [PATCH 061/304] powerpc/signal: Move access_ok() out of get_sigframe() This access_ok() will soon be performed by user_access_begin(). So move it out of get_sigframe(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/900b93744732ed0887f28f5b6a40730fb04a43fa.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal.c | 4 ---- arch/powerpc/kernel/signal_32.c | 4 ++-- arch/powerpc/kernel/signal_64.c | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 7cc305aaf44e..37372fd5b600 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -154,10 +154,6 @@ void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, oldsp = sigsp(oldsp, ksig); newsp = (oldsp - frame_size) & ~0xFUL; - /* Check access */ - if (!access_ok((void __user *)newsp, oldsp - newsp)) - return NULL; - return (void __user *)newsp; } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 8cbc9ac1343d..61621acacc63 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -768,7 +768,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, /* Put a Real Time Context onto stack */ rt_sf = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*rt_sf), 1); addr = rt_sf; - if (unlikely(rt_sf == NULL)) + if (!access_ok(rt_sf, sizeof(*rt_sf))) goto badframe; /* Put the siginfo & fill in most of the ucontext */ @@ -1227,7 +1227,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, /* Set up Signal Frame */ frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 1); - if (unlikely(frame == NULL)) + if (!access_ok(frame, sizeof(*frame))) goto badframe; sc = (struct sigcontext __user *) &frame->sctx; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index cae612bdde5f..d3db78732070 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -823,7 +823,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, #endif frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 0); - if (unlikely(frame == NULL)) + if (!access_ok(frame, sizeof(*frame))) goto badframe; err |= __put_user(&frame->info, &frame->pinfo); From 0ecbc6ad18e324012234183e21805423f5e0cc79 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:21 +0000 Subject: [PATCH 062/304] powerpc/signal: Remove get_clean_sp() get_clean_sp() is only used once in kernel/signal.c . GCC is smart enough to see that x & 0xffffffff is a nop calculation on PPC32, no need of a special PPC32 trivial version. Include the logic from the PPC64 version of get_clean_sp() directly in get_sigframe(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/13ef6510ce30a4867e043157b93af5bb8c67fb3b.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/processor.h | 14 -------------- arch/powerpc/kernel/signal.c | 5 ++++- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 0792530bedef..10d659f2ac46 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -391,20 +391,6 @@ static inline void prefetchw(const void *x) #define HAVE_ARCH_PICK_MMAP_LAYOUT -#ifdef CONFIG_PPC64 -static inline unsigned long get_clean_sp(unsigned long sp, int is_32) -{ - if (is_32) - return sp & 0x0ffffffffUL; - return sp; -} -#else -static inline unsigned long get_clean_sp(unsigned long sp, int is_32) -{ - return sp; -} -#endif - /* asm stubs */ extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val); extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 37372fd5b600..1297b440ae78 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -150,7 +150,10 @@ void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, unsigned long oldsp, newsp; /* Default to using normal stack */ - oldsp = get_clean_sp(sp, is_32); + if (is_32) + oldsp = sp & 0x0ffffffffUL; + else + oldsp = sp; oldsp = sigsp(oldsp, ksig); newsp = (oldsp - frame_size) & ~0xFUL; From c180cb305c9bba094657259487d563c8fbfb648b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:22 +0000 Subject: [PATCH 063/304] powerpc/signal: Call get_tm_stackpointer() from get_sigframe() Instead of calling get_tm_stackpointer() from the caller, call it directly from get_sigframe(). This avoids a double call and allows get_tm_stackpointer() to become static and be inlined into get_sigframe() by GCC. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/abfdc105b8b28c4eb3ab9a26297d17f302b600ea.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal.c | 9 ++++++--- arch/powerpc/kernel/signal.h | 6 ++---- arch/powerpc/kernel/signal_32.c | 4 ++-- arch/powerpc/kernel/signal_64.c | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 1297b440ae78..c3f61dc0a589 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -144,10 +144,13 @@ int show_unhandled_signals = 1; /* * Allocate space for the signal frame */ -void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, - size_t frame_size, int is_32) +static unsigned long get_tm_stackpointer(struct task_struct *tsk); + +void __user *get_sigframe(struct ksignal *ksig, struct task_struct *tsk, + size_t frame_size, int is_32) { unsigned long oldsp, newsp; + unsigned long sp = get_tm_stackpointer(tsk); /* Default to using normal stack */ if (is_32) @@ -300,7 +303,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) user_enter(); } -unsigned long get_tm_stackpointer(struct task_struct *tsk) +static unsigned long get_tm_stackpointer(struct task_struct *tsk) { /* When in an active transaction that takes a signal, we need to be * careful with the stack. It's possible that the stack has moved back diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 6c2a33ab042c..fb98731348c3 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -10,8 +10,8 @@ #ifndef _POWERPC_ARCH_SIGNAL_H #define _POWERPC_ARCH_SIGNAL_H -extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, - size_t frame_size, int is_32); +void __user *get_sigframe(struct ksignal *ksig, struct task_struct *tsk, + size_t frame_size, int is_32); extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk); @@ -19,8 +19,6 @@ extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk); -extern unsigned long get_tm_stackpointer(struct task_struct *tsk); - #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, struct task_struct *task); diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 61621acacc63..e5b2801a94ac 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -766,7 +766,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ - rt_sf = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*rt_sf), 1); + rt_sf = get_sigframe(ksig, tsk, sizeof(*rt_sf), 1); addr = rt_sf; if (!access_ok(rt_sf, sizeof(*rt_sf))) goto badframe; @@ -1226,7 +1226,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, #endif /* Set up Signal Frame */ - frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 1); + frame = get_sigframe(ksig, tsk, sizeof(*frame), 1); if (!access_ok(frame, sizeof(*frame))) goto badframe; sc = (struct sigcontext __user *) &frame->sctx; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index d3db78732070..fec27d599e87 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -822,7 +822,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsigned long msr = regs->msr; #endif - frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 0); + frame = get_sigframe(ksig, tsk, sizeof(*frame), 0); if (!access_ok(frame, sizeof(*frame))) goto badframe; From 7fe8f773ee248c726cec2addcdb94056049d6e34 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:23 +0000 Subject: [PATCH 064/304] powerpc/signal: Refactor bad frame logging The logging of bad frame appears half a dozen of times and is pretty similar. Create signal_fault() fonction to perform that logging. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/fa094445c119fc00315e1c13783b493346306c6a.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal.c | 11 +++++++++++ arch/powerpc/kernel/signal.h | 3 +++ arch/powerpc/kernel/signal_32.c | 35 +++++---------------------------- arch/powerpc/kernel/signal_64.c | 15 ++------------ 4 files changed, 21 insertions(+), 43 deletions(-) diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index c3f61dc0a589..190c866d00a1 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -351,3 +351,14 @@ static unsigned long get_tm_stackpointer(struct task_struct *tsk) #endif return ret; } + +static const char fm32[] = KERN_INFO "%s[%d]: bad frame in %s: %p nip %08lx lr %08lx\n"; +static const char fm64[] = KERN_INFO "%s[%d]: bad frame in %s: %p nip %016lx lr %016lx\n"; + +void signal_fault(struct task_struct *tsk, struct pt_regs *regs, + const char *where, void __user *ptr) +{ + if (show_unhandled_signals) + printk_ratelimited(regs->msr & MSR_64BIT ? fm64 : fm32, tsk->comm, + task_pid_nr(tsk), where, ptr, regs->nip, regs->link); +} diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index fb98731348c3..f610cfafa478 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -93,4 +93,7 @@ static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, #endif /* !defined(CONFIG_PPC64) */ +void signal_fault(struct task_struct *tsk, struct pt_regs *regs, + const char *where, void __user *ptr); + #endif /* _POWERPC_ARCH_SIGNAL_H */ diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index e5b2801a94ac..deb729c8b79d 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -835,12 +835,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, return 0; badframe: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in handle_rt_signal32: " - "%p nip %08lx lr %08lx\n", - tsk->comm, tsk->pid, - addr, regs->nip, regs->link); + signal_fault(tsk, regs, "handle_rt_signal32", addr); return 1; } @@ -1092,12 +1087,7 @@ SYSCALL_DEFINE0(rt_sigreturn) return 0; bad: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in sys_rt_sigreturn: " - "%p nip %08lx lr %08lx\n", - current->comm, current->pid, - rt_sf, regs->nip, regs->link); + signal_fault(current, regs, "sys_rt_sigreturn", rt_sf); force_sig(SIGSEGV); return 0; @@ -1181,12 +1171,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, * We kill the task with a SIGSEGV in this situation. */ if (do_setcontext(ctx, regs, 1)) { - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO "%s[%d]: bad frame in " - "sys_debug_setcontext: %p nip %08lx " - "lr %08lx\n", - current->comm, current->pid, - ctx, regs->nip, regs->link); + signal_fault(current, regs, "sys_debug_setcontext", ctx); force_sig(SIGSEGV); goto out; @@ -1287,12 +1272,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, return 0; badframe: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in handle_signal32: " - "%p nip %08lx lr %08lx\n", - tsk->comm, tsk->pid, - frame, regs->nip, regs->link); + signal_fault(tsk, regs, "handle_signal32", frame); return 1; } @@ -1363,12 +1343,7 @@ SYSCALL_DEFINE0(sigreturn) return 0; badframe: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in sys_sigreturn: " - "%p nip %08lx lr %08lx\n", - current->comm, current->pid, - addr, regs->nip, regs->link); + signal_fault(current, regs, "sys_sigreturn", addr); force_sig(SIGSEGV); return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index fec27d599e87..7df088b9ad0f 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -66,11 +66,6 @@ struct rt_sigframe { char abigap[USER_REDZONE_SIZE]; } __attribute__ ((aligned (16))); -static const char fmt32[] = KERN_INFO \ - "%s[%d]: bad frame in %s: %08lx nip %08lx lr %08lx\n"; -static const char fmt64[] = KERN_INFO \ - "%s[%d]: bad frame in %s: %016lx nip %016lx lr %016lx\n"; - /* * This computes a quad word aligned pointer inside the vmx_reserve array * element. For historical reasons sigcontext might not be quad word aligned, @@ -801,10 +796,7 @@ SYSCALL_DEFINE0(rt_sigreturn) return 0; badframe: - if (show_unhandled_signals) - printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, - current->comm, current->pid, "rt_sigreturn", - (long)uc, regs->nip, regs->link); + signal_fault(current, regs, "rt_sigreturn", uc); force_sig(SIGSEGV); return 0; @@ -911,10 +903,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, return 0; badframe: - if (show_unhandled_signals) - printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, - tsk->comm, tsk->pid, "setup_rt_frame", - (long)frame, regs->nip, regs->link); + signal_fault(current, regs, "handle_rt_signal64", frame); return 1; } From debf122c777f361137a3114db7be8aecc65f6af2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:24 +0000 Subject: [PATCH 065/304] powerpc/signal32: Simplify logging in handle_rt_signal32() If something is bad in the frame, there is no point in knowing which part of the frame exactly is wrong as it got allocated as a single block. Always print the root address of the frame in case of failed user access, just like handle_signal32(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/691895bd31fee89a2d8370befd66ad4eff5b63f2.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index deb729c8b79d..44a46911ff98 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -754,7 +754,6 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct rt_sigframe __user *rt_sf; struct mcontext __user *frame; struct mcontext __user *tm_frame = NULL; - void __user *addr; unsigned long newsp = 0; int sigret; unsigned long tramp; @@ -767,7 +766,6 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ rt_sf = get_sigframe(ksig, tsk, sizeof(*rt_sf), 1); - addr = rt_sf; if (!access_ok(rt_sf, sizeof(*rt_sf))) goto badframe; @@ -782,7 +780,6 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, /* Save user registers on the stack */ frame = &rt_sf->uc.uc_mcontext; - addr = frame; if (vdso32_rt_sigtramp && tsk->mm->context.vdso_base) { sigret = 0; tramp = tsk->mm->context.vdso_base + vdso32_rt_sigtramp; @@ -818,7 +815,6 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, /* create a stack frame for the caller of the handler */ newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); - addr = (void __user *)regs->gpr[1]; if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; @@ -835,7 +831,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, return 0; badframe: - signal_fault(tsk, regs, "handle_rt_signal32", addr); + signal_fault(tsk, regs, "handle_rt_signal32", rt_sf); return 1; } From 3eea688be0ccba2221e047b7df6f9ae87361cdd6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:25 +0000 Subject: [PATCH 066/304] powerpc/signal32: Move handle_signal32() close to handle_rt_signal32() Those two functions are similar and serving the same purpose. To ease refactorisation, move them close to each other. This is pure move, no code change, no cosmetic. Yes, checkpatch is not happy, most will clear later. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/dbce67900bf566bcf40179467bf1eb500814c405.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 170 ++++++++++++++++---------------- 1 file changed, 85 insertions(+), 85 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 44a46911ff98..2cc686b9f566 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -836,6 +836,91 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, return 1; } +/* + * OK, we're invoking a handler + */ +int handle_signal32(struct ksignal *ksig, sigset_t *oldset, + struct task_struct *tsk) +{ + struct sigcontext __user *sc; + struct sigframe __user *frame; + struct mcontext __user *tm_mctx = NULL; + unsigned long newsp = 0; + int sigret; + unsigned long tramp; + struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif + + /* Set up Signal Frame */ + frame = get_sigframe(ksig, tsk, sizeof(*frame), 1); + if (!access_ok(frame, sizeof(*frame))) + goto badframe; + sc = (struct sigcontext __user *) &frame->sctx; + +#if _NSIG != 64 +#error "Please adjust handle_signal()" +#endif + if (__put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler) + || __put_user(oldset->sig[0], &sc->oldmask) +#ifdef CONFIG_PPC64 + || __put_user((oldset->sig[0] >> 32), &sc->_unused[3]) +#else + || __put_user(oldset->sig[1], &sc->_unused[3]) +#endif + || __put_user(to_user_ptr(&frame->mctx), &sc->regs) + || __put_user(ksig->sig, &sc->signal)) + goto badframe; + + if (vdso32_sigtramp && tsk->mm->context.vdso_base) { + sigret = 0; + tramp = tsk->mm->context.vdso_base + vdso32_sigtramp; + } else { + sigret = __NR_sigreturn; + tramp = (unsigned long) frame->mctx.tramp; + } + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_mctx = &frame->mctx_transact; + if (MSR_TM_ACTIVE(msr)) { + if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, + sigret, msr)) + goto badframe; + } + else +#endif + { + if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1)) + goto badframe; + } + + regs->link = tramp; + +#ifdef CONFIG_PPC_FPU_REGS + tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ +#endif + + /* create a stack frame for the caller of the handler */ + newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; + if (put_user(regs->gpr[1], (u32 __user *)newsp)) + goto badframe; + + regs->gpr[1] = newsp; + regs->gpr[3] = ksig->sig; + regs->gpr[4] = (unsigned long) sc; + regs->nip = (unsigned long) (unsigned long)ksig->ka.sa.sa_handler; + /* enter the signal handler in big-endian mode */ + regs->msr &= ~MSR_LE; + return 0; + +badframe: + signal_fault(tsk, regs, "handle_signal32", frame); + + return 1; +} + static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int sig) { sigset_t set; @@ -1188,91 +1273,6 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, } #endif -/* - * OK, we're invoking a handler - */ -int handle_signal32(struct ksignal *ksig, sigset_t *oldset, - struct task_struct *tsk) -{ - struct sigcontext __user *sc; - struct sigframe __user *frame; - struct mcontext __user *tm_mctx = NULL; - unsigned long newsp = 0; - int sigret; - unsigned long tramp; - struct pt_regs *regs = tsk->thread.regs; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Save the thread's msr before get_tm_stackpointer() changes it */ - unsigned long msr = regs->msr; -#endif - - /* Set up Signal Frame */ - frame = get_sigframe(ksig, tsk, sizeof(*frame), 1); - if (!access_ok(frame, sizeof(*frame))) - goto badframe; - sc = (struct sigcontext __user *) &frame->sctx; - -#if _NSIG != 64 -#error "Please adjust handle_signal()" -#endif - if (__put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler) - || __put_user(oldset->sig[0], &sc->oldmask) -#ifdef CONFIG_PPC64 - || __put_user((oldset->sig[0] >> 32), &sc->_unused[3]) -#else - || __put_user(oldset->sig[1], &sc->_unused[3]) -#endif - || __put_user(to_user_ptr(&frame->mctx), &sc->regs) - || __put_user(ksig->sig, &sc->signal)) - goto badframe; - - if (vdso32_sigtramp && tsk->mm->context.vdso_base) { - sigret = 0; - tramp = tsk->mm->context.vdso_base + vdso32_sigtramp; - } else { - sigret = __NR_sigreturn; - tramp = (unsigned long) frame->mctx.tramp; - } - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - tm_mctx = &frame->mctx_transact; - if (MSR_TM_ACTIVE(msr)) { - if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, - sigret, msr)) - goto badframe; - } - else -#endif - { - if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1)) - goto badframe; - } - - regs->link = tramp; - -#ifdef CONFIG_PPC_FPU_REGS - tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ -#endif - - /* create a stack frame for the caller of the handler */ - newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; - if (put_user(regs->gpr[1], (u32 __user *)newsp)) - goto badframe; - - regs->gpr[1] = newsp; - regs->gpr[3] = ksig->sig; - regs->gpr[4] = (unsigned long) sc; - regs->nip = (unsigned long) (unsigned long)ksig->ka.sa.sa_handler; - /* enter the signal handler in big-endian mode */ - regs->msr &= ~MSR_LE; - return 0; - -badframe: - signal_fault(tsk, regs, "handle_signal32", frame); - - return 1; -} - /* * Do a signal return; undo the signal stack. */ From 8e91cf8501f14d8b6727c71c98fd743e95e9b402 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:26 +0000 Subject: [PATCH 067/304] powerpc/signal32: Rename local pointers in handle_rt_signal32() Rename pointers in handle_rt_signal32() to make it more similar to handle_signal32() tm_frame becomes tm_mctx frame becomes mctx rt_sf becomes frame Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/be77477b0f05397876015b218e36548ee8f5e10b.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 51 ++++++++++++++++----------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 2cc686b9f566..d0fcb3de66aa 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -751,9 +751,9 @@ static long restore_tm_user_regs(struct pt_regs *regs, int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk) { - struct rt_sigframe __user *rt_sf; - struct mcontext __user *frame; - struct mcontext __user *tm_frame = NULL; + struct rt_sigframe __user *frame; + struct mcontext __user *mctx; + struct mcontext __user *tm_mctx = NULL; unsigned long newsp = 0; int sigret; unsigned long tramp; @@ -765,46 +765,45 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ - rt_sf = get_sigframe(ksig, tsk, sizeof(*rt_sf), 1); - if (!access_ok(rt_sf, sizeof(*rt_sf))) + frame = get_sigframe(ksig, tsk, sizeof(*frame), 1); + if (!access_ok(frame, sizeof(*frame))) goto badframe; /* Put the siginfo & fill in most of the ucontext */ - if (copy_siginfo_to_user(&rt_sf->info, &ksig->info) - || __put_user(0, &rt_sf->uc.uc_flags) - || __save_altstack(&rt_sf->uc.uc_stack, regs->gpr[1]) - || __put_user(to_user_ptr(&rt_sf->uc.uc_mcontext), - &rt_sf->uc.uc_regs) - || put_sigset_t(&rt_sf->uc.uc_sigmask, oldset)) + if (copy_siginfo_to_user(&frame->info, &ksig->info) || + __put_user(0, &frame->uc.uc_flags) || + __save_altstack(&frame->uc.uc_stack, regs->gpr[1]) || + __put_user(to_user_ptr(&frame->uc.uc_mcontext), &frame->uc.uc_regs) || + put_sigset_t(&frame->uc.uc_sigmask, oldset)) goto badframe; /* Save user registers on the stack */ - frame = &rt_sf->uc.uc_mcontext; + mctx = &frame->uc.uc_mcontext; if (vdso32_rt_sigtramp && tsk->mm->context.vdso_base) { sigret = 0; tramp = tsk->mm->context.vdso_base + vdso32_rt_sigtramp; } else { sigret = __NR_rt_sigreturn; - tramp = (unsigned long) frame->tramp; + tramp = (unsigned long)mctx->tramp; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM - tm_frame = &rt_sf->uc_transact.uc_mcontext; + tm_mctx = &frame->uc_transact.uc_mcontext; if (MSR_TM_ACTIVE(msr)) { - if (__put_user((unsigned long)&rt_sf->uc_transact, - &rt_sf->uc.uc_link) || - __put_user((unsigned long)tm_frame, - &rt_sf->uc_transact.uc_regs)) + if (__put_user((unsigned long)&frame->uc_transact, + &frame->uc.uc_link) || + __put_user((unsigned long)tm_mctx, + &frame->uc_transact.uc_regs)) goto badframe; - if (save_tm_user_regs(regs, frame, tm_frame, sigret, msr)) + if (save_tm_user_regs(regs, mctx, tm_mctx, sigret, msr)) goto badframe; } else #endif { - if (__put_user(0, &rt_sf->uc.uc_link)) + if (__put_user(0, &frame->uc.uc_link)) goto badframe; - if (save_user_regs(regs, frame, tm_frame, sigret, 1)) + if (save_user_regs(regs, mctx, tm_mctx, sigret, 1)) goto badframe; } regs->link = tramp; @@ -814,16 +813,16 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, #endif /* create a stack frame for the caller of the handler */ - newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); + newsp = ((unsigned long)frame) - (__SIGNAL_FRAMESIZE + 16); if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; /* Fill registers for signal handler */ regs->gpr[1] = newsp; regs->gpr[3] = ksig->sig; - regs->gpr[4] = (unsigned long) &rt_sf->info; - regs->gpr[5] = (unsigned long) &rt_sf->uc; - regs->gpr[6] = (unsigned long) rt_sf; + regs->gpr[4] = (unsigned long)&frame->info; + regs->gpr[5] = (unsigned long)&frame->uc; + regs->gpr[6] = (unsigned long)frame; regs->nip = (unsigned long) ksig->ka.sa.sa_handler; /* enter the signal handler in native-endian mode */ regs->msr &= ~MSR_LE; @@ -831,7 +830,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, return 0; badframe: - signal_fault(tsk, regs, "handle_rt_signal32", rt_sf); + signal_fault(tsk, regs, "handle_rt_signal32", frame); return 1; } From 91b8ecd419cb46058e99b3a574184883c02b7729 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:27 +0000 Subject: [PATCH 068/304] powerpc/signal32: Misc changes to make handle_[rt_]_signal32() more similar Miscellaneous changes to clean and make handle_signal32() and handle_rt_signal32() even more similar. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/df0bc8c3b8fa96390c46f611df79b2a94ac21844.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d0fcb3de66aa..ab8c8cb98b15 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -764,8 +764,11 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, #endif /* Set up Signal Frame */ - /* Put a Real Time Context onto stack */ frame = get_sigframe(ksig, tsk, sizeof(*frame), 1); + mctx = &frame->uc.uc_mcontext; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_mctx = &frame->uc_transact.uc_mcontext; +#endif if (!access_ok(frame, sizeof(*frame))) goto badframe; @@ -778,7 +781,6 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, goto badframe; /* Save user registers on the stack */ - mctx = &frame->uc.uc_mcontext; if (vdso32_rt_sigtramp && tsk->mm->context.vdso_base) { sigret = 0; tramp = tsk->mm->context.vdso_base + vdso32_rt_sigtramp; @@ -788,7 +790,6 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM - tm_mctx = &frame->uc_transact.uc_mcontext; if (MSR_TM_ACTIVE(msr)) { if (__put_user((unsigned long)&frame->uc_transact, &frame->uc.uc_link) || @@ -843,6 +844,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, { struct sigcontext __user *sc; struct sigframe __user *frame; + struct mcontext __user *mctx; struct mcontext __user *tm_mctx = NULL; unsigned long newsp = 0; int sigret; @@ -855,6 +857,10 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, /* Set up Signal Frame */ frame = get_sigframe(ksig, tsk, sizeof(*frame), 1); + mctx = &frame->mctx; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_mctx = &frame->mctx_transact; +#endif if (!access_ok(frame, sizeof(*frame))) goto badframe; sc = (struct sigcontext __user *) &frame->sctx; @@ -869,7 +875,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, #else || __put_user(oldset->sig[1], &sc->_unused[3]) #endif - || __put_user(to_user_ptr(&frame->mctx), &sc->regs) + || __put_user(to_user_ptr(mctx), &sc->regs) || __put_user(ksig->sig, &sc->signal)) goto badframe; @@ -878,20 +884,18 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, tramp = tsk->mm->context.vdso_base + vdso32_sigtramp; } else { sigret = __NR_sigreturn; - tramp = (unsigned long) frame->mctx.tramp; + tramp = (unsigned long)mctx->tramp; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM - tm_mctx = &frame->mctx_transact; if (MSR_TM_ACTIVE(msr)) { - if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, - sigret, msr)) + if (save_tm_user_regs(regs, mctx, tm_mctx, sigret, msr)) goto badframe; } else #endif { - if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1)) + if (save_user_regs(regs, mctx, tm_mctx, sigret, 1)) goto badframe; } @@ -909,7 +913,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, regs->gpr[1] = newsp; regs->gpr[3] = ksig->sig; regs->gpr[4] = (unsigned long) sc; - regs->nip = (unsigned long) (unsigned long)ksig->ka.sa.sa_handler; + regs->nip = (unsigned long)ksig->ka.sa.sa_handler; /* enter the signal handler in big-endian mode */ regs->msr &= ~MSR_LE; return 0; From 8d33001dd650b88e915a1a13e2ca807350e374df Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:28 +0000 Subject: [PATCH 069/304] powerpc/signal32: Move signal trampoline setup to handle_[rt_]signal32 Move signal trampoline setup into handle_signal32() and handle_rt_signal32(). At the same time, remove the define which hides the mc_pad field used for trampoline. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e439cc0fa35aa45da6776520777a61848b92fd4b.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 61 ++++++++++++--------------------- 1 file changed, 22 insertions(+), 39 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index ab8c8cb98b15..d8c3843102df 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -199,9 +199,6 @@ struct sigframe { int abigap[56]; }; -/* We use the mc_pad field for the signal return trampoline. */ -#define tramp mc_pad - /* * When we have rt signals to deliver, we set up on the * user stack, going down from the original stack pointer: @@ -236,8 +233,7 @@ struct rt_sigframe { * altivec/spe instructions at some point. */ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int sigret, - int ctx_has_vsx_region) + struct mcontext __user *tm_frame, int ctx_has_vsx_region) { unsigned long msr = regs->msr; @@ -320,15 +316,6 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR])) return 1; - if (sigret) { - /* Set up the sigreturn trampoline: li 0,sigret; sc */ - if (__put_user(PPC_INST_ADDI + sigret, &frame->tramp[0]) - || __put_user(PPC_INST_SC, &frame->tramp[1])) - return 1; - flush_icache_range((unsigned long) &frame->tramp[0], - (unsigned long) &frame->tramp[2]); - } - return 0; } @@ -342,10 +329,8 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * * See save_user_regs() and signal_64.c:setup_tm_sigcontexts(). */ -static int save_tm_user_regs(struct pt_regs *regs, - struct mcontext __user *frame, - struct mcontext __user *tm_frame, int sigret, - unsigned long msr) +static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) { WARN_ON(tm_suspend_disabled); @@ -461,14 +446,6 @@ static int save_tm_user_regs(struct pt_regs *regs, if (__put_user(msr, &frame->mc_gregs[PT_MSR])) return 1; - if (sigret) { - /* Set up the sigreturn trampoline: li 0,sigret; sc */ - if (__put_user(PPC_INST_ADDI + sigret, &frame->tramp[0]) - || __put_user(PPC_INST_SC, &frame->tramp[1])) - return 1; - flush_icache_range((unsigned long) &frame->tramp[0], - (unsigned long) &frame->tramp[2]); - } return 0; } @@ -755,7 +732,6 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct mcontext __user *mctx; struct mcontext __user *tm_mctx = NULL; unsigned long newsp = 0; - int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -782,11 +758,15 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, /* Save user registers on the stack */ if (vdso32_rt_sigtramp && tsk->mm->context.vdso_base) { - sigret = 0; tramp = tsk->mm->context.vdso_base + vdso32_rt_sigtramp; } else { - sigret = __NR_rt_sigreturn; - tramp = (unsigned long)mctx->tramp; + tramp = (unsigned long)mctx->mc_pad; + /* Set up the sigreturn trampoline: li r0,sigret; sc */ + if (__put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0])) + goto badframe; + if (__put_user(PPC_INST_SC, &mctx->mc_pad[1])) + goto badframe; + flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -796,7 +776,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, __put_user((unsigned long)tm_mctx, &frame->uc_transact.uc_regs)) goto badframe; - if (save_tm_user_regs(regs, mctx, tm_mctx, sigret, msr)) + if (save_tm_user_regs(regs, mctx, tm_mctx, msr)) goto badframe; } else @@ -804,7 +784,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, { if (__put_user(0, &frame->uc.uc_link)) goto badframe; - if (save_user_regs(regs, mctx, tm_mctx, sigret, 1)) + if (save_user_regs(regs, mctx, tm_mctx, 1)) goto badframe; } regs->link = tramp; @@ -847,7 +827,6 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct mcontext __user *mctx; struct mcontext __user *tm_mctx = NULL; unsigned long newsp = 0; - int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -880,22 +859,26 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, goto badframe; if (vdso32_sigtramp && tsk->mm->context.vdso_base) { - sigret = 0; tramp = tsk->mm->context.vdso_base + vdso32_sigtramp; } else { - sigret = __NR_sigreturn; - tramp = (unsigned long)mctx->tramp; + tramp = (unsigned long)mctx->mc_pad; + /* Set up the sigreturn trampoline: li r0,sigret; sc */ + if (__put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0])) + goto badframe; + if (__put_user(PPC_INST_SC, &mctx->mc_pad[1])) + goto badframe; + flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(msr)) { - if (save_tm_user_regs(regs, mctx, tm_mctx, sigret, msr)) + if (save_tm_user_regs(regs, mctx, tm_mctx, msr)) goto badframe; } else #endif { - if (save_user_regs(regs, mctx, tm_mctx, sigret, 1)) + if (save_user_regs(regs, mctx, tm_mctx, 1)) goto badframe; } @@ -1047,7 +1030,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, mctx = (struct mcontext __user *) ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); if (!access_ok(old_ctx, ctx_size) - || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region) + || save_user_regs(regs, mctx, NULL, ctx_has_vsx_region) || put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked) || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) return -EFAULT; From ad65f4909fd3736d84533784cd9ab76905536b34 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:29 +0000 Subject: [PATCH 070/304] powerpc/signal32: Switch handle_signal32() to user_access_begin() logic Replace the access_ok() by user_access_begin() and change all user accesses to unsafe_ version. Move flush_icache_range() outside the user access block. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a27797f781aa00da96f8284c898173d18e952361.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d8c3843102df..fc8ba4b29edf 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -840,35 +840,35 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_mctx = &frame->mctx_transact; #endif - if (!access_ok(frame, sizeof(*frame))) + if (!user_write_access_begin(frame, sizeof(*frame))) goto badframe; sc = (struct sigcontext __user *) &frame->sctx; #if _NSIG != 64 #error "Please adjust handle_signal()" #endif - if (__put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler) - || __put_user(oldset->sig[0], &sc->oldmask) + unsafe_put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler, failed); + unsafe_put_user(oldset->sig[0], &sc->oldmask, failed); #ifdef CONFIG_PPC64 - || __put_user((oldset->sig[0] >> 32), &sc->_unused[3]) + unsafe_put_user((oldset->sig[0] >> 32), &sc->_unused[3], failed); #else - || __put_user(oldset->sig[1], &sc->_unused[3]) + unsafe_put_user(oldset->sig[1], &sc->_unused[3], failed); #endif - || __put_user(to_user_ptr(mctx), &sc->regs) - || __put_user(ksig->sig, &sc->signal)) - goto badframe; + unsafe_put_user(to_user_ptr(mctx), &sc->regs, failed); + unsafe_put_user(ksig->sig, &sc->signal, failed); if (vdso32_sigtramp && tsk->mm->context.vdso_base) { tramp = tsk->mm->context.vdso_base + vdso32_sigtramp; } else { tramp = (unsigned long)mctx->mc_pad; /* Set up the sigreturn trampoline: li r0,sigret; sc */ - if (__put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0])) - goto badframe; - if (__put_user(PPC_INST_SC, &mctx->mc_pad[1])) - goto badframe; - flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); + unsafe_put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0], failed); + unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); } + user_write_access_end(); + + if (tramp == (unsigned long)mctx->mc_pad) + flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(msr)) { @@ -901,6 +901,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, regs->msr &= ~MSR_LE; return 0; +failed: + user_write_access_end(); + badframe: signal_fault(tsk, regs, "handle_signal32", frame); From 9504db3e90b22dca19d8152ed5a82c68512dac0e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:30 +0000 Subject: [PATCH 071/304] powerpc/signal32: Switch handle_rt_signal32() to user_access_begin() logic On the same way as handle_signal32(), replace all user accesses with equivalent unsafe_ versions, and move the trampoline code icache flush outside the user access block. Functions that have no unsafe_ equivalent also remains outside the access block. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2974314226256f958e2984912b48883ef1754185.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 55 ++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index fc8ba4b29edf..93c2d6304831 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -58,8 +58,6 @@ #define mcontext mcontext32 #define ucontext ucontext32 -#define __save_altstack __compat_save_altstack - /* * Userspace code may pass a ucontext which doesn't include VSX added * at the end. We need to check for this case. @@ -745,16 +743,28 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_mctx = &frame->uc_transact.uc_mcontext; #endif - if (!access_ok(frame, sizeof(*frame))) + if (!user_write_access_begin(frame, sizeof(*frame))) goto badframe; /* Put the siginfo & fill in most of the ucontext */ - if (copy_siginfo_to_user(&frame->info, &ksig->info) || - __put_user(0, &frame->uc.uc_flags) || - __save_altstack(&frame->uc.uc_stack, regs->gpr[1]) || - __put_user(to_user_ptr(&frame->uc.uc_mcontext), &frame->uc.uc_regs) || - put_sigset_t(&frame->uc.uc_sigmask, oldset)) - goto badframe; + unsafe_put_user(0, &frame->uc.uc_flags, failed); +#ifdef CONFIG_PPC64 + unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed); +#else + unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed); +#endif + unsafe_put_user(to_user_ptr(&frame->uc.uc_mcontext), &frame->uc.uc_regs, failed); + + if (MSR_TM_ACTIVE(msr)) { +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + unsafe_put_user((unsigned long)&frame->uc_transact, + &frame->uc.uc_link, failed); + unsafe_put_user((unsigned long)tm_mctx, + &frame->uc_transact.uc_regs, failed); +#endif + } else { + unsafe_put_user(0, &frame->uc.uc_link, failed); + } /* Save user registers on the stack */ if (vdso32_rt_sigtramp && tsk->mm->context.vdso_base) { @@ -762,28 +772,28 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, } else { tramp = (unsigned long)mctx->mc_pad; /* Set up the sigreturn trampoline: li r0,sigret; sc */ - if (__put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0])) - goto badframe; - if (__put_user(PPC_INST_SC, &mctx->mc_pad[1])) - goto badframe; - flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); + unsafe_put_user(PPC_INST_ADDI + __NR_rt_sigreturn, &mctx->mc_pad[0], + failed); + unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); } + user_write_access_end(); + + if (put_sigset_t(&frame->uc.uc_sigmask, oldset)) + goto badframe; + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + goto badframe; + + if (tramp == (unsigned long)mctx->mc_pad) + flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(msr)) { - if (__put_user((unsigned long)&frame->uc_transact, - &frame->uc.uc_link) || - __put_user((unsigned long)tm_mctx, - &frame->uc_transact.uc_regs)) - goto badframe; if (save_tm_user_regs(regs, mctx, tm_mctx, msr)) goto badframe; } else #endif { - if (__put_user(0, &frame->uc.uc_link)) - goto badframe; if (save_user_regs(regs, mctx, tm_mctx, 1)) goto badframe; } @@ -810,6 +820,9 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, regs->msr |= (MSR_KERNEL & MSR_LE); return 0; +failed: + user_write_access_end(); + badframe: signal_fault(tsk, regs, "handle_rt_signal32", frame); From f1cf4f93de2ff66313a091320d7683735816a0bc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:31 +0000 Subject: [PATCH 072/304] powerpc/signal32: Remove ifdefery in middle of if/else MSR_TM_ACTIVE() is always defined and returns always 0 when CONFIG_PPC_TRANSACTIONAL_MEM is not selected, so the awful ifdefery in the middle of an if/else can be removed. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f3c36d687e4228f58d5c207a4036aa9ddcc7420a.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 93c2d6304831..310d3b8d9ad5 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -447,6 +447,12 @@ static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame return 0; } +#else +static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) +{ + return 0; +} #endif /* @@ -732,10 +738,8 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, unsigned long newsp = 0; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* Save the thread's msr before get_tm_stackpointer() changes it */ unsigned long msr = regs->msr; -#endif /* Set up Signal Frame */ frame = get_sigframe(ksig, tsk, sizeof(*frame), 1); @@ -786,14 +790,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, if (tramp == (unsigned long)mctx->mc_pad) flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(msr)) { if (save_tm_user_regs(regs, mctx, tm_mctx, msr)) goto badframe; - } - else -#endif - { + } else { if (save_user_regs(regs, mctx, tm_mctx, 1)) goto badframe; } @@ -842,10 +842,8 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, unsigned long newsp = 0; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* Save the thread's msr before get_tm_stackpointer() changes it */ unsigned long msr = regs->msr; -#endif /* Set up Signal Frame */ frame = get_sigframe(ksig, tsk, sizeof(*frame), 1); @@ -883,14 +881,10 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, if (tramp == (unsigned long)mctx->mc_pad) flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(msr)) { if (save_tm_user_regs(regs, mctx, tm_mctx, msr)) goto badframe; - } - else -#endif - { + } else { if (save_user_regs(regs, mctx, tm_mctx, 1)) goto badframe; } From 14026b94ccfe626e512bc9fa01e0e72ee75c7a98 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:32 +0000 Subject: [PATCH 073/304] signal: Add unsafe_put_compat_sigset() Implement 'unsafe' version of put_compat_sigset() For the bigendian, use unsafe_put_user() directly to avoid intermediate copy through the stack. For the littleendian, use a straight unsafe_copy_to_user(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/537c7082ee309a0bb9c67a50c5d9dd929aedb82d.1597770847.git.christophe.leroy@csgroup.eu --- include/linux/compat.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/include/linux/compat.h b/include/linux/compat.h index 14d514233e1d..400c0941c8af 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -442,6 +442,38 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, #endif } +#ifdef CONFIG_CPU_BIG_ENDIAN +#define unsafe_put_compat_sigset(compat, set, label) do { \ + compat_sigset_t __user *__c = compat; \ + const sigset_t *__s = set; \ + \ + switch (_NSIG_WORDS) { \ + case 4: \ + unsafe_put_user(__s->sig[3] >> 32, &__c->sig[7], label); \ + unsafe_put_user(__s->sig[3], &__c->sig[6], label); \ + fallthrough; \ + case 3: \ + unsafe_put_user(__s->sig[2] >> 32, &__c->sig[5], label); \ + unsafe_put_user(__s->sig[2], &__c->sig[4], label); \ + fallthrough; \ + case 2: \ + unsafe_put_user(__s->sig[1] >> 32, &__c->sig[3], label); \ + unsafe_put_user(__s->sig[1], &__c->sig[2], label); \ + fallthrough; \ + case 1: \ + unsafe_put_user(__s->sig[0] >> 32, &__c->sig[1], label); \ + unsafe_put_user(__s->sig[0], &__c->sig[0], label); \ + } \ +} while (0) +#else +#define unsafe_put_compat_sigset(compat, set, label) do { \ + compat_sigset_t __user *__c = compat; \ + const sigset_t *__s = set; \ + \ + unsafe_copy_to_user(__c, __s, sizeof(*__c), label); \ +} while (0) +#endif + extern int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); From de781ebdf6b8a256742da4fd6b0e39bb22ed9fe3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:33 +0000 Subject: [PATCH 074/304] powerpc/signal32: Add and use unsafe_put_sigset_t() put_sigset_t() calls copy_to_user() for copying two words. This is terribly inefficient for copying two words. By switching to unsafe_put_user(), we end up with something as simple as: 3cc: 81 3d 00 00 lwz r9,0(r29) 3d0: 91 26 00 b4 stw r9,180(r6) 3d4: 81 3d 00 04 lwz r9,4(r29) 3d8: 91 26 00 b8 stw r9,184(r6) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/06def97e87ac1c4ae8e3197e0982e1fab7b3c8ae.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 310d3b8d9ad5..3f9f315dd036 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -87,6 +87,8 @@ static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) return put_compat_sigset(uset, set, sizeof(*uset)); } +#define unsafe_put_sigset_t unsafe_put_compat_sigset + static inline int get_sigset_t(sigset_t *set, const compat_sigset_t __user *uset) { @@ -141,6 +143,13 @@ static inline int put_sigset_t(sigset_t __user *uset, sigset_t *set) return copy_to_user(uset, set, sizeof(*uset)); } +#define unsafe_put_sigset_t(uset, set, label) do { \ + sigset_t __user *__us = uset ; \ + const sigset_t *__s = set; \ + \ + unsafe_copy_to_user(__us, __s, sizeof(*__us), label); \ +} while (0) + static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) { return copy_from_user(set, uset, sizeof(*uset)); @@ -780,10 +789,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, failed); unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); } + unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed); + user_write_access_end(); - if (put_sigset_t(&frame->uc.uc_sigmask, oldset)) - goto badframe; if (copy_siginfo_to_user(&frame->info, &ksig->info)) goto badframe; From 31147d7d6133ea17504b118114a191a8af85f3de Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:34 +0000 Subject: [PATCH 075/304] powerpc/signal32: Switch swap_context() to user_access_begin() logic As this was the last user of put_sigset_t(), remove it as well. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c3ac4f2d134a3391bb51bdaa2d00e9a409aba9f8.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 3f9f315dd036..5b8a4ede142c 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -82,11 +82,6 @@ * Functions for flipping sigsets (thanks to brain dead generic * implementation that makes things simple for little endian only) */ -static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) -{ - return put_compat_sigset(uset, set, sizeof(*uset)); -} - #define unsafe_put_sigset_t unsafe_put_compat_sigset static inline int get_sigset_t(sigset_t *set, @@ -138,11 +133,6 @@ static inline int restore_general_regs(struct pt_regs *regs, #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) -static inline int put_sigset_t(sigset_t __user *uset, sigset_t *set) -{ - return copy_to_user(uset, set, sizeof(*uset)); -} - #define unsafe_put_sigset_t(uset, set, label) do { \ sigset_t __user *__us = uset ; \ const sigset_t *__s = set; \ @@ -1048,11 +1038,13 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, */ mctx = (struct mcontext __user *) ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); - if (!access_ok(old_ctx, ctx_size) - || save_user_regs(regs, mctx, NULL, ctx_has_vsx_region) - || put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked) - || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) + if (save_user_regs(regs, mctx, NULL, ctx_has_vsx_region)) return -EFAULT; + if (!user_write_access_begin(old_ctx, ctx_size)) + return -EFAULT; + unsafe_put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked, failed); + unsafe_put_user(to_user_ptr(mctx), &old_ctx->uc_regs, failed); + user_write_access_end(); } if (new_ctx == NULL) return 0; @@ -1076,6 +1068,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, set_thread_flag(TIF_RESTOREALL); return 0; + +failed: + user_write_access_end(); + return -EFAULT; } #ifdef CONFIG_PPC64 From b3484a1d4d1fb54ad7b615a13003d8bc11919c96 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:35 +0000 Subject: [PATCH 076/304] powerpc/signal: Create 'unsafe' versions of copy_[ck][fpr/vsx]_to_user() For the non VSX version, that's trivial. Just use unsafe_copy_to_user() instead of __copy_to_user(). For the VSX version, remove the intermediate step through a buffer and use unsafe_put_user() directly. This generates a far smaller code which is acceptable to inline, see below: Standard VSX version: 0000000000000000 <.copy_fpr_to_user>: 0: 7c 08 02 a6 mflr r0 4: fb e1 ff f8 std r31,-8(r1) 8: 39 00 00 20 li r8,32 c: 39 24 0b 80 addi r9,r4,2944 10: 7d 09 03 a6 mtctr r8 14: f8 01 00 10 std r0,16(r1) 18: f8 21 fe 71 stdu r1,-400(r1) 1c: 39 41 00 68 addi r10,r1,104 20: e9 09 00 00 ld r8,0(r9) 24: 39 4a 00 08 addi r10,r10,8 28: 39 29 00 10 addi r9,r9,16 2c: f9 0a 00 00 std r8,0(r10) 30: 42 00 ff f0 bdnz 20 <.copy_fpr_to_user+0x20> 34: e9 24 0d 80 ld r9,3456(r4) 38: 3d 42 00 00 addis r10,r2,0 3a: R_PPC64_TOC16_HA .toc 3c: eb ea 00 00 ld r31,0(r10) 3e: R_PPC64_TOC16_LO_DS .toc 40: f9 21 01 70 std r9,368(r1) 44: e9 3f 00 00 ld r9,0(r31) 48: 81 29 00 20 lwz r9,32(r9) 4c: 2f 89 00 00 cmpwi cr7,r9,0 50: 40 9c 00 18 bge cr7,68 <.copy_fpr_to_user+0x68> 54: 4c 00 01 2c isync 58: 3d 20 40 00 lis r9,16384 5c: 79 29 07 c6 rldicr r9,r9,32,31 60: 7d 3d 03 a6 mtspr 29,r9 64: 4c 00 01 2c isync 68: 38 a0 01 08 li r5,264 6c: 38 81 00 70 addi r4,r1,112 70: 48 00 00 01 bl 70 <.copy_fpr_to_user+0x70> 70: R_PPC64_REL24 .__copy_tofrom_user 74: 60 00 00 00 nop 78: e9 3f 00 00 ld r9,0(r31) 7c: 81 29 00 20 lwz r9,32(r9) 80: 2f 89 00 00 cmpwi cr7,r9,0 84: 40 9c 00 18 bge cr7,9c <.copy_fpr_to_user+0x9c> 88: 4c 00 01 2c isync 8c: 39 20 ff ff li r9,-1 90: 79 29 00 44 rldicr r9,r9,0,1 94: 7d 3d 03 a6 mtspr 29,r9 98: 4c 00 01 2c isync 9c: 38 21 01 90 addi r1,r1,400 a0: e8 01 00 10 ld r0,16(r1) a4: eb e1 ff f8 ld r31,-8(r1) a8: 7c 08 03 a6 mtlr r0 ac: 4e 80 00 20 blr 'unsafe' simulated VSX version (The ... are only nops) using unsafe_copy_fpr_to_user() macro: unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) { unsafe_copy_fpr_to_user(to, task, failed); return 0; failed: return 1; } 0000000000000000 <.copy_fpr_to_user>: 0: 39 00 00 20 li r8,32 4: 39 44 0b 80 addi r10,r4,2944 8: 7d 09 03 a6 mtctr r8 c: 7c 69 1b 78 mr r9,r3 ... 20: e9 0a 00 00 ld r8,0(r10) 24: f9 09 00 00 std r8,0(r9) 28: 39 4a 00 10 addi r10,r10,16 2c: 39 29 00 08 addi r9,r9,8 30: 42 00 ff f0 bdnz 20 <.copy_fpr_to_user+0x20> 34: e9 24 0d 80 ld r9,3456(r4) 38: f9 23 01 00 std r9,256(r3) 3c: 38 60 00 00 li r3,0 40: 4e 80 00 20 blr ... 50: 38 60 00 01 li r3,1 54: 4e 80 00 20 blr Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/29f6c4b8e7a5bbc61e6a8801b78bbf493f9f819e.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal.h | 53 ++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index f610cfafa478..2559a681536e 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -32,7 +32,54 @@ unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task); unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task); unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from); unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); + +#define unsafe_copy_fpr_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NFPREG - 1 ; i++) \ + unsafe_put_user(__t->thread.TS_FPR(i), &buf[i], label); \ + unsafe_put_user(__t->thread.fp_state.fpscr, &buf[i], label); \ +} while (0) + +#define unsafe_copy_vsx_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NVSRHALFREG ; i++) \ + unsafe_put_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \ + &buf[i], label);\ +} while (0) + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +#define unsafe_copy_ckfpr_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NFPREG - 1 ; i++) \ + unsafe_put_user(__t->thread.TS_CKFPR(i), &buf[i], label);\ + unsafe_put_user(__t->thread.ckfp_state.fpscr, &buf[i], label); \ +} while (0) + +#define unsafe_copy_ckvsx_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NVSRHALFREG ; i++) \ + unsafe_put_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \ + &buf[i], label);\ +} while (0) +#endif #elif defined(CONFIG_PPC_FPU_REGS) + +#define unsafe_copy_fpr_to_user(to, task, label) \ + unsafe_copy_to_user(to, (task)->thread.fp_state.fpr, \ + ELF_NFPREG * sizeof(double), label) + static inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) { @@ -48,6 +95,10 @@ copy_fpr_from_user(struct task_struct *task, void __user *from) } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +#define unsafe_copy_ckfpr_to_user(to, task, label) \ + unsafe_copy_to_user(to, (task)->thread.ckfp_state.fpr, \ + ELF_NFPREG * sizeof(double), label) + inline unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task) { return __copy_to_user(to, task->thread.ckfp_state.fpr, @@ -62,6 +113,8 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from) } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #else +#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0) + static inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) { From 968c4fccd1bb8b440326dac5078ad87d17af4a47 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:36 +0000 Subject: [PATCH 077/304] powerpc/signal32: Isolate non-copy actions in save_user_regs() and save_tm_user_regs() Reorder actions in save_user_regs() and save_tm_user_regs() to regroup copies together in order to switch to user_access_begin() logic in a later patch. Move non-copy actions into new functions called prepare_save_user_regs() and prepare_save_tm_user_regs(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f6eac65781b4a57220477c8864bca2b57f29a5d5.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 54 +++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 5b8a4ede142c..86539a4e0514 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -229,14 +229,31 @@ struct rt_sigframe { * We only save the altivec/spe registers if the process has used * altivec/spe instructions at some point. */ +static void prepare_save_user_regs(int ctx_has_vsx_region) +{ + /* Make sure floating point registers are stored in regs */ + flush_fp_to_thread(current); +#ifdef CONFIG_ALTIVEC + if (current->thread.used_vr) + flush_altivec_to_thread(current); + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + current->thread.vrsave = mfspr(SPRN_VRSAVE); +#endif +#ifdef CONFIG_VSX + if (current->thread.used_vsr && ctx_has_vsx_region) + flush_vsx_to_thread(current); +#endif +#ifdef CONFIG_SPE + if (current->thread.used_spe) + flush_spe_to_thread(current); +#endif +} + static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, struct mcontext __user *tm_frame, int ctx_has_vsx_region) { unsigned long msr = regs->msr; - /* Make sure floating point registers are stored in regs */ - flush_fp_to_thread(current); - /* save general registers */ if (save_general_regs(regs, frame)) return 1; @@ -244,7 +261,6 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, #ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { - flush_altivec_to_thread(current); if (__copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state, ELF_NVRREG * sizeof(vector128))) return 1; @@ -260,8 +276,6 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * most significant bits of that same vector. --BenH * Note that the current VRSAVE value is in the SPR at this point. */ - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.vrsave = mfspr(SPRN_VRSAVE); if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32])) return 1; #endif /* CONFIG_ALTIVEC */ @@ -281,7 +295,6 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * contains valid data */ if (current->thread.used_vsr && ctx_has_vsx_region) { - flush_vsx_to_thread(current); if (copy_vsx_to_user(&frame->mc_vsregs, current)) return 1; msr |= MSR_VSX; @@ -290,7 +303,6 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, #ifdef CONFIG_SPE /* save spe registers */ if (current->thread.used_spe) { - flush_spe_to_thread(current); if (__copy_to_user(&frame->mc_vregs, current->thread.evr, ELF_NEVRREG * sizeof(u32))) return 1; @@ -326,11 +338,23 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * * See save_user_regs() and signal_64.c:setup_tm_sigcontexts(). */ -static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, unsigned long msr) +static void prepare_save_tm_user_regs(void) { WARN_ON(tm_suspend_disabled); +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + current->thread.ckvrsave = mfspr(SPRN_VRSAVE); +#endif +#ifdef CONFIG_SPE + if (current->thread.used_spe) + flush_spe_to_thread(current); +#endif +} + +static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) +{ /* Save both sets of general registers */ if (save_general_regs(¤t->thread.ckpt_regs, frame) || save_general_regs(regs, tm_frame)) @@ -374,8 +398,6 @@ static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame * significant bits of a vector, we "cheat" and stuff VRSAVE in the * most significant bits of that same vector. --BenH */ - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.ckvrsave = mfspr(SPRN_VRSAVE); if (__put_user(current->thread.ckvrsave, (u32 __user *)&frame->mc_vregs[32])) return 1; @@ -427,7 +449,6 @@ static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame * simply the same as in save_user_regs(). */ if (current->thread.used_spe) { - flush_spe_to_thread(current); if (__copy_to_user(&frame->mc_vregs, current->thread.evr, ELF_NEVRREG * sizeof(u32))) return 1; @@ -447,6 +468,8 @@ static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame return 0; } #else +static void prepare_save_tm_user_regs(void) { } + static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame, struct mcontext __user *tm_frame, unsigned long msr) { @@ -790,9 +813,11 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); if (MSR_TM_ACTIVE(msr)) { + prepare_save_tm_user_regs(); if (save_tm_user_regs(regs, mctx, tm_mctx, msr)) goto badframe; } else { + prepare_save_user_regs(1); if (save_user_regs(regs, mctx, tm_mctx, 1)) goto badframe; } @@ -881,9 +906,11 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); if (MSR_TM_ACTIVE(msr)) { + prepare_save_tm_user_regs(); if (save_tm_user_regs(regs, mctx, tm_mctx, msr)) goto badframe; } else { + prepare_save_user_regs(1); if (save_user_regs(regs, mctx, tm_mctx, 1)) goto badframe; } @@ -1038,6 +1065,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, */ mctx = (struct mcontext __user *) ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); + prepare_save_user_regs(ctx_has_vsx_region); if (save_user_regs(regs, mctx, NULL, ctx_has_vsx_region)) return -EFAULT; if (!user_write_access_begin(old_ctx, ctx_size)) From ef75e73182949a94bde169a774de1b62ae21fbbc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:38 +0000 Subject: [PATCH 078/304] powerpc/signal32: Transform save_user_regs() and save_tm_user_regs() in 'unsafe' version Change those two functions to be used within a user access block. For that, change save_general_regs() to and unsafe_save_general_regs(), then replace all user accesses by unsafe_ versions. This series leads to a reduction from 2.55s to 1.73s of the system CPU time with the following microbench app on an mpc832x with KUAP (approx 32%) Without KUAP, the difference is in the noise. void sigusr1(int sig) { } int main(int argc, char **argv) { int i = 100000; signal(SIGUSR1, sigusr1); for (;i--;) raise(SIGUSR1); exit(0); } An additional 0.10s reduction is achieved by removing CONFIG_PPC_FPU, as the mpc832x has no FPU. A bit less spectacular on an 8xx as KUAP is less heavy, prior to the series (with KUAP) it ran in 8.10 ms. Once applies the removal of FPU regs handling, we get 7.05s. With the full series, we get 6.9s. If artificially re-activating FPU regs handling with the full series, we get 7.6s. So for the 8xx, the removal of the FPU regs copy is what makes the difference, but the rework of handle_signal also have a benefit. Same as above, without KUAP the difference is in the noise. Signed-off-by: Christophe Leroy [mpe: Fixup typo in SPE handling] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c7b37b385ccf9666066452e58f018a86573f83e8.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 224 ++++++++++++++++---------------- 1 file changed, 111 insertions(+), 113 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 86539a4e0514..123682299d4f 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -93,8 +93,8 @@ static inline int get_sigset_t(sigset_t *set, #define to_user_ptr(p) ptr_to_compat(p) #define from_user_ptr(p) compat_ptr(p) -static inline int save_general_regs(struct pt_regs *regs, - struct mcontext __user *frame) +static __always_inline int +save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int val, i; @@ -108,10 +108,12 @@ static inline int save_general_regs(struct pt_regs *regs, else val = gregs[i]; - if (__put_user(val, &frame->mc_gregs[i])) - return -EFAULT; + unsafe_put_user(val, &frame->mc_gregs[i], failed); } return 0; + +failed: + return 1; } static inline int restore_general_regs(struct pt_regs *regs, @@ -148,11 +150,15 @@ static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) #define to_user_ptr(p) ((unsigned long)(p)) #define from_user_ptr(p) ((void __user *)(p)) -static inline int save_general_regs(struct pt_regs *regs, - struct mcontext __user *frame) +static __always_inline int +save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) { WARN_ON(!FULL_REGS(regs)); - return __copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE); + unsafe_copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE, failed); + return 0; + +failed: + return 1; } static inline int restore_general_regs(struct pt_regs *regs, @@ -170,6 +176,11 @@ static inline int restore_general_regs(struct pt_regs *regs, } #endif +#define unsafe_save_general_regs(regs, frame, label) do { \ + if (save_general_regs_unsafe(regs, frame)) \ + goto label; \ +} while (0) + /* * When we have signals to deliver, we set up on the * user stack, going down from the original stack pointer: @@ -249,21 +260,19 @@ static void prepare_save_user_regs(int ctx_has_vsx_region) #endif } -static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int ctx_has_vsx_region) +static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, int ctx_has_vsx_region) { unsigned long msr = regs->msr; /* save general registers */ - if (save_general_regs(regs, frame)) - return 1; + unsafe_save_general_regs(regs, frame, failed); #ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { - if (__copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; + unsafe_copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state, + ELF_NVRREG * sizeof(vector128), failed); /* set MSR_VEC in the saved MSR value to indicate that frame->mc_vregs contains valid data */ msr |= MSR_VEC; @@ -276,11 +285,10 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * most significant bits of that same vector. --BenH * Note that the current VRSAVE value is in the SPR at this point. */ - if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32])) - return 1; + unsafe_put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32], + failed); #endif /* CONFIG_ALTIVEC */ - if (copy_fpr_to_user(&frame->mc_fregs, current)) - return 1; + unsafe_copy_fpr_to_user(&frame->mc_fregs, current, failed); /* * Clear the MSR VSX bit to indicate there is no valid state attached @@ -295,17 +303,15 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * contains valid data */ if (current->thread.used_vsr && ctx_has_vsx_region) { - if (copy_vsx_to_user(&frame->mc_vsregs, current)) - return 1; + unsafe_copy_vsx_to_user(&frame->mc_vsregs, current, failed); msr |= MSR_VSX; } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* save spe registers */ if (current->thread.used_spe) { - if (__copy_to_user(&frame->mc_vregs, current->thread.evr, - ELF_NEVRREG * sizeof(u32))) - return 1; + unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, + ELF_NEVRREG * sizeof(u32), failed); /* set MSR_SPE in the saved MSR value to indicate that frame->mc_vregs contains valid data */ msr |= MSR_SPE; @@ -313,21 +319,29 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, /* else assert((regs->msr & MSR_SPE) == 0) */ /* We always copy to/from spefscr */ - if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG)) - return 1; + unsafe_put_user(current->thread.spefscr, + (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed); #endif /* CONFIG_SPE */ - if (__put_user(msr, &frame->mc_gregs[PT_MSR])) - return 1; + unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed); + /* We need to write 0 the MSR top 32 bits in the tm frame so that we * can check it on the restore to see if TM is active */ - if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR])) - return 1; + if (tm_frame) + unsafe_put_user(0, &tm_frame->mc_gregs[PT_MSR], failed); return 0; + +failed: + return 1; } +#define unsafe_save_user_regs(regs, frame, tm_frame, has_vsx, label) do { \ + if (save_user_regs_unsafe(regs, frame, tm_frame, has_vsx)) \ + goto label; \ +} while (0) + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Save the current user registers on the user stack. @@ -336,7 +350,7 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * We also save the transactional registers to a second ucontext in the * frame. * - * See save_user_regs() and signal_64.c:setup_tm_sigcontexts(). + * See save_user_regs_unsafe() and signal_64.c:setup_tm_sigcontexts(). */ static void prepare_save_tm_user_regs(void) { @@ -352,13 +366,12 @@ static void prepare_save_tm_user_regs(void) #endif } -static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, unsigned long msr) +static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) { /* Save both sets of general registers */ - if (save_general_regs(¤t->thread.ckpt_regs, frame) - || save_general_regs(regs, tm_frame)) - return 1; + unsafe_save_general_regs(¤t->thread.ckpt_regs, frame, failed); + unsafe_save_general_regs(regs, tm_frame, failed); /* Stash the top half of the 64bit MSR into the 32bit MSR word * of the transactional mcontext. This way we have a backward-compatible @@ -366,26 +379,21 @@ static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame * also look at what type of transaction (T or S) was active at the * time of the signal. */ - if (__put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR])) - return 1; + unsafe_put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR], failed); #ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { - if (__copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; - if (msr & MSR_VEC) { - if (__copy_to_user(&tm_frame->mc_vregs, - ¤t->thread.vr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; - } else { - if (__copy_to_user(&tm_frame->mc_vregs, - ¤t->thread.ckvr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; - } + unsafe_copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state, + ELF_NVRREG * sizeof(vector128), failed); + if (msr & MSR_VEC) + unsafe_copy_to_user(&tm_frame->mc_vregs, + ¤t->thread.vr_state, + ELF_NVRREG * sizeof(vector128), failed); + else + unsafe_copy_to_user(&tm_frame->mc_vregs, + ¤t->thread.ckvr_state, + ELF_NVRREG * sizeof(vector128), failed); /* set MSR_VEC in the saved MSR value to indicate that * frame->mc_vregs contains valid data @@ -398,29 +406,21 @@ static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame * significant bits of a vector, we "cheat" and stuff VRSAVE in the * most significant bits of that same vector. --BenH */ - if (__put_user(current->thread.ckvrsave, - (u32 __user *)&frame->mc_vregs[32])) - return 1; - if (msr & MSR_VEC) { - if (__put_user(current->thread.vrsave, - (u32 __user *)&tm_frame->mc_vregs[32])) - return 1; - } else { - if (__put_user(current->thread.ckvrsave, - (u32 __user *)&tm_frame->mc_vregs[32])) - return 1; - } + unsafe_put_user(current->thread.ckvrsave, + (u32 __user *)&frame->mc_vregs[32], failed); + if (msr & MSR_VEC) + unsafe_put_user(current->thread.vrsave, + (u32 __user *)&tm_frame->mc_vregs[32], failed); + else + unsafe_put_user(current->thread.ckvrsave, + (u32 __user *)&tm_frame->mc_vregs[32], failed); #endif /* CONFIG_ALTIVEC */ - if (copy_ckfpr_to_user(&frame->mc_fregs, current)) - return 1; - if (msr & MSR_FP) { - if (copy_fpr_to_user(&tm_frame->mc_fregs, current)) - return 1; - } else { - if (copy_ckfpr_to_user(&tm_frame->mc_fregs, current)) - return 1; - } + unsafe_copy_ckfpr_to_user(&frame->mc_fregs, current, failed); + if (msr & MSR_FP) + unsafe_copy_fpr_to_user(&tm_frame->mc_fregs, current, failed); + else + unsafe_copy_ckfpr_to_user(&tm_frame->mc_fregs, current, failed); #ifdef CONFIG_VSX /* @@ -430,53 +430,54 @@ static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame * contains valid data */ if (current->thread.used_vsr) { - if (copy_ckvsx_to_user(&frame->mc_vsregs, current)) - return 1; - if (msr & MSR_VSX) { - if (copy_vsx_to_user(&tm_frame->mc_vsregs, - current)) - return 1; - } else { - if (copy_ckvsx_to_user(&tm_frame->mc_vsregs, current)) - return 1; - } + unsafe_copy_ckvsx_to_user(&frame->mc_vsregs, current, failed); + if (msr & MSR_VSX) + unsafe_copy_vsx_to_user(&tm_frame->mc_vsregs, current, failed); + else + unsafe_copy_ckvsx_to_user(&tm_frame->mc_vsregs, current, failed); msr |= MSR_VSX; } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in save_user_regs(). + * simply the same as in save_user_regs_unsafe(). */ if (current->thread.used_spe) { - if (__copy_to_user(&frame->mc_vregs, current->thread.evr, - ELF_NEVRREG * sizeof(u32))) - return 1; + unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, + ELF_NEVRREG * sizeof(u32), failed); /* set MSR_SPE in the saved MSR value to indicate that * frame->mc_vregs contains valid data */ msr |= MSR_SPE; } /* We always copy to/from spefscr */ - if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG)) - return 1; + unsafe_put_user(current->thread.spefscr, + (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed); #endif /* CONFIG_SPE */ - if (__put_user(msr, &frame->mc_gregs[PT_MSR])) - return 1; + unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed); return 0; + +failed: + return 1; } #else static void prepare_save_tm_user_regs(void) { } -static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, unsigned long msr) +static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) { return 0; } #endif +#define unsafe_save_tm_user_regs(regs, frame, tm_frame, msr, label) do { \ + if (save_tm_user_regs_unsafe(regs, frame, tm_frame, msr)) \ + goto label; \ +} while (0) + /* * Restore the current user register values from the user stack, * (except for MSR). @@ -769,6 +770,11 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_mctx = &frame->uc_transact.uc_mcontext; #endif + if (MSR_TM_ACTIVE(msr)) + prepare_save_tm_user_regs(); + else + prepare_save_user_regs(1); + if (!user_write_access_begin(frame, sizeof(*frame))) goto badframe; @@ -788,8 +794,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, unsafe_put_user((unsigned long)tm_mctx, &frame->uc_transact.uc_regs, failed); #endif + unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed); } else { unsafe_put_user(0, &frame->uc.uc_link, failed); + unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed); } /* Save user registers on the stack */ @@ -812,15 +820,6 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, if (tramp == (unsigned long)mctx->mc_pad) flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); - if (MSR_TM_ACTIVE(msr)) { - prepare_save_tm_user_regs(); - if (save_tm_user_regs(regs, mctx, tm_mctx, msr)) - goto badframe; - } else { - prepare_save_user_regs(1); - if (save_user_regs(regs, mctx, tm_mctx, 1)) - goto badframe; - } regs->link = tramp; #ifdef CONFIG_PPC_FPU_REGS @@ -875,6 +874,11 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_mctx = &frame->mctx_transact; #endif + if (MSR_TM_ACTIVE(msr)) + prepare_save_tm_user_regs(); + else + prepare_save_user_regs(1); + if (!user_write_access_begin(frame, sizeof(*frame))) goto badframe; sc = (struct sigcontext __user *) &frame->sctx; @@ -892,6 +896,11 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, unsafe_put_user(to_user_ptr(mctx), &sc->regs, failed); unsafe_put_user(ksig->sig, &sc->signal, failed); + if (MSR_TM_ACTIVE(msr)) + unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed); + else + unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed); + if (vdso32_sigtramp && tsk->mm->context.vdso_base) { tramp = tsk->mm->context.vdso_base + vdso32_sigtramp; } else { @@ -905,16 +914,6 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, if (tramp == (unsigned long)mctx->mc_pad) flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); - if (MSR_TM_ACTIVE(msr)) { - prepare_save_tm_user_regs(); - if (save_tm_user_regs(regs, mctx, tm_mctx, msr)) - goto badframe; - } else { - prepare_save_user_regs(1); - if (save_user_regs(regs, mctx, tm_mctx, 1)) - goto badframe; - } - regs->link = tramp; #ifdef CONFIG_PPC_FPU_REGS @@ -1066,10 +1065,9 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, mctx = (struct mcontext __user *) ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); prepare_save_user_regs(ctx_has_vsx_region); - if (save_user_regs(regs, mctx, NULL, ctx_has_vsx_region)) - return -EFAULT; if (!user_write_access_begin(old_ctx, ctx_size)) return -EFAULT; + unsafe_save_user_regs(regs, mctx, NULL, ctx_has_vsx_region, failed); unsafe_put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked, failed); unsafe_put_user(to_user_ptr(mctx), &old_ctx->uc_regs, failed); user_write_access_end(); From 7fe2de246e21f01212a8923fbabb4ac84c944d4a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:18 +0000 Subject: [PATCH 079/304] powerpc/vdso: Stripped VDSO is not needed, don't build it Since commit 24b659a13866 ("powerpc: Use unstripped VDSO image for more accurate profiling data"), only the unstripped VDSO image has been used. Partially revert commit 8150caad0226 ("[POWERPC] powerpc vDSO: install unstripped copies on disk") to avoid building the stripped version. And the unstripped version in $(MODLIB)/vdso/ is not required anymore as it is the one embedded in the kernel image. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5986ca25be44fe6e9790486304507f240077d8c4.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/Makefile | 9 --------- arch/powerpc/kernel/vdso32/Makefile | 19 ++----------------- arch/powerpc/kernel/vdso64/Makefile | 19 ++----------------- 3 files changed, 4 insertions(+), 43 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 16b8336f91dd..86c925bfbb76 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -406,15 +406,6 @@ PHONY += install install: $(Q)$(MAKE) $(build)=$(boot) install -PHONY += vdso_install -vdso_install: -ifdef CONFIG_PPC64 - $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ -endif -ifdef CONFIG_VDSO32 - $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@ -endif - archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 853545a19a1e..a119d9f84b08 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -30,7 +30,7 @@ CC32FLAGS += -m32 KBUILD_CFLAGS := $(filter-out -mcmodel=medium,$(KBUILD_CFLAGS)) endif -targets := $(obj-vdso32) vdso32.so vdso32.so.dbg +targets := $(obj-vdso32) vdso32.so.dbg obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) GCOV_PROFILE := n @@ -47,17 +47,12 @@ targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc # Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so.dbg # link rule for the .so file, .lds has to be first $(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday.o FORCE $(call if_changed,vdso32ld_and_check) -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) - # assembly rules for the .S files $(obj-vdso32): %.o: %.S FORCE $(call if_changed_dep,vdso32as) @@ -71,13 +66,3 @@ quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< quiet_cmd_vdso32cc = VDSO32C $@ cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< - -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso32.so: $(obj)/vdso32.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso32.so diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 4a8c5e4d25c0..29004ad1b0fb 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -17,7 +17,7 @@ endif # Build rules -targets := $(obj-vdso64) vdso64.so vdso64.so.dbg +targets := $(obj-vdso64) vdso64.so.dbg obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) GCOV_PROFILE := n @@ -36,27 +36,12 @@ CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) $(obj)/vgettimeofday.o: %.o: %.c FORCE # Force dependency (incbin is bad) -$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so.dbg # link rule for the .so file, .lds has to be first $(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday.o FORCE $(call if_changed,vdso64ld_and_check) -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) - # actual build commands quiet_cmd_vdso64ld_and_check = VDSO64L $@ cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) - -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso64.so: $(obj)/vdso64.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso64.so From bc9d5bfc4d23fb3580e7da360f2c9bd878dda9b2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:19 +0000 Subject: [PATCH 080/304] powerpc/vdso: Add missing includes and clean vdso_setup_syscall_map() Instead of including extern references locally in vdso_setup_syscall_map(), add the missing headers. sys_ni_syscall() being a function, cast its address to an unsigned long instead of declaring it as a fake unsigned long object. At the same time, remove a comment which paraphrases the function name. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b4afedce748ed2858299ceab5ae29b52109263ef.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 23208a051af5..b0332c609104 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -17,8 +17,10 @@ #include #include #include +#include #include +#include #include #include #include @@ -639,24 +641,18 @@ static __init int vdso_setup(void) static void __init vdso_setup_syscall_map(void) { unsigned int i; - extern unsigned long *sys_call_table; -#ifdef CONFIG_PPC64 - extern unsigned long *compat_sys_call_table; -#endif - extern unsigned long sys_ni_syscall; - for (i = 0; i < NR_syscalls; i++) { #ifdef CONFIG_PPC64 - if (sys_call_table[i] != sys_ni_syscall) + if (sys_call_table[i] != (unsigned long)&sys_ni_syscall) vdso_data->syscall_map_64[i >> 5] |= 0x80000000UL >> (i & 0x1f); if (IS_ENABLED(CONFIG_COMPAT) && - compat_sys_call_table[i] != sys_ni_syscall) + compat_sys_call_table[i] != (unsigned long)&sys_ni_syscall) vdso_data->syscall_map_32[i >> 5] |= 0x80000000UL >> (i & 0x1f); #else /* CONFIG_PPC64 */ - if (sys_call_table[i] != sys_ni_syscall) + if (sys_call_table[i] != (unsigned long)&sys_ni_syscall) vdso_data->syscall_map_32[i >> 5] |= 0x80000000UL >> (i & 0x1f); #endif /* CONFIG_PPC64 */ @@ -738,9 +734,6 @@ static int __init vdso_init(void) #endif - /* - * Setup the syscall map in the vDOS - */ vdso_setup_syscall_map(); /* From 1bb30b7a45976ae02d54fd43a8665e77314cc05e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:20 +0000 Subject: [PATCH 081/304] powerpc/vdso: Rename syscall_map_32/64 to simplify vdso_setup_syscall_map() Today vdso_data structure has: - syscall_map_32[] and syscall_map_64[] on PPC64 - syscall_map_32[] on PPC32 On PPC32, syscall_map_32[] is populated using sys_call_table[]. On PPC64, syscall_map_64[] is populated using sys_call_table[] and syscal_map_32[] is populated using compat_sys_call_table[]. To simplify vdso_setup_syscall_map(), - On PPC32 rename syscall_map_32[] into syscall_map[], - On PPC64 rename syscall_map_64[] into syscall_map[], - On PPC64 rename syscall_map_32[] into compat_syscall_map[]. That way, syscall_map[] gets populated using sys_call_table[] and compat_syscall_map[] gets population using compat_sys_call_table[]. Also define an empty compat_syscall_map[] on PPC32 to avoid ifdefs. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/472734be0d9991eee320a06824219a5b2663736b.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso_datapage.h | 7 ++++--- arch/powerpc/kernel/asm-offsets.c | 6 ++++-- arch/powerpc/kernel/vdso.c | 12 ++---------- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index c4d320504d26..3d996db05acd 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -79,8 +79,8 @@ struct vdso_arch_data { __u32 icache_block_size; /* L1 i-cache block size */ __u32 dcache_log_block_size; /* L1 d-cache log block size */ __u32 icache_log_block_size; /* L1 i-cache log block size */ - __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */ - __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ + __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ + __u32 compat_syscall_map[SYSCALL_MAP_SIZE]; /* Map of compat syscalls */ struct vdso_data data[CS_BASES]; }; @@ -92,7 +92,8 @@ struct vdso_arch_data { */ struct vdso_arch_data { __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ - __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ + __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ + __u32 compat_syscall_map[0]; /* No compat syscalls on PPC32 */ struct vdso_data data[CS_BASES]; }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 81d68494d026..d4331d451c71 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -402,13 +402,15 @@ int main(void) /* datapage offsets for use by vdso */ OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data); OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec); - OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map_32); #ifdef CONFIG_PPC64 OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size); OFFSET(CFG_DCACHE_BLOCKSZ, vdso_arch_data, dcache_block_size); OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_arch_data, icache_log_block_size); OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_arch_data, dcache_log_block_size); - OFFSET(CFG_SYSCALL_MAP64, vdso_arch_data, syscall_map_64); + OFFSET(CFG_SYSCALL_MAP64, vdso_arch_data, syscall_map); + OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, compat_syscall_map); +#else + OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map); #endif #ifdef CONFIG_BUG diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index b0332c609104..6d106fcafb9e 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -643,19 +643,11 @@ static void __init vdso_setup_syscall_map(void) unsigned int i; for (i = 0; i < NR_syscalls; i++) { -#ifdef CONFIG_PPC64 if (sys_call_table[i] != (unsigned long)&sys_ni_syscall) - vdso_data->syscall_map_64[i >> 5] |= - 0x80000000UL >> (i & 0x1f); + vdso_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); if (IS_ENABLED(CONFIG_COMPAT) && compat_sys_call_table[i] != (unsigned long)&sys_ni_syscall) - vdso_data->syscall_map_32[i >> 5] |= - 0x80000000UL >> (i & 0x1f); -#else /* CONFIG_PPC64 */ - if (sys_call_table[i] != (unsigned long)&sys_ni_syscall) - vdso_data->syscall_map_32[i >> 5] |= - 0x80000000UL >> (i & 0x1f); -#endif /* CONFIG_PPC64 */ + vdso_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); } } From abcdbd039e6823305c2841d07a352fbd2343564e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:21 +0000 Subject: [PATCH 082/304] powerpc/vdso: Remove get_page() in vdso_pagelist initialization Partly copied from commit 16fb1a9bec61 ("arm64: vdso: clean up vdso_pagelist initialization"). No need to get_page() the vdso text/data - these are part of the kernel image. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9d14540bd10832b6c9519d74fb5728fdc4974b36.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 6d106fcafb9e..dfaa4be258d2 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -748,7 +748,7 @@ static int __init vdso_init(void) BUG_ON(vdso32_pagelist == NULL); for (i = 0; i < vdso32_pages; i++) { struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); - get_page(pg); + vdso32_pagelist[i] = pg; } vdso32_pagelist[i++] = virt_to_page(vdso_data); @@ -761,15 +761,13 @@ static int __init vdso_init(void) BUG_ON(vdso64_pagelist == NULL); for (i = 0; i < vdso64_pages; i++) { struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); - get_page(pg); + vdso64_pagelist[i] = pg; } vdso64_pagelist[i++] = virt_to_page(vdso_data); vdso64_pagelist[i] = NULL; #endif /* CONFIG_PPC64 */ - get_page(virt_to_page(vdso_data)); - smp_wmb(); vdso_ready = 1; From 35c1c7c0bc354d8c3d55bea3bf3e239797980013 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:22 +0000 Subject: [PATCH 083/304] powerpc/vdso: Remove NULL termination element in vdso_pagelist No need of a NULL last element in pagelists, install_special_mapping() knows how long the list is. Remove that element. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e58d95ab859e3cbc9bae3c9ce2959e17d2864f5d.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index dfaa4be258d2..d2c08f5de587 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -743,7 +743,7 @@ static int __init vdso_init(void) #ifdef CONFIG_VDSO32 /* Make sure pages are in the correct state */ - vdso32_pagelist = kcalloc(vdso32_pages + 2, sizeof(struct page *), + vdso32_pagelist = kcalloc(vdso32_pages + 1, sizeof(struct page *), GFP_KERNEL); BUG_ON(vdso32_pagelist == NULL); for (i = 0; i < vdso32_pages; i++) { @@ -752,11 +752,10 @@ static int __init vdso_init(void) vdso32_pagelist[i] = pg; } vdso32_pagelist[i++] = virt_to_page(vdso_data); - vdso32_pagelist[i] = NULL; #endif #ifdef CONFIG_PPC64 - vdso64_pagelist = kcalloc(vdso64_pages + 2, sizeof(struct page *), + vdso64_pagelist = kcalloc(vdso64_pages + 1, sizeof(struct page *), GFP_KERNEL); BUG_ON(vdso64_pagelist == NULL); for (i = 0; i < vdso64_pages; i++) { @@ -765,7 +764,6 @@ static int __init vdso_init(void) vdso64_pagelist[i] = pg; } vdso64_pagelist[i++] = virt_to_page(vdso_data); - vdso64_pagelist[i] = NULL; #endif /* CONFIG_PPC64 */ smp_wmb(); From 3cf63825413c9eed2dae06070464efb27381bdac Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:23 +0000 Subject: [PATCH 084/304] powerpc/vdso: Refactor 32 bits and 64 bits pages setup The setup of VDSO pages is identical for 32 bits VDSO and 64 bits VDSO. Refactor that setup. And use &vdsoXX_start which is synonym of vdsoXX_kbase. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/269ffb54c37fc1d46128f77d7a39f88ef4a9957d.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 39 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index d2c08f5de587..d129d7ee006d 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -678,10 +678,26 @@ int vdso_getcpu_init(void) early_initcall(vdso_getcpu_init); #endif -static int __init vdso_init(void) +static struct page ** __init vdso_setup_pages(void *start, void *end) { int i; + struct page **pagelist; + int pages = (end - start) >> PAGE_SHIFT; + pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL); + if (!pagelist) + panic("%s: Cannot allocate page list for VDSO", __func__); + + for (i = 0; i < pages; i++) + pagelist[i] = virt_to_page(start + i * PAGE_SIZE); + + pagelist[i] = virt_to_page(vdso_data); + + return pagelist; +} + +static int __init vdso_init(void) +{ #ifdef CONFIG_PPC64 /* * Fill up the "systemcfg" stuff for backward compatibility @@ -742,28 +758,11 @@ static int __init vdso_init(void) } #ifdef CONFIG_VDSO32 - /* Make sure pages are in the correct state */ - vdso32_pagelist = kcalloc(vdso32_pages + 1, sizeof(struct page *), - GFP_KERNEL); - BUG_ON(vdso32_pagelist == NULL); - for (i = 0; i < vdso32_pages; i++) { - struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); - - vdso32_pagelist[i] = pg; - } - vdso32_pagelist[i++] = virt_to_page(vdso_data); + vdso32_pagelist = vdso_setup_pages(&vdso32_start, &vdso32_end); #endif #ifdef CONFIG_PPC64 - vdso64_pagelist = kcalloc(vdso64_pages + 1, sizeof(struct page *), - GFP_KERNEL); - BUG_ON(vdso64_pagelist == NULL); - for (i = 0; i < vdso64_pages; i++) { - struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); - - vdso64_pagelist[i] = pg; - } - vdso64_pagelist[i++] = virt_to_page(vdso_data); + vdso64_pagelist = vdso_setup_pages(&vdso64_start, &vdso64_end); #endif /* CONFIG_PPC64 */ smp_wmb(); From 4fe0e3c1724e397845df75f64059bcea4ff590e8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:24 +0000 Subject: [PATCH 085/304] powerpc/vdso: Remove unnecessary ifdefs in vdso_pagelist initialization No need of all those #ifdefs around the pagelist initialisation, use IS_ENABLED(), GCC will kick out unused static variables. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f9333432e329b1fcbbbf846cb1cd4a1c4127a60b.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index d129d7ee006d..a24f6a583fac 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -53,15 +53,12 @@ static struct page **vdso32_pagelist; unsigned long vdso32_sigtramp; unsigned long vdso32_rt_sigtramp; -#ifdef CONFIG_VDSO32 extern char vdso32_start, vdso32_end; -#endif - -#ifdef CONFIG_PPC64 extern char vdso64_start, vdso64_end; static void *vdso64_kbase = &vdso64_start; static unsigned int vdso64_pages; static struct page **vdso64_pagelist; +#ifdef CONFIG_PPC64 unsigned long vdso64_rt_sigtramp; #endif /* CONFIG_PPC64 */ @@ -136,7 +133,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!vdso_ready) return 0; -#ifdef CONFIG_PPC64 if (is_32bit_task()) { vdso_pagelist = vdso32_pagelist; vdso_pages = vdso32_pages; @@ -151,11 +147,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) */ vdso_base = 0; } -#else - vdso_pagelist = vdso32_pagelist; - vdso_pages = vdso32_pages; - vdso_base = VDSO32_MBASE; -#endif current->mm->context.vdso_base = 0; @@ -614,9 +605,7 @@ static __init int vdso_setup(void) struct lib64_elfinfo v64; v32.hdr = vdso32_kbase; -#ifdef CONFIG_PPC64 v64.hdr = vdso64_kbase; -#endif if (vdso_do_find_sections(&v32, &v64)) return -1; @@ -722,16 +711,14 @@ static int __init vdso_init(void) vdso_data->icache_block_size = ppc64_caches.l1i.block_size; vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; +#endif /* CONFIG_PPC64 */ /* * Calculate the size of the 64 bits vDSO */ vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT; DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages); -#endif /* CONFIG_PPC64 */ - -#ifdef CONFIG_VDSO32 vdso32_kbase = &vdso32_start; /* @@ -739,8 +726,6 @@ static int __init vdso_init(void) */ vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT; DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages); -#endif - vdso_setup_syscall_map(); @@ -751,19 +736,15 @@ static int __init vdso_init(void) if (vdso_setup()) { printk(KERN_ERR "vDSO setup failure, not enabled !\n"); vdso32_pages = 0; -#ifdef CONFIG_PPC64 vdso64_pages = 0; -#endif return 0; } -#ifdef CONFIG_VDSO32 - vdso32_pagelist = vdso_setup_pages(&vdso32_start, &vdso32_end); -#endif + if (IS_ENABLED(CONFIG_VDSO32)) + vdso32_pagelist = vdso_setup_pages(&vdso32_start, &vdso32_end); -#ifdef CONFIG_PPC64 - vdso64_pagelist = vdso_setup_pages(&vdso64_start, &vdso64_end); -#endif /* CONFIG_PPC64 */ + if (IS_ENABLED(CONFIG_PPC64)) + vdso64_pagelist = vdso_setup_pages(&vdso64_start, &vdso64_end); smp_wmb(); vdso_ready = 1; From 7461a4f79ba16dc7733c07c00883a10c7e46b602 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:25 +0000 Subject: [PATCH 086/304] powerpc/vdso: Use VDSO size in arch_setup_additional_pages() In arch_setup_additional_pages(), instead of using number of VDSO pages and recalculate VDSO size, directly use the VDSO size. As vdso_ready is set, vdso_pages can't be 0 so just remove the test. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4edfa548c3885a430b765335dc720105716e273f.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index a24f6a583fac..448ecaa27ac5 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -126,7 +126,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; struct page **vdso_pagelist; - unsigned long vdso_pages; + unsigned long vdso_size; unsigned long vdso_base; int rc; @@ -135,11 +135,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (is_32bit_task()) { vdso_pagelist = vdso32_pagelist; - vdso_pages = vdso32_pages; + vdso_size = &vdso32_end - &vdso32_start; vdso_base = VDSO32_MBASE; } else { vdso_pagelist = vdso64_pagelist; - vdso_pages = vdso64_pages; + vdso_size = &vdso64_end - &vdso64_start; /* * On 64bit we don't have a preferred map address. This * allows get_unmapped_area to find an area near other mmaps @@ -150,13 +150,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) current->mm->context.vdso_base = 0; - /* vDSO has a problem and was disabled, just don't "enable" it for the - * process - */ - if (vdso_pages == 0) - return 0; /* Add a page to the vdso size for the data page */ - vdso_pages ++; + vdso_size += PAGE_SIZE; /* * pick a base address for the vDSO in process space. We try to put it @@ -167,8 +162,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (mmap_write_lock_killable(mm)) return -EINTR; vdso_base = get_unmapped_area(NULL, vdso_base, - (vdso_pages << PAGE_SHIFT) + - ((VDSO_ALIGNMENT - 1) & PAGE_MASK), + vdso_size + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), 0, 0); if (IS_ERR_VALUE(vdso_base)) { rc = vdso_base; @@ -195,7 +189,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * It's fine to use that for setting breakpoints in the vDSO code * pages though. */ - rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, + rc = install_special_mapping(mm, vdso_base, vdso_size, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pagelist); From b2df3f60b452ab496adcef1b2f9c2560f6d8e8e0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:26 +0000 Subject: [PATCH 087/304] powerpc/vdso: Simplify arch_setup_additional_pages() exit To simplify arch_setup_additional_pages() exit, rename it __arch_setup_additional_pages() and create a caller arch_setup_additional_pages() which does the locking. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/603c1d039d3f928ee95e547fcd2219fcf4c3b514.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 40 ++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 448ecaa27ac5..a976c5e4a7ac 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -122,7 +122,7 @@ struct lib64_elfinfo * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; struct page **vdso_pagelist; @@ -130,9 +130,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) unsigned long vdso_base; int rc; - if (!vdso_ready) - return 0; - if (is_32bit_task()) { vdso_pagelist = vdso32_pagelist; vdso_size = &vdso32_end - &vdso32_start; @@ -148,8 +145,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) vdso_base = 0; } - current->mm->context.vdso_base = 0; - /* Add a page to the vdso size for the data page */ vdso_size += PAGE_SIZE; @@ -159,15 +154,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * and end up putting it elsewhere. * Add enough to the size so that the result can be aligned. */ - if (mmap_write_lock_killable(mm)) - return -EINTR; vdso_base = get_unmapped_area(NULL, vdso_base, vdso_size + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), 0, 0); - if (IS_ERR_VALUE(vdso_base)) { - rc = vdso_base; - goto fail_mmapsem; - } + if (IS_ERR_VALUE(vdso_base)) + return vdso_base; /* Add required alignment. */ vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT); @@ -193,15 +184,26 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pagelist); - if (rc) { - current->mm->context.vdso_base = 0; - goto fail_mmapsem; - } + return rc; +} - mmap_write_unlock(mm); - return 0; +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + int rc; + + mm->context.vdso_base = 0; + + if (!vdso_ready) + return 0; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + rc = __arch_setup_additional_pages(bprm, uses_interp); + if (rc) + mm->context.vdso_base = 0; - fail_mmapsem: mmap_write_unlock(mm); return rc; } From c1bab64360e6850ca54305d2f1902dac829c9752 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:27 +0000 Subject: [PATCH 088/304] powerpc/vdso: Move to _install_special_mapping() and remove arch_vma_name() Copied from commit 2fea7f6c98f5 ("arm64: vdso: move to _install_special_mapping and remove arch_vma_name"). Use the new _install_special_mapping() API added by commit a62c34bd2a8a ("x86, mm: Improve _install_special_mapping and fix x86 vdso naming") which obsolete install_special_mapping(). And remove arch_vma_name() as the name is handled by the new API. Signed-off-by: Christophe Leroy Signed-off-by: kernel test robot [mpe: Squash fix to use PTR_ERR_OR_ZERO() from lkp] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e7e5dfe0f93234e31051f2a610b4b07f50b0082f.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 42 +++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index a976c5e4a7ac..67fb4c7e504c 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -49,7 +49,6 @@ static unsigned int vdso32_pages; static void *vdso32_kbase; -static struct page **vdso32_pagelist; unsigned long vdso32_sigtramp; unsigned long vdso32_rt_sigtramp; @@ -57,7 +56,6 @@ extern char vdso32_start, vdso32_end; extern char vdso64_start, vdso64_end; static void *vdso64_kbase = &vdso64_start; static unsigned int vdso64_pages; -static struct page **vdso64_pagelist; #ifdef CONFIG_PPC64 unsigned long vdso64_rt_sigtramp; #endif /* CONFIG_PPC64 */ @@ -118,6 +116,14 @@ struct lib64_elfinfo }; +static struct vm_special_mapping vdso32_spec __ro_after_init = { + .name = "[vdso]", +}; + +static struct vm_special_mapping vdso64_spec __ro_after_init = { + .name = "[vdso]", +}; + /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree @@ -125,17 +131,17 @@ struct lib64_elfinfo static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - struct page **vdso_pagelist; + struct vm_special_mapping *vdso_spec; + struct vm_area_struct *vma; unsigned long vdso_size; unsigned long vdso_base; - int rc; if (is_32bit_task()) { - vdso_pagelist = vdso32_pagelist; + vdso_spec = &vdso32_spec; vdso_size = &vdso32_end - &vdso32_start; vdso_base = VDSO32_MBASE; } else { - vdso_pagelist = vdso64_pagelist; + vdso_spec = &vdso64_spec; vdso_size = &vdso64_end - &vdso64_start; /* * On 64bit we don't have a preferred map address. This @@ -166,7 +172,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int /* * Put vDSO base into mm struct. We need to do this before calling * install_special_mapping or the perf counter mmap tracking code - * will fail to recognise it as a vDSO (since arch_vma_name fails). + * will fail to recognise it as a vDSO. */ current->mm->context.vdso_base = vdso_base; @@ -180,11 +186,10 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int * It's fine to use that for setting breakpoints in the vDSO code * pages though. */ - rc = install_special_mapping(mm, vdso_base, vdso_size, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pagelist); - return rc; + vma = _install_special_mapping(mm, vdso_base, vdso_size, + VM_READ | VM_EXEC | VM_MAYREAD | + VM_MAYWRITE | VM_MAYEXEC, vdso_spec); + return PTR_ERR_OR_ZERO(vma); } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) @@ -208,15 +213,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) return rc; } -const char *arch_vma_name(struct vm_area_struct *vma) -{ - if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base) - return "[vdso]"; - return NULL; -} - - - #ifdef CONFIG_VDSO32 static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, unsigned long *size) @@ -737,10 +733,10 @@ static int __init vdso_init(void) } if (IS_ENABLED(CONFIG_VDSO32)) - vdso32_pagelist = vdso_setup_pages(&vdso32_start, &vdso32_end); + vdso32_spec.pages = vdso_setup_pages(&vdso32_start, &vdso32_end); if (IS_ENABLED(CONFIG_PPC64)) - vdso64_pagelist = vdso_setup_pages(&vdso64_start, &vdso64_end); + vdso64_spec.pages = vdso_setup_pages(&vdso64_start, &vdso64_end); smp_wmb(); vdso_ready = 1; From 526a9c4a7234cccf6d900c6e82d79356f974cbfd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:28 +0000 Subject: [PATCH 089/304] powerpc/vdso: Provide vdso_remap() Provide vdso_remap() through _install_special_mapping() and drop arch_remap(). This adds a test of the size and returns -EINVAL if the size is not correct. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/373c66f768fa9cc8890f3b55462209a98c522326.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mm-arch-hooks.h | 25 ------------------------ arch/powerpc/kernel/vdso.c | 24 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 25 deletions(-) delete mode 100644 arch/powerpc/include/asm/mm-arch-hooks.h diff --git a/arch/powerpc/include/asm/mm-arch-hooks.h b/arch/powerpc/include/asm/mm-arch-hooks.h deleted file mode 100644 index dce274be824a..000000000000 --- a/arch/powerpc/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour - */ - -#ifndef _ASM_POWERPC_MM_ARCH_HOOKS_H -#define _ASM_POWERPC_MM_ARCH_HOOKS_H - -static inline void arch_remap(struct mm_struct *mm, - unsigned long old_start, unsigned long old_end, - unsigned long new_start, unsigned long new_end) -{ - /* - * mremap() doesn't allow moving multiple vmas so we can limit the - * check to old_start == vdso_base. - */ - if (old_start == mm->context.vdso_base) - mm->context.vdso_base = new_start; -} -#define arch_remap arch_remap - -#endif /* _ASM_POWERPC_MM_ARCH_HOOKS_H */ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 67fb4c7e504c..2b975759a04d 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -115,13 +115,37 @@ struct lib64_elfinfo unsigned long text; }; +static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, + unsigned long text_size) +{ + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; + + if (new_size != text_size + PAGE_SIZE) + return -EINVAL; + + current->mm->context.vdso_base = new_vma->vm_start; + + return 0; +} + +static int vdso32_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) +{ + return vdso_mremap(sm, new_vma, &vdso32_end - &vdso32_start); +} + +static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) +{ + return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start); +} static struct vm_special_mapping vdso32_spec __ro_after_init = { .name = "[vdso]", + .mremap = vdso32_mremap, }; static struct vm_special_mapping vdso64_spec __ro_after_init = { .name = "[vdso]", + .mremap = vdso64_mremap, }; /* From c102f07667486dc4a6ae1e3fe7aa67135cb40e3e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:29 +0000 Subject: [PATCH 090/304] powerpc/vdso: Replace vdso_base by vdso All other architectures but s390 use a void pointer named 'vdso' to reference the VDSO mapping. In a following patch, the VDSO data page will be put in front of text, vdso_base will then not anymore point to VDSO text. To avoid confusion between vdso_base and VDSO text, rename vdso_base into vdso and make it a void __user *. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8e6cefe474aa4ceba028abb729485cd46c140990.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/mmu-hash.h | 2 +- arch/powerpc/include/asm/book3s/64/mmu.h | 2 +- arch/powerpc/include/asm/elf.h | 2 +- arch/powerpc/include/asm/mmu_context.h | 6 ++++-- arch/powerpc/include/asm/nohash/32/mmu-40x.h | 2 +- arch/powerpc/include/asm/nohash/32/mmu-44x.h | 2 +- arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 2 +- arch/powerpc/include/asm/nohash/mmu-book3e.h | 2 +- arch/powerpc/kernel/signal_32.c | 8 ++++---- arch/powerpc/kernel/signal_64.c | 4 ++-- arch/powerpc/kernel/vdso.c | 8 ++++---- arch/powerpc/perf/callchain_32.c | 8 ++++---- arch/powerpc/perf/callchain_64.c | 4 ++-- 13 files changed, 27 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 2e277ca0170f..331187661236 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -90,7 +90,7 @@ struct hash_pte { typedef struct { unsigned long id; - unsigned long vdso_base; + void __user *vdso; } mm_context_t; void update_bats(void); diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index e0b52940e43c..ad0837d8076d 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -111,7 +111,7 @@ typedef struct { struct hash_mm_context *hash_context; - unsigned long vdso_base; + void __user *vdso; /* * pagetable fragment support */ diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 53ed2ca40151..4ecc372c408e 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -169,7 +169,7 @@ do { \ NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize); \ NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize); \ NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize); \ - VDSO_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso_base); \ + VDSO_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long)current->mm->context.vdso);\ ARCH_DLINFO_CACHE_GEOMETRY; \ } while (0) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index e02aa793420b..d54358cb5be1 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -262,8 +262,10 @@ extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, unsigned long start, unsigned long end) { - if (start <= mm->context.vdso_base && mm->context.vdso_base < end) - mm->context.vdso_base = 0; + unsigned long vdso_base = (unsigned long)mm->context.vdso; + + if (start <= vdso_base && vdso_base < end) + mm->context.vdso = NULL; } #ifdef CONFIG_PPC_MEM_KEYS diff --git a/arch/powerpc/include/asm/nohash/32/mmu-40x.h b/arch/powerpc/include/asm/nohash/32/mmu-40x.h index 74f4edb5916e..8a8f13a22cf4 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-40x.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-40x.h @@ -57,7 +57,7 @@ typedef struct { unsigned int id; unsigned int active; - unsigned long vdso_base; + void __user *vdso; } mm_context_t; #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-44x.h b/arch/powerpc/include/asm/nohash/32/mmu-44x.h index 28aa3b339c5e..2d92a39d8f2e 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-44x.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-44x.h @@ -108,7 +108,7 @@ extern unsigned int tlb_44x_index; typedef struct { unsigned int id; unsigned int active; - unsigned long vdso_base; + void __user *vdso; } mm_context_t; /* patch sites */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 0bd1b144eb76..478249959baa 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -181,7 +181,7 @@ void mmu_pin_tlb(unsigned long top, bool readonly); typedef struct { unsigned int id; unsigned int active; - unsigned long vdso_base; + void __user *vdso; void *pte_frag; } mm_context_t; diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h index b41004664312..e43a418d3ccd 100644 --- a/arch/powerpc/include/asm/nohash/mmu-book3e.h +++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h @@ -238,7 +238,7 @@ extern unsigned int tlbcam_index; typedef struct { unsigned int id; unsigned int active; - unsigned long vdso_base; + void __user *vdso; } mm_context_t; /* Page size definitions, common between 32 and 64-bit diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 123682299d4f..e45aafef4c5b 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -801,8 +801,8 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, } /* Save user registers on the stack */ - if (vdso32_rt_sigtramp && tsk->mm->context.vdso_base) { - tramp = tsk->mm->context.vdso_base + vdso32_rt_sigtramp; + if (vdso32_rt_sigtramp && tsk->mm->context.vdso) { + tramp = (unsigned long)tsk->mm->context.vdso + vdso32_rt_sigtramp; } else { tramp = (unsigned long)mctx->mc_pad; /* Set up the sigreturn trampoline: li r0,sigret; sc */ @@ -901,8 +901,8 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, else unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed); - if (vdso32_sigtramp && tsk->mm->context.vdso_base) { - tramp = tsk->mm->context.vdso_base + vdso32_sigtramp; + if (vdso32_sigtramp && tsk->mm->context.vdso) { + tramp = (unsigned long)tsk->mm->context.vdso + vdso32_sigtramp; } else { tramp = (unsigned long)mctx->mc_pad; /* Set up the sigreturn trampoline: li r0,sigret; sc */ diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 7df088b9ad0f..68e850bd5ef7 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -854,8 +854,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, tsk->thread.fp_state.fpscr = 0; /* Set up to return from userspace. */ - if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) { - regs->nip = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; + if (vdso64_rt_sigtramp && tsk->mm->context.vdso) { + regs->nip = (unsigned long)tsk->mm->context.vdso + vdso64_rt_sigtramp; } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); if (err) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 2b975759a04d..5214cd4909f8 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -123,7 +123,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struc if (new_size != text_size + PAGE_SIZE) return -EINVAL; - current->mm->context.vdso_base = new_vma->vm_start; + current->mm->context.vdso = (void __user *)new_vma->vm_start; return 0; } @@ -198,7 +198,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int * install_special_mapping or the perf counter mmap tracking code * will fail to recognise it as a vDSO. */ - current->mm->context.vdso_base = vdso_base; + mm->context.vdso = (void __user *)vdso_base; /* * our vma flags don't have VM_WRITE so by default, the process isn't @@ -221,7 +221,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) struct mm_struct *mm = current->mm; int rc; - mm->context.vdso_base = 0; + mm->context.vdso = NULL; if (!vdso_ready) return 0; @@ -231,7 +231,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) rc = __arch_setup_additional_pages(bprm, uses_interp); if (rc) - mm->context.vdso_base = 0; + mm->context.vdso = NULL; mmap_write_unlock(mm); return rc; diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index 64e4013d8060..b32e94047fb9 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -59,8 +59,8 @@ static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) { if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) return 1; - if (vdso32_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso32_sigtramp) + if (vdso32_sigtramp && current->mm->context.vdso && + nip == (unsigned long)current->mm->context.vdso + vdso32_sigtramp) return 1; return 0; } @@ -70,8 +70,8 @@ static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) if (nip == fp + offsetof(struct rt_signal_frame_32, uc.uc_mcontext.mc_pad)) return 1; - if (vdso32_rt_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) + if (vdso32_rt_sigtramp && current->mm->context.vdso && + nip == (unsigned long)current->mm->context.vdso + vdso32_rt_sigtramp) return 1; return 0; } diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 0777b04a0c56..6b9c06058c33 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -68,8 +68,8 @@ static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) { if (nip == fp + offsetof(struct signal_frame_64, tramp)) return 1; - if (vdso64_rt_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) + if (vdso64_rt_sigtramp && current->mm->context.vdso && + nip == (unsigned long)current->mm->context.vdso + vdso64_rt_sigtramp) return 1; return 0; } From 511157ab641eb6bedd00d62673388e78a4f871cf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:30 +0000 Subject: [PATCH 091/304] powerpc/vdso: Move vdso datapage up front Move the vdso datapage in front of the VDSO area, before vdso test. This will allow to remove the __kernel_datapage_offset symbol and simplify __get_datapage() in following patches. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b68c99b6e8ee0b1d99bfa4c7e34c359fc1bc1000.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mmu_context.h | 2 +- arch/powerpc/kernel/vdso.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index d54358cb5be1..e5a5e3cb7724 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -262,7 +262,7 @@ extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, unsigned long start, unsigned long end) { - unsigned long vdso_base = (unsigned long)mm->context.vdso; + unsigned long vdso_base = (unsigned long)mm->context.vdso - PAGE_SIZE; if (start <= vdso_base && vdso_base < end) mm->context.vdso = NULL; diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 5214cd4909f8..e10bc0d9856c 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -123,7 +123,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struc if (new_size != text_size + PAGE_SIZE) return -EINVAL; - current->mm->context.vdso = (void __user *)new_vma->vm_start; + current->mm->context.vdso = (void __user *)new_vma->vm_start + PAGE_SIZE; return 0; } @@ -198,7 +198,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int * install_special_mapping or the perf counter mmap tracking code * will fail to recognise it as a vDSO. */ - mm->context.vdso = (void __user *)vdso_base; + mm->context.vdso = (void __user *)vdso_base + PAGE_SIZE; /* * our vma flags don't have VM_WRITE so by default, the process isn't @@ -507,7 +507,7 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, return -1; } *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) = - (vdso64_pages << PAGE_SHIFT) - + -PAGE_SIZE - (sym64->st_value - VDSO64_LBASE); #endif /* CONFIG_PPC64 */ @@ -519,7 +519,7 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, return -1; } *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) = - (vdso32_pages << PAGE_SHIFT) - + -PAGE_SIZE - (sym32->st_value - VDSO32_LBASE); #endif @@ -693,10 +693,10 @@ static struct page ** __init vdso_setup_pages(void *start, void *end) if (!pagelist) panic("%s: Cannot allocate page list for VDSO", __func__); - for (i = 0; i < pages; i++) - pagelist[i] = virt_to_page(start + i * PAGE_SIZE); + pagelist[0] = virt_to_page(vdso_data); - pagelist[i] = virt_to_page(vdso_data); + for (i = 0; i < pages; i++) + pagelist[i + 1] = virt_to_page(start + i * PAGE_SIZE); return pagelist; } From 591857b635c1f635cae556e1b1f9d81808242493 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:31 +0000 Subject: [PATCH 092/304] powerpc/vdso: Simplify __get_datapage() The VDSO datapage and the text pages are always located immediately next to each other, so it can be hardcoded without an indirection through __kernel_datapage_offset Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b08f5ef99d64cfc38f79b7ad5310d9b4d2479eeb.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso_datapage.h | 8 +++++--- arch/powerpc/kernel/vdso32/vdso32.lds.S | 2 ++ arch/powerpc/kernel/vdso64/vdso64.lds.S | 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index 3d996db05acd..535ba737397d 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -105,10 +105,12 @@ extern struct vdso_arch_data *vdso_data; .macro get_datapage ptr, tmp bcl 20, 31, .+4 +999: mflr \ptr - addi \ptr, \ptr, (__kernel_datapage_offset - (.-4))@l - lwz \tmp, 0(\ptr) - add \ptr, \tmp, \ptr +#if CONFIG_PPC_PAGE_SHIFT > 14 + addis \ptr, \ptr, (_vdso_datapage - 999b)@ha +#endif + addi \ptr, \ptr, (_vdso_datapage - 999b)@l .endm #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 27a2d03c72d5..88a2976e9942 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -4,6 +4,7 @@ * library */ #include +#include #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle") @@ -15,6 +16,7 @@ ENTRY(_start) SECTIONS { + PROVIDE(_vdso_datapage = . - PAGE_SIZE); . = VDSO32_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 71be083b24ed..e43731386469 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -4,6 +4,7 @@ * library */ #include +#include #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle") @@ -15,6 +16,7 @@ ENTRY(_start) SECTIONS { + PROVIDE(_vdso_datapage = . - PAGE_SIZE); . = VDSO64_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text From 550e6074c106e1a6fb57dfef62f0daede12d832c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:32 +0000 Subject: [PATCH 093/304] powerpc/vdso: Remove unused \tmp param in __get_datapage() The \tmp param is not used anymore, remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4b13f897dcccce8ae03c031a4598cf26b32e2f1c.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso/gettimeofday.h | 4 ++-- arch/powerpc/include/asm/vdso_datapage.h | 2 +- arch/powerpc/kernel/vdso32/cacheflush.S | 2 +- arch/powerpc/kernel/vdso32/datapage.S | 4 ++-- arch/powerpc/kernel/vdso64/cacheflush.S | 2 +- arch/powerpc/kernel/vdso64/datapage.S | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 0f95569e8fc3..81671aa365b3 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -22,7 +22,7 @@ #ifdef __powerpc64__ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) #endif - get_datapage r5, r0 + get_datapage r5 addi r5, r5, VDSO_DATA_OFFSET bl DOTSYM(\funct) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) @@ -51,7 +51,7 @@ #ifdef __powerpc64__ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) #endif - get_datapage r4, r0 + get_datapage r4 addi r4, r4, VDSO_DATA_OFFSET bl DOTSYM(\funct) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index 535ba737397d..3f958ecf2beb 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -103,7 +103,7 @@ extern struct vdso_arch_data *vdso_data; #else /* __ASSEMBLY__ */ -.macro get_datapage ptr, tmp +.macro get_datapage ptr bcl 20, 31, .+4 999: mflr \ptr diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S index 3440ddf21c8b..017843bf5382 100644 --- a/arch/powerpc/kernel/vdso32/cacheflush.S +++ b/arch/powerpc/kernel/vdso32/cacheflush.S @@ -27,7 +27,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) #ifdef CONFIG_PPC64 mflr r12 .cfi_register lr,r12 - get_datapage r10, r0 + get_datapage r10 mtlr r12 #endif diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 1d23e2771dba..abff2250e891 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -31,7 +31,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr. r4,r3 - get_datapage r3, r0 + get_datapage r3 mtlr r12 addi r3,r3,CFG_SYSCALL_MAP32 beqlr @@ -51,7 +51,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_datapage r3, r0 + get_datapage r3 lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) lwz r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index cab14324242b..61985de5758f 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -25,7 +25,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_datapage r10, r0 + get_datapage r10 mtlr r12 lwz r7,CFG_DCACHE_BLOCKSZ(r10) diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index 067247d3efb9..941b735df069 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -31,7 +31,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr r4,r3 - get_datapage r3, r0 + get_datapage r3 mtlr r12 addi r3,r3,CFG_SYSCALL_MAP64 cmpldi cr0,r4,0 @@ -53,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_datapage r3, r0 + get_datapage r3 ld r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 crclr cr0*4+so From 91bf695596f594e42d69d70deb2ae53cafecf77c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:33 +0000 Subject: [PATCH 094/304] powerpc/vdso: Retrieve sigtramp offsets at buildtime This is copied from arm64. Instead of using runtime generated signal trampoline offsets, get offsets at buildtime. If the said trampoline doesn't exist, build will fail. So no need to check whether the trampoline exists or not in the VDSO. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f8bfd6812c3e3678b1cdb4d55a52f9eb022b40d3.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/Makefile | 15 +++++++++++++++ arch/powerpc/include/asm/vdso.h | 12 ++++++++++++ arch/powerpc/kernel/signal_32.c | 8 ++++---- arch/powerpc/kernel/signal_64.c | 4 ++-- arch/powerpc/kernel/vdso32/Makefile | 8 ++++++++ arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh | 16 ++++++++++++++++ arch/powerpc/kernel/vdso32/vdso32.lds.S | 6 ++++++ arch/powerpc/kernel/vdso64/Makefile | 8 ++++++++ arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh | 16 ++++++++++++++++ arch/powerpc/kernel/vdso64/vdso64.lds.S | 5 +++++ arch/powerpc/perf/callchain_32.c | 8 ++++---- arch/powerpc/perf/callchain_64.c | 4 ++-- 12 files changed, 98 insertions(+), 12 deletions(-) create mode 100755 arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh create mode 100755 arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 86c925bfbb76..fde3dbe57bda 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -409,6 +409,21 @@ install: archclean: $(Q)$(MAKE) $(clean)=$(boot) +ifeq ($(KBUILD_EXTMOD),) +# We need to generate vdso-offsets.h before compiling certain files in kernel/. +# In order to do that, we should use the archprepare target, but we can't since +# asm-offsets.h is included in some files used to generate vdso-offsets.h, and +# asm-offsets.h is built in prepare0, for which archprepare is a dependency. +# Therefore we need to generate the header after prepare0 has been made, hence +# this hack. +prepare: vdso_prepare +vdso_prepare: prepare0 + $(if $(CONFIG_VDSO32),$(Q)$(MAKE) \ + $(build)=arch/powerpc/kernel/vdso32 include/generated/vdso32-offsets.h) + $(if $(CONFIG_PPC64),$(Q)$(MAKE) \ + $(build)=arch/powerpc/kernel/vdso64 include/generated/vdso64-offsets.h) +endif + archprepare: checkbin archheaders: diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h index 2ff884853f97..f5257b7f17d0 100644 --- a/arch/powerpc/include/asm/vdso.h +++ b/arch/powerpc/include/asm/vdso.h @@ -15,6 +15,18 @@ #ifndef __ASSEMBLY__ +#ifdef CONFIG_PPC64 +#include +#endif + +#ifdef CONFIG_VDSO32 +#include +#endif + +#define VDSO64_SYMBOL(base, name) ((unsigned long)(base) + (vdso64_offset_##name)) + +#define VDSO32_SYMBOL(base, name) ((unsigned long)(base) + (vdso32_offset_##name)) + /* Offsets relative to thread->vdso_base */ extern unsigned long vdso64_rt_sigtramp; extern unsigned long vdso32_sigtramp; diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index e45aafef4c5b..934cbdf6dd10 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -801,8 +801,8 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, } /* Save user registers on the stack */ - if (vdso32_rt_sigtramp && tsk->mm->context.vdso) { - tramp = (unsigned long)tsk->mm->context.vdso + vdso32_rt_sigtramp; + if (tsk->mm->context.vdso) { + tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32); } else { tramp = (unsigned long)mctx->mc_pad; /* Set up the sigreturn trampoline: li r0,sigret; sc */ @@ -901,8 +901,8 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, else unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed); - if (vdso32_sigtramp && tsk->mm->context.vdso) { - tramp = (unsigned long)tsk->mm->context.vdso + vdso32_sigtramp; + if (tsk->mm->context.vdso) { + tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32); } else { tramp = (unsigned long)mctx->mc_pad; /* Set up the sigreturn trampoline: li r0,sigret; sc */ diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 68e850bd5ef7..f9e4a1ac440f 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -854,8 +854,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, tsk->thread.fp_state.fpscr = 0; /* Set up to return from userspace. */ - if (vdso64_rt_sigtramp && tsk->mm->context.vdso) { - regs->nip = (unsigned long)tsk->mm->context.vdso + vdso64_rt_sigtramp; + if (tsk->mm->context.vdso) { + regs->nip = VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64); } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); if (err) diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index a119d9f84b08..59aa2944ecae 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -59,6 +59,14 @@ $(obj-vdso32): %.o: %.S FORCE $(obj)/vgettimeofday.o: %.o: %.c FORCE $(call if_changed_dep,vdso32cc) +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE + $(call if_changed,vdsosym) + # actual build commands quiet_cmd_vdso32ld_and_check = VDSO32L $@ cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) diff --git a/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh new file mode 100755 index 000000000000..c7b54a5dcd3e --- /dev/null +++ b/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Author: Will Deacon $@ + +include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE + $(call if_changed,vdsosym) + # actual build commands quiet_cmd_vdso64ld_and_check = VDSO64L $@ cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) diff --git a/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh new file mode 100755 index 000000000000..4bf15ffd5933 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Author: Will Deacon mm->context.vdso && - nip == (unsigned long)current->mm->context.vdso + vdso32_sigtramp) + if (current->mm->context.vdso && + nip == VDSO32_SYMBOL(current->mm->context.vdso, sigtramp32)) return 1; return 0; } @@ -70,8 +70,8 @@ static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) if (nip == fp + offsetof(struct rt_signal_frame_32, uc.uc_mcontext.mc_pad)) return 1; - if (vdso32_rt_sigtramp && current->mm->context.vdso && - nip == (unsigned long)current->mm->context.vdso + vdso32_rt_sigtramp) + if (current->mm->context.vdso && + nip == VDSO32_SYMBOL(current->mm->context.vdso, sigtramp_rt32)) return 1; return 0; } diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 6b9c06058c33..8d0df4226328 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -68,8 +68,8 @@ static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) { if (nip == fp + offsetof(struct signal_frame_64, tramp)) return 1; - if (vdso64_rt_sigtramp && current->mm->context.vdso && - nip == (unsigned long)current->mm->context.vdso + vdso64_rt_sigtramp) + if (current->mm->context.vdso && + nip == VDSO64_SYMBOL(current->mm->context.vdso, sigtramp_rt64)) return 1; return 0; } From ed07f6353ddf19e51c4db6d2be72ca97f7ed8a08 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:34 +0000 Subject: [PATCH 095/304] powerpc/vdso: Use builtin symbols to locate fixup section Add builtin symbols to locate fixup section and use them instead of locating sections through elf headers at runtime. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2954526981859ca1ccfcfc7a7c4263920e9ddfcb.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 55 +++++++------------------ arch/powerpc/kernel/vdso32/vdso32.lds.S | 8 ++++ arch/powerpc/kernel/vdso64/vdso64.lds.S | 8 ++++ 3 files changed, 30 insertions(+), 41 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index e10bc0d9856c..27449202c1d7 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -434,6 +434,12 @@ static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32, #endif /* CONFIG_PPC64 */ +#define VDSO_DO_FIXUPS(type, value, bits, sec) do { \ + void *__start = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_start); \ + void *__end = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_end); \ + \ + do_##type##_fixups((value), __start, __end); \ +} while (0) static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, struct lib64_elfinfo *v64) @@ -530,53 +536,20 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, static __init int vdso_fixup_features(struct lib32_elfinfo *v32, struct lib64_elfinfo *v64) { - unsigned long size; - void *start; - #ifdef CONFIG_PPC64 - start = find_section64(v64->hdr, "__ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->cpu_features, - start, start + size); - - start = find_section64(v64->hdr, "__mmu_ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->mmu_features, - start, start + size); - - start = find_section64(v64->hdr, "__fw_ftr_fixup", &size); - if (start) - do_feature_fixups(powerpc_firmware_features, - start, start + size); - - start = find_section64(v64->hdr, "__lwsync_fixup", &size); - if (start) - do_lwsync_fixups(cur_cpu_spec->cpu_features, - start, start + size); + VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 64, ftr_fixup); + VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 64, mmu_ftr_fixup); + VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 64, fw_ftr_fixup); + VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 64, lwsync_fixup); #endif /* CONFIG_PPC64 */ #ifdef CONFIG_VDSO32 - start = find_section32(v32->hdr, "__ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->cpu_features, - start, start + size); - - start = find_section32(v32->hdr, "__mmu_ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->mmu_features, - start, start + size); - + VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 32, ftr_fixup); + VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 32, mmu_ftr_fixup); #ifdef CONFIG_PPC64 - start = find_section32(v32->hdr, "__fw_ftr_fixup", &size); - if (start) - do_feature_fixups(powerpc_firmware_features, - start, start + size); + VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 32, fw_ftr_fixup); #endif /* CONFIG_PPC64 */ - - start = find_section32(v32->hdr, "__lwsync_fixup", &size); - if (start) - do_lwsync_fixups(cur_cpu_spec->cpu_features, - start, start + size); + VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 32, lwsync_fixup); #endif return 0; diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 078d75c0cd24..dc62772f028c 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -38,17 +38,25 @@ SECTIONS PROVIDE(etext = .); . = ALIGN(8); + VDSO_ftr_fixup_start = .; __ftr_fixup : { *(__ftr_fixup) } + VDSO_ftr_fixup_end = .; . = ALIGN(8); + VDSO_mmu_ftr_fixup_start = .; __mmu_ftr_fixup : { *(__mmu_ftr_fixup) } + VDSO_mmu_ftr_fixup_end = .; . = ALIGN(8); + VDSO_lwsync_fixup_start = .; __lwsync_fixup : { *(__lwsync_fixup) } + VDSO_lwsync_fixup_end = .; #ifdef CONFIG_PPC64 . = ALIGN(8); + VDSO_fw_ftr_fixup_start = .; __fw_ftr_fixup : { *(__fw_ftr_fixup) } + VDSO_fw_ftr_fixup_end = .; #endif /* diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 1f06e4f730a8..913d34e8bd05 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -39,16 +39,24 @@ SECTIONS PROVIDE(etext = .); . = ALIGN(8); + VDSO_ftr_fixup_start = .; __ftr_fixup : { *(__ftr_fixup) } + VDSO_ftr_fixup_end = .; . = ALIGN(8); + VDSO_mmu_ftr_fixup_start = .; __mmu_ftr_fixup : { *(__mmu_ftr_fixup) } + VDSO_mmu_ftr_fixup_end = .; . = ALIGN(8); + VDSO_lwsync_fixup_start = .; __lwsync_fixup : { *(__lwsync_fixup) } + VDSO_lwsync_fixup_end = .; . = ALIGN(8); + VDSO_fw_ftr_fixup_start = .; __fw_ftr_fixup : { *(__fw_ftr_fixup) } + VDSO_fw_ftr_fixup_end = .; /* * Other stuff is appended to the text segment: From 0fc980db9a404a993c4ed542369a745d8a14b0b7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:35 +0000 Subject: [PATCH 096/304] powerpc/vdso: Merge __kernel_sync_dicache_p5() into __kernel_sync_dicache() __kernel_sync_dicache_p5() is an alternative to __kernel_sync_dicache() when cpu has CPU_FTR_COHERENT_ICACHE Remove this alternative function and merge __kernel_sync_dicache_p5() into __kernel_sync_dicache() using standard CPU feature fixup. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4c7dcc6544882761b2b0249d7a8ec2c3a8088cb5.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 4 ---- arch/powerpc/kernel/vdso32/cacheflush.S | 17 ++++++----------- arch/powerpc/kernel/vdso32/vdso32.lds.S | 1 - arch/powerpc/kernel/vdso64/cacheflush.S | 16 ++++++---------- arch/powerpc/kernel/vdso64/vdso64.lds.S | 1 - 5 files changed, 12 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 27449202c1d7..f110b58ff520 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -87,10 +87,6 @@ struct vdso_patch_def * with a coherent icache */ static struct vdso_patch_def vdso_patches[] = { - { - CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE, - "__kernel_sync_dicache", "__kernel_sync_dicache_p5" - }, }; /* diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S index 017843bf5382..f340e82d1981 100644 --- a/arch/powerpc/kernel/vdso32/cacheflush.S +++ b/arch/powerpc/kernel/vdso32/cacheflush.S @@ -24,11 +24,15 @@ */ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc +BEGIN_FTR_SECTION + b 3f +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) #ifdef CONFIG_PPC64 mflr r12 .cfi_register lr,r12 get_datapage r10 mtlr r12 + .cfi_restore lr #endif #ifdef CONFIG_PPC64 @@ -84,20 +88,11 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) isync li r3,0 blr - .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache) - - -/* - * POWER5 version of __kernel_sync_dicache - */ -V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) - .cfi_startproc +3: crclr cr0*4+so sync isync li r3,0 blr .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache_p5) - +V_FUNCTION_END(__kernel_sync_dicache) diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index dc62772f028c..18b10d177eb5 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -162,7 +162,6 @@ VERSION __kernel_time; __kernel_get_tbfreq; __kernel_sync_dicache; - __kernel_sync_dicache_p5; __kernel_sigtramp32; __kernel_sigtramp_rt32; #if defined(CONFIG_PPC64) || !defined(CONFIG_SMP) diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index 61985de5758f..76c3c8cf8ece 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -23,10 +23,14 @@ */ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc +BEGIN_FTR_SECTION + b 3f +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) mflr r12 .cfi_register lr,r12 get_datapage r10 mtlr r12 + .cfi_restore lr lwz r7,CFG_DCACHE_BLOCKSZ(r10) addi r5,r7,-1 @@ -61,19 +65,11 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) isync li r3,0 blr - .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache) - - -/* - * POWER5 version of __kernel_sync_dicache - */ -V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) - .cfi_startproc +3: crclr cr0*4+so sync isync li r3,0 blr .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache_p5) +V_FUNCTION_END(__kernel_sync_dicache) diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 913d34e8bd05..21aec1382702 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -159,7 +159,6 @@ VERSION __kernel_clock_getres; __kernel_get_tbfreq; __kernel_sync_dicache; - __kernel_sync_dicache_p5; __kernel_sigtramp_rt64; __kernel_getcpu; __kernel_time; From b7fe9c15b57d767fda250e8eff79be435996ef33 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:36 +0000 Subject: [PATCH 097/304] powerpc/vdso: Remove vdso32_pages and vdso64_pages vdso32_pages and vdso64_pages are not used anymore. Remove them. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bce021f616cbaf39dfb5766cf7ef114adcb918d9.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index f110b58ff520..264c5c5dc842 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -47,7 +47,6 @@ /* The alignment of the vDSO */ #define VDSO_ALIGNMENT (1 << 16) -static unsigned int vdso32_pages; static void *vdso32_kbase; unsigned long vdso32_sigtramp; unsigned long vdso32_rt_sigtramp; @@ -55,7 +54,6 @@ unsigned long vdso32_rt_sigtramp; extern char vdso32_start, vdso32_end; extern char vdso64_start, vdso64_end; static void *vdso64_kbase = &vdso64_start; -static unsigned int vdso64_pages; #ifdef CONFIG_PPC64 unsigned long vdso64_rt_sigtramp; #endif /* CONFIG_PPC64 */ @@ -698,20 +696,8 @@ static int __init vdso_init(void) vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; #endif /* CONFIG_PPC64 */ - /* - * Calculate the size of the 64 bits vDSO - */ - vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT; - DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages); - vdso32_kbase = &vdso32_start; - /* - * Calculate the size of the 32 bits vDSO - */ - vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT; - DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages); - vdso_setup_syscall_map(); /* @@ -720,8 +706,6 @@ static int __init vdso_init(void) */ if (vdso_setup()) { printk(KERN_ERR "vDSO setup failure, not enabled !\n"); - vdso32_pages = 0; - vdso64_pages = 0; return 0; } From 49bf59fd0371b1053a17021f27605f43071584ee Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:37 +0000 Subject: [PATCH 098/304] powerpc/vdso: Remove __kernel_datapage_offset __kernel_datapage_offset is not used anymore, remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ddb5c746bec4e1a026d7c85243213a1876ef844f.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 39 ------------------------- arch/powerpc/kernel/vdso32/datapage.S | 3 -- arch/powerpc/kernel/vdso32/vdso32.lds.S | 5 ---- arch/powerpc/kernel/vdso64/datapage.S | 3 -- arch/powerpc/kernel/vdso64/vdso64.lds.S | 5 ---- 5 files changed, 55 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 264c5c5dc842..4c9770577e30 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -491,42 +491,6 @@ static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32, vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32"); } -static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ -#ifdef CONFIG_VDSO32 - Elf32_Sym *sym32; -#endif -#ifdef CONFIG_PPC64 - Elf64_Sym *sym64; - - sym64 = find_symbol64(v64, "__kernel_datapage_offset"); - if (sym64 == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol " - "__kernel_datapage_offset !\n"); - return -1; - } - *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) = - -PAGE_SIZE - - (sym64->st_value - VDSO64_LBASE); -#endif /* CONFIG_PPC64 */ - -#ifdef CONFIG_VDSO32 - sym32 = find_symbol32(v32, "__kernel_datapage_offset"); - if (sym32 == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol " - "__kernel_datapage_offset !\n"); - return -1; - } - *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) = - -PAGE_SIZE - - (sym32->st_value - VDSO32_LBASE); -#endif - - return 0; -} - - static __init int vdso_fixup_features(struct lib32_elfinfo *v32, struct lib64_elfinfo *v64) { @@ -592,9 +556,6 @@ static __init int vdso_setup(void) if (vdso_do_find_sections(&v32, &v64)) return -1; - if (vdso_fixup_datapage(&v32, &v64)) - return -1; - if (vdso_fixup_features(&v32, &v64)) return -1; diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index abff2250e891..65244416ab94 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -13,9 +13,6 @@ #include .text - .global __kernel_datapage_offset; -__kernel_datapage_offset: - .long 0 /* * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 18b10d177eb5..660891af2f58 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -149,11 +149,6 @@ VERSION { VDSO_VERSION_STRING { global: - /* - * Has to be there for the kernel to find - */ - __kernel_datapage_offset; - __kernel_get_syscall_map; __kernel_gettimeofday; __kernel_clock_gettime; diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index 941b735df069..00760dc69d68 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -13,9 +13,6 @@ #include .text -.global __kernel_datapage_offset; -__kernel_datapage_offset: - .long 0 /* * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 21aec1382702..0cacd511a8ec 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -148,11 +148,6 @@ VERSION { VDSO_VERSION_STRING { global: - /* - * Has to be there for the kernel to find - */ - __kernel_datapage_offset; - __kernel_get_syscall_map; __kernel_gettimeofday; __kernel_clock_gettime; From 899367ea50637f382fdc5c927fe47e6090d4aefe Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:38 +0000 Subject: [PATCH 099/304] powerpc/vdso: Remove runtime generated sigtramp offsets Signal trampoline offsets are now generated at buildtime. Runtime generated offsets are not used anymore, remove them. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7c192d35a437151837cf4c48aeccb42380d6daac.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso.h | 5 --- arch/powerpc/kernel/vdso.c | 59 --------------------------------- 2 files changed, 64 deletions(-) diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h index f5257b7f17d0..a97384909fe5 100644 --- a/arch/powerpc/include/asm/vdso.h +++ b/arch/powerpc/include/asm/vdso.h @@ -27,11 +27,6 @@ #define VDSO32_SYMBOL(base, name) ((unsigned long)(base) + (vdso32_offset_##name)) -/* Offsets relative to thread->vdso_base */ -extern unsigned long vdso64_rt_sigtramp; -extern unsigned long vdso32_sigtramp; -extern unsigned long vdso32_rt_sigtramp; - int vdso_getcpu_init(void); #else /* __ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 4c9770577e30..9993dc8d68e6 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -48,15 +48,10 @@ #define VDSO_ALIGNMENT (1 << 16) static void *vdso32_kbase; -unsigned long vdso32_sigtramp; -unsigned long vdso32_rt_sigtramp; extern char vdso32_start, vdso32_end; extern char vdso64_start, vdso64_end; static void *vdso64_kbase = &vdso64_start; -#ifdef CONFIG_PPC64 -unsigned long vdso64_rt_sigtramp; -#endif /* CONFIG_PPC64 */ static int vdso_ready; @@ -275,22 +270,6 @@ static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, return NULL; } -/* Note that we assume the section is .text and the symbol is relative to - * the library base - */ -static unsigned long __init find_function32(struct lib32_elfinfo *lib, - const char *symname) -{ - Elf32_Sym *sym = find_symbol32(lib, symname); - - if (sym == NULL) { - printk(KERN_WARNING "vDSO32: function %s not found !\n", - symname); - return 0; - } - return sym->st_value - VDSO32_LBASE; -} - static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, struct lib64_elfinfo *v64, const char *orig, const char *fix) @@ -320,12 +299,6 @@ static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, return 0; } #else /* !CONFIG_VDSO32 */ -static unsigned long __init find_function32(struct lib32_elfinfo *lib, - const char *symname) -{ - return 0; -} - static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, struct lib64_elfinfo *v64, const char *orig, const char *fix) @@ -381,22 +354,6 @@ static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, return NULL; } -/* Note that we assume the section is .text and the symbol is relative to - * the library base - */ -static unsigned long __init find_function64(struct lib64_elfinfo *lib, - const char *symname) -{ - Elf64_Sym *sym = find_symbol64(lib, symname); - - if (sym == NULL) { - printk(KERN_WARNING "vDSO64: function %s not found !\n", - symname); - return 0; - } - return sym->st_value - VDSO64_LBASE; -} - static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32, struct lib64_elfinfo *v64, const char *orig, const char *fix) @@ -477,20 +434,6 @@ static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, return 0; } -static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ - /* - * Find signal trampolines - */ - -#ifdef CONFIG_PPC64 - vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64"); -#endif - vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32"); - vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32"); -} - static __init int vdso_fixup_features(struct lib32_elfinfo *v32, struct lib64_elfinfo *v64) { @@ -562,8 +505,6 @@ static __init int vdso_setup(void) if (vdso_fixup_alt_funcs(&v32, &v64)) return -1; - vdso_setup_trampolines(&v32, &v64); - return 0; } From 5cda7c75493fd17a010d7399e39fda6619f69043 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:39 +0000 Subject: [PATCH 100/304] powerpc/vdso: Remove vdso_patches[] and associated functions vdso_patches[] is now empty, remove it and remove all functions that depends on it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/27d75debd6e4ddeaffe1d66ffed1e7526684a004.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 161 ------------------------------------- 1 file changed, 161 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 9993dc8d68e6..ea0ce3a9fb4a 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -41,9 +41,6 @@ #define DBG(fmt...) #endif -/* Max supported size for symbol names */ -#define MAX_SYMNAME 64 - /* The alignment of the vDSO */ #define VDSO_ALIGNMENT (1 << 16) @@ -66,22 +63,6 @@ static union { } vdso_data_store __page_aligned_data; struct vdso_arch_data *vdso_data = &vdso_data_store.data; -/* Format of the patch table */ -struct vdso_patch_def -{ - unsigned long ftr_mask, ftr_value; - const char *gen_name; - const char *fix_name; -}; - -/* Table of functions to patch based on the CPU type/revision - * - * Currently, we only change sync_dicache to do nothing on processors - * with a coherent icache - */ -static struct vdso_patch_def vdso_patches[] = { -}; - /* * Some infos carried around for each of them during parsing at * boot time. @@ -249,62 +230,6 @@ static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, *size = 0; return NULL; } - -static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, - const char *symname) -{ - unsigned int i; - char name[MAX_SYMNAME], *c; - - for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) { - if (lib->dynsym[i].st_name == 0) - continue; - strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, - MAX_SYMNAME); - c = strchr(name, '@'); - if (c) - *c = 0; - if (strcmp(symname, name) == 0) - return &lib->dynsym[i]; - } - return NULL; -} - -static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64, - const char *orig, const char *fix) -{ - Elf32_Sym *sym32_gen, *sym32_fix; - - sym32_gen = find_symbol32(v32, orig); - if (sym32_gen == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig); - return -1; - } - if (fix == NULL) { - sym32_gen->st_name = 0; - return 0; - } - sym32_fix = find_symbol32(v32, fix); - if (sym32_fix == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix); - return -1; - } - sym32_gen->st_value = sym32_fix->st_value; - sym32_gen->st_size = sym32_fix->st_size; - sym32_gen->st_info = sym32_fix->st_info; - sym32_gen->st_other = sym32_fix->st_other; - sym32_gen->st_shndx = sym32_fix->st_shndx; - - return 0; -} -#else /* !CONFIG_VDSO32 */ -static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64, - const char *orig, const char *fix) -{ - return 0; -} #endif /* CONFIG_VDSO32 */ @@ -333,56 +258,6 @@ static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname, *size = 0; return NULL; } - -static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, - const char *symname) -{ - unsigned int i; - char name[MAX_SYMNAME], *c; - - for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) { - if (lib->dynsym[i].st_name == 0) - continue; - strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, - MAX_SYMNAME); - c = strchr(name, '@'); - if (c) - *c = 0; - if (strcmp(symname, name) == 0) - return &lib->dynsym[i]; - } - return NULL; -} - -static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64, - const char *orig, const char *fix) -{ - Elf64_Sym *sym64_gen, *sym64_fix; - - sym64_gen = find_symbol64(v64, orig); - if (sym64_gen == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig); - return -1; - } - if (fix == NULL) { - sym64_gen->st_name = 0; - return 0; - } - sym64_fix = find_symbol64(v64, fix); - if (sym64_fix == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix); - return -1; - } - sym64_gen->st_value = sym64_fix->st_value; - sym64_gen->st_size = sym64_fix->st_size; - sym64_gen->st_info = sym64_fix->st_info; - sym64_gen->st_other = sym64_fix->st_other; - sym64_gen->st_shndx = sym64_fix->st_shndx; - - return 0; -} - #endif /* CONFIG_PPC64 */ #define VDSO_DO_FIXUPS(type, value, bits, sec) do { \ @@ -456,39 +331,6 @@ static __init int vdso_fixup_features(struct lib32_elfinfo *v32, return 0; } -static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) { - struct vdso_patch_def *patch = &vdso_patches[i]; - int match = (cur_cpu_spec->cpu_features & patch->ftr_mask) - == patch->ftr_value; - if (!match) - continue; - - DBG("replacing %s with %s...\n", patch->gen_name, - patch->fix_name ? "NONE" : patch->fix_name); - - /* - * Patch the 32 bits and 64 bits symbols. Note that we do not - * patch the "." symbol on 64 bits. - * It would be easy to do, but doesn't seem to be necessary, - * patching the OPD symbol is enough. - */ - vdso_do_func_patch32(v32, v64, patch->gen_name, - patch->fix_name); -#ifdef CONFIG_PPC64 - vdso_do_func_patch64(v32, v64, patch->gen_name, - patch->fix_name); -#endif /* CONFIG_PPC64 */ - } - - return 0; -} - - static __init int vdso_setup(void) { struct lib32_elfinfo v32; @@ -502,9 +344,6 @@ static __init int vdso_setup(void) if (vdso_fixup_features(&v32, &v64)) return -1; - if (vdso_fixup_alt_funcs(&v32, &v64)) - return -1; - return 0; } From e113f8ef1c7e5fd79b440e5565c8552b36122bfa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:40 +0000 Subject: [PATCH 101/304] powerpc/vdso: Remove unused text member in struct lib32/64_elfinfo The text member in struct lib32_elfinfo and struct lib64_elfinfo is not used, remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f53dcc9bb1946a7854d15b34d03d3d2e2003848c.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index ea0ce3a9fb4a..9851039b0786 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -73,7 +73,6 @@ struct lib32_elfinfo Elf32_Sym *dynsym; /* ptr to .dynsym section */ unsigned long dynsymsize; /* size of .dynsym section */ char *dynstr; /* ptr to .dynstr section */ - unsigned long text; /* offset of .text section in .so */ }; struct lib64_elfinfo @@ -82,7 +81,6 @@ struct lib64_elfinfo Elf64_Sym *dynsym; unsigned long dynsymsize; char *dynstr; - unsigned long text; }; static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, @@ -270,8 +268,6 @@ static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname, static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, struct lib64_elfinfo *v64) { - void *sect; - /* * Locate symbol tables & text section */ @@ -283,12 +279,6 @@ static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, printk(KERN_ERR "vDSO32: required symbol section not found\n"); return -1; } - sect = find_section32(v32->hdr, ".text", NULL); - if (sect == NULL) { - printk(KERN_ERR "vDSO32: the .text section was not found\n"); - return -1; - } - v32->text = sect - vdso32_kbase; #endif #ifdef CONFIG_PPC64 @@ -298,12 +288,6 @@ static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, printk(KERN_ERR "vDSO64: required symbol section not found\n"); return -1; } - sect = find_section64(v64->hdr, ".text", NULL); - if (sect == NULL) { - printk(KERN_ERR "vDSO64: the .text section was not found\n"); - return -1; - } - v64->text = sect - vdso64_kbase; #endif /* CONFIG_PPC64 */ return 0; From 6ed613ad572a84c175629fc8657a197c6415b7d6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:41 +0000 Subject: [PATCH 102/304] powerpc/vdso: Remove symbol section information in struct lib32/64_elfinfo The members related to the symbol section in struct lib32_elfinfo and struct lib64_elfinfo are not used anymore, removed them. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b779e5b7cc0354e2f87fd407fe5b02f4a8a73825.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 90 -------------------------------------- 1 file changed, 90 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 9851039b0786..d7eb707785e9 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -70,17 +70,11 @@ struct vdso_arch_data *vdso_data = &vdso_data_store.data; struct lib32_elfinfo { Elf32_Ehdr *hdr; /* ptr to ELF */ - Elf32_Sym *dynsym; /* ptr to .dynsym section */ - unsigned long dynsymsize; /* size of .dynsym section */ - char *dynstr; /* ptr to .dynstr section */ }; struct lib64_elfinfo { Elf64_Ehdr *hdr; - Elf64_Sym *dynsym; - unsigned long dynsymsize; - char *dynstr; }; static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, @@ -205,59 +199,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) return rc; } -#ifdef CONFIG_VDSO32 -static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, - unsigned long *size) -{ - Elf32_Shdr *sechdrs; - unsigned int i; - char *secnames; - - /* Grab section headers and strings so we can tell who is who */ - sechdrs = (void *)ehdr + ehdr->e_shoff; - secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; - - /* Find the section they want */ - for (i = 1; i < ehdr->e_shnum; i++) { - if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { - if (size) - *size = sechdrs[i].sh_size; - return (void *)ehdr + sechdrs[i].sh_offset; - } - } - *size = 0; - return NULL; -} -#endif /* CONFIG_VDSO32 */ - - -#ifdef CONFIG_PPC64 - -static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname, - unsigned long *size) -{ - Elf64_Shdr *sechdrs; - unsigned int i; - char *secnames; - - /* Grab section headers and strings so we can tell who is who */ - sechdrs = (void *)ehdr + ehdr->e_shoff; - secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; - - /* Find the section they want */ - for (i = 1; i < ehdr->e_shnum; i++) { - if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { - if (size) - *size = sechdrs[i].sh_size; - return (void *)ehdr + sechdrs[i].sh_offset; - } - } - if (size) - *size = 0; - return NULL; -} -#endif /* CONFIG_PPC64 */ - #define VDSO_DO_FIXUPS(type, value, bits, sec) do { \ void *__start = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_start); \ void *__end = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_end); \ @@ -265,34 +206,6 @@ static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname, do_##type##_fixups((value), __start, __end); \ } while (0) -static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ - /* - * Locate symbol tables & text section - */ - -#ifdef CONFIG_VDSO32 - v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize); - v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL); - if (v32->dynsym == NULL || v32->dynstr == NULL) { - printk(KERN_ERR "vDSO32: required symbol section not found\n"); - return -1; - } -#endif - -#ifdef CONFIG_PPC64 - v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize); - v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL); - if (v64->dynsym == NULL || v64->dynstr == NULL) { - printk(KERN_ERR "vDSO64: required symbol section not found\n"); - return -1; - } -#endif /* CONFIG_PPC64 */ - - return 0; -} - static __init int vdso_fixup_features(struct lib32_elfinfo *v32, struct lib64_elfinfo *v64) { @@ -322,9 +235,6 @@ static __init int vdso_setup(void) v32.hdr = vdso32_kbase; v64.hdr = vdso64_kbase; - if (vdso_do_find_sections(&v32, &v64)) - return -1; - if (vdso_fixup_features(&v32, &v64)) return -1; From 67a354051da28d482e53146def212b102664ce0e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:42 +0000 Subject: [PATCH 103/304] powerpc/vdso: Remove lib32_elfinfo and lib64_elfinfo lib32_elfinfo and lib64_elfinfo are not used anymore, remove them. Also remove vdso32_kbase and vdso64_kbase while removing the last use. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/01ac65abf22f0428f8f764525a7d84459c54d806.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index d7eb707785e9..faff1d081ca8 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -44,11 +44,8 @@ /* The alignment of the vDSO */ #define VDSO_ALIGNMENT (1 << 16) -static void *vdso32_kbase; - extern char vdso32_start, vdso32_end; extern char vdso64_start, vdso64_end; -static void *vdso64_kbase = &vdso64_start; static int vdso_ready; @@ -63,20 +60,6 @@ static union { } vdso_data_store __page_aligned_data; struct vdso_arch_data *vdso_data = &vdso_data_store.data; -/* - * Some infos carried around for each of them during parsing at - * boot time. - */ -struct lib32_elfinfo -{ - Elf32_Ehdr *hdr; /* ptr to ELF */ -}; - -struct lib64_elfinfo -{ - Elf64_Ehdr *hdr; -}; - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, unsigned long text_size) { @@ -206,8 +189,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) do_##type##_fixups((value), __start, __end); \ } while (0) -static __init int vdso_fixup_features(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) +static int __init vdso_fixup_features(void) { #ifdef CONFIG_PPC64 VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 64, ftr_fixup); @@ -230,12 +212,7 @@ static __init int vdso_fixup_features(struct lib32_elfinfo *v32, static __init int vdso_setup(void) { - struct lib32_elfinfo v32; - struct lib64_elfinfo v64; - - v32.hdr = vdso32_kbase; - v64.hdr = vdso64_kbase; - if (vdso_fixup_features(&v32, &v64)) + if (vdso_fixup_features()) return -1; return 0; @@ -331,8 +308,6 @@ static int __init vdso_init(void) vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; #endif /* CONFIG_PPC64 */ - vdso32_kbase = &vdso32_start; - vdso_setup_syscall_map(); /* From a4ccd64acb8c08ce8d36001cdd06477deec6ae89 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:43 +0000 Subject: [PATCH 104/304] powerpc/vdso: Remove vdso_setup() vdso_fixup_features() cannot fail anymore and that's the only function called by vdso_setup(). vdso_setup() has become trivial and can be removed. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/11522eec6140f510a8c89c63cbb739277d097fdc.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index faff1d081ca8..93e09e45608c 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -189,7 +189,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) do_##type##_fixups((value), __start, __end); \ } while (0) -static int __init vdso_fixup_features(void) +static void __init vdso_fixup_features(void) { #ifdef CONFIG_PPC64 VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 64, ftr_fixup); @@ -206,16 +206,6 @@ static int __init vdso_fixup_features(void) #endif /* CONFIG_PPC64 */ VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 32, lwsync_fixup); #endif - - return 0; -} - -static __init int vdso_setup(void) -{ - if (vdso_fixup_features()) - return -1; - - return 0; } /* @@ -310,14 +300,7 @@ static int __init vdso_init(void) vdso_setup_syscall_map(); - /* - * Initialize the vDSO images in memory, that is do necessary - * fixups of vDSO symbols, locate trampolines, etc... - */ - if (vdso_setup()) { - printk(KERN_ERR "vDSO setup failure, not enabled !\n"); - return 0; - } + vdso_fixup_features(); if (IS_ENABLED(CONFIG_VDSO32)) vdso32_spec.pages = vdso_setup_pages(&vdso32_start, &vdso32_end); From 23c4ceaf1a457808d031c666760fa325c7b7f23f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:45 +0000 Subject: [PATCH 105/304] powerpc/vdso: Remove vdso_ready There is no way to get out of vdso_init() prematuraly anymore. Remove vdso_ready as it will always be 1. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0e1e18c6329b848aa3edeeba76509b4d76182e7d.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 93e09e45608c..0a26dc461197 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -47,8 +47,6 @@ extern char vdso32_start, vdso32_end; extern char vdso64_start, vdso64_end; -static int vdso_ready; - /* * The vdso data page (aka. systemcfg for old ppc64 fans) is here. * Once the early boot kernel code no longer needs to muck around @@ -168,9 +166,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) mm->context.vdso = NULL; - if (!vdso_ready) - return 0; - if (mmap_write_lock_killable(mm)) return -EINTR; @@ -309,7 +304,6 @@ static int __init vdso_init(void) vdso64_spec.pages = vdso_setup_pages(&vdso64_start, &vdso64_end); smp_wmb(); - vdso_ready = 1; return 0; } From e90903203d94d0a0d0e8ebc979aa0617a7bbe9a3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:46 +0000 Subject: [PATCH 106/304] powerpc/vdso: Remove DBG() DBG() is not used anymore. Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e11a9b50e709f197bb3aa2ed1d80d2dee8714afc.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 0a26dc461197..e839a906fdf2 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -33,14 +33,6 @@ #include #include -#undef DEBUG - -#ifdef DEBUG -#define DBG(fmt...) printk(fmt) -#else -#define DBG(fmt...) -#endif - /* The alignment of the vDSO */ #define VDSO_ALIGNMENT (1 << 16) From 676155ab239dc2035d5306438b45695b6fa165e2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:47 +0000 Subject: [PATCH 107/304] powerpc/vdso: Remove VDSO32_LBASE and VDSO64_LBASE VDSO32_LBASE and VDSO64_LBASE are 0. Remove them to simplify code. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6c4d6570d886bbe1cc471e8ca01602e4b4d9beb5.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso.h | 4 ---- arch/powerpc/kernel/vdso32/vdso32.lds.S | 2 +- arch/powerpc/kernel/vdso64/vdso64.lds.S | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h index a97384909fe5..2448419cb3e5 100644 --- a/arch/powerpc/include/asm/vdso.h +++ b/arch/powerpc/include/asm/vdso.h @@ -4,10 +4,6 @@ #ifdef __KERNEL__ -/* Default link addresses for the vDSOs */ -#define VDSO32_LBASE 0x0 -#define VDSO64_LBASE 0x0 - /* Default map addresses for 32bit vDSO */ #define VDSO32_MBASE 0x100000 diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 660891af2f58..8bf958a61045 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -17,7 +17,7 @@ ENTRY(_start) SECTIONS { PROVIDE(_vdso_datapage = . - PAGE_SIZE); - . = VDSO32_LBASE + SIZEOF_HEADERS; + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 0cacd511a8ec..20f4366db1df 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -17,7 +17,7 @@ ENTRY(_start) SECTIONS { PROVIDE(_vdso_datapage = . - PAGE_SIZE); - . = VDSO64_LBASE + SIZEOF_HEADERS; + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } From 65d2150c89121a49e4bd4abbb99c436c77003eed Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Sep 2020 09:16:48 +0000 Subject: [PATCH 108/304] powerpc/vdso: Cleanup vdso.h Rename the guard define to _ASM_POWERPC_VDSO_H And remove useless #ifdef __KERNEL__ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9902590d410cd1c2afa48b83b277faf0711f07b2.1601197618.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h index 2448419cb3e5..8542e9bbeead 100644 --- a/arch/powerpc/include/asm/vdso.h +++ b/arch/powerpc/include/asm/vdso.h @@ -1,8 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PPC64_VDSO_H__ -#define __PPC64_VDSO_H__ - -#ifdef __KERNEL__ +#ifndef _ASM_POWERPC_VDSO_H +#define _ASM_POWERPC_VDSO_H /* Default map addresses for 32bit vDSO */ #define VDSO32_MBASE 0x100000 @@ -54,6 +52,4 @@ int vdso_getcpu_init(void); #endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ - -#endif /* __PPC64_VDSO_H__ */ +#endif /* _ASM_POWERPC_VDSO_H */ From fe18a35e685c9bdabc8b11b3e19deb85a068b75d Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Mon, 30 Nov 2020 11:44:04 +1100 Subject: [PATCH 109/304] powerpc/64: Fix an EMIT_BUG_ENTRY in head_64.S Commit 63ce271b5e37 ("powerpc/prom: convert PROM_BUG() to standard trap") added an EMIT_BUG_ENTRY for the trap after the branch to start_kernel(). The EMIT_BUG_ENTRY was for the address "0b", however the trap was not labeled with "0". Hence the address used for bug is in relative_toc() where the previous "0" label is. Label the trap as "0" so the correct address is used. Fixes: 63ce271b5e37 ("powerpc/prom: convert PROM_BUG() to standard trap") Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201130004404.30953-1-jniethe5@gmail.com --- arch/powerpc/kernel/head_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 3bae6286c17c..f63d01c78398 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -995,7 +995,7 @@ start_here_common: bl start_kernel /* Not reached */ - trap +0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 .previous From a21df7a1d6ca9bd387a17841863a99431c4aa730 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Fri, 27 Nov 2020 15:07:37 +0800 Subject: [PATCH 110/304] powerpc: Use common STABS_DEBUG and DWARF_DEBUG and ELF_DETAILS macro Use the common STABS_DEBUG and DWARF_DEBUG and ELF_DETAILS macro rule for the linker script in an effort. Signed-off-by: Youling Tang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1606460857-2723-1-git-send-email-tangyouling@loongson.cn --- arch/powerpc/kernel/vdso32/vdso32.lds.S | 42 +++---------------------- arch/powerpc/kernel/vdso64/vdso64.lds.S | 42 +++---------------------- 2 files changed, 8 insertions(+), 76 deletions(-) diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 8bf958a61045..a4b806b0d618 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -5,6 +5,7 @@ */ #include #include +#include #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle") @@ -78,44 +79,9 @@ SECTIONS __end = .; PROVIDE(end = .); - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the beginning - * of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS /DISCARD/ : { *(.note.GNU-stack) diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 20f4366db1df..6164d1a1ba11 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -5,6 +5,7 @@ */ #include #include +#include #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle") @@ -76,44 +77,9 @@ SECTIONS _end = .; PROVIDE(end = .); - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the beginning - * of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS /DISCARD/ : { *(.note.GNU-stack) From f3e90408019b353fd1fcd338091fb8d3c4a1c1a5 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 30 Nov 2020 09:14:06 +0530 Subject: [PATCH 111/304] powerpc/xmon: Fix build failure for 8xx With CONFIG_PPC_8xx and CONFIG_XMON set, kernel build fails with arch/powerpc/xmon/xmon.c:1379:12: error: 'find_free_data_bpt' defined but not used [-Werror=unused-function] Fix it by enclosing find_free_data_bpt() inside #ifndef CONFIG_PPC_8xx. Fixes: 30df74d67d48 ("powerpc/watchpoint/xmon: Support 2nd DAWR") Reported-by: kernel test robot Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201130034406.288047-1-ravi.bangoria@linux.ibm.com --- arch/powerpc/xmon/xmon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 55c43a6c9111..5559edf36756 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1383,6 +1383,7 @@ static long check_bp_loc(unsigned long addr) return 1; } +#ifndef CONFIG_PPC_8xx static int find_free_data_bpt(void) { int i; @@ -1394,6 +1395,7 @@ static int find_free_data_bpt(void) printf("Couldn't find free breakpoint register\n"); return -1; } +#endif static void print_data_bpts(void) { From 1baa1f70ef77c4447628992ad50ab83213e2eb6c Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Tue, 1 Dec 2020 11:52:03 +1100 Subject: [PATCH 112/304] powerpc: Allow relative pointers in bug table entries This enables GENERIC_BUG_RELATIVE_POINTERS on Power so that 32-bit offsets are stored in the bug entries rather than 64-bit pointers. While this doesn't save space for 32-bit machines, use it anyway so there is only one code path. Signed-off-by: Jordan Niethe Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201201005203.15210-1-jniethe5@gmail.com --- arch/powerpc/Kconfig | 4 ++++ arch/powerpc/include/asm/bug.h | 8 ++++---- arch/powerpc/xmon/xmon.c | 4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8d12da224cb9..9e679ba0811c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -313,6 +313,10 @@ config GENERIC_BUG default y depends on BUG +config GENERIC_BUG_RELATIVE_POINTERS + def_bool y + depends on GENERIC_BUG + config SYS_SUPPORTS_APM_EMULATION default y if PMAC_APM_EMU bool diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 338f36cd9934..ba0500872cce 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -12,7 +12,7 @@ #ifdef CONFIG_DEBUG_BUGVERBOSE .macro EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" -5001: PPC_LONG \addr, 5002f +5001: .4byte \addr - 5001b, 5002f - 5001b .short \line, \flags .org 5001b+BUG_ENTRY_SIZE .previous @@ -23,7 +23,7 @@ #else .macro EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" -5001: PPC_LONG \addr +5001: .4byte \addr - 5001b .short \flags .org 5001b+BUG_ENTRY_SIZE .previous @@ -36,14 +36,14 @@ #ifdef CONFIG_DEBUG_BUGVERBOSE #define _EMIT_BUG_ENTRY \ ".section __bug_table,\"aw\"\n" \ - "2:\t" PPC_LONG "1b, %0\n" \ + "2:\t.4byte 1b - 2b, %0 - 2b\n" \ "\t.short %1, %2\n" \ ".org 2b+%3\n" \ ".previous\n" #else #define _EMIT_BUG_ENTRY \ ".section __bug_table,\"aw\"\n" \ - "2:\t" PPC_LONG "1b\n" \ + "2:\t.4byte 1b - 2b\n" \ "\t.short %2\n" \ ".org 2b+%3\n" \ ".previous\n" diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 5559edf36756..dcd817ca2edf 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1747,9 +1747,9 @@ static void print_bug_trap(struct pt_regs *regs) #ifdef CONFIG_DEBUG_BUGVERBOSE printf("kernel BUG at %s:%u!\n", - bug->file, bug->line); + (char *)bug + bug->file_disp, bug->line); #else - printf("kernel BUG at %px!\n", (void *)bug->bug_addr); + printf("kernel BUG at %px!\n", (void *)bug + bug->bug_addr_disp); #endif #endif /* CONFIG_BUG */ } From c8754c517e37270a01b0561ad46ee647a721a09b Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Mon, 30 Nov 2020 16:29:49 +0100 Subject: [PATCH 113/304] powerpc/pseries: Define PCI bus speed for Gen4 and Gen5 Update bus speed definition for PCI Gen4 and 5. Signed-off-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201130152949.26467-1-fbarrat@linux.ibm.com --- arch/powerpc/platforms/pseries/pci.c | 51 ++++++++++++---------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 911534b89c85..72a4d4167849 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -290,6 +290,25 @@ static void fixup_winbond_82c105(struct pci_dev* dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105, fixup_winbond_82c105); +static enum pci_bus_speed prop_to_pci_speed(u32 prop) +{ + switch (prop) { + case 0x01: + return PCIE_SPEED_2_5GT; + case 0x02: + return PCIE_SPEED_5_0GT; + case 0x04: + return PCIE_SPEED_8_0GT; + case 0x08: + return PCIE_SPEED_16_0GT; + case 0x10: + return PCIE_SPEED_32_0GT; + default: + pr_debug("Unexpected PCI link speed property value\n"); + return PCI_SPEED_UNKNOWN; + } +} + int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) { struct device_node *dn, *pdn; @@ -322,35 +341,7 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) return 0; } - switch (pcie_link_speed_stats[0]) { - case 0x01: - bus->max_bus_speed = PCIE_SPEED_2_5GT; - break; - case 0x02: - bus->max_bus_speed = PCIE_SPEED_5_0GT; - break; - case 0x04: - bus->max_bus_speed = PCIE_SPEED_8_0GT; - break; - default: - bus->max_bus_speed = PCI_SPEED_UNKNOWN; - break; - } - - switch (pcie_link_speed_stats[1]) { - case 0x01: - bus->cur_bus_speed = PCIE_SPEED_2_5GT; - break; - case 0x02: - bus->cur_bus_speed = PCIE_SPEED_5_0GT; - break; - case 0x04: - bus->cur_bus_speed = PCIE_SPEED_8_0GT; - break; - default: - bus->cur_bus_speed = PCI_SPEED_UNKNOWN; - break; - } - + bus->max_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[0]); + bus->cur_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[1]); return 0; } From 1fc0c27b14b93b2506953ef59e965d98ccc78122 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 2 Dec 2020 01:43:44 +1100 Subject: [PATCH 114/304] powerpc/feature-fixups: use a semicolon rather than a comma In a bunch of our security flushes, we use a comma rather than a semicolon to 'terminate' an assignment. Nothing breaks, but checkpatch picks it up if you copy it into another flush. Switch to semicolons for ending statements. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201201144344.1228421-1-dja@axtens.net --- arch/powerpc/lib/feature-fixups.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 321c12a9ef6b..47821055b94c 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -124,7 +124,7 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) long *start, *end; int i; - start = PTRRELOC(&__start___stf_entry_barrier_fixup), + start = PTRRELOC(&__start___stf_entry_barrier_fixup); end = PTRRELOC(&__stop___stf_entry_barrier_fixup); instrs[0] = 0x60000000; /* nop */ @@ -176,7 +176,7 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) long *start, *end; int i; - start = PTRRELOC(&__start___stf_exit_barrier_fixup), + start = PTRRELOC(&__start___stf_exit_barrier_fixup); end = PTRRELOC(&__stop___stf_exit_barrier_fixup); instrs[0] = 0x60000000; /* nop */ @@ -344,7 +344,7 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) long *start, *end; int i; - start = PTRRELOC(&__start___rfi_flush_fixup), + start = PTRRELOC(&__start___rfi_flush_fixup); end = PTRRELOC(&__stop___rfi_flush_fixup); instrs[0] = 0x60000000; /* nop */ @@ -417,7 +417,7 @@ void do_barrier_nospec_fixups(bool enable) { void *start, *end; - start = PTRRELOC(&__start___barrier_nospec_fixup), + start = PTRRELOC(&__start___barrier_nospec_fixup); end = PTRRELOC(&__stop___barrier_nospec_fixup); do_barrier_nospec_fixups_range(enable, start, end); From f0812f6ca8299e864fe0f41bd7ffdaae3ce7630e Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 2 Dec 2020 01:44:27 +1100 Subject: [PATCH 115/304] selftests/powerpc: update .gitignore I did an in-place build of the self-tests and found that it left the tree dirty. Add missed test binaries to .gitignore Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201201144427.1228745-1-dja@axtens.net --- tools/testing/selftests/powerpc/nx-gzip/.gitignore | 3 +++ tools/testing/selftests/powerpc/security/.gitignore | 1 + tools/testing/selftests/powerpc/signal/.gitignore | 1 + tools/testing/selftests/powerpc/syscalls/.gitignore | 1 + 4 files changed, 6 insertions(+) create mode 100644 tools/testing/selftests/powerpc/nx-gzip/.gitignore diff --git a/tools/testing/selftests/powerpc/nx-gzip/.gitignore b/tools/testing/selftests/powerpc/nx-gzip/.gitignore new file mode 100644 index 000000000000..886d522d52df --- /dev/null +++ b/tools/testing/selftests/powerpc/nx-gzip/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +gunz_test +gzfht_test diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore index 4257a1f156bb..93614b125ded 100644 --- a/tools/testing/selftests/powerpc/security/.gitignore +++ b/tools/testing/selftests/powerpc/security/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only rfi_flush entry_flush +spectre_v2 diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore index 405b5364044c..ce3375cd8e73 100644 --- a/tools/testing/selftests/powerpc/signal/.gitignore +++ b/tools/testing/selftests/powerpc/signal/.gitignore @@ -3,3 +3,4 @@ signal signal_tm sigfuz sigreturn_vdso +sig_sc_double_restart diff --git a/tools/testing/selftests/powerpc/syscalls/.gitignore b/tools/testing/selftests/powerpc/syscalls/.gitignore index b00cab225476..a1e19ccdef84 100644 --- a/tools/testing/selftests/powerpc/syscalls/.gitignore +++ b/tools/testing/selftests/powerpc/syscalls/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only ipc_unmuxed +rtas_filter From c9344769e2b46ba28b947bec7a8a8f0a091ecd57 Mon Sep 17 00:00:00 2001 From: Harish Date: Tue, 1 Dec 2020 14:54:03 +0530 Subject: [PATCH 116/304] selftests/powerpc: Fix uninitialized variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch fixes uninitialized variable warning in bad_accesses test which causes the selftests build to fail in older distibutions bad_accesses.c: In function ‘bad_access’: bad_accesses.c:52:9: error: ‘x’ may be used uninitialized in this function [-Werror=maybe-uninitialized] printf("Bad - no SEGV! (%c)\n", x); ^ cc1: all warnings being treated as errors Signed-off-by: Harish Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201201092403.238182-1-harish@linux.ibm.com --- tools/testing/selftests/powerpc/mm/bad_accesses.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c index fd747b2ffcfc..65d2148b05dc 100644 --- a/tools/testing/selftests/powerpc/mm/bad_accesses.c +++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c @@ -38,7 +38,7 @@ static void segv_handler(int n, siginfo_t *info, void *ctxt_v) int bad_access(char *p, bool write) { - char x; + char x = 0; fault_code = 0; fault_addr = 0; From f66de7ac4849eb42a7b18e26b8ee49e08130fd27 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Tue, 1 Dec 2020 04:28:00 -0500 Subject: [PATCH 117/304] powerpc/perf: Invoke per-CPU variable access with disabled interrupts The power_pmu_event_init() callback access per-cpu variable (cpu_hw_events) to check for event constraints and Branch Stack (BHRB). Current usage is to disable preemption when accessing the per-cpu variable, but this does not prevent timer callback from interrupting event_init. Fix this by using 'local_irq_save/restore' to make sure the code path is invoked with disabled interrupts. This change is tested in mambo simulator to ensure that, if a timer interrupt comes in during the per-cpu access in event_init, it will be soft masked and replayed later. For testing purpose, introduced a udelay() in power_pmu_event_init() to make sure a timer interrupt arrives while in per-cpu variable access code between local_irq_save/resore. As expected the timer interrupt was replayed later during local_irq_restore called from power_pmu_event_init. This was confirmed by adding breakpoint in mambo and checking the backtrace when timer_interrupt was hit. Reported-by: Sebastian Andrzej Siewior Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1606814880-1720-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 8e20ef6252e1..6e224650b3c9 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1912,7 +1912,7 @@ static bool is_event_blacklisted(u64 ev) static int power_pmu_event_init(struct perf_event *event) { u64 ev; - unsigned long flags; + unsigned long flags, irq_flags; struct perf_event *ctrs[MAX_HWEVENTS]; u64 events[MAX_HWEVENTS]; unsigned int cflags[MAX_HWEVENTS]; @@ -2020,7 +2020,9 @@ static int power_pmu_event_init(struct perf_event *event) if (check_excludes(ctrs, cflags, n, 1)) return -EINVAL; - cpuhw = &get_cpu_var(cpu_hw_events); + local_irq_save(irq_flags); + cpuhw = this_cpu_ptr(&cpu_hw_events); + err = power_check_constraints(cpuhw, events, cflags, n + 1); if (has_branch_stack(event)) { @@ -2031,13 +2033,13 @@ static int power_pmu_event_init(struct perf_event *event) event->attr.branch_sample_type); if (bhrb_filter == -1) { - put_cpu_var(cpu_hw_events); + local_irq_restore(irq_flags); return -EOPNOTSUPP; } cpuhw->bhrb_filter = bhrb_filter; } - put_cpu_var(cpu_hw_events); + local_irq_restore(irq_flags); if (err) return -EINVAL; From bf13718bc57ada25016d9fe80323238d0b94506e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 7 Nov 2020 12:33:05 +1000 Subject: [PATCH 118/304] powerpc: show registers when unwinding interrupt frames It's often useful to know the register state for interrupts in the stack frame. In the below example (with this patch applied), the important information is the state of the page fault. A blatant case like this probably rather should have the page fault regs passed down to the warning, but quite often there are less obvious cases where an interrupt shows up that might give some more clues. The downside is longer and more complex bug output. Bug: Write fault blocked by AMR! WARNING: CPU: 0 PID: 72 at arch/powerpc/include/asm/book3s/64/kup-radix.h:164 __do_page_fault+0x880/0xa90 Modules linked in: CPU: 0 PID: 72 Comm: systemd-gpt-aut Not tainted NIP: c00000000006e2f0 LR: c00000000006e2ec CTR: 0000000000000000 REGS: c00000000a4f3420 TRAP: 0700 MSR: 8000000000021033 CR: 28002840 XER: 20040000 CFAR: c000000000128be0 IRQMASK: 3 GPR00: c00000000006e2ec c00000000a4f36c0 c0000000014f0700 0000000000000020 GPR04: 0000000000000001 c000000001290f50 0000000000000001 c000000001290f80 GPR08: c000000001612b08 0000000000000000 0000000000000000 00000000ffffe0f7 GPR12: 0000000048002840 c0000000016e0000 c00c000000021c80 c000000000fd6f60 GPR16: 0000000000000000 c00000000a104698 0000000000000003 c0000000087f0000 GPR20: 0000000000000100 c0000000070330b8 0000000000000000 0000000000000004 GPR24: 0000000002000000 0000000000000300 0000000002000000 c00000000a5b0c00 GPR28: 0000000000000000 000000000a000000 00007fffb2a90038 c00000000a4f3820 NIP [c00000000006e2f0] __do_page_fault+0x880/0xa90 LR [c00000000006e2ec] __do_page_fault+0x87c/0xa90 Call Trace: [c00000000a4f36c0] [c00000000006e2ec] __do_page_fault+0x87c/0xa90 (unreliable) [c00000000a4f3780] [c000000000e1c034] do_page_fault+0x34/0x90 [c00000000a4f37b0] [c000000000008908] data_access_common_virt+0x158/0x1b0 --- interrupt: 300 at __copy_tofrom_user_base+0x9c/0x5a4 NIP: c00000000009b028 LR: c000000000802978 CTR: 0000000000000800 REGS: c00000000a4f3820 TRAP: 0300 MSR: 800000000280b033 CR: 24004840 XER: 00000000 CFAR: c00000000009aff4 DAR: 00007fffb2a90038 DSISR: 0a000000 IRQMASK: 0 GPR00: 0000000000000000 c00000000a4f3ac0 c0000000014f0700 00007fffb2a90028 GPR04: c000000008720010 0000000000010000 0000000000000000 0000000000000000 GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000001 GPR12: 0000000000004000 c0000000016e0000 c00c000000021c80 c000000000fd6f60 GPR16: 0000000000000000 c00000000a104698 0000000000000003 c0000000087f0000 GPR20: 0000000000000100 c0000000070330b8 0000000000000000 0000000000000004 GPR24: c00000000a4f3c80 c000000008720000 0000000000010000 0000000000000000 GPR28: 0000000000010000 0000000008720000 0000000000010000 c000000001515b98 NIP [c00000000009b028] __copy_tofrom_user_base+0x9c/0x5a4 LR [c000000000802978] copyout+0x68/0xc0 --- interrupt: 300 [c00000000a4f3af0] [c0000000008074b8] copy_page_to_iter+0x188/0x540 [c00000000a4f3b50] [c00000000035c678] generic_file_buffered_read+0x358/0xd80 [c00000000a4f3c40] [c0000000004c1e90] blkdev_read_iter+0x50/0x80 [c00000000a4f3c60] [c00000000045733c] new_sync_read+0x12c/0x1c0 [c00000000a4f3d00] [c00000000045a1f0] vfs_read+0x1d0/0x240 [c00000000a4f3d50] [c00000000045a7f4] ksys_read+0x84/0x140 [c00000000a4f3da0] [c000000000033a60] system_call_exception+0x100/0x280 [c00000000a4f3e10] [c00000000000c508] system_call_common+0xf8/0x2f8 Instruction dump: eae10078 3be0000b 4bfff890 60420000 792917e1 4182ff18 3c82ffab 3884a5e0 3c62ffab 3863a6e8 480ba891 60000000 <0fe00000> 3be0000b 4bfff860 e93c0938 Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201107023305.2384874-1-npiggin@gmail.com --- arch/powerpc/kernel/process.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ba2c987b8403..293d9b2ec0fa 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1466,12 +1466,10 @@ static void print_msr_bits(unsigned long val) #define LAST_VOLATILE 12 #endif -void show_regs(struct pt_regs * regs) +static void __show_regs(struct pt_regs *regs) { int i, trap; - show_regs_print_info(KERN_DEFAULT); - printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); printk("REGS: %px TRAP: %04lx %s (%s)\n", @@ -1513,6 +1511,12 @@ void show_regs(struct pt_regs * regs) printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); } +} + +void show_regs(struct pt_regs *regs) +{ + show_regs_print_info(KERN_DEFAULT); + __show_regs(regs); show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT); if (!user_mode(regs)) show_instructions(regs); @@ -2178,10 +2182,14 @@ void show_stack(struct task_struct *tsk, unsigned long *stack, && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); + lr = regs->link; - printk("%s--- interrupt: %lx at %pS\n LR = %pS\n", - loglvl, regs->trap, - (void *)regs->nip, (void *)lr); + printk("%s--- interrupt: %lx at %pS\n", + loglvl, regs->trap, (void *)regs->nip); + __show_regs(regs); + printk("%s--- interrupt: %lx\n", + loglvl, regs->trap); + firstframe = 1; } From 92cc6bf01c7f4c5cfefd1963985c0064687ebeda Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 2 Dec 2020 10:34:53 +0530 Subject: [PATCH 119/304] powerpc: Refactor is_kvm_guest() declaration to new header Only code/declaration movement, in anticipation of doing a KVM-aware vcpu_is_preempted(). No additional changes. Signed-off-by: Srikar Dronamraju Acked-by: Waiman Long Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201202050456.164005-2-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/firmware.h | 6 ------ arch/powerpc/include/asm/kvm_guest.h | 15 +++++++++++++++ arch/powerpc/include/asm/kvm_para.h | 2 +- arch/powerpc/kernel/firmware.c | 1 + arch/powerpc/platforms/pseries/smp.c | 1 + 5 files changed, 18 insertions(+), 7 deletions(-) create mode 100644 arch/powerpc/include/asm/kvm_guest.h diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 0b295bdb201e..aa6a5ef5d483 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -134,12 +134,6 @@ extern int ibm_nmi_interlock_token; extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup; -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) -bool is_kvm_guest(void); -#else -static inline bool is_kvm_guest(void) { return false; } -#endif - #ifdef CONFIG_PPC_PSERIES void pseries_probe_fw_features(void); #else diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h new file mode 100644 index 000000000000..d2c946dbbd2c --- /dev/null +++ b/arch/powerpc/include/asm/kvm_guest.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 IBM Corporation + */ + +#ifndef _ASM_POWERPC_KVM_GUEST_H_ +#define _ASM_POWERPC_KVM_GUEST_H_ + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) +bool is_kvm_guest(void); +#else +static inline bool is_kvm_guest(void) { return false; } +#endif + +#endif /* _ASM_POWERPC_KVM_GUEST_H_ */ diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 744612054c94..abe1b5e82547 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -8,7 +8,7 @@ #ifndef __POWERPC_KVM_PARA_H__ #define __POWERPC_KVM_PARA_H__ -#include +#include #include diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index fe48d319d490..5f48e5ad24cd 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -14,6 +14,7 @@ #include #include +#include #ifdef CONFIG_PPC64 unsigned long powerpc_firmware_features __read_mostly; diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 92922491a81c..d578732c545d 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "pseries.h" From 16520a858a995742c2d2248e86a6026bd0316562 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 2 Dec 2020 10:34:54 +0530 Subject: [PATCH 120/304] powerpc: Rename is_kvm_guest() to check_kvm_guest() We want to reuse the is_kvm_guest() name in a subsequent patch but with a new body. Hence rename is_kvm_guest() to check_kvm_guest(). No additional changes. Signed-off-by: Srikar Dronamraju Acked-by: Waiman Long Signed-off-by: kernel test robot # int -> bool fix [mpe: Fold in fix from lkp to use true/false not 0/1] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201202050456.164005-3-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/kvm_guest.h | 4 ++-- arch/powerpc/include/asm/kvm_para.h | 2 +- arch/powerpc/kernel/firmware.c | 8 ++++---- arch/powerpc/platforms/pseries/smp.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h index d2c946dbbd2c..d7749ecb30d4 100644 --- a/arch/powerpc/include/asm/kvm_guest.h +++ b/arch/powerpc/include/asm/kvm_guest.h @@ -7,9 +7,9 @@ #define _ASM_POWERPC_KVM_GUEST_H_ #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) -bool is_kvm_guest(void); +bool check_kvm_guest(void); #else -static inline bool is_kvm_guest(void) { return false; } +static inline bool check_kvm_guest(void) { return false; } #endif #endif /* _ASM_POWERPC_KVM_GUEST_H_ */ diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index abe1b5e82547..6fba06b6cfdb 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -14,7 +14,7 @@ static inline int kvm_para_available(void) { - return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest(); + return IS_ENABLED(CONFIG_KVM_GUEST) && check_kvm_guest(); } static inline unsigned int kvm_arch_para_features(void) diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index 5f48e5ad24cd..c3140c6084c9 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -22,17 +22,17 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features); #endif #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) -bool is_kvm_guest(void) +bool check_kvm_guest(void) { struct device_node *hyper_node; hyper_node = of_find_node_by_path("/hypervisor"); if (!hyper_node) - return 0; + return false; if (!of_device_is_compatible(hyper_node, "linux,kvm")) - return 0; + return false; - return 1; + return true; } #endif diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index d578732c545d..c70b4be9f0a5 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -211,7 +211,7 @@ static __init void pSeries_smp_probe(void) if (!cpu_has_feature(CPU_FTR_SMT)) return; - if (is_kvm_guest()) { + if (check_kvm_guest()) { /* * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp * faults to the hypervisor which then reads the instruction From a21d1becaa3f17a97b933ffa677b526afc514ec5 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 2 Dec 2020 10:34:55 +0530 Subject: [PATCH 121/304] powerpc: Reintroduce is_kvm_guest() as a fast-path check Introduce a static branch that would be set during boot if the OS happens to be a KVM guest. Subsequent checks to see if we are on KVM will rely on this static branch. This static branch would be used in vcpu_is_preempted() in a subsequent patch. Signed-off-by: Srikar Dronamraju Acked-by: Waiman Long Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201202050456.164005-4-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/kvm_guest.h | 10 ++++++++++ arch/powerpc/include/asm/kvm_para.h | 2 +- arch/powerpc/kernel/firmware.c | 2 ++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h index d7749ecb30d4..2fca299f7e19 100644 --- a/arch/powerpc/include/asm/kvm_guest.h +++ b/arch/powerpc/include/asm/kvm_guest.h @@ -7,8 +7,18 @@ #define _ASM_POWERPC_KVM_GUEST_H_ #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) +#include + +DECLARE_STATIC_KEY_FALSE(kvm_guest); + +static inline bool is_kvm_guest(void) +{ + return static_branch_unlikely(&kvm_guest); +} + bool check_kvm_guest(void); #else +static inline bool is_kvm_guest(void) { return false; } static inline bool check_kvm_guest(void) { return false; } #endif diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 6fba06b6cfdb..abe1b5e82547 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -14,7 +14,7 @@ static inline int kvm_para_available(void) { - return IS_ENABLED(CONFIG_KVM_GUEST) && check_kvm_guest(); + return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest(); } static inline unsigned int kvm_arch_para_features(void) diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index c3140c6084c9..c9e2819b095a 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -22,6 +22,7 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features); #endif #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) +DEFINE_STATIC_KEY_FALSE(kvm_guest); bool check_kvm_guest(void) { struct device_node *hyper_node; @@ -33,6 +34,7 @@ bool check_kvm_guest(void) if (!of_device_is_compatible(hyper_node, "linux,kvm")) return false; + static_branch_enable(&kvm_guest); return true; } #endif From ca3f969dcb111d35674b66bdcb72beb2c426b9b5 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 2 Dec 2020 10:34:56 +0530 Subject: [PATCH 122/304] powerpc/paravirt: Use is_kvm_guest() in vcpu_is_preempted() If its a shared LPAR but not a KVM guest, then see if the vCPU is related to the calling vCPU. On PowerVM, only cores can be preempted. So if one vCPU is a non-preempted state, we can decipher that all other vCPUs sharing the same core are in non-preempted state. Performance results: $ perf stat -r 5 -a perf bench sched pipe -l 10000000 (lesser time is better) powerpc/next 35,107,951.20 msec cpu-clock # 255.898 CPUs utilized ( +- 0.31% ) 23,655,348 context-switches # 0.674 K/sec ( +- 3.72% ) 14,465 cpu-migrations # 0.000 K/sec ( +- 5.37% ) 82,463 page-faults # 0.002 K/sec ( +- 8.40% ) 1,127,182,328,206 cycles # 0.032 GHz ( +- 1.60% ) (66.67%) 78,587,300,622 stalled-cycles-frontend # 6.97% frontend cycles idle ( +- 0.08% ) (50.01%) 654,124,218,432 stalled-cycles-backend # 58.03% backend cycles idle ( +- 1.74% ) (50.01%) 834,013,059,242 instructions # 0.74 insn per cycle # 0.78 stalled cycles per insn ( +- 0.73% ) (66.67%) 132,911,454,387 branches # 3.786 M/sec ( +- 0.59% ) (50.00%) 2,890,882,143 branch-misses # 2.18% of all branches ( +- 0.46% ) (50.00%) 137.195 +- 0.419 seconds time elapsed ( +- 0.31% ) powerpc/next + patchset 29,981,702.64 msec cpu-clock # 255.881 CPUs utilized ( +- 1.30% ) 40,162,456 context-switches # 0.001 M/sec ( +- 0.01% ) 1,110 cpu-migrations # 0.000 K/sec ( +- 5.20% ) 62,616 page-faults # 0.002 K/sec ( +- 3.93% ) 1,430,030,626,037 cycles # 0.048 GHz ( +- 1.41% ) (66.67%) 83,202,707,288 stalled-cycles-frontend # 5.82% frontend cycles idle ( +- 0.75% ) (50.01%) 744,556,088,520 stalled-cycles-backend # 52.07% backend cycles idle ( +- 1.39% ) (50.01%) 940,138,418,674 instructions # 0.66 insn per cycle # 0.79 stalled cycles per insn ( +- 0.51% ) (66.67%) 146,452,852,283 branches # 4.885 M/sec ( +- 0.80% ) (50.00%) 3,237,743,996 branch-misses # 2.21% of all branches ( +- 1.18% ) (50.01%) 117.17 +- 1.52 seconds time elapsed ( +- 1.30% ) This is around 14.6% improvement in performance. Signed-off-by: Srikar Dronamraju Acked-by: Waiman Long [mpe: Fold in performance results from cover letter] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201202050456.164005-5-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/paravirt.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index 9362c94fe3aa..edc08f04aef7 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -10,6 +10,9 @@ #endif #ifdef CONFIG_PPC_SPLPAR +#include +#include + DECLARE_STATIC_KEY_FALSE(shared_processor); static inline bool is_shared_processor(void) @@ -74,6 +77,21 @@ static inline bool vcpu_is_preempted(int cpu) { if (!is_shared_processor()) return false; + +#ifdef CONFIG_PPC_SPLPAR + if (!is_kvm_guest()) { + int first_cpu = cpu_first_thread_sibling(smp_processor_id()); + + /* + * Preemption can only happen at core granularity. This CPU + * is not preempted if one of the CPU of this core is not + * preempted. + */ + if (cpu_first_thread_sibling(cpu) == first_cpu) + return false; + } +#endif + if (yield_count_of(cpu) & 1) return true; return false; From 7ff94669e7d8e50756cd57947283381ae9665759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 26 Nov 2020 17:59:49 +0100 Subject: [PATCH 123/304] ALSA: ppc: drop if block with always false condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The remove callback is only called for devices that were probed successfully before. As the matching probe function cannot complete without error if dev->match_id != PS3_MATCH_ID_SOUND, we don't have to check this here. Signed-off-by: Uwe Kleine-König Reviewed-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126165950.2554997-1-u.kleine-koenig@pengutronix.de --- sound/ppc/snd_ps3.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c index 58bb49fff184..6ab796a5d936 100644 --- a/sound/ppc/snd_ps3.c +++ b/sound/ppc/snd_ps3.c @@ -1053,8 +1053,6 @@ static int snd_ps3_driver_remove(struct ps3_system_bus_device *dev) { int ret; pr_info("%s:start id=%d\n", __func__, dev->match_id); - if (dev->match_id != PS3_MATCH_ID_SOUND) - return -ENXIO; /* * ctl and preallocate buffer will be freed in From 6d247e4d264961aa3b871290f9b11a48d5a567f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 26 Nov 2020 17:59:50 +0100 Subject: [PATCH 124/304] powerpc/ps3: make system bus's remove and shutdown callbacks return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver core ignores the return value of struct device_driver::remove because there is only little that can be done. For the shutdown callback it's ps3_system_bus_shutdown() which ignores the return value. To simplify the quest to make struct device_driver::remove return void, let struct ps3_system_bus_driver::remove return void, too. All users already unconditionally return 0, this commit makes it obvious that returning an error code is a bad idea and ensures future users behave accordingly. Signed-off-by: Uwe Kleine-König Reviewed-by: Geert Uytterhoeven Acked-by: Takashi Iwai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126165950.2554997-2-u.kleine-koenig@pengutronix.de --- arch/powerpc/include/asm/ps3.h | 4 ++-- arch/powerpc/platforms/ps3/system-bus.c | 5 ++--- drivers/block/ps3disk.c | 3 +-- drivers/block/ps3vram.c | 3 +-- drivers/char/ps3flash.c | 3 +-- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 3 +-- drivers/ps3/ps3-lpm.c | 3 +-- drivers/ps3/ps3-vuart.c | 10 ++++------ drivers/scsi/ps3rom.c | 3 +-- drivers/usb/host/ehci-ps3.c | 4 +--- drivers/usb/host/ohci-ps3.c | 4 +--- drivers/video/fbdev/ps3fb.c | 4 +--- sound/ppc/snd_ps3.c | 3 +-- 13 files changed, 18 insertions(+), 34 deletions(-) diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index cb89e4bf55ce..e646c7f218bc 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -378,8 +378,8 @@ struct ps3_system_bus_driver { enum ps3_match_sub_id match_sub_id; struct device_driver core; int (*probe)(struct ps3_system_bus_device *); - int (*remove)(struct ps3_system_bus_device *); - int (*shutdown)(struct ps3_system_bus_device *); + void (*remove)(struct ps3_system_bus_device *); + void (*shutdown)(struct ps3_system_bus_device *); /* int (*suspend)(struct ps3_system_bus_device *, pm_message_t); */ /* int (*resume)(struct ps3_system_bus_device *); */ }; diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index c62aaa29a9d5..b431f41c6cb5 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -382,7 +382,6 @@ static int ps3_system_bus_probe(struct device *_dev) static int ps3_system_bus_remove(struct device *_dev) { - int result = 0; struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); struct ps3_system_bus_driver *drv; @@ -393,13 +392,13 @@ static int ps3_system_bus_remove(struct device *_dev) BUG_ON(!drv); if (drv->remove) - result = drv->remove(dev); + drv->remove(dev); else dev_dbg(&dev->core, "%s:%d %s: no remove method\n", __func__, __LINE__, drv->core.name); pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, dev_name(&dev->core)); - return result; + return 0; } static void ps3_system_bus_shutdown(struct device *_dev) diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 7b55811c2a81..ba3ece56cbb3 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -507,7 +507,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) return error; } -static int ps3disk_remove(struct ps3_system_bus_device *_dev) +static void ps3disk_remove(struct ps3_system_bus_device *_dev) { struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); @@ -526,7 +526,6 @@ static int ps3disk_remove(struct ps3_system_bus_device *_dev) kfree(dev->bounce_buf); kfree(priv); ps3_system_bus_set_drvdata(_dev, NULL); - return 0; } static struct ps3_system_bus_driver ps3disk = { diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 1088798c8dd0..b71d28372ef3 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -797,7 +797,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) return error; } -static int ps3vram_remove(struct ps3_system_bus_device *dev) +static void ps3vram_remove(struct ps3_system_bus_device *dev) { struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); @@ -817,7 +817,6 @@ static int ps3vram_remove(struct ps3_system_bus_device *dev) free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE)); kfree(priv); ps3_system_bus_set_drvdata(dev, NULL); - return 0; } static struct ps3_system_bus_driver ps3vram = { diff --git a/drivers/char/ps3flash.c b/drivers/char/ps3flash.c index 1a07fee33f66..23871cde41fb 100644 --- a/drivers/char/ps3flash.c +++ b/drivers/char/ps3flash.c @@ -403,7 +403,7 @@ static int ps3flash_probe(struct ps3_system_bus_device *_dev) return error; } -static int ps3flash_remove(struct ps3_system_bus_device *_dev) +static void ps3flash_remove(struct ps3_system_bus_device *_dev) { struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); @@ -413,7 +413,6 @@ static int ps3flash_remove(struct ps3_system_bus_device *_dev) kfree(ps3_system_bus_get_drvdata(&dev->sbd)); ps3_system_bus_set_drvdata(&dev->sbd, NULL); ps3flash_dev = NULL; - return 0; } diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index d9a5722f561b..3d1fc8d2ca66 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1791,7 +1791,7 @@ static int ps3_gelic_driver_probe(struct ps3_system_bus_device *dev) * ps3_gelic_driver_remove - remove a device from the control of this driver */ -static int ps3_gelic_driver_remove(struct ps3_system_bus_device *dev) +static void ps3_gelic_driver_remove(struct ps3_system_bus_device *dev) { struct gelic_card *card = ps3_system_bus_get_drvdata(dev); struct net_device *netdev0; @@ -1840,7 +1840,6 @@ static int ps3_gelic_driver_remove(struct ps3_system_bus_device *dev) ps3_close_hv_device(dev); pr_debug("%s: done\n", __func__); - return 0; } static struct ps3_system_bus_driver ps3_gelic_driver = { diff --git a/drivers/ps3/ps3-lpm.c b/drivers/ps3/ps3-lpm.c index e54aa2d82f50..65512b6cc6fd 100644 --- a/drivers/ps3/ps3-lpm.c +++ b/drivers/ps3/ps3-lpm.c @@ -1196,7 +1196,7 @@ static int ps3_lpm_probe(struct ps3_system_bus_device *dev) return 0; } -static int ps3_lpm_remove(struct ps3_system_bus_device *dev) +static void ps3_lpm_remove(struct ps3_system_bus_device *dev) { dev_dbg(&dev->core, " -> %s:%u:\n", __func__, __LINE__); @@ -1206,7 +1206,6 @@ static int ps3_lpm_remove(struct ps3_system_bus_device *dev) lpm_priv = NULL; dev_info(&dev->core, " <- %s:%u:\n", __func__, __LINE__); - return 0; } static struct ps3_system_bus_driver ps3_lpm_driver = { diff --git a/drivers/ps3/ps3-vuart.c b/drivers/ps3/ps3-vuart.c index 4ed131eaff51..e34ae6a442c7 100644 --- a/drivers/ps3/ps3-vuart.c +++ b/drivers/ps3/ps3-vuart.c @@ -1102,7 +1102,7 @@ static int ps3_vuart_cleanup(struct ps3_system_bus_device *dev) * device can no longer be used. */ -static int ps3_vuart_remove(struct ps3_system_bus_device *dev) +static void ps3_vuart_remove(struct ps3_system_bus_device *dev) { struct ps3_vuart_port_priv *priv = to_port_priv(dev); struct ps3_vuart_port_driver *drv; @@ -1118,7 +1118,7 @@ static int ps3_vuart_remove(struct ps3_system_bus_device *dev) dev_dbg(&dev->core, "%s:%d: no driver bound\n", __func__, __LINE__); mutex_unlock(&vuart_bus_priv.probe_mutex); - return 0; + return; } drv = ps3_system_bus_dev_to_vuart_drv(dev); @@ -1141,7 +1141,6 @@ static int ps3_vuart_remove(struct ps3_system_bus_device *dev) dev_dbg(&dev->core, " <- %s:%d\n", __func__, __LINE__); mutex_unlock(&vuart_bus_priv.probe_mutex); - return 0; } /** @@ -1154,7 +1153,7 @@ static int ps3_vuart_remove(struct ps3_system_bus_device *dev) * sequence. */ -static int ps3_vuart_shutdown(struct ps3_system_bus_device *dev) +static void ps3_vuart_shutdown(struct ps3_system_bus_device *dev) { struct ps3_vuart_port_driver *drv; @@ -1169,7 +1168,7 @@ static int ps3_vuart_shutdown(struct ps3_system_bus_device *dev) dev_dbg(&dev->core, "%s:%d: no driver bound\n", __func__, __LINE__); mutex_unlock(&vuart_bus_priv.probe_mutex); - return 0; + return; } drv = ps3_system_bus_dev_to_vuart_drv(dev); @@ -1193,7 +1192,6 @@ static int ps3_vuart_shutdown(struct ps3_system_bus_device *dev) dev_dbg(&dev->core, " <- %s:%d\n", __func__, __LINE__); mutex_unlock(&vuart_bus_priv.probe_mutex); - return 0; } static int __init ps3_vuart_bus_init(void) diff --git a/drivers/scsi/ps3rom.c b/drivers/scsi/ps3rom.c index f75c0b5cd587..ccb5771f1cb7 100644 --- a/drivers/scsi/ps3rom.c +++ b/drivers/scsi/ps3rom.c @@ -402,7 +402,7 @@ static int ps3rom_probe(struct ps3_system_bus_device *_dev) return error; } -static int ps3rom_remove(struct ps3_system_bus_device *_dev) +static void ps3rom_remove(struct ps3_system_bus_device *_dev) { struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); struct Scsi_Host *host = ps3_system_bus_get_drvdata(&dev->sbd); @@ -412,7 +412,6 @@ static int ps3rom_remove(struct ps3_system_bus_device *_dev) scsi_host_put(host); ps3_system_bus_set_drvdata(&dev->sbd, NULL); kfree(dev->bounce_buf); - return 0; } static struct ps3_system_bus_driver ps3rom = { diff --git a/drivers/usb/host/ehci-ps3.c b/drivers/usb/host/ehci-ps3.c index fb52133c3557..98568b046a1a 100644 --- a/drivers/usb/host/ehci-ps3.c +++ b/drivers/usb/host/ehci-ps3.c @@ -200,7 +200,7 @@ static int ps3_ehci_probe(struct ps3_system_bus_device *dev) return result; } -static int ps3_ehci_remove(struct ps3_system_bus_device *dev) +static void ps3_ehci_remove(struct ps3_system_bus_device *dev) { unsigned int tmp; struct usb_hcd *hcd = ps3_system_bus_get_drvdata(dev); @@ -227,8 +227,6 @@ static int ps3_ehci_remove(struct ps3_system_bus_device *dev) ps3_dma_region_free(dev->d_region); ps3_close_hv_device(dev); - - return 0; } static int __init ps3_ehci_driver_register(struct ps3_system_bus_driver *drv) diff --git a/drivers/usb/host/ohci-ps3.c b/drivers/usb/host/ohci-ps3.c index f77cd6af0ccf..4f5af929c3e4 100644 --- a/drivers/usb/host/ohci-ps3.c +++ b/drivers/usb/host/ohci-ps3.c @@ -184,7 +184,7 @@ static int ps3_ohci_probe(struct ps3_system_bus_device *dev) return result; } -static int ps3_ohci_remove(struct ps3_system_bus_device *dev) +static void ps3_ohci_remove(struct ps3_system_bus_device *dev) { unsigned int tmp; struct usb_hcd *hcd = ps3_system_bus_get_drvdata(dev); @@ -212,8 +212,6 @@ static int ps3_ohci_remove(struct ps3_system_bus_device *dev) ps3_dma_region_free(dev->d_region); ps3_close_hv_device(dev); - - return 0; } static int __init ps3_ohci_driver_register(struct ps3_system_bus_driver *drv) diff --git a/drivers/video/fbdev/ps3fb.c b/drivers/video/fbdev/ps3fb.c index 203c254f8f6c..2fe08b67eda7 100644 --- a/drivers/video/fbdev/ps3fb.c +++ b/drivers/video/fbdev/ps3fb.c @@ -1208,7 +1208,7 @@ static int ps3fb_probe(struct ps3_system_bus_device *dev) return retval; } -static int ps3fb_shutdown(struct ps3_system_bus_device *dev) +static void ps3fb_shutdown(struct ps3_system_bus_device *dev) { struct fb_info *info = ps3_system_bus_get_drvdata(dev); u64 xdr_lpar = ps3_mm_phys_to_lpar(__pa(ps3fb_videomemory.address)); @@ -1241,8 +1241,6 @@ static int ps3fb_shutdown(struct ps3_system_bus_device *dev) lv1_gpu_memory_free(ps3fb.memory_handle); ps3_close_hv_device(dev); dev_dbg(&dev->core, " <- %s:%d\n", __func__, __LINE__); - - return 0; } static struct ps3_system_bus_driver ps3fb_driver = { diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c index 6ab796a5d936..8e44fa5d4dc7 100644 --- a/sound/ppc/snd_ps3.c +++ b/sound/ppc/snd_ps3.c @@ -1049,7 +1049,7 @@ static int snd_ps3_driver_probe(struct ps3_system_bus_device *dev) }; /* snd_ps3_probe */ /* called when module removal */ -static int snd_ps3_driver_remove(struct ps3_system_bus_device *dev) +static void snd_ps3_driver_remove(struct ps3_system_bus_device *dev) { int ret; pr_info("%s:start id=%d\n", __func__, dev->match_id); @@ -1075,7 +1075,6 @@ static int snd_ps3_driver_remove(struct ps3_system_bus_device *dev) lv1_gpu_device_unmap(2); ps3_close_hv_device(dev); pr_info("%s:end id=%d\n", __func__, dev->match_id); - return 0; } /* snd_ps3_remove */ static struct ps3_system_bus_driver snd_ps3_bus_driver_info = { From 0ce2382657f39ced2adbb927355360c3aaeb05f8 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 28 Nov 2020 17:07:22 +1000 Subject: [PATCH 125/304] powerpc/64s/powernv: Allow KVM to handle guest machine check details KVM has strategies to perform machine check recovery. If a MCE hits in a guest, have the low level handler just decode and save the MCE but not try to recover anything, so KVM can deal with it. The host does not own SLBs and does not need to report the SLB state in case of a multi-hit for example, or know about the virtual memory map of the guest. UE and memory poisoning of guest pages in the host is one thing that is possibly not completely robust at the moment, but this too needs to go via KVM (possibly via the guest and back out to host via hcall) rather than being handled at a low level in the host handler. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201128070728.825934-3-npiggin@gmail.com --- arch/powerpc/kernel/mce.c | 2 +- arch/powerpc/kernel/mce_power.c | 96 ++++++++++++++++++--------------- 2 files changed, 55 insertions(+), 43 deletions(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 63702c0badb9..8afe8d37b983 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -577,7 +577,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, #ifdef CONFIG_PPC_BOOK3S_64 /* Display faulty slb contents for SLB errors. */ - if (evt->error_type == MCE_ERROR_TYPE_SLB) + if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest) slb_dump_contents(local_paca->mce_faulty_slbs); #endif } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index b7e173754a2e..1372ce3f7bdd 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -62,6 +62,20 @@ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) return pfn; } +static bool mce_in_guest(void) +{ +#ifdef CONFIG_KVM_BOOK3S_HANDLER + /* + * If machine check is hit when in guest context or low level KVM + * code, avoid looking up any translations or making any attempts + * to recover, just record the event and pass to KVM. + */ + if (get_paca()->kvm_hstate.in_guest) + return true; +#endif + return false; +} + /* flush SLBs and reload */ #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void) @@ -69,14 +83,6 @@ void flush_and_reload_slb(void) /* Invalidate all SLBs */ slb_flush_all_realmode(); -#ifdef CONFIG_KVM_BOOK3S_HANDLER - /* - * If machine check is hit when in guest or in transition, we will - * only flush the SLBs and continue. - */ - if (get_paca()->kvm_hstate.in_guest) - return; -#endif if (early_radix_enabled()) return; @@ -490,19 +496,21 @@ static int mce_handle_ierror(struct pt_regs *regs, if ((srr1 & table[i].srr1_mask) != table[i].srr1_value) continue; - /* attempt to correct the error */ - switch (table[i].error_type) { - case MCE_ERROR_TYPE_SLB: - if (local_paca->in_mce == 1) - slb_save_contents(local_paca->mce_faulty_slbs); - handled = mce_flush(MCE_FLUSH_SLB); - break; - case MCE_ERROR_TYPE_ERAT: - handled = mce_flush(MCE_FLUSH_ERAT); - break; - case MCE_ERROR_TYPE_TLB: - handled = mce_flush(MCE_FLUSH_TLB); - break; + if (!mce_in_guest()) { + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + handled = mce_flush(MCE_FLUSH_SLB); + break; + case MCE_ERROR_TYPE_ERAT: + handled = mce_flush(MCE_FLUSH_ERAT); + break; + case MCE_ERROR_TYPE_TLB: + handled = mce_flush(MCE_FLUSH_TLB); + break; + } } /* now fill in mce_error_info */ @@ -534,7 +542,7 @@ static int mce_handle_ierror(struct pt_regs *regs, mce_err->sync_error = table[i].sync_error; mce_err->severity = table[i].severity; mce_err->initiator = table[i].initiator; - if (table[i].nip_valid) { + if (table[i].nip_valid && !mce_in_guest()) { *addr = regs->nip; if (mce_err->sync_error && table[i].error_type == MCE_ERROR_TYPE_UE) { @@ -577,22 +585,24 @@ static int mce_handle_derror(struct pt_regs *regs, if (!(dsisr & table[i].dsisr_value)) continue; - /* attempt to correct the error */ - switch (table[i].error_type) { - case MCE_ERROR_TYPE_SLB: - if (local_paca->in_mce == 1) - slb_save_contents(local_paca->mce_faulty_slbs); - if (mce_flush(MCE_FLUSH_SLB)) - handled = 1; - break; - case MCE_ERROR_TYPE_ERAT: - if (mce_flush(MCE_FLUSH_ERAT)) - handled = 1; - break; - case MCE_ERROR_TYPE_TLB: - if (mce_flush(MCE_FLUSH_TLB)) - handled = 1; - break; + if (!mce_in_guest()) { + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + if (mce_flush(MCE_FLUSH_SLB)) + handled = 1; + break; + case MCE_ERROR_TYPE_ERAT: + if (mce_flush(MCE_FLUSH_ERAT)) + handled = 1; + break; + case MCE_ERROR_TYPE_TLB: + if (mce_flush(MCE_FLUSH_TLB)) + handled = 1; + break; + } } /* @@ -634,7 +644,7 @@ static int mce_handle_derror(struct pt_regs *regs, mce_err->initiator = table[i].initiator; if (table[i].dar_valid) *addr = regs->dar; - else if (mce_err->sync_error && + else if (mce_err->sync_error && !mce_in_guest() && table[i].error_type == MCE_ERROR_TYPE_UE) { /* * We do a maximum of 4 nested MCE calls, see @@ -662,7 +672,8 @@ static int mce_handle_derror(struct pt_regs *regs, static long mce_handle_ue_error(struct pt_regs *regs, struct mce_error_info *mce_err) { - long handled = 0; + if (mce_in_guest()) + return 0; mce_common_process_ue(regs, mce_err); if (mce_err->ignore_event) @@ -677,9 +688,10 @@ static long mce_handle_ue_error(struct pt_regs *regs, if (ppc_md.mce_check_early_recovery) { if (ppc_md.mce_check_early_recovery(regs)) - handled = 1; + return 1; } - return handled; + + return 0; } static long mce_handle_error(struct pt_regs *regs, From 067c9f9c98c8804b07751994c51d8557e440821e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 28 Nov 2020 17:07:23 +1000 Subject: [PATCH 126/304] KVM: PPC: Book3S HV: Don't attempt to recover machine checks for FWNMI enabled guests Guests that can deal with machine checks would actually prefer the hypervisor not to try recover for them. For example if SLB multi-hits are recovered by the hypervisor by clearing the SLB then the guest will not be able to log the contents and debug its programming error. If guests don't register for FWNMI, they may not be so capable and so the hypervisor will continue to recover for those. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201128070728.825934-4-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv_ras.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 6028628ea3ac..d4bca93b79f6 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -65,10 +65,9 @@ static void reload_slb(struct kvm_vcpu *vcpu) * On POWER7, see if we can handle a machine check that occurred inside * the guest in real mode, without switching to the host partition. */ -static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) +static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) { unsigned long srr1 = vcpu->arch.shregs.msr; - struct machine_check_event mce_evt; long handled = 1; if (srr1 & SRR1_MC_LDSTERR) { @@ -106,6 +105,21 @@ static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) handled = 0; } + return handled; +} + +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) +{ + struct machine_check_event mce_evt; + long handled; + + if (vcpu->kvm->arch.fwnmi_enabled) { + /* FWNMI guests handle their own recovery */ + handled = 0; + } else { + handled = kvmppc_realmode_mc_power7(vcpu); + } + /* * Now get the event and stash it in the vcpu struct so it can * be handled by the primary thread in virtual mode. We can't @@ -122,11 +136,6 @@ static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) vcpu->arch.mce_evt = mce_evt; } -void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) -{ - kvmppc_realmode_mc_power7(vcpu); -} - /* Check if dynamic split is in force and return subcore size accordingly. */ static inline int kvmppc_cur_subcore_size(void) { From 1d15ffdfc94127d75e04a88344ee1ce8c79f05fd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 28 Nov 2020 17:07:24 +1000 Subject: [PATCH 127/304] KVM: PPC: Book3S HV: Ratelimit machine check messages coming from guests A number of machine check exceptions are triggerable by the guest. Ratelimit these to avoid a guest flooding the host console and logs. Signed-off-by: Nicholas Piggin [mpe: Use dedicated ratelimit state, not printk_ratelimit()] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201128070728.825934-5-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0faafe6f8c4e..cfaa91b27112 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1327,9 +1327,15 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, case BOOK3S_INTERRUPT_SYSTEM_RESET: r = RESUME_GUEST; break; - case BOOK3S_INTERRUPT_MACHINE_CHECK: - /* Print the MCE event to host console. */ - machine_check_print_event_info(&vcpu->arch.mce_evt, false, true); + case BOOK3S_INTERRUPT_MACHINE_CHECK: { + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + /* + * Print the MCE event to host console. Ratelimit so the guest + * can't flood the host log. + */ + if (__ratelimit(&rs)) + machine_check_print_event_info(&vcpu->arch.mce_evt,false, true); /* * If the guest can do FWNMI, exit to userspace so it can @@ -1357,6 +1363,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, r = RESUME_HOST; break; + } case BOOK3S_INTERRUPT_PROGRAM: { ulong flags; @@ -1516,11 +1523,16 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_MACHINE_CHECK: + { + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); /* Pass the machine check to the L1 guest */ r = RESUME_HOST; /* Print the MCE event to host console. */ - machine_check_print_event_info(&vcpu->arch.mce_evt, false, true); + if (__ratelimit(&rs)) + machine_check_print_event_info(&vcpu->arch.mce_evt, false, true); break; + } /* * We get these next two if the guest accesses a page which it thinks * it has mapped but which is not actually present, either because From f4b239e4c6bddf63d00cd460eabb933232dbc326 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 28 Nov 2020 17:07:25 +1000 Subject: [PATCH 128/304] powerpc/64s/powernv: Ratelimit harmless HMI error printing Harmless HMI errors can be triggered by guests in some cases, and don't contain much useful information anyway. Ratelimit these to avoid flooding the console/logs. Signed-off-by: Nicholas Piggin [mpe: Use dedicated ratelimit state, not printk_ratelimit()] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201128070728.825934-6-npiggin@gmail.com --- arch/powerpc/platforms/powernv/opal-hmi.c | 29 +++++++++++++---------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index 3e1f064a18db..f0c1830deb51 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -213,6 +213,8 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) "A hypervisor resource error occurred", "CAPP recovery process is in progress", }; + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); /* Print things out */ if (hmi_evt->version < OpalHMIEvt_V1) { @@ -240,19 +242,22 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) break; } - printk("%s%s Hypervisor Maintenance interrupt [%s]\n", - level, sevstr, - hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ? - "Recovered" : "Not recovered"); - error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ? - hmi_error_types[hmi_evt->type] - : "Unknown"; - printk("%s Error detail: %s\n", level, error_info); - printk("%s HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer)); - if ((hmi_evt->type == OpalHMI_ERROR_TFAC) || - (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) - printk("%s TFMR: %016llx\n", level, + if (hmi_evt->severity != OpalHMI_SEV_NO_ERROR || __ratelimit(&rs)) { + printk("%s%s Hypervisor Maintenance interrupt [%s]\n", + level, sevstr, + hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ? + "Recovered" : "Not recovered"); + error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ? + hmi_error_types[hmi_evt->type] + : "Unknown"; + printk("%s Error detail: %s\n", level, error_info); + printk("%s HMER: %016llx\n", level, + be64_to_cpu(hmi_evt->hmer)); + if ((hmi_evt->type == OpalHMI_ERROR_TFAC) || + (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) + printk("%s TFMR: %016llx\n", level, be64_to_cpu(hmi_evt->tfmr)); + } if (hmi_evt->version < OpalHMIEvt_V2) return; From 82f70a05108c98aea4f140067c44a606262d2af7 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 28 Nov 2020 17:07:26 +1000 Subject: [PATCH 129/304] powerpc/64s/pseries: Add ERAT specific machine check handler Don't treat ERAT MCEs as SLB, don't save the SLB and use a specific ERAT flush to recover it. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201128070728.825934-7-npiggin@gmail.com --- arch/powerpc/include/asm/mce.h | 1 + arch/powerpc/kernel/mce_power.c | 2 +- arch/powerpc/platforms/pseries/ras.c | 5 ++++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 89aa8248a57d..e6c27ae843dc 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -228,6 +228,7 @@ int mce_register_notifier(struct notifier_block *nb); int mce_unregister_notifier(struct notifier_block *nb); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); +void flush_erat(void); long __machine_check_early_realmode_p7(struct pt_regs *regs); long __machine_check_early_realmode_p8(struct pt_regs *regs); long __machine_check_early_realmode_p9(struct pt_regs *regs); diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 1372ce3f7bdd..667104d4c455 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -97,7 +97,7 @@ void flush_and_reload_slb(void) } #endif -static void flush_erat(void) +void flush_erat(void) { #ifdef CONFIG_PPC_BOOK3S_64 if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index b2b245b25edb..149cec2212e6 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -526,8 +526,11 @@ static int mce_handle_err_realmode(int disposition, u8 error_type) #ifdef CONFIG_PPC_BOOK3S_64 if (disposition == RTAS_DISP_NOT_RECOVERED) { switch (error_type) { - case MC_ERROR_TYPE_SLB: case MC_ERROR_TYPE_ERAT: + flush_erat(); + disposition = RTAS_DISP_FULLY_RECOVERED; + break; + case MC_ERROR_TYPE_SLB: /* * Store the old slb content in paca before flushing. * Print this when we go to virtual mode. From 4a869531ddbf5939c45eab6ff389e4e58c8ed19c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 28 Nov 2020 17:07:27 +1000 Subject: [PATCH 130/304] powerpc/64s: Remove "Host" from MCE logging "Host" caused machine check is printed when the kernel sees a MCE hit in this kernel or userspace, and "Guest" if it hit one of its guests. This is confusing when a guest kernel handles a hypervisor- delivered MCE, it also prints "Host". Just remove "Host". "Guest" is adequate to make the distinction. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201128070728.825934-8-npiggin@gmail.com --- arch/powerpc/kernel/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 8afe8d37b983..9f3e133b57b7 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -555,7 +555,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, } printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n", - level, evt->cpu, sevstr, in_guest ? "Guest" : "Host", + level, evt->cpu, sevstr, in_guest ? "Guest" : "", err_type, subtype, dar_str, evt->disposition == MCE_DISPOSITION_RECOVERED ? "Recovered" : "Not recovered"); From 865ae6f27789dcc3f92341d935f4439e8730a9fe Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 28 Nov 2020 17:07:28 +1000 Subject: [PATCH 131/304] powerpc/64s: Tidy machine check SLB logging Since ISA v3.0, SLB no longer uses the slb_cache, and stab_rr is no longer correlated with SLB allocation. Move those to pre-3.0. While here, improve some alignments and reduce whitespace. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201128070728.825934-9-npiggin@gmail.com --- arch/powerpc/mm/book3s64/slb.c | 37 ++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index c30fcbfa0e32..6d720c1c08a4 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -255,7 +255,6 @@ void slb_dump_contents(struct slb_entry *slb_ptr) return; pr_err("SLB contents of cpu 0x%x\n", smp_processor_id()); - pr_err("Last SLB entry inserted at slot %d\n", get_paca()->stab_rr); for (i = 0; i < mmu_slb_size; i++) { e = slb_ptr->esid; @@ -265,34 +264,38 @@ void slb_dump_contents(struct slb_entry *slb_ptr) if (!e && !v) continue; - pr_err("%02d %016lx %016lx\n", i, e, v); + pr_err("%02d %016lx %016lx %s\n", i, e, v, + (e & SLB_ESID_V) ? "VALID" : "NOT VALID"); - if (!(e & SLB_ESID_V)) { - pr_err("\n"); + if (!(e & SLB_ESID_V)) continue; - } + llp = v & SLB_VSID_LLP; if (v & SLB_VSID_B_1T) { - pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n", + pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n", GET_ESID_1T(e), (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp); } else { - pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n", + pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n", GET_ESID(e), (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp); } } - pr_err("----------------------------------\n"); - /* Dump slb cache entires as well. */ - pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr); - pr_err("Valid SLB cache entries:\n"); - n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES); - for (i = 0; i < n; i++) - pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); - pr_err("Rest of SLB cache entries:\n"); - for (i = n; i < SLB_CACHE_ENTRIES; i++) - pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); + if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { + /* RR is not so useful as it's often not used for allocation */ + pr_err("SLB RR allocator index %d\n", get_paca()->stab_rr); + + /* Dump slb cache entires as well. */ + pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr); + pr_err("Valid SLB cache entries:\n"); + n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES); + for (i = 0; i < n; i++) + pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); + pr_err("Rest of SLB cache entries:\n"); + for (i = n; i < SLB_CACHE_ENTRIES; i++) + pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); + } } void slb_vmalloc_update(void) From c3d35ddd1ec874690a4e8da5a18497256f1ffa9a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:03 +0530 Subject: [PATCH 132/304] powerpc: Add new macro to handle NESTED_IFCLR This will be used by the following patches Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-2-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/feature-fixups.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index c509f784a5f6..f6d2acb57425 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -116,6 +116,9 @@ label##5: \ #define END_MMU_FTR_SECTION_NESTED_IFSET(msk, label) \ END_MMU_FTR_SECTION_NESTED((msk), (msk), label) +#define END_MMU_FTR_SECTION_NESTED_IFCLR(msk, label) \ + END_MMU_FTR_SECTION_NESTED((msk), 0, label) + #define END_MMU_FTR_SECTION_IFSET(msk) END_MMU_FTR_SECTION((msk), (msk)) #define END_MMU_FTR_SECTION_IFCLR(msk) END_MMU_FTR_SECTION((msk), 0) From 9f378b9f007cc94beadea40df83cc62a76975c6f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:04 +0530 Subject: [PATCH 133/304] KVM: PPC: BOOK3S: PR: Ignore UAMOR SPR With power7 and above we expect the cpu to support keys. The number of keys are firmware controlled based on device tree. PR KVM do not expose key details via device tree. Hence when running with PR KVM we do run with MMU_FTR_KEY support disabled. But we can still get updates on UAMOR. Hence ignore access to them and for mfstpr return 0 indicating no AMR/IAMR update is no allowed. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-3-aneesh.kumar@linux.ibm.com --- arch/powerpc/kvm/book3s_emulate.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 0effd48c8f4d..b08cc15f31c7 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -840,6 +840,9 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_MMCR1: case SPRN_MMCR2: case SPRN_UMMCR2: + case SPRN_UAMOR: + case SPRN_IAMR: + case SPRN_AMR: #endif break; unprivileged: @@ -1004,6 +1007,9 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val case SPRN_MMCR2: case SPRN_UMMCR2: case SPRN_TIR: + case SPRN_UAMOR: + case SPRN_IAMR: + case SPRN_AMR: #endif *spr_val = 0; break; From 227ae625522c65c4535cabe407f47abc058585ed Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:05 +0530 Subject: [PATCH 134/304] powerpc/book3s64/kuap/kuep: Add PPC_PKEY config on book3s64 The config CONFIG_PPC_PKEY is used to select the base support that is required for PPC_MEM_KEYS, KUAP, and KUEP. Adding this dependency reduces the code complexity(in terms of #ifdefs) and enables us to move some of the initialization code to pkeys.c Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-4-aneesh.kumar@linux.ibm.com --- .../powerpc/include/asm/book3s/64/kup-radix.h | 4 ++-- arch/powerpc/include/asm/book3s/64/mmu.h | 2 +- arch/powerpc/include/asm/ptrace.h | 7 +++++- arch/powerpc/kernel/asm-offsets.c | 3 +++ arch/powerpc/mm/book3s64/Makefile | 2 +- arch/powerpc/mm/book3s64/pkeys.c | 24 ++++++++++++------- arch/powerpc/platforms/Kconfig.cputype | 5 ++++ 7 files changed, 33 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index a39e2d193fdc..2fb8ee7b1e2a 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -16,7 +16,7 @@ #ifdef CONFIG_PPC_KUAP BEGIN_MMU_FTR_SECTION_NESTED(67) mfspr \gpr1, SPRN_AMR - ld \gpr2, STACK_REGS_KUAP(r1) + ld \gpr2, STACK_REGS_AMR(r1) cmpd \gpr1, \gpr2 beq 998f isync @@ -48,7 +48,7 @@ bne \msr_pr_cr, 99f .endif mfspr \gpr1, SPRN_AMR - std \gpr1, STACK_REGS_KUAP(r1) + std \gpr1, STACK_REGS_AMR(r1) li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) sldi \gpr2, \gpr2, AMR_KUAP_SHIFT cmpd \use_cr, \gpr1, \gpr2 diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index ad0837d8076d..d0365914686e 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -199,7 +199,7 @@ extern int mmu_io_psize; void mmu_early_init_devtree(void); void hash__early_init_devtree(void); void radix__early_init_devtree(void); -#ifdef CONFIG_PPC_MEM_KEYS +#ifdef CONFIG_PPC_PKEY void pkey_early_init_devtree(void); #else static inline void pkey_early_init_devtree(void) {} diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 297d30fed945..0aeba52b5ca8 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -53,9 +53,14 @@ struct pt_regs #ifdef CONFIG_PPC64 unsigned long ppr; #endif + union { #ifdef CONFIG_PPC_KUAP - unsigned long kuap; + unsigned long kuap; #endif +#ifdef CONFIG_PPC_PKEY + unsigned long amr; +#endif + }; }; unsigned long __pad[2]; /* Maintain 16 byte interrupt stack alignment */ }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d4331d451c71..a2e01b7b9eeb 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -356,6 +356,9 @@ int main(void) STACK_PT_REGS_OFFSET(_PPR, ppr); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_PKEY + STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr); +#endif #ifdef CONFIG_PPC_KUAP STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap); #endif diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile index fd393b8be14f..1b56d3af47d4 100644 --- a/arch/powerpc/mm/book3s64/Makefile +++ b/arch/powerpc/mm/book3s64/Makefile @@ -17,7 +17,7 @@ endif obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o -obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o +obj-$(CONFIG_PPC_PKEY) += pkeys.o # Instrumenting the SLB fault path can lead to duplicate SLB entries KCOV_INSTRUMENT_slb.o := n diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index b1d091a97611..7dc71f85683d 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -89,12 +89,14 @@ static int scan_pkey_feature(void) } } +#ifdef CONFIG_PPC_MEM_KEYS /* * Adjust the upper limit, based on the number of bits supported by * arch-neutral code. */ pkeys_total = min_t(int, pkeys_total, ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1)); +#endif return pkeys_total; } @@ -102,6 +104,7 @@ void __init pkey_early_init_devtree(void) { int pkeys_total, i; +#ifdef CONFIG_PPC_MEM_KEYS /* * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE. @@ -117,7 +120,7 @@ void __init pkey_early_init_devtree(void) BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) != (sizeof(u64) * BITS_PER_BYTE)); - +#endif /* * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 */ @@ -223,14 +226,6 @@ void __init pkey_early_init_devtree(void) return; } -void pkey_mm_init(struct mm_struct *mm) -{ - if (!mmu_has_feature(MMU_FTR_PKEY)) - return; - mm_pkey_allocation_map(mm) = initial_allocation_mask; - mm->context.execute_only_pkey = execute_only_key; -} - static inline u64 read_amr(void) { return mfspr(SPRN_AMR); @@ -257,6 +252,15 @@ static inline void write_iamr(u64 value) mtspr(SPRN_IAMR, value); } +#ifdef CONFIG_PPC_MEM_KEYS +void pkey_mm_init(struct mm_struct *mm) +{ + if (!mmu_has_feature(MMU_FTR_PKEY)) + return; + mm_pkey_allocation_map(mm) = initial_allocation_mask; + mm->context.execute_only_pkey = execute_only_key; +} + static inline void init_amr(int pkey, u8 init_bits) { u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); @@ -445,3 +449,5 @@ void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm) mm_pkey_allocation_map(mm) = mm_pkey_allocation_map(oldmm); mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; } + +#endif /* CONFIG_PPC_MEM_KEYS */ diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 44ab03fbcadc..60162b65909c 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -406,6 +406,11 @@ config PPC_KUAP_DEBUG Add extra debugging for Kernel Userspace Access Protection (KUAP) If you're unsure, say N. +config PPC_PKEY + def_bool y + depends on PPC_BOOK3S_64 + depends on PPC_MEM_KEYS || PPC_KUAP || PPC_KUEP + config ARCH_ENABLE_HUGEPAGE_MIGRATION def_bool y depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION From 39df17bc20059c84ddc6f91831fce2e2cc79a6f3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:06 +0530 Subject: [PATCH 135/304] powerpc/book3s64/kuap/kuep: Move uamor setup to pkey init This patch consolidates UAMOR update across pkey, kuap and kuep features. The boot cpu initialize UAMOR via pkey init and both radix/hash do the secondary cpu UAMOR init in early_init_mmu_secondary. We don't check for mmu_feature in radix secondary init because UAMOR is a supported SPRN with all CPUs supporting radix translation. The old code was not updating UAMOR if we had smap disabled and smep enabled. This change handles that case. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-5-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/radix_pgtable.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 3adcf730f478..f5f248d44d5c 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -620,9 +620,6 @@ void setup_kuap(bool disabled) cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP; } - /* Make sure userspace can't change the AMR */ - mtspr(SPRN_UAMOR, 0); - /* * Set the default kernel AMR values on all cpus. */ @@ -721,6 +718,9 @@ void radix__early_init_mmu_secondary(void) radix__switch_mmu_context(NULL, &init_mm); tlbiel_all(); + + /* Make sure userspace can't change the AMR */ + mtspr(SPRN_UAMOR, 0); } void radix__mmu_cleanup_all(void) From 3b47b7549ead0719e94022c6742199333c7c8d9f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:07 +0530 Subject: [PATCH 136/304] powerpc/book3s64/kuap: Move KUAP related function outside radix The next set of patches adds support for kuap with hash translation. In preparation for that rename/move kuap related functions to non radix names. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-6-aneesh.kumar@linux.ibm.com --- .../asm/book3s/64/{kup-radix.h => kup.h} | 6 ++--- arch/powerpc/include/asm/kup.h | 4 +++- arch/powerpc/mm/book3s64/pkeys.c | 22 +++++++++++++++++++ arch/powerpc/mm/book3s64/radix_pgtable.c | 19 ---------------- 4 files changed, 28 insertions(+), 23 deletions(-) rename arch/powerpc/include/asm/book3s/64/{kup-radix.h => kup.h} (97%) diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup.h similarity index 97% rename from arch/powerpc/include/asm/book3s/64/kup-radix.h rename to arch/powerpc/include/asm/book3s/64/kup.h index 2fb8ee7b1e2a..8735d2dede94 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H -#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H +#ifndef _ASM_POWERPC_BOOK3S_64_KUP_H +#define _ASM_POWERPC_BOOK3S_64_KUP_H #include #include @@ -202,4 +202,4 @@ static inline void restore_user_access(unsigned long flags) } #endif /* __ASSEMBLY__ */ -#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ +#endif /* _ASM_POWERPC_BOOK3S_64_KUP_H */ diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 0d93331d0fab..a06e50b68d40 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -15,11 +15,13 @@ #define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE) #ifdef CONFIG_PPC_BOOK3S_64 -#include +#include #endif + #ifdef CONFIG_PPC_8xx #include #endif + #ifdef CONFIG_PPC_BOOK3S_32 #include #endif diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 7dc71f85683d..c75994cf50a7 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -9,9 +9,12 @@ #include #include #include +#include + #include #include + int num_pkey; /* Max number of pkeys supported */ /* * Keys marked in the reservation list cannot be allocated by userspace @@ -226,6 +229,25 @@ void __init pkey_early_init_devtree(void) return; } +#ifdef CONFIG_PPC_KUAP +void __init setup_kuap(bool disabled) +{ + if (disabled || !early_radix_enabled()) + return; + + if (smp_processor_id() == boot_cpuid) { + pr_info("Activating Kernel Userspace Access Prevention\n"); + cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP; + } + + /* + * Set the default kernel AMR values on all cpus. + */ + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + isync(); +} +#endif + static inline u64 read_amr(void) { return mfspr(SPRN_AMR); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index f5f248d44d5c..fe2c26dbcb28 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -609,25 +609,6 @@ void setup_kuep(bool disabled) } #endif -#ifdef CONFIG_PPC_KUAP -void setup_kuap(bool disabled) -{ - if (disabled || !early_radix_enabled()) - return; - - if (smp_processor_id() == boot_cpuid) { - pr_info("Activating Kernel Userspace Access Prevention\n"); - cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP; - } - - /* - * Set the default kernel AMR values on all cpus. - */ - mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); - isync(); -} -#endif - void __init radix__early_init_mmu(void) { unsigned long lpcr; From 57b7505aa8ba13eb18ffabeb689ac64343c53aaa Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:08 +0530 Subject: [PATCH 137/304] powerpc/book3s64/kuep: Move KUEP related function outside radix The next set of patches adds support for kuep with hash translation. In preparation for that rename/move kuap related functions to non radix names. Also set MMU_FTR_KUEP and add the missing isync(). Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-7-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/kup.h | 1 + arch/powerpc/mm/book3s64/pkeys.c | 21 +++++++++++++++++++++ arch/powerpc/mm/book3s64/radix_pgtable.c | 20 -------------------- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 8735d2dede94..60d53553c114 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -7,6 +7,7 @@ #define AMR_KUAP_BLOCK_READ UL(0x4000000000000000) #define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000) +#define AMR_KUEP_BLOCKED (1UL << 62) #define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE) #define AMR_KUAP_SHIFT 62 diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index c75994cf50a7..82c722fbce52 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -229,6 +229,27 @@ void __init pkey_early_init_devtree(void) return; } +#ifdef CONFIG_PPC_KUEP +void __init setup_kuep(bool disabled) +{ + if (disabled || !early_radix_enabled()) + return; + + if (smp_processor_id() == boot_cpuid) { + pr_info("Activating Kernel Userspace Execution Prevention\n"); + cur_cpu_spec->mmu_features |= MMU_FTR_KUEP; + } + + /* + * Radix always uses key0 of the IAMR to determine if an access is + * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction + * fetch. + */ + mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); + isync(); +} +#endif + #ifdef CONFIG_PPC_KUAP void __init setup_kuap(bool disabled) { diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index fe2c26dbcb28..98f0b243c1ab 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -589,26 +589,6 @@ static void radix_init_amor(void) mtspr(SPRN_AMOR, (3ul << 62)); } -#ifdef CONFIG_PPC_KUEP -void setup_kuep(bool disabled) -{ - if (disabled || !early_radix_enabled()) - return; - - if (smp_processor_id() == boot_cpuid) { - pr_info("Activating Kernel Userspace Execution Prevention\n"); - cur_cpu_spec->mmu_features |= MMU_FTR_KUEP; - } - - /* - * Radix always uses key0 of the IAMR to determine if an access is - * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction - * fetch. - */ - mtspr(SPRN_IAMR, (1ul << 62)); -} -#endif - void __init radix__early_init_mmu(void) { unsigned long lpcr; From d5b810b5c938e73fd21b2b05ef6a79837eeaa305 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:09 +0530 Subject: [PATCH 138/304] powerpc/book3s64/kuap: Rename MMU_FTR_RADIX_KUAP and MMU_FTR_KUEP This is in preparation to adding support for kuap with hash translation. In preparation for that rename/move kuap related functions to non radix names. Also move the feature bit closer to MMU_FTR_KUEP. MMU_FTR_KUEP is renamed to MMU_FTR_BOOK3S_KUEP to indicate the feature is only relevant to BOOK3S_64 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-8-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/kup.h | 18 ++++++++-------- arch/powerpc/include/asm/mmu.h | 26 ++++++++++++++---------- arch/powerpc/mm/book3s64/pkeys.c | 4 ++-- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 60d53553c114..03660d9fa826 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -24,7 +24,7 @@ mtspr SPRN_AMR, \gpr2 /* No isync required, see kuap_restore_amr() */ 998: - END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67) #endif .endm @@ -37,7 +37,7 @@ sldi \gpr2, \gpr2, AMR_KUAP_SHIFT 999: tdne \gpr1, \gpr2 EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) - END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67) #endif .endm #endif @@ -58,7 +58,7 @@ mtspr SPRN_AMR, \gpr2 isync 99: - END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67) #endif .endm @@ -75,7 +75,7 @@ DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { - if (mmu_has_feature(MMU_FTR_RADIX_KUAP) && unlikely(regs->kuap != amr)) { + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP) && unlikely(regs->kuap != amr)) { isync(); mtspr(SPRN_AMR, regs->kuap); /* @@ -88,7 +88,7 @@ static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) static inline unsigned long kuap_get_and_check_amr(void) { - if (mmu_has_feature(MMU_FTR_RADIX_KUAP)) { + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { unsigned long amr = mfspr(SPRN_AMR); if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */ WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED); @@ -99,7 +99,7 @@ static inline unsigned long kuap_get_and_check_amr(void) static inline void kuap_check_amr(void) { - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_RADIX_KUAP)) + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); } @@ -118,7 +118,7 @@ static inline unsigned long get_kuap(void) * This has no effect in terms of actually blocking things on hash, * so it doesn't break anything. */ - if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) + if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) return AMR_KUAP_BLOCKED; return mfspr(SPRN_AMR); @@ -126,7 +126,7 @@ static inline unsigned long get_kuap(void) static inline void set_kuap(unsigned long value) { - if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) + if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) return; /* @@ -141,7 +141,7 @@ static inline void set_kuap(unsigned long value) static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && + return WARN(mmu_has_feature(MMU_FTR_BOOK3S_KUAP) && (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); } diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 255a1837e9f7..b6ab5edb644a 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -29,9 +29,18 @@ */ /* - * Support for KUEP feature. + * Supports KUAP feature + * key 0 controlling userspace addresses on radix + * Key 3 on hash */ -#define MMU_FTR_KUEP ASM_CONST(0x00000400) +#define MMU_FTR_BOOK3S_KUAP ASM_CONST(0x00000200) + +/* + * Supports KUEP feature + * key 0 controlling userspace addresses on radix + * Key 3 on hash + */ +#define MMU_FTR_BOOK3S_KUEP ASM_CONST(0x00000400) /* * Support for memory protection keys. @@ -120,11 +129,6 @@ */ #define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) -/* - * Supports KUAP (key 0 controlling userspace addresses) on radix - */ -#define MMU_FTR_RADIX_KUAP ASM_CONST(0x80000000) - /* MMU feature bit sets for various CPUs */ #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 @@ -187,15 +191,15 @@ enum { #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE | -#ifdef CONFIG_PPC_KUAP - MMU_FTR_RADIX_KUAP | -#endif /* CONFIG_PPC_KUAP */ #endif /* CONFIG_PPC_RADIX_MMU */ +#ifdef CONFIG_PPC_KUAP + MMU_FTR_BOOK3S_KUAP | +#endif /* CONFIG_PPC_KUAP */ #ifdef CONFIG_PPC_MEM_KEYS MMU_FTR_PKEY | #endif #ifdef CONFIG_PPC_KUEP - MMU_FTR_KUEP | + MMU_FTR_BOOK3S_KUEP | #endif /* CONFIG_PPC_KUAP */ 0, diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 82c722fbce52..c5c61aa18a04 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -237,7 +237,7 @@ void __init setup_kuep(bool disabled) if (smp_processor_id() == boot_cpuid) { pr_info("Activating Kernel Userspace Execution Prevention\n"); - cur_cpu_spec->mmu_features |= MMU_FTR_KUEP; + cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUEP; } /* @@ -258,7 +258,7 @@ void __init setup_kuap(bool disabled) if (smp_processor_id() == boot_cpuid) { pr_info("Activating Kernel Userspace Access Prevention\n"); - cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP; + cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUAP; } /* From d94b827e89dc3f92cd871d10f4992a6bd3c861e5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:10 +0530 Subject: [PATCH 139/304] powerpc/book3s64/kuap: Use Key 3 for kernel mapping with hash translation This patch updates kernel hash page table entries to use storage key 3 for its mapping. This implies all kernel access will now use key 3 to control READ/WRITE. The patch also prevents the allocation of key 3 from userspace and UAMOR value is updated such that userspace cannot modify key 3. Signed-off-by: Aneesh Kumar K.V Reviewed-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-9-aneesh.kumar@linux.ibm.com --- .../powerpc/include/asm/book3s/64/hash-pkey.h | 25 ++++++++++++++----- arch/powerpc/include/asm/book3s/64/hash.h | 2 +- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 + arch/powerpc/include/asm/mmu_context.h | 2 +- arch/powerpc/mm/book3s64/hash_4k.c | 2 +- arch/powerpc/mm/book3s64/hash_64k.c | 4 +-- arch/powerpc/mm/book3s64/hash_hugepage.c | 2 +- arch/powerpc/mm/book3s64/hash_hugetlbpage.c | 2 +- arch/powerpc/mm/book3s64/hash_pgtable.c | 2 +- arch/powerpc/mm/book3s64/hash_utils.c | 10 +++++--- arch/powerpc/mm/book3s64/pkeys.c | 12 +++++++++ 11 files changed, 46 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash-pkey.h b/arch/powerpc/include/asm/book3s/64/hash-pkey.h index 795010897e5d..f1e60d579f6c 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-pkey.h +++ b/arch/powerpc/include/asm/book3s/64/hash-pkey.h @@ -2,6 +2,9 @@ #ifndef _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H #define _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H +/* We use key 3 for KERNEL */ +#define HASH_DEFAULT_KERNEL_KEY (HPTE_R_KEY_BIT0 | HPTE_R_KEY_BIT1) + static inline u64 hash__vmflag_to_pte_pkey_bits(u64 vm_flags) { return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT0 : 0x0UL) | @@ -11,13 +14,23 @@ static inline u64 hash__vmflag_to_pte_pkey_bits(u64 vm_flags) ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT4 : 0x0UL)); } -static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags) { - return (((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL)); + unsigned long pte_pkey; + + pte_pkey = (((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL)); + + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP) || + mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { + if ((pte_pkey == 0) && (flags & HPTE_USE_KERNEL_KEY)) + return HASH_DEFAULT_KERNEL_KEY; + } + + return pte_pkey; } static inline u16 hash__pte_to_pkey_bits(u64 pteflags) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 73ad038ed10b..d959b0195ad9 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -145,7 +145,7 @@ extern void hash__mark_initmem_nx(void); extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long pte, int huge); -extern unsigned long htab_convert_pte_flags(unsigned long pteflags); +unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags); /* Atomic PTE updates */ static inline unsigned long hash__pte_update(struct mm_struct *mm, unsigned long addr, diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 683a9c7d1b03..9192cb05a6ab 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -452,6 +452,7 @@ static inline unsigned long hpt_hash(unsigned long vpn, #define HPTE_LOCAL_UPDATE 0x1 #define HPTE_NOHPTE_UPDATE 0x2 +#define HPTE_USE_KERNEL_KEY 0x4 extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index e5a5e3cb7724..033d2f39ed28 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -286,7 +286,7 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, #define thread_pkey_regs_init(thread) #define arch_dup_pkeys(oldmm, mm) -static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags) { return 0x0UL; } diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c index 22e787123cdf..7de1a8a0c62a 100644 --- a/arch/powerpc/mm/book3s64/hash_4k.c +++ b/arch/powerpc/mm/book3s64/hash_4k.c @@ -54,7 +54,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * PP bits. _PAGE_USER is already PP bit 0x2, so we only * need to add in 0x1 if it's a read-only user page */ - rflags = htab_convert_pte_flags(new_pte); + rflags = htab_convert_pte_flags(new_pte, flags); rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c index 7084ce2951e6..998c6817ed47 100644 --- a/arch/powerpc/mm/book3s64/hash_64k.c +++ b/arch/powerpc/mm/book3s64/hash_64k.c @@ -72,7 +72,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * Handle the subpage protection bits */ subpg_pte = new_pte & ~subpg_prot; - rflags = htab_convert_pte_flags(subpg_pte); + rflags = htab_convert_pte_flags(subpg_pte, flags); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { @@ -260,7 +260,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, new_pte |= _PAGE_DIRTY; } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); - rflags = htab_convert_pte_flags(new_pte); + rflags = htab_convert_pte_flags(new_pte, flags); rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && diff --git a/arch/powerpc/mm/book3s64/hash_hugepage.c b/arch/powerpc/mm/book3s64/hash_hugepage.c index 440823797de7..c0fabe6c5a12 100644 --- a/arch/powerpc/mm/book3s64/hash_hugepage.c +++ b/arch/powerpc/mm/book3s64/hash_hugepage.c @@ -57,7 +57,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, if (!(old_pmd & (H_PAGE_THP_HUGE | _PAGE_DEVMAP))) return 0; - rflags = htab_convert_pte_flags(new_pmd); + rflags = htab_convert_pte_flags(new_pmd, flags); #if 0 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c index 964467b3a776..b5e9fff8c217 100644 --- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c @@ -70,7 +70,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, if (old_pte & (H_PAGE_THP_HUGE | _PAGE_DEVMAP)) return 0; - rflags = htab_convert_pte_flags(new_pte); + rflags = htab_convert_pte_flags(new_pte, flags); if (unlikely(mmu_psize == MMU_PAGE_16G)) offset = PTRS_PER_PUD; else diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index fd9c7f91b092..567e0c6b3978 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -443,7 +443,7 @@ void hash__mark_initmem_nx(void) start = (unsigned long)__init_begin; end = (unsigned long)__init_end; - pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL)); + pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); WARN_ON(!hash__change_memory_range(start, end, pp)); } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index d2dcb7757c68..e0fe1a43e7b8 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -186,7 +186,7 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = { * - We make sure R is always set and never lost * - C is _PAGE_DIRTY, and *should* always be set for a writeable mapping */ -unsigned long htab_convert_pte_flags(unsigned long pteflags) +unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags) { unsigned long rflags = 0; @@ -240,7 +240,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) */ rflags |= HPTE_R_M; - rflags |= pte_to_hpte_pkey_bits(pteflags); + rflags |= pte_to_hpte_pkey_bits(pteflags, flags); return rflags; } @@ -255,7 +255,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, shift = mmu_psize_defs[psize].shift; step = 1 << shift; - prot = htab_convert_pte_flags(prot); + prot = htab_convert_pte_flags(prot, HPTE_USE_KERNEL_KEY); DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n", vstart, vend, pstart, prot, psize, ssize); @@ -1316,12 +1316,14 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, vsid = get_kernel_vsid(ea, mmu_kernel_ssize); psize = mmu_vmalloc_psize; ssize = mmu_kernel_ssize; + flags |= HPTE_USE_KERNEL_KEY; break; case IO_REGION_ID: vsid = get_kernel_vsid(ea, mmu_kernel_ssize); psize = mmu_io_psize; ssize = mmu_kernel_ssize; + flags |= HPTE_USE_KERNEL_KEY; break; default: /* @@ -1900,7 +1902,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) unsigned long hash; unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); - unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL)); + unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); long ret; hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index c5c61aa18a04..e434c0a2ee5d 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -205,6 +205,18 @@ void __init pkey_early_init_devtree(void) reserved_allocation_mask |= (0x1 << 1); default_uamor &= ~(0x3ul << pkeyshift(1)); + /* handle key which is used by kernel for KAUP */ + reserved_allocation_mask |= (0x1 << 3); + /* + * Mark access for KUAP key in default amr so that + * we continue to operate with that AMR in + * copy_to/from_user(). + */ + default_amr &= ~(0x3ul << pkeyshift(3)); + default_iamr &= ~(0x1ul << pkeyshift(3)); + default_uamor &= ~(0x3ul << pkeyshift(3)); + + /* * Prevent the usage of OS reserved keys. Update UAMOR * for those keys. Also mark the rest of the bits in the From d7df77e89039623ededf0ece7b4358f7c9ecbaae Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:11 +0530 Subject: [PATCH 140/304] powerpc/exec: Set thread.regs early during exec In later patches during exec, we would like to access default regs.amr to control access to the user mapping. Having thread.regs set early makes the code changes simpler. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-10-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/thread_info.h | 2 -- arch/powerpc/kernel/process.c | 27 +++++++++++++------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 46a210b03d2b..de4c911d9ced 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -77,10 +77,8 @@ struct thread_info { /* how to get the thread information struct from C */ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); -#ifdef CONFIG_PPC_BOOK3S_64 void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec -#endif #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 293d9b2ec0fa..3f0b6adecf75 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1531,14 +1531,22 @@ void flush_thread(void) #endif /* CONFIG_HAVE_HW_BREAKPOINT */ } -#ifdef CONFIG_PPC_BOOK3S_64 void arch_setup_new_exec(void) { - if (radix_enabled()) - return; - hash__setup_new_exec(); -} + +#ifdef CONFIG_PPC_BOOK3S_64 + if (!radix_enabled()) + hash__setup_new_exec(); #endif + /* + * If we exec out of a kernel thread then thread.regs will not be + * set. Do it now. + */ + if (!current->thread.regs) { + struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE; + current->thread.regs = regs - 1; + } +} #ifdef CONFIG_PPC64 /** @@ -1771,15 +1779,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) preload_new_slb_context(start, sp); #endif - /* - * If we exec out of a kernel thread then thread.regs will not be - * set. Do it now. - */ - if (!current->thread.regs) { - struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE; - current->thread.regs = regs - 1; - } - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Clear any transactional state, we're exec()ing. The cause is From 8e560921b58cbc18e192f0ac273d307a37a144f9 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:12 +0530 Subject: [PATCH 141/304] powerpc/book3s64/pkeys: Store/restore userspace AMR/IAMR correctly on entry and exit from kernel This prepare kernel to operate with a different value than userspace AMR/IAMR. For this, AMR/IAMR need to be saved and restored on entry and return from the kernel. With KUAP we modify kernel AMR when accessing user address from the kernel via copy_to/from_user interfaces. We don't need to modify IAMR value in similar fashion. If MMU_FTR_PKEY is enabled we need to save AMR/IAMR in pt_regs on entering kernel from userspace. If not we can assume that AMR/IAMR is not modified from userspace. We need to save AMR if we have MMU_FTR_BOOK3S_KUAP feature enabled and we are interrupted within kernel. This is required so that if we get interrupted within copy_to/from_user we continue with the right AMR value. If we hae MMU_FTR_BOOK3S_KUEP enabled we need to restore IAMR on return to userspace beause kernel will be running with a different IAMR value. Signed-off-by: Aneesh Kumar K.V Reviewed-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-11-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/kup.h | 222 +++++++++++++++++++---- arch/powerpc/include/asm/ptrace.h | 5 +- arch/powerpc/kernel/asm-offsets.c | 2 + arch/powerpc/kernel/entry_64.S | 6 +- arch/powerpc/kernel/exceptions-64s.S | 4 +- arch/powerpc/kernel/syscall_64.c | 32 +++- 6 files changed, 225 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 03660d9fa826..fa671391e931 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -13,17 +13,46 @@ #ifdef __ASSEMBLY__ -.macro kuap_restore_amr gpr1, gpr2 -#ifdef CONFIG_PPC_KUAP +.macro kuap_user_restore gpr1 +#if defined(CONFIG_PPC_PKEY) BEGIN_MMU_FTR_SECTION_NESTED(67) - mfspr \gpr1, SPRN_AMR + /* + * AMR and IAMR are going to be different when + * returning to userspace. + */ + ld \gpr1, STACK_REGS_AMR(r1) + isync + mtspr SPRN_AMR, \gpr1 + /* + * Restore IAMR only when returning to userspace + */ + ld \gpr1, STACK_REGS_IAMR(r1) + mtspr SPRN_IAMR, \gpr1 + + /* No isync required, see kuap_user_restore() */ + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_PKEY, 67) +#endif +.endm + +.macro kuap_kernel_restore gpr1, gpr2 +#if defined(CONFIG_PPC_PKEY) + + BEGIN_MMU_FTR_SECTION_NESTED(67) + /* + * AMR is going to be mostly the same since we are + * returning to the kernel. Compare and do a mtspr. + */ ld \gpr2, STACK_REGS_AMR(r1) + mfspr \gpr1, SPRN_AMR cmpd \gpr1, \gpr2 - beq 998f + beq 100f isync mtspr SPRN_AMR, \gpr2 - /* No isync required, see kuap_restore_amr() */ -998: + /* + * No isync required, see kuap_restore_amr() + * No need to restore IAMR when returning to kernel space. + */ +100: END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67) #endif .endm @@ -42,23 +71,98 @@ .endm #endif +/* + * if (pkey) { + * + * save AMR -> stack; + * if (kuap) { + * if (AMR != BLOCKED) + * KUAP_BLOCKED -> AMR; + * } + * if (from_user) { + * save IAMR -> stack; + * if (kuep) { + * KUEP_BLOCKED ->IAMR + * } + * } + * return; + * } + * + * if (kuap) { + * if (from_kernel) { + * save AMR -> stack; + * if (AMR != BLOCKED) + * KUAP_BLOCKED -> AMR; + * } + * + * } + */ .macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr -#ifdef CONFIG_PPC_KUAP +#if defined(CONFIG_PPC_PKEY) + + /* + * if both pkey and kuap is disabled, nothing to do + */ + BEGIN_MMU_FTR_SECTION_NESTED(68) + b 100f // skip_save_amr + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY | MMU_FTR_BOOK3S_KUAP, 68) + + /* + * if pkey is disabled and we are entering from userspace + * don't do anything. + */ BEGIN_MMU_FTR_SECTION_NESTED(67) .ifnb \msr_pr_cr - bne \msr_pr_cr, 99f + /* + * Without pkey we are not changing AMR outside the kernel + * hence skip this completely. + */ + bne \msr_pr_cr, 100f // from userspace .endif + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY, 67) + + /* + * pkey is enabled or pkey is disabled but entering from kernel + */ mfspr \gpr1, SPRN_AMR std \gpr1, STACK_REGS_AMR(r1) - li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) - sldi \gpr2, \gpr2, AMR_KUAP_SHIFT + + /* + * update kernel AMR with AMR_KUAP_BLOCKED only + * if KUAP feature is enabled + */ + BEGIN_MMU_FTR_SECTION_NESTED(69) + LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED) cmpd \use_cr, \gpr1, \gpr2 - beq \use_cr, 99f - // We don't isync here because we very recently entered via rfid + beq \use_cr, 102f + /* + * We don't isync here because we very recently entered via an interrupt + */ mtspr SPRN_AMR, \gpr2 isync -99: - END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67) +102: + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 69) + + /* + * if entering from kernel we don't need save IAMR + */ + .ifnb \msr_pr_cr + beq \msr_pr_cr, 100f // from kernel space + mfspr \gpr1, SPRN_IAMR + std \gpr1, STACK_REGS_IAMR(r1) + + /* + * update kernel IAMR with AMR_KUEP_BLOCKED only + * if KUEP feature is enabled + */ + BEGIN_MMU_FTR_SECTION_NESTED(70) + LOAD_REG_IMMEDIATE(\gpr2, AMR_KUEP_BLOCKED) + mtspr SPRN_IAMR, \gpr2 + isync + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUEP, 70) + .endif + +100: // skip_save_amr #endif .endm @@ -68,22 +172,42 @@ DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); -#ifdef CONFIG_PPC_KUAP +#ifdef CONFIG_PPC_PKEY #include #include -static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) +static inline void kuap_user_restore(struct pt_regs *regs) { - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP) && unlikely(regs->kuap != amr)) { - isync(); - mtspr(SPRN_AMR, regs->kuap); - /* - * No isync required here because we are about to RFI back to - * previous context before any user accesses would be made, - * which is a CSI. - */ + if (!mmu_has_feature(MMU_FTR_PKEY)) + return; + + isync(); + mtspr(SPRN_AMR, regs->amr); + mtspr(SPRN_IAMR, regs->iamr); + /* + * No isync required here because we are about to rfi + * back to previous context before any user accesses + * would be made, which is a CSI. + */ +} +static inline void kuap_kernel_restore(struct pt_regs *regs, + unsigned long amr) +{ + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + if (unlikely(regs->amr != amr)) { + isync(); + mtspr(SPRN_AMR, regs->amr); + /* + * No isync required here because we are about to rfi + * back to previous context before any user accesses + * would be made, which is a CSI. + */ + } } + /* + * No need to restore IAMR when returning to kernel space. + */ } static inline unsigned long kuap_get_and_check_amr(void) @@ -97,6 +221,26 @@ static inline unsigned long kuap_get_and_check_amr(void) return 0; } +#else /* CONFIG_PPC_PKEY */ + +static inline void kuap_user_restore(struct pt_regs *regs) +{ +} + +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) +{ +} + +static inline unsigned long kuap_get_and_check_amr(void) +{ + return 0; +} + +#endif /* CONFIG_PPC_PKEY */ + + +#ifdef CONFIG_PPC_KUAP + static inline void kuap_check_amr(void) { if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) @@ -145,21 +289,6 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); } -#else /* CONFIG_PPC_KUAP */ -static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { } - -static inline unsigned long kuap_get_and_check_amr(void) -{ - return 0UL; -} - -static inline unsigned long get_kuap(void) -{ - return AMR_KUAP_BLOCKED; -} - -static inline void set_kuap(unsigned long value) { } -#endif /* !CONFIG_PPC_KUAP */ static __always_inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) @@ -176,6 +305,21 @@ static __always_inline void allow_user_access(void __user *to, const void __user BUILD_BUG(); } +#else /* CONFIG_PPC_KUAP */ + +static inline unsigned long get_kuap(void) +{ + return AMR_KUAP_BLOCKED; +} + +static inline void set_kuap(unsigned long value) { } + +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) +{ } + +#endif /* !CONFIG_PPC_KUAP */ + static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 0aeba52b5ca8..58f9dc060a7b 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -61,8 +61,11 @@ struct pt_regs unsigned long amr; #endif }; +#ifdef CONFIG_PPC_PKEY + unsigned long iamr; +#endif }; - unsigned long __pad[2]; /* Maintain 16 byte interrupt stack alignment */ + unsigned long __pad[4]; /* Maintain 16 byte interrupt stack alignment */ }; }; #endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a2e01b7b9eeb..b12d7c049bfe 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -358,11 +358,13 @@ int main(void) #ifdef CONFIG_PPC_PKEY STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr); + STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr); #endif #ifdef CONFIG_PPC_KUAP STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap); #endif + #if defined(CONFIG_PPC32) #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 2f3846192ec7..da23c397ceb2 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -653,8 +653,8 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) kuap_check_amr r3, r4 ld r5,_MSR(r1) andi. r0,r5,MSR_PR - bne .Lfast_user_interrupt_return - kuap_restore_amr r3, r4 + bne .Lfast_user_interrupt_return_amr + kuap_kernel_restore r3, r4 andi. r0,r5,MSR_RI li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return @@ -674,6 +674,8 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) cmpdi r3,0 bne- .Lrestore_nvgprs +.Lfast_user_interrupt_return_amr: + kuap_user_restore r3 .Lfast_user_interrupt_return: ld r11,_NIP(r1) ld r12,_MSR(r1) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4d01f09ecf80..1c8f1b90e174 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1059,7 +1059,7 @@ EXC_COMMON_BEGIN(system_reset_common) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - kuap_restore_amr r9, r10 + kuap_kernel_restore r9, r10 EXCEPTION_RESTORE_REGS RFI_TO_USER_OR_KERNEL @@ -2875,7 +2875,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - kuap_restore_amr r9, r10 + kuap_kernel_restore r9, r10 EXCEPTION_RESTORE_REGS hsrr=0 RFI_TO_KERNEL diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 310bcd768cd5..11f1c6360291 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -35,7 +35,25 @@ notrace long system_call_exception(long r3, long r4, long r5, BUG_ON(!FULL_REGS(regs)); BUG_ON(regs->softe != IRQS_ENABLED); - kuap_check_amr(); +#ifdef CONFIG_PPC_PKEY + if (mmu_has_feature(MMU_FTR_PKEY)) { + unsigned long amr, iamr; + /* + * When entering from userspace we mostly have the AMR/IAMR + * different from kernel default values. Hence don't compare. + */ + amr = mfspr(SPRN_AMR); + iamr = mfspr(SPRN_IAMR); + regs->amr = amr; + regs->iamr = iamr; + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) + mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); + isync(); + } else +#endif + kuap_check_amr(); account_cpu_user_entry(); @@ -245,6 +263,12 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, account_cpu_user_exit(); +#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */ + /* + * We do this at the end so that we do context switch with KERNEL AMR + */ + kuap_user_restore(regs); +#endif return ret; } @@ -330,6 +354,10 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned account_cpu_user_exit(); + /* + * We do this at the end so that we do context switch with KERNEL AMR + */ + kuap_user_restore(regs); return ret; } @@ -400,7 +428,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign * which would cause Read-After-Write stalls. Hence, we take the AMR * value from the check above. */ - kuap_restore_amr(regs, amr); + kuap_kernel_restore(regs, amr); return ret; } From f643fcab74c005ddfdda68c69909f03bde766ff1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:13 +0530 Subject: [PATCH 142/304] powerpc/book3s64/pkeys: Inherit correctly on fork. Child thread.kuap value is inherited from the parent in copy_thread_tls. We still need to make sure when the child returns from a fork in the kernel we start with the kernel default AMR value. Signed-off-by: Aneesh Kumar K.V Reviewed-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-12-aneesh.kumar@linux.ibm.com --- arch/powerpc/kernel/process.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3f0b6adecf75..2ec907a6f9af 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1760,6 +1760,16 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childregs->ppr = DEFAULT_PPR; p->thread.tidr = 0; +#endif + /* + * Run with the current AMR value of the kernel + */ +#ifdef CONFIG_PPC_PKEY + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + kregs->amr = AMR_KUAP_BLOCKED; + + if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) + kregs->iamr = AMR_KUEP_BLOCKED; #endif kregs->nip = ppc_function_entry(f); return 0; From d5fa30e6993ffcdd1859d8dab1a07a6f6c6e7c3f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:14 +0530 Subject: [PATCH 143/304] powerpc/book3s64/pkeys: Reset userspace AMR correctly on exec On fork, we inherit from the parent and on exec, we should switch to default_amr values. Also, avoid changing the AMR register value within the kernel. The kernel now runs with different AMR values. Signed-off-by: Aneesh Kumar K.V Reviewed-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-13-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/pkeys.h | 2 ++ arch/powerpc/kernel/process.c | 6 +++++- arch/powerpc/mm/book3s64/pkeys.c | 16 ++-------------- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pkeys.h b/arch/powerpc/include/asm/book3s/64/pkeys.h index b7d9f4267bcd..3b8640498f5b 100644 --- a/arch/powerpc/include/asm/book3s/64/pkeys.h +++ b/arch/powerpc/include/asm/book3s/64/pkeys.h @@ -6,6 +6,8 @@ #include extern u64 __ro_after_init default_uamor; +extern u64 __ro_after_init default_amr; +extern u64 __ro_after_init default_iamr; static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags) { diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 2ec907a6f9af..0538e0f1790c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1546,6 +1546,11 @@ void arch_setup_new_exec(void) struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE; current->thread.regs = regs - 1; } + +#ifdef CONFIG_PPC_MEM_KEYS + current->thread.regs->amr = default_amr; + current->thread.regs->iamr = default_iamr; +#endif } #ifdef CONFIG_PPC64 @@ -1895,7 +1900,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.load_tm = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - thread_pkey_regs_init(¤t->thread); } EXPORT_SYMBOL(start_thread); diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index e434c0a2ee5d..355d001fa155 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -28,8 +28,8 @@ static u32 initial_allocation_mask __ro_after_init; * Even if we allocate keys with sys_pkey_alloc(), we need to make sure * other thread still find the access denied using the same keys. */ -static u64 default_amr = ~0x0UL; -static u64 default_iamr = 0x5555555555555555UL; +u64 default_amr __ro_after_init = ~0x0UL; +u64 default_iamr __ro_after_init = 0x5555555555555555UL; u64 default_uamor __ro_after_init; /* * Key used to implement PROT_EXEC mmap. Denies READ/WRITE @@ -396,18 +396,6 @@ void thread_pkey_regs_restore(struct thread_struct *new_thread, write_iamr(new_thread->iamr); } -void thread_pkey_regs_init(struct thread_struct *thread) -{ - if (!mmu_has_feature(MMU_FTR_PKEY)) - return; - - thread->amr = default_amr; - thread->iamr = default_iamr; - - write_amr(default_amr); - write_iamr(default_iamr); -} - int execute_only_pkey(struct mm_struct *mm) { return mm->context.execute_only_pkey; From edc541ecaae73d498a49b9ca82bc66255d9e0720 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:15 +0530 Subject: [PATCH 144/304] powerpc/ptrace-view: Use pt_regs values instead of thread_struct based one. We will remove thread.amr/iamr/uamor in a later patch Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-14-aneesh.kumar@linux.ibm.com --- arch/powerpc/kernel/ptrace/ptrace-view.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 00a765f00d31..2bad8068f598 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -471,12 +471,12 @@ static int pkey_active(struct task_struct *target, const struct user_regset *reg static int pkey_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { - BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); if (!arch_pkeys_enabled()) return -ENODEV; - membuf_write(&to, &target->thread.amr, 2 * sizeof(unsigned long)); + membuf_store(&to, target->thread.regs->amr); + membuf_store(&to, target->thread.regs->iamr); return membuf_store(&to, default_uamor); } @@ -509,7 +509,8 @@ static int pkey_set(struct task_struct *target, const struct user_regset *regset * Pick the AMR values for the keys that kernel is using. This * will be indicated by the ~default_uamor bits. */ - target->thread.amr = (new_amr & default_uamor) | (target->thread.amr & ~default_uamor); + target->thread.regs->amr = (new_amr & default_uamor) | + (target->thread.regs->amr & ~default_uamor); return 0; } From 48a8ab4eeb8271f2a0e2ca3cf80844a59acca153 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:16 +0530 Subject: [PATCH 145/304] powerpc/book3s64/pkeys: Don't update SPRN_AMR when in kernel mode. Now that kernel correctly store/restore userspace AMR/IAMR values, avoid manipulating AMR and IAMR from the kernel on behalf of userspace. Signed-off-by: Aneesh Kumar K.V Reviewed-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-15-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/kup.h | 21 +++++++++ arch/powerpc/include/asm/processor.h | 4 -- arch/powerpc/kernel/process.c | 4 -- arch/powerpc/kernel/traps.c | 6 --- arch/powerpc/mm/book3s64/pkeys.c | 57 +++++------------------- 5 files changed, 31 insertions(+), 61 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index fa671391e931..f41f6f468002 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -177,6 +177,27 @@ DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); #include #include +/* + * For kernel thread that doesn't have thread.regs return + * default AMR/IAMR values. + */ +static inline u64 current_thread_amr(void) +{ + if (current->thread.regs) + return current->thread.regs->amr; + return AMR_KUAP_BLOCKED; +} + +static inline u64 current_thread_iamr(void) +{ + if (current->thread.regs) + return current->thread.regs->iamr; + return AMR_KUEP_BLOCKED; +} +#endif /* CONFIG_PPC_PKEY */ + +#ifdef CONFIG_PPC_KUAP + static inline void kuap_user_restore(struct pt_regs *regs) { if (!mmu_has_feature(MMU_FTR_PKEY)) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 10d659f2ac46..8acc3590c971 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -226,10 +226,6 @@ struct thread_struct { struct thread_vr_state ckvr_state; /* Checkpointed VR state */ unsigned long ckvrsave; /* Checkpointed VRSAVE */ #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -#ifdef CONFIG_PPC_MEM_KEYS - unsigned long amr; - unsigned long iamr; -#endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 0538e0f1790c..96bb10d00d9c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -589,7 +589,6 @@ static void save_all(struct task_struct *tsk) __giveup_spe(tsk); msr_check_and_clear(msr_all_available); - thread_pkey_regs_save(&tsk->thread); } void flush_all_to_thread(struct task_struct *tsk) @@ -1160,8 +1159,6 @@ static inline void save_sprs(struct thread_struct *t) t->tar = mfspr(SPRN_TAR); } #endif - - thread_pkey_regs_save(t); } static inline void restore_sprs(struct thread_struct *old_thread, @@ -1202,7 +1199,6 @@ static inline void restore_sprs(struct thread_struct *old_thread, mtspr(SPRN_TIDR, new_thread->tidr); #endif - thread_pkey_regs_restore(new_thread, old_thread); } struct task_struct *__switch_to(struct task_struct *prev, diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5b39baa61590..46419ae4d17e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -347,12 +347,6 @@ static bool exception_common(int signr, struct pt_regs *regs, int code, current->thread.trap_nr = code; - /* - * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need - * to capture the content, if the task gets killed. - */ - thread_pkey_regs_save(¤t->thread); - return true; } diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 355d001fa155..8d1bf2f18ca4 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -281,30 +281,17 @@ void __init setup_kuap(bool disabled) } #endif -static inline u64 read_amr(void) +static inline void update_current_thread_amr(u64 value) { - return mfspr(SPRN_AMR); + current->thread.regs->amr = value; } -static inline void write_amr(u64 value) -{ - mtspr(SPRN_AMR, value); -} - -static inline u64 read_iamr(void) -{ - if (!likely(pkey_execute_disable_supported)) - return 0x0UL; - - return mfspr(SPRN_IAMR); -} - -static inline void write_iamr(u64 value) +static inline void update_current_thread_iamr(u64 value) { if (!likely(pkey_execute_disable_supported)) return; - mtspr(SPRN_IAMR, value); + current->thread.regs->iamr = value; } #ifdef CONFIG_PPC_MEM_KEYS @@ -319,17 +306,17 @@ void pkey_mm_init(struct mm_struct *mm) static inline void init_amr(int pkey, u8 init_bits) { u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); - u64 old_amr = read_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); + u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); - write_amr(old_amr | new_amr_bits); + update_current_thread_amr(old_amr | new_amr_bits); } static inline void init_iamr(int pkey, u8 init_bits) { u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey)); - u64 old_iamr = read_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); + u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); - write_iamr(old_iamr | new_iamr_bits); + update_current_thread_iamr(old_iamr | new_iamr_bits); } /* @@ -372,30 +359,6 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, return 0; } -void thread_pkey_regs_save(struct thread_struct *thread) -{ - if (!mmu_has_feature(MMU_FTR_PKEY)) - return; - - /* - * TODO: Skip saving registers if @thread hasn't used any keys yet. - */ - thread->amr = read_amr(); - thread->iamr = read_iamr(); -} - -void thread_pkey_regs_restore(struct thread_struct *new_thread, - struct thread_struct *old_thread) -{ - if (!mmu_has_feature(MMU_FTR_PKEY)) - return; - - if (old_thread->amr != new_thread->amr) - write_amr(new_thread->amr); - if (old_thread->iamr != new_thread->iamr) - write_iamr(new_thread->iamr); -} - int execute_only_pkey(struct mm_struct *mm) { return mm->context.execute_only_pkey; @@ -444,9 +407,9 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) pkey_shift = pkeyshift(pkey); if (execute) - return !(read_iamr() & (IAMR_EX_BIT << pkey_shift)); + return !(current_thread_iamr() & (IAMR_EX_BIT << pkey_shift)); - amr = read_amr(); + amr = current_thread_amr(); if (write) return !(amr & (AMR_WR_BIT << pkey_shift)); From 4d6c551e9f548f7675a01eff229d09ab41162a25 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:17 +0530 Subject: [PATCH 146/304] powerpc/book3s64/kuap: Restrict access to userspace based on userspace AMR If an application has configured address protection such that read/write is denied using pkey even the kernel should receive a FAULT on accessing the same. This patch use user AMR value stored in pt_regs.amr to achieve the same. Signed-off-by: Aneesh Kumar K.V Reviewed-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-16-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/kup.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index f41f6f468002..4fa0760a47a4 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -314,14 +314,20 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) static __always_inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { + unsigned long thread_amr = 0; + // This is written so we can resolve to a single case at build time BUILD_BUG_ON(!__builtin_constant_p(dir)); + + if (mmu_has_feature(MMU_FTR_PKEY)) + thread_amr = current_thread_amr(); + if (dir == KUAP_READ) - set_kuap(AMR_KUAP_BLOCK_WRITE); + set_kuap(thread_amr | AMR_KUAP_BLOCK_WRITE); else if (dir == KUAP_WRITE) - set_kuap(AMR_KUAP_BLOCK_READ); + set_kuap(thread_amr | AMR_KUAP_BLOCK_READ); else if (dir == KUAP_READ_WRITE) - set_kuap(0); + set_kuap(thread_amr); else BUILD_BUG(); } From eb232b1624462752dc916d9015b31ecdac0a01f1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:18 +0530 Subject: [PATCH 147/304] powerpc/book3s64/kuap: Improve error reporting with KUAP With hash translation use DSISR_KEYFAULT to identify a wrong access. With Radix we look at the AMR value and type of fault. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-17-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/32/kup.h | 4 +-- arch/powerpc/include/asm/book3s/64/kup.h | 27 ++++++++++++++++---- arch/powerpc/include/asm/kup.h | 4 +-- arch/powerpc/include/asm/nohash/32/kup-8xx.h | 4 +-- arch/powerpc/mm/fault.c | 2 +- 5 files changed, 29 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 32fd4452e960..b18cd931e325 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -177,8 +177,8 @@ static inline void restore_user_access(unsigned long flags) allow_user_access(to, to, end - addr, KUAP_READ_WRITE); } -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, + bool is_write, unsigned long error_code) { unsigned long begin = regs->kuap & 0xf0000000; unsigned long end = regs->kuap << 28; diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 4fa0760a47a4..f8f87b5c0e67 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -303,12 +303,29 @@ static inline void set_kuap(unsigned long value) isync(); } -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +#define RADIX_KUAP_BLOCK_READ UL(0x4000000000000000) +#define RADIX_KUAP_BLOCK_WRITE UL(0x8000000000000000) + +static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, + bool is_write, unsigned long error_code) { - return WARN(mmu_has_feature(MMU_FTR_BOOK3S_KUAP) && - (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), - "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + return false; + + if (radix_enabled()) { + /* + * Will be a storage protection fault. + * Only check the details of AMR[0] + */ + return WARN((regs->kuap & (is_write ? RADIX_KUAP_BLOCK_WRITE : RADIX_KUAP_BLOCK_READ)), + "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); + } + /* + * We don't want to WARN here because userspace can setup + * keys such that a kernel access to user address can cause + * fault + */ + return !!(error_code & DSISR_KEYFAULT); } static __always_inline void allow_user_access(void __user *to, const void __user *from, diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index a06e50b68d40..952be0414f43 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -59,8 +59,8 @@ void setup_kuap(bool disabled); #else static inline void setup_kuap(bool disabled) { } -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, + bool is_write, unsigned long error_code) { return false; } diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 567cdc557402..7bdd9e5b63ed 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -60,8 +60,8 @@ static inline void restore_user_access(unsigned long flags) mtspr(SPRN_MD_AP, flags); } -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, + bool is_write, unsigned long error_code) { return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000), "Bug: fault blocked by AP register !"); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 0add963a849b..c91621df0c61 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -227,7 +227,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, // Read/write fault in a valid region (the exception table search passed // above), but blocked by KUAP is bad, it can never succeed. - if (bad_kuap_fault(regs, address, is_write)) + if (bad_kuap_fault(regs, address, is_write, error_code)) return true; // What's left? Kernel fault on user in well defined regions (extable From fa46c2fa6ffbedab3a3cbcbde1292468979e830b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:19 +0530 Subject: [PATCH 148/304] powerpc/book3s64/kuap: Use Key 3 to implement KUAP with hash translation. Radix use AMR Key 0 and hash translation use AMR key 3. Signed-off-by: Aneesh Kumar K.V Reviewed-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-18-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/kup.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index f8f87b5c0e67..b9cbb96d9aed 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -5,11 +5,10 @@ #include #include -#define AMR_KUAP_BLOCK_READ UL(0x4000000000000000) -#define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000) +#define AMR_KUAP_BLOCK_READ UL(0x5455555555555555) +#define AMR_KUAP_BLOCK_WRITE UL(0xa8aaaaaaaaaaaaaa) #define AMR_KUEP_BLOCKED (1UL << 62) #define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE) -#define AMR_KUAP_SHIFT 62 #ifdef __ASSEMBLY__ @@ -62,8 +61,8 @@ #ifdef CONFIG_PPC_KUAP_DEBUG BEGIN_MMU_FTR_SECTION_NESTED(67) mfspr \gpr1, SPRN_AMR - li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) - sldi \gpr2, \gpr2, AMR_KUAP_SHIFT + /* Prevent access to userspace using any key values */ + LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED) 999: tdne \gpr1, \gpr2 EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67) From 292f86c4c683a1064aff7210348da088c1573ee0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:20 +0530 Subject: [PATCH 149/304] powerpc/book3s64/kuep: Use Key 3 to implement KUEP with hash translation. Radix use IAMR Key 0 and hash translation use IAMR key 3. Signed-off-by: Aneesh Kumar K.V Reviewed-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-19-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/kup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index b9cbb96d9aed..f54ab2cb189b 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -7,7 +7,7 @@ #define AMR_KUAP_BLOCK_READ UL(0x5455555555555555) #define AMR_KUAP_BLOCK_WRITE UL(0xa8aaaaaaaaaaaaaa) -#define AMR_KUEP_BLOCKED (1UL << 62) +#define AMR_KUEP_BLOCKED UL(0x5455555555555555) #define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE) #ifdef __ASSEMBLY__ From b2ff33a10c8b3e9d260c57df38b5cd3765a0b785 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:21 +0530 Subject: [PATCH 150/304] powerpc/book3s64/hash/kuap: Enable kuap on hash Signed-off-by: Aneesh Kumar K.V Reviewed-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-20-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/pkeys.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 8d1bf2f18ca4..9f01c86d2beb 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -265,7 +265,12 @@ void __init setup_kuep(bool disabled) #ifdef CONFIG_PPC_KUAP void __init setup_kuap(bool disabled) { - if (disabled || !early_radix_enabled()) + if (disabled) + return; + /* + * On hash if PKEY feature is not enabled, disable KUAP too. + */ + if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY)) return; if (smp_processor_id() == boot_cpuid) { From c91435d95c49f4053b05ba03b41dd7ed0fbd6c71 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:22 +0530 Subject: [PATCH 151/304] powerpc/book3s64/hash/kuep: Enable KUEP on hash Signed-off-by: Aneesh Kumar K.V Reviewed-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-21-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/pkeys.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 9f01c86d2beb..4a3aeddbe0c7 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -244,7 +244,12 @@ void __init pkey_early_init_devtree(void) #ifdef CONFIG_PPC_KUEP void __init setup_kuep(bool disabled) { - if (disabled || !early_radix_enabled()) + if (disabled) + return; + /* + * On hash if PKEY feature is not enabled, disable KUAP too. + */ + if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY)) return; if (smp_processor_id() == boot_cpuid) { From 61130e203dca3ba1f0c510eb12f7a4294e31a834 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 2 Dec 2020 10:08:54 +0530 Subject: [PATCH 152/304] powerpc/book3s64/kup: Check max key supported before enabling kup Don't enable KUEP/KUAP if we support less than or equal to 3 keys. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201202043854.76406-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/kup.h | 3 +++ arch/powerpc/mm/book3s64/pkeys.c | 33 ++++++++++++++++++++------------ arch/powerpc/mm/init-common.c | 4 ++-- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 952be0414f43..f8ec679bd2de 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -44,6 +44,9 @@ #else /* !__ASSEMBLY__ */ +extern bool disable_kuep; +extern bool disable_kuap; + #include void setup_kup(void); diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 4a3aeddbe0c7..2b7ded396db4 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -185,6 +185,27 @@ void __init pkey_early_init_devtree(void) default_uamor &= ~(0x3ul << pkeyshift(execute_only_key)); } + if (unlikely(num_pkey <= 3)) { + /* + * Insufficient number of keys to support + * KUAP/KUEP feature. + */ + disable_kuep = true; + disable_kuap = true; + WARN(1, "Disabling kernel user protection due to low (%d) max supported keys\n", num_pkey); + } else { + /* handle key which is used by kernel for KAUP */ + reserved_allocation_mask |= (0x1 << 3); + /* + * Mark access for kup_key in default amr so that + * we continue to operate with that AMR in + * copy_to/from_user(). + */ + default_amr &= ~(0x3ul << pkeyshift(3)); + default_iamr &= ~(0x1ul << pkeyshift(3)); + default_uamor &= ~(0x3ul << pkeyshift(3)); + } + /* * Allow access for only key 0. And prevent any other modification. */ @@ -205,18 +226,6 @@ void __init pkey_early_init_devtree(void) reserved_allocation_mask |= (0x1 << 1); default_uamor &= ~(0x3ul << pkeyshift(1)); - /* handle key which is used by kernel for KAUP */ - reserved_allocation_mask |= (0x1 << 3); - /* - * Mark access for KUAP key in default amr so that - * we continue to operate with that AMR in - * copy_to/from_user(). - */ - default_amr &= ~(0x3ul << pkeyshift(3)); - default_iamr &= ~(0x1ul << pkeyshift(3)); - default_uamor &= ~(0x3ul << pkeyshift(3)); - - /* * Prevent the usage of OS reserved keys. Update UAMOR * for those keys. Also mark the rest of the bits in the diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 8e0d792ac296..afdebb95bcae 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -28,8 +28,8 @@ EXPORT_SYMBOL_GPL(kernstart_addr); unsigned long kernstart_virt_addr __ro_after_init = KERNELBASE; EXPORT_SYMBOL_GPL(kernstart_virt_addr); -static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); -static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); +bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); +bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); static int __init parse_nosmep(char *p) { From ec0f9b98f7d01b15c804e77e12a515ffc56d7309 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:24 +0530 Subject: [PATCH 153/304] powerpc/book3s64/pkeys: Optimize KUAP and KUEP feature disabled case If FTR_BOOK3S_KUAP is disabled, kernel will continue to run with the same AMR value with which it was entered. Hence there is a high chance that we can return without restoring the AMR value. This also helps the case when applications are not using the pkey feature. In this case, different applications will have the same AMR values and hence we can avoid restoring AMR in this case too. Also avoid isync() if not really needed. Do the same for IAMR. null-syscall benchmark results: With smap/smep disabled: Without patch: 957.95 ns 2778.17 cycles With patch: 858.38 ns 2489.30 cycles With smap/smep enabled: Without patch: 1017.26 ns 2950.36 cycles With patch: 1021.51 ns 2962.44 cycles Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-23-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/kup.h | 63 +++++++++++++++++++++--- arch/powerpc/kernel/entry_64.S | 2 +- arch/powerpc/kernel/syscall_64.c | 12 +++-- 3 files changed, 67 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index f54ab2cb189b..f2e6dd78d5e2 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -12,28 +12,54 @@ #ifdef __ASSEMBLY__ -.macro kuap_user_restore gpr1 +.macro kuap_user_restore gpr1, gpr2 #if defined(CONFIG_PPC_PKEY) BEGIN_MMU_FTR_SECTION_NESTED(67) + b 100f // skip_restore_amr + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY, 67) /* * AMR and IAMR are going to be different when * returning to userspace. */ ld \gpr1, STACK_REGS_AMR(r1) + + /* + * If kuap feature is not enabled, do the mtspr + * only if AMR value is different. + */ + BEGIN_MMU_FTR_SECTION_NESTED(68) + mfspr \gpr2, SPRN_AMR + cmpd \gpr1, \gpr2 + beq 99f + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_BOOK3S_KUAP, 68) + isync mtspr SPRN_AMR, \gpr1 +99: /* * Restore IAMR only when returning to userspace */ ld \gpr1, STACK_REGS_IAMR(r1) + + /* + * If kuep feature is not enabled, do the mtspr + * only if IAMR value is different. + */ + BEGIN_MMU_FTR_SECTION_NESTED(69) + mfspr \gpr2, SPRN_IAMR + cmpd \gpr1, \gpr2 + beq 100f + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_BOOK3S_KUEP, 69) + + isync mtspr SPRN_IAMR, \gpr1 +100: //skip_restore_amr /* No isync required, see kuap_user_restore() */ - END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_PKEY, 67) #endif .endm -.macro kuap_kernel_restore gpr1, gpr2 +.macro kuap_kernel_restore gpr1, gpr2 #if defined(CONFIG_PPC_PKEY) BEGIN_MMU_FTR_SECTION_NESTED(67) @@ -199,18 +225,43 @@ static inline u64 current_thread_iamr(void) static inline void kuap_user_restore(struct pt_regs *regs) { + bool restore_amr = false, restore_iamr = false; + unsigned long amr, iamr; + if (!mmu_has_feature(MMU_FTR_PKEY)) return; - isync(); - mtspr(SPRN_AMR, regs->amr); - mtspr(SPRN_IAMR, regs->iamr); + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + amr = mfspr(SPRN_AMR); + if (amr != regs->amr) + restore_amr = true; + } else { + restore_amr = true; + } + + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { + iamr = mfspr(SPRN_IAMR); + if (iamr != regs->iamr) + restore_iamr = true; + } else { + restore_iamr = true; + } + + + if (restore_amr || restore_iamr) { + isync(); + if (restore_amr) + mtspr(SPRN_AMR, regs->amr); + if (restore_iamr) + mtspr(SPRN_IAMR, regs->iamr); + } /* * No isync required here because we are about to rfi * back to previous context before any user accesses * would be made, which is a CSI. */ } + static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index da23c397ceb2..c9d59450fba0 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -675,7 +675,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) bne- .Lrestore_nvgprs .Lfast_user_interrupt_return_amr: - kuap_user_restore r3 + kuap_user_restore r3, r4 .Lfast_user_interrupt_return: ld r11,_NIP(r1) ld r12,_MSR(r1) diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 11f1c6360291..7c85ed04a164 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -38,6 +38,7 @@ notrace long system_call_exception(long r3, long r4, long r5, #ifdef CONFIG_PPC_PKEY if (mmu_has_feature(MMU_FTR_PKEY)) { unsigned long amr, iamr; + bool flush_needed = false; /* * When entering from userspace we mostly have the AMR/IAMR * different from kernel default values. Hence don't compare. @@ -46,11 +47,16 @@ notrace long system_call_exception(long r3, long r4, long r5, iamr = mfspr(SPRN_IAMR); regs->amr = amr; regs->iamr = iamr; - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); - if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) + flush_needed = true; + } + if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); - isync(); + flush_needed = true; + } + if (flush_needed) + isync(); } else #endif kuap_check_amr(); From d3afd28cd2f35b2a1046b76e0cf010b684da2e84 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 26 Nov 2020 11:54:38 -0500 Subject: [PATCH 154/304] powerpc/perf: Fix to update radix_scope_qual in power10 power10 uses bit 9 of the raw event code as RADIX_SCOPE_QUAL. This bit is used for enabling the radix process events. Patch fixes the PMU counter support functions to program bit 18 of MMCR1 ( Monitor Mode Control Register1 ) with the RADIX_SCOPE_QUAL bit value. Since this field is not per-pmc, add this to PMU group constraints to make sure events in a group will have same bit value for this field. Use bit 21 as constraint bit field for radix_scope_qual. Patch also updates the power10 raw event encoding layout information, format field and constraints bit layout to include the radix_scope_qual bit. Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support") Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1606409684-1589-2-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/isa207-common.c | 12 ++++++++++++ arch/powerpc/perf/isa207-common.h | 13 ++++++++++--- arch/powerpc/perf/power10-pmu.c | 11 +++++++---- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 2848904df638..f57f54f92c10 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -339,6 +339,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) value |= CNST_L1_QUAL_VAL(cache); } + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mask |= CNST_RADIX_SCOPE_GROUP_MASK; + value |= CNST_RADIX_SCOPE_GROUP_VAL(event >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT); + } + if (is_event_marked(event)) { mask |= CNST_SAMPLE_MASK; value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT); @@ -456,6 +461,13 @@ int isa207_compute_mmcr(u64 event[], int n_ev, } } + /* Set RADIX_SCOPE_QUAL bit */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + val = (event[i] >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) & + p10_EVENT_RADIX_SCOPE_QUAL_MASK; + mmcr1 |= val << p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT; + } + if (is_event_marked(event[i])) { mmcra |= MMCRA_SAMPLE_ENABLE; diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 7025de5e60e7..dc9c3d22fb38 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -101,6 +101,9 @@ #define p10_EVENT_CACHE_SEL_MASK 0x3ull #define p10_EVENT_MMCR3_MASK 0x7fffull #define p10_EVENT_MMCR3_SHIFT 45 +#define p10_EVENT_RADIX_SCOPE_QUAL_SHIFT 9 +#define p10_EVENT_RADIX_SCOPE_QUAL_MASK 0x1 +#define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT 45 #define p10_EVENT_VALID_MASK \ ((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \ @@ -112,6 +115,7 @@ (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \ (p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \ (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ + (p10_EVENT_RADIX_SCOPE_QUAL_MASK << p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) | \ EVENT_LINUX_MASK | \ EVENT_PSEL_MASK)) /* @@ -125,9 +129,9 @@ * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ ] | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1] - * | | | | - * BHRB IFM -* | | | Count of events for each PMC. + * [ ] | [ ] | [ sample ] [ ] [6] [5] [4] [3] [2] [1] + * | | | | | + * BHRB IFM -* | | |*radix_scope | Count of events for each PMC. * EBB -* | | p1, p2, p3, p4, p5, p6. * L1 I/D qualifier -* | * nc - number of counters -* @@ -165,6 +169,9 @@ #define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55) #define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f) +#define CNST_RADIX_SCOPE_GROUP_VAL(v) (((v) & 0x1ull) << 21) +#define CNST_RADIX_SCOPE_GROUP_MASK CNST_RADIX_SCOPE_GROUP_VAL(1) + /* * For NC we are counting up to 4 events. This requires three bits, and we need * the fifth event to overflow and set the 4th bit. To achieve that we bias the diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index a01e87f0b8d0..88c54308125d 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -23,10 +23,10 @@ * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] m [ pmcxsel ] - * | | | | | | - * | | | | | *- mark - * | | | *- L1/L2/L3 cache_sel | + * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] | m [ pmcxsel ] + * | | | | | | | + * | | | | | | *- mark + * | | | *- L1/L2/L3 cache_sel | |*-radix_scope_qual * | | sdar_mode | * | *- sampling mode for marked events *- combine * | @@ -59,6 +59,7 @@ * * MMCR1[16] = cache_sel[0] * MMCR1[17] = cache_sel[1] + * MMCR1[18] = radix_scope_qual * * if mark: * MMCRA[63] = 1 (SAMPLE_ENABLE) @@ -175,6 +176,7 @@ PMU_FORMAT_ATTR(src_sel, "config:45-46"); PMU_FORMAT_ATTR(invert_bit, "config:47"); PMU_FORMAT_ATTR(src_mask, "config:48-53"); PMU_FORMAT_ATTR(src_match, "config:54-59"); +PMU_FORMAT_ATTR(radix_scope, "config:9"); static struct attribute *power10_pmu_format_attr[] = { &format_attr_event.attr, @@ -194,6 +196,7 @@ static struct attribute *power10_pmu_format_attr[] = { &format_attr_invert_bit.attr, &format_attr_src_mask.attr, &format_attr_src_match.attr, + &format_attr_radix_scope.attr, NULL, }; From e924be7b0b0d1f37d0509c854a92c7a71e3cdfe7 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 26 Nov 2020 11:54:39 -0500 Subject: [PATCH 155/304] powerpc/perf: Update the PMU group constraints for l2l3 events in power10 In Power9, L2/L3 bus events are always available as a "bank" of 4 events. To obtain the counts for any of the l2/l3 bus events in a given bank, the user will have to program PMC4 with corresponding l2/l3 bus event for that bank. Commit 59029136d750 ("powerpc/perf: Add constraints for power9 l2/l3 bus events") enforced this rule in Power9. But this is not valid for Power10, since in Power10 Monitor Mode Control Register2 (MMCR2) has bits to configure l2/l3 event bits. Hence remove this PMC4 constraint check from power10. Since the l2/l3 bits in MMCR2 are not per-pmc, patch handles group constrints checks for l2/l3 bits in MMCR2. Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support") Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1606409684-1589-3-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/isa207-common.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index f57f54f92c10..38ed450c7855 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -311,9 +311,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } if (unit >= 6 && unit <= 9) { - if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { - mask |= CNST_L2L3_GROUP_MASK; - value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (unit == 6) { + mask |= CNST_L2L3_GROUP_MASK; + value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT); + } } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { mask |= CNST_CACHE_GROUP_MASK; value |= CNST_CACHE_GROUP_VAL(event & 0xff); From 0263bbb377af0c2d38bc8b2ad2ff147e240094de Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 26 Nov 2020 11:54:40 -0500 Subject: [PATCH 156/304] powerpc/perf: Fix the PMU group constraints for threshold events in power10 The PMU group constraints mask for threshold events covers all thresholding bits which includes threshold control value (start/stop), select value as well as thresh_cmp value (MMCRA[9:18]. In power9, thresh_cmp bits were part of the event code. But in case of power10, thresh_cmp bits are not part of event code due to inclusion of MMCR3 bits. Hence thresh_cmp is not valid for group constraints for power10. Fix the PMU group constraints checking for threshold events in power10 by using constraint mask and value for only threshold control and select bits. Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support") Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1606409684-1589-4-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/isa207-common.c | 7 ++++++- arch/powerpc/perf/isa207-common.h | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 38ed450c7855..0f4983ef4103 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -351,7 +351,12 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT); } - if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (event_is_threshold(event)) { + mask |= CNST_THRESH_CTL_SEL_MASK; + value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT); + } + } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (event_is_threshold(event) && is_thresh_cmp_valid(event)) { mask |= CNST_THRESH_MASK; value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index dc9c3d22fb38..42087643c333 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -149,6 +149,9 @@ #define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32) #define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK) +#define CNST_THRESH_CTL_SEL_VAL(v) (((v) & 0x7ffull) << 32) +#define CNST_THRESH_CTL_SEL_MASK CNST_THRESH_CTL_SEL_VAL(0x7ff) + #define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24) #define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK) From c0e3985790251b307b7b71b687ed0128741b3f34 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 26 Nov 2020 11:54:41 -0500 Subject: [PATCH 157/304] powerpc/perf: Add generic and cache event list for power10 DD1 There are event code updates for some of the generic events and cache events for power10. Inorder to maintain the current event codes work with DD1 also, create a new array of generic_events, cache_events and pmu_attr_groups with suffix _dd1, example, power10_events_attr_dd1. So that further updates to event codes can be made in the original list, ie, power10_events_attr. Update the power10 pmu init code to pick the dd1 list while registering the power PMU, based on the pvr (Processor Version Register) value. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1606409684-1589-5-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/power10-pmu.c | 152 ++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 88c54308125d..bc3d4dd5e6cd 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -129,6 +129,31 @@ CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL); CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); +static struct attribute *power10_events_attr_dd1[] = { + GENERIC_EVENT_PTR(PM_RUN_CYC), + GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), + GENERIC_EVENT_PTR(PM_BR_CMPL), + GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), + GENERIC_EVENT_PTR(PM_LD_REF_L1), + GENERIC_EVENT_PTR(PM_LD_MISS_L1), + GENERIC_EVENT_PTR(MEM_LOADS), + GENERIC_EVENT_PTR(MEM_STORES), + CACHE_EVENT_PTR(PM_LD_MISS_L1), + CACHE_EVENT_PTR(PM_LD_REF_L1), + CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS), + CACHE_EVENT_PTR(PM_ST_MISS_L1), + CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), + CACHE_EVENT_PTR(PM_INST_FROM_L1), + CACHE_EVENT_PTR(PM_IC_PREF_REQ), + CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), + CACHE_EVENT_PTR(PM_DATA_FROM_L3), + CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), + CACHE_EVENT_PTR(PM_BR_CMPL), + CACHE_EVENT_PTR(PM_DTLB_MISS), + CACHE_EVENT_PTR(PM_ITLB_MISS), + NULL +}; + static struct attribute *power10_events_attr[] = { GENERIC_EVENT_PTR(PM_RUN_CYC), GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), @@ -154,6 +179,11 @@ static struct attribute *power10_events_attr[] = { NULL }; +static struct attribute_group power10_pmu_events_group_dd1 = { + .name = "events", + .attrs = power10_events_attr_dd1, +}; + static struct attribute_group power10_pmu_events_group = { .name = "events", .attrs = power10_events_attr, @@ -205,12 +235,27 @@ static struct attribute_group power10_pmu_format_group = { .attrs = power10_pmu_format_attr, }; +static const struct attribute_group *power10_pmu_attr_groups_dd1[] = { + &power10_pmu_format_group, + &power10_pmu_events_group_dd1, + NULL, +}; + static const struct attribute_group *power10_pmu_attr_groups[] = { &power10_pmu_format_group, &power10_pmu_events_group, NULL, }; +static int power10_generic_events_dd1[] = { + [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC, + [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL, + [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, + [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, +}; + static int power10_generic_events[] = { [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC, [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL, @@ -276,6 +321,107 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter) * 0 means not supported, -1 means nonsensical, other values * are event codes. */ +static u64 power10_cache_events_dd1[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_LD_REF_L1, + [C(RESULT_MISS)] = PM_LD_MISS_L1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_ST_MISS_L1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS, + [C(RESULT_MISS)] = 0, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_INST_FROM_L1, + [C(RESULT_MISS)] = PM_L1_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_IC_PREF_REQ, + [C(RESULT_MISS)] = 0, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_DATA_FROM_L3, + [C(RESULT_MISS)] = PM_DATA_FROM_L3MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = 0, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_BR_CMPL, + [C(RESULT_MISS)] = PM_BR_MPRED_CMPL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, +}; + static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { [C(OP_READ)] = { @@ -422,6 +568,12 @@ int init_power10_pmu(void) /* Set the PERF_REG_EXTENDED_MASK here */ PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31; + if ((PVR_CFG(pvr) == 1)) { + power10_pmu.generic_events = power10_generic_events_dd1; + power10_pmu.attr_groups = power10_pmu_attr_groups_dd1; + power10_pmu.cache_events = &power10_cache_events_dd1; + } + rc = register_power_pmu(&power10_pmu); if (rc) return rc; From 1f12316394e3b241e70ed620ca846002c8ace3ec Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 26 Nov 2020 11:54:42 -0500 Subject: [PATCH 158/304] powerpc/perf: Fix to update generic event codes for power10 Fix the event code for events: branch-instructions (to PM_BR_FIN), branch-misses (to PM_MPRED_BR_FIN) and cache-misses (to PM_LD_DEMAND_MISS_L1_FIN) for power10 PMU. Update the list of generic events with this modified event code. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1606409684-1589-6-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/power10-events-list.h | 3 +++ arch/powerpc/perf/power10-pmu.c | 15 +++++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h index 60c1b8111082..abd778f0dd5f 100644 --- a/arch/powerpc/perf/power10-events-list.h +++ b/arch/powerpc/perf/power10-events-list.h @@ -15,6 +15,9 @@ EVENT(PM_EXEC_STALL, 0x30008); EVENT(PM_RUN_INST_CMPL, 0x500fa); EVENT(PM_BR_CMPL, 0x4d05e); EVENT(PM_BR_MPRED_CMPL, 0x400f6); +EVENT(PM_BR_FIN, 0x2f04a); +EVENT(PM_MPRED_BR_FIN, 0x3e098); +EVENT(PM_LD_DEMAND_MISS_L1_FIN, 0x400f0); /* All L1 D cache load references counted at finish, gated by reject */ EVENT(PM_LD_REF_L1, 0x100fc); diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index bc3d4dd5e6cd..a02da6900997 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -114,6 +114,9 @@ GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1); GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS); GENERIC_EVENT_ATTR(mem-stores, MEM_STORES); +GENERIC_EVENT_ATTR(branch-instructions, PM_BR_FIN); +GENERIC_EVENT_ATTR(branch-misses, PM_MPRED_BR_FIN); +GENERIC_EVENT_ATTR(cache-misses, PM_LD_DEMAND_MISS_L1_FIN); CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1); CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1); @@ -157,10 +160,10 @@ static struct attribute *power10_events_attr_dd1[] = { static struct attribute *power10_events_attr[] = { GENERIC_EVENT_PTR(PM_RUN_CYC), GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), - GENERIC_EVENT_PTR(PM_BR_CMPL), - GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), + GENERIC_EVENT_PTR(PM_BR_FIN), + GENERIC_EVENT_PTR(PM_MPRED_BR_FIN), GENERIC_EVENT_PTR(PM_LD_REF_L1), - GENERIC_EVENT_PTR(PM_LD_MISS_L1), + GENERIC_EVENT_PTR(PM_LD_DEMAND_MISS_L1_FIN), GENERIC_EVENT_PTR(MEM_LOADS), GENERIC_EVENT_PTR(MEM_STORES), CACHE_EVENT_PTR(PM_LD_MISS_L1), @@ -259,10 +262,10 @@ static int power10_generic_events_dd1[] = { static int power10_generic_events[] = { [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC, [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL, - [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_FIN, + [PERF_COUNT_HW_BRANCH_MISSES] = PM_MPRED_BR_FIN, [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, - [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_DEMAND_MISS_L1_FIN, }; static u64 power10_bhrb_filter_map(u64 branch_sample_type) From 9a8ee52634235993273c43ef67669d8168497dd7 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 26 Nov 2020 11:54:43 -0500 Subject: [PATCH 159/304] powerpc/perf: Fix to update cache events with l2l3 events in power10 Export l2l3 events (PM_L2_ST_MISS and PM_L2_ST) and LLC-prefetches (PM_L3_PF_MISS_L3) via sysfs, and also add these to list of cache_events. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1606409684-1589-7-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/power10-events-list.h | 6 ++++++ arch/powerpc/perf/power10-pmu.c | 12 +++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h index abd778f0dd5f..e45dafe818ed 100644 --- a/arch/powerpc/perf/power10-events-list.h +++ b/arch/powerpc/perf/power10-events-list.h @@ -39,6 +39,12 @@ EVENT(PM_IC_PREF_REQ, 0x040a0); EVENT(PM_DATA_FROM_L3, 0x01340000001c040); /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ EVENT(PM_DATA_FROM_L3MISS, 0x300fe); +/* All successful D-side store dispatches for this thread */ +EVENT(PM_L2_ST, 0x010000046080); +/* All successful D-side store dispatches for this thread that were L2 Miss */ +EVENT(PM_L2_ST_MISS, 0x26880); +/* Total HW L3 prefetches(Load+store) */ +EVENT(PM_L3_PF_MISS_L3, 0x100000016080); /* Data PTEG reload */ EVENT(PM_DTLB_MISS, 0x300fc); /* ITLB Reloaded */ diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index a02da6900997..79e0206ca454 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -127,6 +127,9 @@ CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1); CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_REQ); CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3); +CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PF_MISS_L3); +CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS); +CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST); CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL); CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); @@ -175,6 +178,9 @@ static struct attribute *power10_events_attr[] = { CACHE_EVENT_PTR(PM_IC_PREF_REQ), CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), CACHE_EVENT_PTR(PM_DATA_FROM_L3), + CACHE_EVENT_PTR(PM_L3_PF_MISS_L3), + CACHE_EVENT_PTR(PM_L2_ST_MISS), + CACHE_EVENT_PTR(PM_L2_ST), CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), CACHE_EVENT_PTR(PM_BR_CMPL), CACHE_EVENT_PTR(PM_DTLB_MISS), @@ -460,11 +466,11 @@ static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(RESULT_MISS)] = PM_DATA_FROM_L3MISS, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = -1, - [C(RESULT_MISS)] = -1, + [C(RESULT_ACCESS)] = PM_L2_ST, + [C(RESULT_MISS)] = PM_L2_ST_MISS, }, [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = -1, + [C(RESULT_ACCESS)] = PM_L3_PF_MISS_L3, [C(RESULT_MISS)] = 0, }, }, From 91668ab7db4bcfae332e561df1de2401f3f18553 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 26 Nov 2020 11:54:44 -0500 Subject: [PATCH 160/304] powerpc/perf: MMCR0 control for PMU registers under PMCC=00 PowerISA v3.1 introduces new control bit (PMCCEXT) for restricting access to group B PMU registers in problem state when MMCR0 PMCC=0b00. In problem state and when MMCR0 PMCC=0b00, setting the Monitor Mode Control Register bit 54 (MMCR0 PMCCEXT), will restrict read permission on Group B Performance Monitor Registers (SIER, SIAR, SDAR and MMCR1). When this bit is set to zero, group B registers will be readable. In other platforms (like power9), the older behaviour is retained where group B PMU SPRs are readable. Patch adds support for MMCR0 PMCCEXT bit in power10 by enabling this bit during boot and during the PMU event enable/disable callback functions. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1606409684-1589-8-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kernel/cpu_setup_power.c | 1 + arch/powerpc/kernel/dt_cpu_ftrs.c | 1 + arch/powerpc/perf/core-book3s.c | 4 ++++ arch/powerpc/perf/isa207-common.c | 8 ++++++++ 5 files changed, 15 insertions(+) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 602236e223c4..1fdbd7bd5f22 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -864,6 +864,7 @@ #define MMCR0_BHRBA 0x00200000UL /* BHRB Access allowed in userspace */ #define MMCR0_EBE 0x00100000UL /* Event based branch enable */ #define MMCR0_PMCC 0x000c0000UL /* PMC control */ +#define MMCR0_PMCCEXT ASM_CONST(0x00000200) /* PMCCEXT control */ #define MMCR0_PMCC_U6 0x00080000UL /* PMC1-6 are R/W by user (PR) */ #define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ #define MMCR0_PMCjCE ASM_CONST(0x00004000) /* PMCj count enable*/ diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c index 0c2191ee139e..3cca88ee96d7 100644 --- a/arch/powerpc/kernel/cpu_setup_power.c +++ b/arch/powerpc/kernel/cpu_setup_power.c @@ -123,6 +123,7 @@ static void init_PMU_ISA31(void) { mtspr(SPRN_MMCR3, 0); mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); + mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); } /* diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 1098863e17ee..9d079659b24d 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -454,6 +454,7 @@ static void init_pmu_power10(void) mtspr(SPRN_MMCR3, 0); mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); + mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); } static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 6e224650b3c9..c0e5ea3d6b25 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -95,6 +95,7 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; #define SPRN_SIER3 0 #define MMCRA_SAMPLE_ENABLE 0 #define MMCRA_BHRB_DISABLE 0 +#define MMCR0_PMCCEXT 0 static inline unsigned long perf_ip_adjust(struct pt_regs *regs) { @@ -1273,6 +1274,9 @@ static void power_pmu_disable(struct pmu *pmu) val |= MMCR0_FC; val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO | MMCR0_FC56); + /* Set mmcr0 PMCCEXT for p10 */ + if (ppmu->flags & PPMU_ARCH_31) + val |= MMCR0_PMCCEXT; /* * The barrier is to make sure the mtspr has been diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 0f4983ef4103..24f0a900a824 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -558,6 +558,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev, if (!(pmc_inuse & 0x60)) mmcr->mmcr0 |= MMCR0_FC56; + /* + * Set mmcr0 (PMCCEXT) for p10 which + * will restrict access to group B registers + * when MMCR0 PMCC=0b00. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mmcr->mmcr0 |= MMCR0_PMCCEXT; + mmcr->mmcr1 = mmcr1; mmcr->mmcra = mmcra; mmcr->mmcr2 = mmcr2; From fc1347b5feb685073ce2108c68cd8147340be016 Mon Sep 17 00:00:00 2001 From: Christophe Lombard Date: Wed, 25 Nov 2020 16:50:09 +0100 Subject: [PATCH 161/304] ocxl: Assign a register set to a Logical Partition Platform specific function to assign a register set to a Logical Partition. The "ibm,mmio-atsd" property, provided by the firmware, contains the 16 base ATSD physical addresses (ATSD0 through ATSD15) of the set of MMIO registers (XTS MMIO ATSDx LPARID/AVA/launch/status register). For the time being, the ATSD0 set of registers is used by default. Signed-off-by: Christophe Lombard Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201125155013.39955-2-clombard@linux.vnet.ibm.com --- arch/powerpc/include/asm/pnv-ocxl.h | 3 ++ arch/powerpc/platforms/powernv/ocxl.c | 45 +++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index d37ededca3ee..60c3c74427d9 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -28,4 +28,7 @@ int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **p void pnv_ocxl_spa_release(void *platform_data); int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); +int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, + uint64_t lpcr, void __iomem **arva); +void pnv_ocxl_unmap_lpar(void __iomem *arva); #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index ecdad219d704..57fc1062677b 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -483,3 +483,48 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle) return rc; } EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache); + +int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, + uint64_t lpcr, void __iomem **arva) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + u64 mmio_atsd; + int rc; + + /* ATSD physical address. + * ATSD LAUNCH register: write access initiates a shoot down to + * initiate the TLB Invalidate command. + */ + rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", + 0, &mmio_atsd); + if (rc) { + dev_info(&dev->dev, "No available ATSD found\n"); + return rc; + } + + /* Assign a register set to a Logical Partition and MMIO ATSD + * LPARID register to the required value. + */ + rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev), + lparid, lpcr); + if (rc) { + dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc); + return rc; + } + + *arva = ioremap(mmio_atsd, 24); + if (!(*arva)) { + dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd); + rc = -ENOMEM; + } + + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar); + +void pnv_ocxl_unmap_lpar(void __iomem *arva) +{ + iounmap(arva); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar); From 19b311ca51e108b6d8d679496af8635fdc1984a8 Mon Sep 17 00:00:00 2001 From: Christophe Lombard Date: Wed, 25 Nov 2020 16:50:10 +0100 Subject: [PATCH 162/304] ocxl: Initiate a TLB invalidate command When a TLB Invalidate is required for the Logical Partition, the following sequence has to be performed: 1. Load MMIO ATSD AVA register with the necessary value, if required. 2. Write the MMIO ATSD launch register to initiate the TLB Invalidate command. 3. Poll the MMIO ATSD status register to determine when the TLB Invalidate has been completed. Signed-off-by: Christophe Lombard Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201125155013.39955-3-clombard@linux.vnet.ibm.com --- arch/powerpc/include/asm/pnv-ocxl.h | 51 ++++++++++++++++++++ arch/powerpc/platforms/powernv/ocxl.c | 69 +++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index 60c3c74427d9..9acd1fbf1197 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -3,12 +3,59 @@ #ifndef _ASM_PNV_OCXL_H #define _ASM_PNV_OCXL_H +#include #include #define PNV_OCXL_TL_MAX_TEMPLATE 63 #define PNV_OCXL_TL_BITS_PER_RATE 4 #define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8) +#define PNV_OCXL_ATSD_TIMEOUT 1 + +/* TLB Management Instructions */ +#define PNV_OCXL_ATSD_LNCH 0x00 +/* Radix Invalidate */ +#define PNV_OCXL_ATSD_LNCH_R PPC_BIT(0) +/* Radix Invalidation Control + * 0b00 Just invalidate TLB. + * 0b01 Invalidate just Page Walk Cache. + * 0b10 Invalidate TLB, Page Walk Cache, and any + * caching of Partition and Process Table Entries. + */ +#define PNV_OCXL_ATSD_LNCH_RIC PPC_BITMASK(1, 2) +/* Number and Page Size of translations to be invalidated */ +#define PNV_OCXL_ATSD_LNCH_LP PPC_BITMASK(3, 10) +/* Invalidation Criteria + * 0b00 Invalidate just the target VA. + * 0b01 Invalidate matching PID. + */ +#define PNV_OCXL_ATSD_LNCH_IS PPC_BITMASK(11, 12) +/* 0b1: Process Scope, 0b0: Partition Scope */ +#define PNV_OCXL_ATSD_LNCH_PRS PPC_BIT(13) +/* Invalidation Flag */ +#define PNV_OCXL_ATSD_LNCH_B PPC_BIT(14) +/* Actual Page Size to be invalidated + * 000 4KB + * 101 64KB + * 001 2MB + * 010 1GB + */ +#define PNV_OCXL_ATSD_LNCH_AP PPC_BITMASK(15, 17) +/* Defines the large page select + * L=0b0 for 4KB pages + * L=0b1 for large pages) + */ +#define PNV_OCXL_ATSD_LNCH_L PPC_BIT(18) +/* Process ID */ +#define PNV_OCXL_ATSD_LNCH_PID PPC_BITMASK(19, 38) +/* NoFlush – Assumed to be 0b0 */ +#define PNV_OCXL_ATSD_LNCH_F PPC_BIT(39) +#define PNV_OCXL_ATSD_LNCH_OCAPI_SLBI PPC_BIT(40) +#define PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON PPC_BIT(41) +#define PNV_OCXL_ATSD_AVA 0x08 +#define PNV_OCXL_ATSD_AVA_AVA PPC_BITMASK(0, 51) +#define PNV_OCXL_ATSD_STAT 0x10 + int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 *supported); int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count); @@ -31,4 +78,8 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, uint64_t lpcr, void __iomem **arva); void pnv_ocxl_unmap_lpar(void __iomem *arva); +void pnv_ocxl_tlb_invalidate(void __iomem *arva, + unsigned long pid, + unsigned long addr, + unsigned long page_size); #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index 57fc1062677b..9105efcf242a 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -528,3 +528,72 @@ void pnv_ocxl_unmap_lpar(void __iomem *arva) iounmap(arva); } EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar); + +void pnv_ocxl_tlb_invalidate(void __iomem *arva, + unsigned long pid, + unsigned long addr, + unsigned long page_size) +{ + unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT); + u64 val = 0ull; + int pend; + u8 size; + + if (!(arva)) + return; + + if (addr) { + /* load Abbreviated Virtual Address register with + * the necessary value + */ + val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51)); + out_be64(arva + PNV_OCXL_ATSD_AVA, val); + } + + /* Write access initiates a shoot down to initiate the + * TLB Invalidate command + */ + val = PNV_OCXL_ATSD_LNCH_R; + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10); + if (addr) + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00); + else { + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01); + val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON; + } + val |= PNV_OCXL_ATSD_LNCH_PRS; + /* Actual Page Size to be invalidated + * 000 4KB + * 101 64KB + * 001 2MB + * 010 1GB + */ + size = 0b101; + if (page_size == 0x1000) + size = 0b000; + if (page_size == 0x200000) + size = 0b001; + if (page_size == 0x40000000) + size = 0b010; + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size); + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid); + out_be64(arva + PNV_OCXL_ATSD_LNCH, val); + + /* Poll the ATSD status register to determine when the + * TLB Invalidate has been completed. + */ + val = in_be64(arva + PNV_OCXL_ATSD_STAT); + pend = val >> 63; + + while (pend) { + if (time_after_eq(jiffies, timeout)) { + pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n", + __func__, val, pid); + return; + } + cpu_relax(); + val = in_be64(arva + PNV_OCXL_ATSD_STAT); + pend = val >> 63; + } +} +EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate); From d731feea00c7c1734c9697558f2a1962c12d2710 Mon Sep 17 00:00:00 2001 From: Christophe Lombard Date: Wed, 25 Nov 2020 16:50:11 +0100 Subject: [PATCH 163/304] ocxl: Update the Process Element Entry To complete the MMIO based mechanism, the fields: PASID, bus, device and function of the Process Element Entry have to be filled. (See OpenCAPI Power Platform Architecture document) Hypervisor Process Element Entry Word 0 1 .... 7 8 ...... 12 13 ..15 16.... 19 20 ........... 31 0 OSL Configuration State (0:31) 1 OSL Configuration State (32:63) 2 PASID | Reserved 3 Bus | Device |Function | Reserved 4 Reserved 5 Reserved 6 .... Signed-off-by: Christophe Lombard Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201125155013.39955-4-clombard@linux.vnet.ibm.com --- drivers/misc/ocxl/context.c | 4 +++- drivers/misc/ocxl/link.c | 4 +++- drivers/misc/ocxl/ocxl_internal.h | 9 ++++++--- drivers/scsi/cxlflash/ocxl_hw.c | 6 ++++-- include/misc/ocxl.h | 2 +- 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c index c21f65a5c762..9eb0d93b01c6 100644 --- a/drivers/misc/ocxl/context.c +++ b/drivers/misc/ocxl/context.c @@ -70,6 +70,7 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm) { int rc; unsigned long pidr = 0; + struct pci_dev *dev; // Locks both status & tidr mutex_lock(&ctx->status_mutex); @@ -81,8 +82,9 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm) if (mm) pidr = mm->context.id; + dev = to_pci_dev(ctx->afu->fn->dev.parent); rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, pidr, ctx->tidr, - amr, mm, xsl_fault_error, ctx); + amr, pci_dev_id(dev), mm, xsl_fault_error, ctx); if (rc) goto out; diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index fd73d3bc0eb6..77381dda2c45 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -494,7 +494,7 @@ static u64 calculate_cfg_state(bool kernel) } int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, - u64 amr, struct mm_struct *mm, + u64 amr, u16 bdf, struct mm_struct *mm, void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), void *xsl_err_data) { @@ -529,6 +529,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, memset(pe, 0, sizeof(struct ocxl_process_element)); pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0)); + pe->pasid = cpu_to_be32(pasid << (31 - 19)); + pe->bdf = cpu_to_be16(bdf); pe->lpid = cpu_to_be32(mfspr(SPRN_LPID)); pe->pid = cpu_to_be32(pidr); pe->tid = cpu_to_be32(tidr); diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h index 0bad0a123af6..10125a22d5a5 100644 --- a/drivers/misc/ocxl/ocxl_internal.h +++ b/drivers/misc/ocxl/ocxl_internal.h @@ -84,13 +84,16 @@ struct ocxl_context { struct ocxl_process_element { __be64 config_state; - __be32 reserved1[11]; + __be32 pasid; + __be16 bdf; + __be16 reserved1; + __be32 reserved2[9]; __be32 lpid; __be32 tid; __be32 pid; - __be32 reserved2[10]; + __be32 reserved3[10]; __be64 amr; - __be32 reserved3[3]; + __be32 reserved4[3]; __be32 software_state; }; diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c index e4e0d767b98e..244fc27215dc 100644 --- a/drivers/scsi/cxlflash/ocxl_hw.c +++ b/drivers/scsi/cxlflash/ocxl_hw.c @@ -329,6 +329,7 @@ static int start_context(struct ocxlflash_context *ctx) struct ocxl_hw_afu *afu = ctx->hw_afu; struct ocxl_afu_config *acfg = &afu->acfg; void *link_token = afu->link_token; + struct pci_dev *pdev = afu->pdev; struct device *dev = afu->dev; bool master = ctx->master; struct mm_struct *mm; @@ -360,8 +361,9 @@ static int start_context(struct ocxlflash_context *ctx) mm = current->mm; } - rc = ocxl_link_add_pe(link_token, ctx->pe, pid, 0, 0, mm, - ocxlflash_xsl_fault, ctx); + rc = ocxl_link_add_pe(link_token, ctx->pe, pid, 0, 0, + pci_dev_id(pdev), mm, ocxlflash_xsl_fault, + ctx); if (unlikely(rc)) { dev_err(dev, "%s: ocxl_link_add_pe failed rc=%d\n", __func__, rc); diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h index e013736e275d..3ed736da02c8 100644 --- a/include/misc/ocxl.h +++ b/include/misc/ocxl.h @@ -447,7 +447,7 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle); * defined */ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, - u64 amr, struct mm_struct *mm, + u64 amr, u16 bdf, struct mm_struct *mm, void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), void *xsl_err_data); From 5f686eea4b3cb1d311f02b81ce4264e66a21d979 Mon Sep 17 00:00:00 2001 From: Christophe Lombard Date: Wed, 25 Nov 2020 16:50:12 +0100 Subject: [PATCH 164/304] ocxl: Add mmu notifier Add invalidate_range mmu notifier, when required (ATSD access of MMIO registers is available), to initiate TLB invalidation commands. For the time being, the ATSD0 set of registers is used by default. The pasid and bdf values have to be configured in the Process Element Entry. The PEE must be set up to match the BDF/PASID of the AFU. Signed-off-by: Christophe Lombard Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201125155013.39955-5-clombard@linux.vnet.ibm.com --- drivers/misc/ocxl/link.c | 62 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index 77381dda2c45..129d4eddc4d2 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -2,8 +2,10 @@ // Copyright 2017 IBM Corp. #include #include +#include #include #include +#include #include #include #include @@ -33,6 +35,7 @@ #define SPA_PE_VALID 0x80000000 +struct ocxl_link; struct pe_data { struct mm_struct *mm; @@ -41,6 +44,8 @@ struct pe_data { /* opaque pointer to be passed to the above callback */ void *xsl_err_data; struct rcu_head rcu; + struct ocxl_link *link; + struct mmu_notifier mmu_notifier; }; struct spa { @@ -83,6 +88,8 @@ struct ocxl_link { int domain; int bus; int dev; + void __iomem *arva; /* ATSD register virtual address */ + spinlock_t atsd_lock; /* to serialize shootdowns */ atomic_t irq_available; struct spa *spa; void *platform_data; @@ -388,6 +395,7 @@ static int alloc_link(struct pci_dev *dev, int PE_mask, struct ocxl_link **out_l link->bus = dev->bus->number; link->dev = PCI_SLOT(dev->devfn); atomic_set(&link->irq_available, MAX_IRQ_PER_LINK); + spin_lock_init(&link->atsd_lock); rc = alloc_spa(dev, link); if (rc) @@ -403,6 +411,13 @@ static int alloc_link(struct pci_dev *dev, int PE_mask, struct ocxl_link **out_l if (rc) goto err_xsl_irq; + /* if link->arva is not defeined, MMIO registers are not used to + * generate TLB invalidate. PowerBus snooping is enabled. + * Otherwise, PowerBus snooping is disabled. TLB Invalidates are + * initiated using MMIO registers. + */ + pnv_ocxl_map_lpar(dev, mfspr(SPRN_LPID), 0, &link->arva); + *out_link = link; return 0; @@ -454,6 +469,11 @@ static void release_xsl(struct kref *ref) { struct ocxl_link *link = container_of(ref, struct ocxl_link, ref); + if (link->arva) { + pnv_ocxl_unmap_lpar(link->arva); + link->arva = NULL; + } + list_del(&link->list); /* call platform code before releasing data */ pnv_ocxl_spa_release(link->platform_data); @@ -470,6 +490,26 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle) } EXPORT_SYMBOL_GPL(ocxl_link_release); +static void invalidate_range(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct pe_data *pe_data = container_of(mn, struct pe_data, mmu_notifier); + struct ocxl_link *link = pe_data->link; + unsigned long addr, pid, page_size = PAGE_SIZE; + + pid = mm->context.id; + + spin_lock(&link->atsd_lock); + for (addr = start; addr < end; addr += page_size) + pnv_ocxl_tlb_invalidate(link->arva, pid, addr, page_size); + spin_unlock(&link->atsd_lock); +} + +static const struct mmu_notifier_ops ocxl_mmu_notifier_ops = { + .invalidate_range = invalidate_range, +}; + static u64 calculate_cfg_state(bool kernel) { u64 state; @@ -526,6 +566,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, pe_data->mm = mm; pe_data->xsl_err_cb = xsl_err_cb; pe_data->xsl_err_data = xsl_err_data; + pe_data->link = link; + pe_data->mmu_notifier.ops = &ocxl_mmu_notifier_ops; memset(pe, 0, sizeof(struct ocxl_process_element)); pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0)); @@ -542,8 +584,16 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, * by the nest MMU. If we have a kernel context, TLBIs are * already global. */ - if (mm) + if (mm) { mm_context_add_copro(mm); + if (link->arva) { + /* Use MMIO registers for the TLB Invalidate + * operations. + */ + mmu_notifier_register(&pe_data->mmu_notifier, mm); + } + } + /* * Barrier is to make sure PE is visible in the SPA before it * is used by the device. It also helps with the global TLBI @@ -674,6 +724,16 @@ int ocxl_link_remove_pe(void *link_handle, int pasid) WARN(1, "Couldn't find pe data when removing PE\n"); } else { if (pe_data->mm) { + if (link->arva) { + mmu_notifier_unregister(&pe_data->mmu_notifier, + pe_data->mm); + spin_lock(&link->atsd_lock); + pnv_ocxl_tlb_invalidate(link->arva, + pe_data->mm->context.id, + 0ull, + PAGE_SIZE); + spin_unlock(&link->atsd_lock); + } mm_context_remove_copro(pe_data->mm); mmdrop(pe_data->mm); } From 98f5559a439a68e0773f42352f7c0806cac9e76e Mon Sep 17 00:00:00 2001 From: Christophe Lombard Date: Wed, 25 Nov 2020 16:50:13 +0100 Subject: [PATCH 165/304] ocxl: Add new kernel traces Add specific kernel traces which provide information on mmu notifier and on pages range. Signed-off-by: Christophe Lombard Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201125155013.39955-6-clombard@linux.vnet.ibm.com --- drivers/misc/ocxl/link.c | 4 +++ drivers/misc/ocxl/trace.h | 64 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index 129d4eddc4d2..ab039c115381 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -499,6 +499,7 @@ static void invalidate_range(struct mmu_notifier *mn, unsigned long addr, pid, page_size = PAGE_SIZE; pid = mm->context.id; + trace_ocxl_mmu_notifier_range(start, end, pid); spin_lock(&link->atsd_lock); for (addr = start; addr < end; addr += page_size) @@ -590,6 +591,7 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, /* Use MMIO registers for the TLB Invalidate * operations. */ + trace_ocxl_init_mmu_notifier(pasid, mm->context.id); mmu_notifier_register(&pe_data->mmu_notifier, mm); } } @@ -725,6 +727,8 @@ int ocxl_link_remove_pe(void *link_handle, int pasid) } else { if (pe_data->mm) { if (link->arva) { + trace_ocxl_release_mmu_notifier(pasid, + pe_data->mm->context.id); mmu_notifier_unregister(&pe_data->mmu_notifier, pe_data->mm); spin_lock(&link->atsd_lock); diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h index 17e21cb2addd..a33a5094ff6c 100644 --- a/drivers/misc/ocxl/trace.h +++ b/drivers/misc/ocxl/trace.h @@ -8,6 +8,70 @@ #include + +TRACE_EVENT(ocxl_mmu_notifier_range, + TP_PROTO(unsigned long start, unsigned long end, unsigned long pidr), + TP_ARGS(start, end, pidr), + + TP_STRUCT__entry( + __field(unsigned long, start) + __field(unsigned long, end) + __field(unsigned long, pidr) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + __entry->pidr = pidr; + ), + + TP_printk("start=0x%lx end=0x%lx pidr=0x%lx", + __entry->start, + __entry->end, + __entry->pidr + ) +); + +TRACE_EVENT(ocxl_init_mmu_notifier, + TP_PROTO(int pasid, unsigned long pidr), + TP_ARGS(pasid, pidr), + + TP_STRUCT__entry( + __field(int, pasid) + __field(unsigned long, pidr) + ), + + TP_fast_assign( + __entry->pasid = pasid; + __entry->pidr = pidr; + ), + + TP_printk("pasid=%d, pidr=0x%lx", + __entry->pasid, + __entry->pidr + ) +); + +TRACE_EVENT(ocxl_release_mmu_notifier, + TP_PROTO(int pasid, unsigned long pidr), + TP_ARGS(pasid, pidr), + + TP_STRUCT__entry( + __field(int, pasid) + __field(unsigned long, pidr) + ), + + TP_fast_assign( + __entry->pasid = pasid; + __entry->pidr = pidr; + ), + + TP_printk("pasid=%d, pidr=0x%lx", + __entry->pasid, + __entry->pidr + ) +); + DECLARE_EVENT_CLASS(ocxl_context, TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr), TP_ARGS(pid, spa, pasid, pidr, tidr), From 035b19a15a98907916a42a6b1d025877c42f10ad Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 25 Nov 2020 07:10:46 +0000 Subject: [PATCH 166/304] powerpc/32s: Always map kernel text and rodata with BATs Since commit 2b279c0348af ("powerpc/32s: Allow mapping with BATs with DEBUG_PAGEALLOC"), there is no real situation where mapping without BATs is required. In order to simplify memory handling, always map kernel text and rodata with BATs even when "nobats" kernel parameter is set. Also fix the 603 TLB miss exceptions that don't require anymore kernel page table if DEBUG_PAGEALLOC. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/da51f7ec632825a4ce43290a904aad61648408c0.1606285013.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 4 ++-- arch/powerpc/mm/book3s32/mmu.c | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index bbcc84c5cf5f..757aed0c5764 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -457,13 +457,13 @@ InstructionTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) +#ifdef CONFIG_MODULES lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SPRG_PGDIR li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) +#ifdef CONFIG_MODULES bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index a59e7ec98180..5c60dcade90a 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -157,11 +157,9 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) unsigned long done; unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; - if (__map_without_bats) { - pr_debug("RAM mapped without BATs\n"); - return base; - } - if (debug_pagealloc_enabled()) { + + if (debug_pagealloc_enabled() || __map_without_bats) { + pr_debug_once("Read-Write memory mapped without BATs\n"); if (base >= border) return base; if (top >= border) From 79d1befe054ad4adb277fbd2d2756b1394eaf24e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 25 Nov 2020 07:10:47 +0000 Subject: [PATCH 167/304] powerpc/32s: Don't hash_preload() kernel text We now always map kernel text with BATs. Neither need to preload hash with kernel text addresses nor ensure they are never evicted. This is more or less a revert of commit ee4f2ea48674 ("[POWERPC] Fix 32-bit mm operations when not using BATs") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0a0bab7fadd89aa829e33420fbc10d60c59040a7.1606285014.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/hash_low.S | 18 +----------------- arch/powerpc/mm/book3s32/mmu.c | 2 +- arch/powerpc/mm/mmu_decl.h | 2 -- arch/powerpc/mm/pgtable_32.c | 4 ---- 4 files changed, 2 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index b2c912e517b9..48415c857d80 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -411,30 +411,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) * and we know there is a definite (although small) speed * advantage to putting the PTE in the primary PTEG, we always * put the PTE in the primary PTEG. - * - * In addition, we skip any slot that is mapping kernel text in - * order to avoid a deadlock when not using BAT mappings if - * trying to hash in the kernel hash code itself after it has - * already taken the hash table lock. This works in conjunction - * with pre-faulting of the kernel text. - * - * If the hash table bucket is full of kernel text entries, we'll - * lockup here but that shouldn't happen */ -1: lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */ + lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */ lwz r6, (next_slot - PAGE_OFFSET)@l(r4) addi r6,r6,HPTE_SIZE /* search for candidate */ andi. r6,r6,7*HPTE_SIZE stw r6,next_slot@l(r4) add r4,r3,r6 - LDPTE r0,HPTE_SIZE/2(r4) /* get PTE second word */ - clrrwi r0,r0,12 - lis r6,etext@h - ori r6,r6,etext@l /* get etext */ - tophys(r6,r6) - cmpl cr0,r0,r6 /* compare and try again */ - blt 1b #ifndef CONFIG_SMP /* Store PTE in PTEG */ diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 5c60dcade90a..23f60e97196e 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -302,7 +302,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, /* * Preload a translation in the hash table */ -void hash_preload(struct mm_struct *mm, unsigned long ea) +static void hash_preload(struct mm_struct *mm, unsigned long ea) { pmd_t *pmd; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 1b6d39e9baed..0ad6d476d01d 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -91,8 +91,6 @@ void print_system_hash_info(void); #ifdef CONFIG_PPC32 -void hash_preload(struct mm_struct *mm, unsigned long ea); - extern void mapin_ram(void); extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 888b9713a316..e0ec67a16887 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -112,10 +112,6 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) ktext = ((char *)v >= _stext && (char *)v < etext) || ((char *)v >= _sinittext && (char *)v < _einittext); map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL); -#ifdef CONFIG_PPC_BOOK3S_32 - if (ktext) - hash_preload(&init_mm, v); -#endif v += PAGE_SIZE; p += PAGE_SIZE; } From 7b107a71e732c298d684ee1bafd82f1a2be58d5e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 25 Nov 2020 07:10:48 +0000 Subject: [PATCH 168/304] powerpc/32s: Fix an FTR_SECTION_ELSE An FTR_SECTION_ELSE is in the middle of BEGIN_MMU_FTR_SECTION/ALT_MMU_FTR_SECTION_END_IFSET Change it to MMU_FTR_SECTION_ELSE Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/61790f1a91692950a6bb5bb53d6d514d9bcdad74.1606285014.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 757aed0c5764..c965fd4be760 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -332,7 +332,7 @@ BEGIN_MMU_FTR_SECTION rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ bl hash_page b handle_page_fault_tramp_1 -FTR_SECTION_ELSE +MMU_FTR_SECTION_ELSE b handle_page_fault_tramp_2 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) #endif /* CONFIG_VMAP_STACK */ From 03d701c2d9b0091cf8e96cb49ab7d2a6a9f19937 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 25 Nov 2020 07:10:49 +0000 Subject: [PATCH 169/304] powerpc/32s: Don't use SPRN_SPRG_PGDIR in hash_page SPRN_SPRG_PGDIR is there mainly to speedup SW TLB miss handlers for powerpc 603. We need to free SPRN_SPRG2 to reduce the mess with CONFIG_VMAP_STACK. In hash_page(), reading PGDIR from thread_struct will be in the noise performance wise. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4adca19b7120cdf619956768ed09e74fc6a558f3.1606285014.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/hash_low.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 48415c857d80..aca353d1c5f4 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -65,13 +65,14 @@ _GLOBAL(hash_page) /* Get PTE (linux-style) and check access */ lis r0, TASK_SIZE@h /* check if kernel address */ cmplw 0,r4,r0 + mfspr r8,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ - mfspr r5, SPRN_SPRG_PGDIR /* phys page-table root */ + lwz r5,PGDIR(r8) /* virt page-table root */ blt+ 112f /* assume user more likely */ - lis r5, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - addi r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + lis r5,swapper_pg_dir@ha /* if kernel address, use */ + addi r5,r5,swapper_pg_dir@l /* kernel page table */ rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ -112: +112: tophys(r5, r5) #ifndef CONFIG_PTE_64BIT rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ lwz r8,0(r5) /* get pmd entry */ From c4a22611bf6ced73d86bdfc0604d7db8982a24a4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 25 Nov 2020 07:10:50 +0000 Subject: [PATCH 170/304] powerpc/603: Use SPRN_SDR1 to store the pgdir phys address On the 603, SDR1 is not used. In order to free SPRN_SPRG2, use SPRN_SDR1 to store the pgdir phys addr. But only some bits of SDR1 can be used (0xffff01ff). As the pgdir is 4k aligned, rotate it by 4 bits to the left. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7370574b49d8476878ce5480726197993cb76108.1606285014.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/reg.h | 1 - arch/powerpc/kernel/head_book3s_32.S | 31 +++++++++++++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1fdbd7bd5f22..0978a9a0a0d1 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1204,7 +1204,6 @@ #ifdef CONFIG_PPC_BOOK3S_32 #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 -#define SPRN_SPRG_PGDIR SPRN_SPRG2 #define SPRN_SPRG_603_LRU SPRN_SPRG4 #endif diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index c965fd4be760..8cc83ce61a2b 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -461,8 +461,9 @@ InstructionTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif - mfspr r2, SPRN_SPRG_PGDIR + mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC + rlwinm r2, r2, 28, 0xfffff000 #ifdef CONFIG_MODULES bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ @@ -523,8 +524,9 @@ DataLoadTLBMiss: mfspr r3,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 - mfspr r2, SPRN_SPRG_PGDIR + mfspr r2, SPRN_SDR1 li r1, _PAGE_PRESENT | _PAGE_ACCESSED + rlwinm r2, r2, 28, 0xfffff000 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -599,8 +601,9 @@ DataStoreTLBMiss: mfspr r3,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 - mfspr r2, SPRN_SPRG_PGDIR + mfspr r2, SPRN_SDR1 li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED + rlwinm r2, r2, 28, 0xfffff000 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -893,9 +896,12 @@ __secondary_start: tophys(r4,r2) addi r4,r4,THREAD /* phys address of our thread_struct */ mtspr SPRN_SPRG_THREAD,r4 +BEGIN_MMU_FTR_SECTION lis r4, (swapper_pg_dir - PAGE_OFFSET)@h ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l - mtspr SPRN_SPRG_PGDIR, r4 + rlwinm r4, r4, 4, 0xffff01ff + mtspr SPRN_SDR1, r4 +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) /* enable MMU and jump to start_secondary */ li r4,MSR_KERNEL @@ -935,11 +941,13 @@ load_up_mmu: tlbia /* Clear all TLB entries */ sync /* wait for tlbia/tlbie to finish */ TLBSYNC /* ... on all CPUs */ +BEGIN_MMU_FTR_SECTION /* Load the SDR1 register (hash table base & size) */ lis r6,_SDR1@ha tophys(r6,r6) lwz r6,_SDR1@l(r6) mtspr SPRN_SDR1,r6 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) /* Load the BAT registers with the values set up by MMU_init. */ lis r3,BATS@ha @@ -995,9 +1003,12 @@ start_here: tophys(r4,r2) addi r4,r4,THREAD /* init task's THREAD */ mtspr SPRN_SPRG_THREAD,r4 +BEGIN_MMU_FTR_SECTION lis r4, (swapper_pg_dir - PAGE_OFFSET)@h ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l - mtspr SPRN_SPRG_PGDIR, r4 + rlwinm r4, r4, 4, 0xffff01ff + mtspr SPRN_SDR1, r4 +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) /* stack */ lis r1,init_thread_union@ha @@ -1077,16 +1088,22 @@ _ENTRY(switch_mmu_context) li r0,NUM_USER_SEGMENTS mtctr r0 - lwz r4, MM_PGD(r4) #ifdef CONFIG_BDI_SWITCH /* Context switch the PTE pointer for the Abatron BDI2000. * The PGDIR is passed as second argument. */ + lwz r4, MM_PGD(r4) lis r5, abatron_pteptrs@ha stw r4, abatron_pteptrs@l + 0x4(r5) +#endif +BEGIN_MMU_FTR_SECTION +#ifndef CONFIG_BDI_SWITCH + lwz r4, MM_PGD(r4) #endif tophys(r4, r4) - mtspr SPRN_SPRG_PGDIR, r4 + rlwinm r4, r4, 4, 0xffff01ff + mtspr SPRN_SDR1, r4 +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) li r4,0 isync 3: From 6285f9cff570bfd07b542840912c1d01bd5428e0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 25 Nov 2020 07:10:51 +0000 Subject: [PATCH 171/304] powerpc/32: Simplify EXCEPTION_PROLOG_1 macro Make code more readable with a clear CONFIG_VMAP_STACK section and a clear non CONFIG_VMAP_STACK section. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c0f16cf432d22fc80097264d94649460d3dd761d.1606285014.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 232000742c9a..30f0b162abfd 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -46,18 +46,16 @@ mfspr r1,SPRN_SPRG_THREAD lwz r1,TASK_STACK-THREAD(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE +1: + mtcrf 0x7f, r1 + bt 32 - THREAD_ALIGN_SHIFT, stack_overflow #else subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ beq 1f mfspr r11,SPRN_SPRG_THREAD lwz r11,TASK_STACK-THREAD(r11) addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE -#endif -1: - tophys_novmstack r11, r11 -#ifdef CONFIG_VMAP_STACK - mtcrf 0x7f, r1 - bt 32 - THREAD_ALIGN_SHIFT, stack_overflow +1: tophys(r11, r11) #endif .endm From de1cd0790697e67b728de43e8657bb52f528bfb9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 25 Nov 2020 07:10:52 +0000 Subject: [PATCH 172/304] powerpc/32s: Use SPRN_SPRG_SCRATCH2 in DSI prolog Use SPRN_SPRG_SCRATCH2 as an alternative scratch register in the early part of DSI prolog in order to avoid clobbering SPRN_SPRG_SCRATCH0/1 used by other prologs. The 603 doesn't like a jump from DataLoadTLBMiss to the 10 nops that are now in the beginning of DSI exception as a result of the feature section. To workaround this, add a jump as alternative. It also avoids fetching 10 nops for nothing. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f9f8df2a2be93568768ef1ac793639f7914cf103.1606285014.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kernel/head_book3s_32.S | 24 ++++++++---------------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 0978a9a0a0d1..ee645e790446 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1204,6 +1204,7 @@ #ifdef CONFIG_PPC_BOOK3S_32 #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 +#define SPRN_SPRG_SCRATCH2 SPRN_SPRG2 #define SPRN_SPRG_603_LRU SPRN_SPRG4 #endif diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 8cc83ce61a2b..a93c75ca8c8e 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -288,9 +288,9 @@ MachineCheck: DO_KVM 0x300 DataAccess: #ifdef CONFIG_VMAP_STACK - mtspr SPRN_SPRG_SCRATCH0,r10 - mfspr r10, SPRN_SPRG_THREAD BEGIN_MMU_FTR_SECTION + mtspr SPRN_SPRG_SCRATCH2,r10 + mfspr r10, SPRN_SPRG_THREAD stw r11, THR11(r10) mfspr r10, SPRN_DSISR mfcr r11 @@ -304,19 +304,11 @@ BEGIN_MMU_FTR_SECTION .Lhash_page_dsi_cont: mtcr r11 lwz r11, THR11(r10) -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) - mtspr SPRN_SPRG_SCRATCH1,r11 - mfspr r11, SPRN_DAR - stw r11, DAR(r10) - mfspr r11, SPRN_DSISR - stw r11, DSISR(r10) - mfspr r11, SPRN_SRR0 - stw r11, SRR0(r10) - mfspr r11, SPRN_SRR1 /* check whether user or kernel */ - stw r11, SRR1(r10) - mfcr r10 - andi. r11, r11, MSR_PR - + mfspr r10, SPRN_SPRG_SCRATCH2 +MMU_FTR_SECTION_ELSE + b 1f +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 b handle_page_fault_tramp_1 #else /* CONFIG_VMAP_STACK */ @@ -764,7 +756,7 @@ fast_hash_page_return: /* DSI */ mtcr r11 lwz r11, THR11(r10) - mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r10, SPRN_SPRG_SCRATCH2 rfi 1: /* ISI */ From d2e006036082e2dc394c5ec86c5bb88cc27c0749 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 25 Nov 2020 07:10:53 +0000 Subject: [PATCH 173/304] powerpc/32: Use SPRN_SPRG_SCRATCH2 in exception prologs Use SPRN_SPRG_SCRATCH2 as a third scratch register in exception prologs in order to simplify them and avoid data going back and forth from/to CR. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6f5c8a7faa8cc54acb89c55c20aa579a2f30a4e9.1606285014.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 30f0b162abfd..541664d95702 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -40,7 +40,7 @@ .macro EXCEPTION_PROLOG_1 for_rtas=0 #ifdef CONFIG_VMAP_STACK - mr r11, r1 + mtspr SPRN_SPRG_SCRATCH2,r1 subi r1, r1, INT_FRAME_SIZE /* use r1 if kernel */ beq 1f mfspr r1,SPRN_SPRG_THREAD @@ -61,15 +61,10 @@ .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 #ifdef CONFIG_VMAP_STACK - mtcr r10 - li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ - mtmsr r10 + li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ + mtmsr r11 isync -#else - stw r10,_CCR(r11) /* save registers */ -#endif - mfspr r10, SPRN_SPRG_SCRATCH0 -#ifdef CONFIG_VMAP_STACK + mfspr r11, SPRN_SPRG_SCRATCH2 stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 @@ -78,14 +73,12 @@ stw r1,0(r11) tovirt(r1, r11) /* set new kernel sp */ #endif + stw r10,_CCR(r11) /* save registers */ stw r12,GPR12(r11) stw r9,GPR9(r11) - stw r10,GPR10(r11) -#ifdef CONFIG_VMAP_STACK - mfcr r10 - stw r10, _CCR(r11) -#endif + mfspr r10,SPRN_SPRG_SCRATCH0 mfspr r12,SPRN_SPRG_SCRATCH1 + stw r10,GPR10(r11) stw r12,GPR11(r11) mflr r10 stw r10,_LINK(r11) @@ -99,7 +92,6 @@ stw r10, _DSISR(r11) .endif lwz r9, SRR1(r12) - andi. r10, r9, MSR_PR lwz r12, SRR0(r12) #else mfspr r12,SPRN_SRR0 From c3cb5dbd85dbd9ae51fadf867782dc34806f04d8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 1 Oct 2020 10:59:20 +0000 Subject: [PATCH 174/304] powerpc/time: Remove ifdef in get_vtb() SPRN_VTB and CPU_FTR_ARCH_207S are always defined, no need of an ifdef. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a0fc81cd85121407726bcf480fc9a0d8e7617fce.1601549933.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/time.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index a59f8030f020..8f789b597bae 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -41,10 +41,9 @@ struct div_result { static inline u64 get_vtb(void) { -#ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_ARCH_207S)) return mfspr(SPRN_VTB); -#endif + return 0; } From 17179aeb9d34cc81e1a4ae3f85e5b12b13a1f8d0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 10 Oct 2020 17:30:59 +0000 Subject: [PATCH 175/304] powerpc/mm: Fix verification of MMU_FTR_TYPE_44x MMU_FTR_TYPE_44x cannot be checked by cpu_has_feature() Use mmu_has_feature() instead Fixes: 23eb7f560a2a ("powerpc: Convert flush_icache_range & friends to C") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ceede82fadf37f3b8275e61fcf8cf29a3e2ec7fe.1602351011.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8b946ec68d1b..49b668a85eb8 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -556,7 +556,7 @@ void __flush_dcache_icache(void *p) * space occurs, before returning to user space. */ - if (cpu_has_feature(MMU_FTR_TYPE_44x)) + if (mmu_has_feature(MMU_FTR_TYPE_44x)) return; invalidate_icache_range(addr, addr + PAGE_SIZE); From 197493af414ee22427be3343637ac290a791925a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 12 Oct 2020 08:02:13 +0000 Subject: [PATCH 176/304] powerpc/feature: Add CPU_FTR_NOEXECUTE to G2_LE G2_LE has a 603 core, add CPU_FTR_NOEXECUTE. Fixes: 385e89d5b20f ("powerpc/mm: add exec protection on powerpc 603") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/39a530ee41d83f49747ab3af8e39c056450b9b4d.1602489653.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cputable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index e069a2d9f7c1..4e94184a1b09 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -369,7 +369,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_NOEXECUTE) #define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_MAYBE_CAN_NAP) + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NOEXECUTE) #define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_COMMON | CPU_FTR_NOEXECUTE) From 1a1be322178ca8097abeee244262ce0da5b519a9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 12 Oct 2020 08:02:30 +0000 Subject: [PATCH 177/304] powerpc/mm: Remove useless #ifndef CPU_FTR_COHERENT_ICACHE in mem.c Since commit 10b35d9978ac ("[PATCH] powerpc: merged asm/cputable.h"), CPU_FTR_COHERENT_ICACHE has always been defined. Remove the #ifndef CPU_FTR_COHERENT_ICACHE block. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e26ddc1d6f6aca739dd8d2b7c67351ead559b084.1602489664.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/mem.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 49b668a85eb8..f17954516b2d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -53,11 +53,6 @@ #include -#ifndef CPU_FTR_COHERENT_ICACHE -#define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */ -#define CPU_FTR_NOEXECUTE 0 -#endif - static DEFINE_MUTEX(linear_mapping_mutex); unsigned long long memory_limit; bool init_mem_is_free; From b68e3a3dff97bdc1cba79dc5f80cede8a2419cac Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 12 Oct 2020 08:05:49 +0000 Subject: [PATCH 178/304] powerpc/mm: MMU_FTR_NEED_DTLB_SW_LRU is only possible with CONFIG_PPC_83xx Only mpc83xx will set MMU_FTR_NEED_DTLB_SW_LRU and its definition is enclosed in #ifdef CONFIG_PPC_83xx. Make MMU_FTR_NEED_DTLB_SW_LRU possible only when CONFIG_PPC_83xx is set. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d01d7613664fafa43de1f1ae89924075bc24241c.1602489931.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mmu.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index b6ab5edb644a..24bf77d68253 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -177,7 +177,10 @@ enum { MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL | #endif #ifdef CONFIG_PPC_BOOK3S_32 - MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU | + MMU_FTR_USE_HIGH_BATS | +#endif +#ifdef CONFIG_PPC_83xx + MMU_FTR_NEED_DTLB_SW_LRU | #endif #ifdef CONFIG_PPC_BOOK3E_64 MMU_FTR_USE_TLBRSRV | MMU_FTR_USE_PAIRED_MAS | From 0e8ff4f8d2faa2e3381e774c9e2fb975e8b4598f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 12 Oct 2020 08:04:24 +0000 Subject: [PATCH 179/304] powerpc/mm: Desintegrate MMU_FTR_PPCAS_ARCH_V2 MMU_FTR_PPCAS_ARCH_V2 is defined in cpu_table.h as MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE. MMU_FTR_TLBIEL and MMU_FTR_16M_PAGE are defined in mmu.h MMU_FTR_PPCAS_ARCH_V2 is used only in mmu.h and it is used only once. Remove MMU_FTR_PPCAS_ARCH_V2 and use directly MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/829ae1aed1d2fc6b5fc5818362e573dee5d6ecde.1602489852.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cputable.h | 2 -- arch/powerpc/include/asm/mmu.h | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 4e94184a1b09..47e87e5cacf8 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -221,8 +221,6 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN) -#define MMU_FTR_PPCAS_ARCH_V2 (MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE) - /* We only set the altivec features if the kernel was compiled with altivec * support */ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 24bf77d68253..77aeb2079ad4 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -130,8 +130,7 @@ #define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) /* MMU feature bit sets for various CPUs */ -#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ - MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 +#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 (MMU_FTR_HPTE_TABLE | MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE) #define MMU_FTRS_POWER MMU_FTRS_DEFAULT_HPTE_ARCH_V2 #define MMU_FTRS_PPC970 MMU_FTRS_POWER | MMU_FTR_TLBIE_CROP_VA #define MMU_FTRS_POWER5 MMU_FTRS_POWER | MMU_FTR_LOCKLESS_TLBIE From 7d47034551687eb6c15e8431d897a3758fc5f83e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 13 Oct 2020 11:11:21 +0000 Subject: [PATCH 180/304] powerpc/feature: Remove CPU_FTR_NODSISRALIGN CPU_FTR_NODSISRALIGN has not been used since commit 31bfdb036f12 ("powerpc: Use instruction emulation infrastructure to handle alignment faults") Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/05d98136b24bbf11525445414bb18cffe2724f48.1602587470.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cputable.h | 23 +++++++++++------------ arch/powerpc/kernel/dt_cpu_ftrs.c | 9 +-------- arch/powerpc/kernel/prom.c | 1 - 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 47e87e5cacf8..7becef14759f 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -137,7 +137,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_DBELL ASM_CONST(0x00000004) #define CPU_FTR_CAN_NAP ASM_CONST(0x00000008) #define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00000010) -#define CPU_FTR_NODSISRALIGN ASM_CONST(0x00000020) +// ASM_CONST(0x00000020) Free #define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00000040) #define CPU_FTR_LWSYNC ASM_CONST(0x00000080) #define CPU_FTR_NOEXECUTE ASM_CONST(0x00000100) @@ -219,7 +219,7 @@ static inline void cpu_feature_keys_init(void) { } #ifndef __ASSEMBLY__ -#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN) +#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE) /* We only set the altivec features if the kernel was compiled with altivec * support @@ -376,33 +376,33 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE | CPU_FTR_NOEXECUTE) #define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON) #define CPU_FTRS_8XX (CPU_FTR_NOEXECUTE) -#define CPU_FTRS_40X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_44X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_440x6 (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \ +#define CPU_FTRS_40X (CPU_FTR_NOEXECUTE) +#define CPU_FTRS_44X (CPU_FTR_NOEXECUTE) +#define CPU_FTRS_440x6 (CPU_FTR_NOEXECUTE | \ CPU_FTR_INDEXED_DCR) #define CPU_FTRS_47X (CPU_FTRS_440x6) #define CPU_FTRS_E200 (CPU_FTR_SPE_COMP | \ - CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \ + CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_NOEXECUTE | \ CPU_FTR_DEBUG_LVL_EXC) #define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \ + CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_NOEXECUTE) #define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \ - CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_E500MC (CPU_FTR_NODSISRALIGN | \ + CPU_FTR_NOEXECUTE) +#define CPU_FTRS_E500MC ( \ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV) /* * e5500/e6500 erratum A-006958 is a timebase bug that can use the * same workaround as CPU_FTR_CELL_TB_BUG. */ -#define CPU_FTRS_E5500 (CPU_FTR_NODSISRALIGN | \ +#define CPU_FTRS_E5500 ( \ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_CELL_TB_BUG) -#define CPU_FTRS_E6500 (CPU_FTR_NODSISRALIGN | \ +#define CPU_FTRS_E6500 ( \ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \ @@ -552,7 +552,6 @@ enum { #define CPU_FTRS_DT_CPU_BASE \ (CPU_FTR_LWSYNC | \ CPU_FTR_FPU_UNAVAILABLE | \ - CPU_FTR_NODSISRALIGN | \ CPU_FTR_NOEXECUTE | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_STCX_CHECKS_ADDRESS | \ diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 9d079659b24d..bd8faa21d3dd 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -273,13 +273,6 @@ static int __init feat_enable_idle_nap(struct dt_cpu_feature *f) return 1; } -static int __init feat_enable_align_dsisr(struct dt_cpu_feature *f) -{ - cur_cpu_spec->cpu_features &= ~CPU_FTR_NODSISRALIGN; - - return 1; -} - static int __init feat_enable_idle_stop(struct dt_cpu_feature *f) { u64 lpcr; @@ -642,7 +635,7 @@ static struct dt_cpu_feature_match __initdata {"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST}, {"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG}, {"idle-nap", feat_enable_idle_nap, 0}, - {"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0}, + /* alignment-interrupt-dsisr ignored */ {"idle-stop", feat_enable_idle_stop, 0}, {"machine-check-power8", feat_enable_mce_power8, 0}, {"performance-monitor-power8", feat_enable_pmu_power8, 0}, diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index c1545f22c077..ae3c41730367 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -165,7 +165,6 @@ static struct ibm_pa_feature { #ifdef CONFIG_PPC_RADIX_MMU { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE }, #endif - { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN }, { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, .cpu_user_ftrs = PPC_FEATURE_TRUE_LE }, /* From 8b8319b181fd9d6821703fef1228b4dcde613a16 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 18 Oct 2020 17:25:17 +0000 Subject: [PATCH 181/304] powerpc/44x: Don't support 440 when CONFIG_PPC_47x is set As stated in platform/44x/Kconfig, CONFIG_PPC_47x is not compatible with 440 and 460 variants. This is confirmed in asm/cache.h as L1_CACHE_SHIFT is different for 47x, meaning a kernel built for 47x will not run correctly on a 440. In cputable, opt out all 440 and 460 variants when CONFIG_PPC_47x is set. Also add a default match dedicated to 470. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/822833ce3dc10634339818f7d1ab616edf63b0c6.1603041883.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cputable.h | 9 +++++---- arch/powerpc/include/asm/mmu.h | 7 +++---- arch/powerpc/kernel/cputable.c | 29 +++++++++++++++++++++-------- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 7becef14759f..845a338c8d3f 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -524,11 +524,10 @@ enum { #ifdef CONFIG_40x CPU_FTRS_40X | #endif -#ifdef CONFIG_44x - CPU_FTRS_44X | CPU_FTRS_440x6 | -#endif #ifdef CONFIG_PPC_47x CPU_FTRS_47X | CPU_FTR_476_DD2 | +#elif defined(CONFIG_44x) + CPU_FTRS_44X | CPU_FTRS_440x6 | #endif #ifdef CONFIG_E200 CPU_FTRS_E200 | @@ -597,7 +596,9 @@ enum { #ifdef CONFIG_40x CPU_FTRS_40X & #endif -#ifdef CONFIG_44x +#ifdef CONFIG_PPC_47x + CPU_FTRS_47X & +#elif defined(CONFIG_44x) CPU_FTRS_44X & CPU_FTRS_440x6 & #endif #ifdef CONFIG_E200 diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 77aeb2079ad4..60aa420f414d 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -166,15 +166,14 @@ enum { #ifdef CONFIG_40x MMU_FTR_TYPE_40x | #endif -#ifdef CONFIG_44x +#ifdef CONFIG_PPC_47x + MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL | +#elif defined(CONFIG_44x) MMU_FTR_TYPE_44x | #endif #if defined(CONFIG_E200) || defined(CONFIG_E500) MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX | #endif -#ifdef CONFIG_PPC_47x - MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL | -#endif #ifdef CONFIG_PPC_BOOK3S_32 MMU_FTR_USE_HIGH_BATS | #endif diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 8fdb40ee86d1..b552e22dcddd 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1536,6 +1536,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_40x */ #ifdef CONFIG_44x +#ifndef CONFIG_PPC_47x { .pvr_mask = 0xf0000fff, .pvr_value = 0x40000850, @@ -1818,7 +1819,19 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, -#ifdef CONFIG_PPC_47x + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic 44x PPC)", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + } +#else /* CONFIG_PPC_47x */ { /* 476 DD2 core */ .pvr_mask = 0xffffffff, .pvr_value = 0x11a52080, @@ -1875,19 +1888,19 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_47x, .platform = "ppc470", }, -#endif /* CONFIG_PPC_47x */ { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, - .cpu_name = "(generic 44x PPC)", - .cpu_features = CPU_FTRS_44X, + .cpu_name = "(generic 47x PPC)", + .cpu_features = CPU_FTRS_47X, .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, + .mmu_features = MMU_FTR_TYPE_47x, .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440", + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", } +#endif /* CONFIG_PPC_47x */ #endif /* CONFIG_44x */ #ifdef CONFIG_E200 { /* e200z5 */ From 1f69aa0b89240653fdf708aada6a3d968447cce7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 18 Oct 2020 17:25:18 +0000 Subject: [PATCH 182/304] powerpc/44x: Don't support 47x code and non 47x code at the same time 440/460 variants and 470 variants are not compatible, no need to make code supporting both and using MMU features. Just use CONFIG_PPC_47x to decide what to build. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c3e64da3d5d068c69a201e03bbae7da055761e5b.1603041883.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 11 +++-------- arch/powerpc/mm/nohash/tlb_low.S | 31 ++++++++----------------------- 2 files changed, 11 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index c7c28e8acc10..58177c71dfd4 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -448,15 +448,13 @@ syscall_exit_cont: andis. r10,r0,DBCR0_IDM@h bnel- load_dbcr0 #endif -#ifdef CONFIG_44x -BEGIN_MMU_FTR_SECTION +#ifdef CONFIG_PPC_47x lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 bne- 2f +#endif /* CONFIG_PPC_47x */ 1: -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x) -#endif /* CONFIG_44x */ BEGIN_FTR_SECTION lwarx r7,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) @@ -966,10 +964,7 @@ restore_kuap: /* interrupts are hard-disabled at this point */ restore: -#ifdef CONFIG_44x -BEGIN_MMU_FTR_SECTION - b 1f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) +#if defined(CONFIG_44x) && !defined(CONFIG_PPC_47x) lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index eaeee402f96e..68797e072f55 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -92,36 +92,25 @@ _GLOBAL(__tlbil_va) tlbsx. r6,0,r3 bne 10f sync -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) +#ifndef CONFIG_PPC_47x /* On 440 There are only 64 TLB entries, so r3 < 64, which means bit * 22, is clear. Since 22 is the V bit in the TLB_PAGEID, loading this * value will invalidate the TLB entry. */ tlbwe r6,r6,PPC44x_TLB_PAGEID - isync -10: wrtee r10 - blr -2: -#ifdef CONFIG_PPC_47x +#else oris r7,r6,0x8000 /* specify way explicitly */ clrrwi r4,r3,12 /* get an EPN for the hashing with V = 0 */ ori r4,r4,PPC47x_TLBE_SIZE tlbwe r4,r7,0 /* write it */ - isync - wrtee r10 - blr -#else /* CONFIG_PPC_47x */ -1: trap - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; #endif /* !CONFIG_PPC_47x */ + isync +10: wrtee r10 + blr _GLOBAL(_tlbil_all) _GLOBAL(_tlbil_pid) -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) +#ifndef CONFIG_PPC_47x li r3,0 sync @@ -136,8 +125,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) isync blr -2: -#ifdef CONFIG_PPC_47x +#else /* 476 variant. There's not simple way to do this, hopefully we'll * try to limit the amount of such full invalidates */ @@ -179,11 +167,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) b 1b /* Then loop */ 1: isync /* Sync shadows */ wrtee r11 -#else /* CONFIG_PPC_47x */ -1: trap - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; -#endif /* !CONFIG_PPC_47x */ blr +#endif /* !CONFIG_PPC_47x */ #ifdef CONFIG_PPC_47x From ed2bbd2b8581313ca18a7c586a947f6cdd93a52a Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 3 Dec 2020 15:28:07 +1100 Subject: [PATCH 183/304] powerpc: add security.config, enforcing lockdown=integrity It's sometimes handy to have a config that boots a bit like a system under secure boot (forcing lockdown=integrity, without needing any extra stuff like a command line option). This config file allows that, and also turns on a few assorted security and hardening options for good measure. Suggested-by: Michael Ellerman Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201203042807.1293655-1-dja@axtens.net --- arch/powerpc/configs/security.config | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 arch/powerpc/configs/security.config diff --git a/arch/powerpc/configs/security.config b/arch/powerpc/configs/security.config new file mode 100644 index 000000000000..1c91a35c6a73 --- /dev/null +++ b/arch/powerpc/configs/security.config @@ -0,0 +1,15 @@ +# This is the equivalent of booting with lockdown=integrity +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y +CONFIG_SECURITY_LOCKDOWN_LSM=y +CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y +CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y + +# These are some general, reasonably inexpensive hardening options +CONFIG_HARDENED_USERCOPY=y +CONFIG_FORTIFY_SOURCE=y +CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y + +# UBSAN bounds checking is very cheap and good for hardening +CONFIG_UBSAN=y +# CONFIG_UBSAN_MISC is not set \ No newline at end of file From 450be4960a0fb89b931a6bb3c3f0bb538ac4c03c Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 2 Dec 2020 11:52:22 +1100 Subject: [PATCH 184/304] powerpc/pci: Remove LSI mappings on device teardown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a passthrough IO adapter is removed from a pseries machine using hash MMU and the XIVE interrupt mode, the POWER hypervisor expects the guest OS to clear all page table entries related to the adapter. If some are still present, the RTAS call which isolates the PCI slot returns error 9001 "valid outstanding translations" and the removal of the IO adapter fails. This is because when the PHBs are scanned, Linux maps automatically the INTx interrupts in the Linux interrupt number space but these are never removed. This problem can be fixed by adding the corresponding unmap operation when the device is removed. There's no pcibios_* hook for the remove case, but the same effect can be achieved using a bus notifier. Because INTx are shared among PHBs (and potentially across the system), this adds tracking of virq to unmap them only when the last user is gone. [aik: added refcounter] Signed-off-by: Oliver O'Halloran Signed-off-by: Alexey Kardashevskiy Tested-by: Cédric Le Goater Reviewed-by: Frederic Barrat Reviewed-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201202005222.5477-1-aik@ozlabs.ru --- arch/powerpc/kernel/pci-common.c | 82 ++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index be108616a721..2b555997b295 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -353,6 +353,55 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr) return NULL; } +struct pci_intx_virq { + int virq; + struct kref kref; + struct list_head list_node; +}; + +static LIST_HEAD(intx_list); +static DEFINE_MUTEX(intx_mutex); + +static void ppc_pci_intx_release(struct kref *kref) +{ + struct pci_intx_virq *vi = container_of(kref, struct pci_intx_virq, kref); + + list_del(&vi->list_node); + irq_dispose_mapping(vi->virq); + kfree(vi); +} + +static int ppc_pci_unmap_irq_line(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct pci_dev *pdev = to_pci_dev(data); + + if (action == BUS_NOTIFY_DEL_DEVICE) { + struct pci_intx_virq *vi; + + mutex_lock(&intx_mutex); + list_for_each_entry(vi, &intx_list, list_node) { + if (vi->virq == pdev->irq) { + kref_put(&vi->kref, ppc_pci_intx_release); + break; + } + } + mutex_unlock(&intx_mutex); + } + + return NOTIFY_DONE; +} + +static struct notifier_block ppc_pci_unmap_irq_notifier = { + .notifier_call = ppc_pci_unmap_irq_line, +}; + +static int ppc_pci_register_irq_notifier(void) +{ + return bus_register_notifier(&pci_bus_type, &ppc_pci_unmap_irq_notifier); +} +arch_initcall(ppc_pci_register_irq_notifier); + /* * Reads the interrupt pin to determine if interrupt is use by card. * If the interrupt is used, then gets the interrupt line from the @@ -361,6 +410,12 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr) static int pci_read_irq_line(struct pci_dev *pci_dev) { int virq; + struct pci_intx_virq *vi, *vitmp; + + /* Preallocate vi as rewind is complex if this fails after mapping */ + vi = kzalloc(sizeof(struct pci_intx_virq), GFP_KERNEL); + if (!vi) + return -1; pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); @@ -377,12 +432,12 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) * function. */ if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin)) - return -1; + goto error_exit; if (pin == 0) - return -1; + goto error_exit; if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) || line == 0xff || line == 0) { - return -1; + goto error_exit; } pr_debug(" No map ! Using line %d (pin %d) from PCI config\n", line, pin); @@ -394,14 +449,33 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) if (!virq) { pr_debug(" Failed to map !\n"); - return -1; + goto error_exit; } pr_debug(" Mapped to linux irq %d\n", virq); pci_dev->irq = virq; + mutex_lock(&intx_mutex); + list_for_each_entry(vitmp, &intx_list, list_node) { + if (vitmp->virq == virq) { + kref_get(&vitmp->kref); + kfree(vi); + vi = NULL; + break; + } + } + if (vi) { + vi->virq = virq; + kref_init(&vi->kref); + list_add_tail(&vi->list_node, &intx_list); + } + mutex_unlock(&intx_mutex); + return 0; +error_exit: + kfree(vi); + return -1; } /* From 6c58b1b41b19c00099e4771ee55e21eb9aa245c1 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Thu, 9 Apr 2020 16:13:37 +1000 Subject: [PATCH 185/304] powernv/pci: Print an error when device enable is blocked If the platform decides to block enabling the device nothing is printed currently. This can lead to some confusion since the dmesg output will usually print an error with no context e.g. e1000e: probe of 0022:01:00.0 failed with error -22 This shouldn't be spammy since pci_enable_device() already prints a messages when it succeeds. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200409061337.9187-1-oohall@gmail.com --- arch/powerpc/platforms/powernv/pci-ioda.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2b4ceb5e6ce4..c4f72cdc9b51 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2613,8 +2613,10 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) return true; pdn = pci_get_pdn(dev); - if (!pdn || pdn->pe_number == IODA_INVALID_PE) + if (!pdn || pdn->pe_number == IODA_INVALID_PE) { + pci_err(dev, "pci_enable_device() blocked, no PE assigned.\n"); return false; + } return true; } From 3ba150fb21207e4a7f4b600eb2dbbe83f94571fe Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Mon, 30 Nov 2020 14:00:57 +0530 Subject: [PATCH 186/304] lkdtm/powerpc: Add SLB multihit test To check machine check handling, add support to inject slb multihit errors. Co-developed-by: Mahesh Salgaonkar Signed-off-by: Mahesh Salgaonkar Signed-off-by: Ganesh Goudar [mpe: Use CONFIG_PPC_BOOK3S_64 to fix compile errors reported by lkp@intel.com] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201130083057.135610-1-ganeshgr@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 28 +++- arch/powerpc/mm/book3s64/hash_utils.c | 1 + arch/powerpc/mm/book3s64/slb.c | 27 ---- drivers/misc/lkdtm/Makefile | 1 + drivers/misc/lkdtm/core.c | 3 + drivers/misc/lkdtm/lkdtm.h | 3 + drivers/misc/lkdtm/powerpc.c | 120 ++++++++++++++++++ tools/testing/selftests/lkdtm/tests.txt | 1 + 8 files changed, 156 insertions(+), 28 deletions(-) create mode 100644 drivers/misc/lkdtm/powerpc.c diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 9192cb05a6ab..066b1d34c7bc 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -843,6 +843,32 @@ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) unsigned htab_shift_for_mem_size(unsigned long mem_size); -#endif /* __ASSEMBLY__ */ +enum slb_index { + LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ + KSTACK_INDEX = 1, /* Kernel stack map */ +}; +#define slb_esid_mask(ssize) \ + (((ssize) == MMU_SEGSIZE_256M) ? ESID_MASK : ESID_MASK_1T) + +static inline unsigned long mk_esid_data(unsigned long ea, int ssize, + enum slb_index index) +{ + return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; +} + +static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize, + unsigned long flags) +{ + return (vsid << slb_vsid_shift(ssize)) | flags | + ((unsigned long)ssize << SLB_VSID_SSIZE_SHIFT); +} + +static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, + unsigned long flags) +{ + return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); +} + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_ */ diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index e0fe1a43e7b8..73b06adb6eeb 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -112,6 +112,7 @@ int mmu_linear_psize = MMU_PAGE_4K; EXPORT_SYMBOL_GPL(mmu_linear_psize); int mmu_virtual_psize = MMU_PAGE_4K; int mmu_vmalloc_psize = MMU_PAGE_4K; +EXPORT_SYMBOL_GPL(mmu_vmalloc_psize); #ifdef CONFIG_SPARSEMEM_VMEMMAP int mmu_vmemmap_psize = MMU_PAGE_4K; #endif diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 6d720c1c08a4..584567970c11 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -28,35 +28,8 @@ #include "internal.h" -enum slb_index { - LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ - KSTACK_INDEX = 1, /* Kernel stack map */ -}; - static long slb_allocate_user(struct mm_struct *mm, unsigned long ea); -#define slb_esid_mask(ssize) \ - (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T) - -static inline unsigned long mk_esid_data(unsigned long ea, int ssize, - enum slb_index index) -{ - return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; -} - -static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize, - unsigned long flags) -{ - return (vsid << slb_vsid_shift(ssize)) | flags | - ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); -} - -static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, - unsigned long flags) -{ - return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); -} - bool stress_slb_enabled __initdata; static int __init parse_stress_slb(char *p) diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile index c70b3822013f..5a92c74eca92 100644 --- a/drivers/misc/lkdtm/Makefile +++ b/drivers/misc/lkdtm/Makefile @@ -10,6 +10,7 @@ lkdtm-$(CONFIG_LKDTM) += rodata_objcopy.o lkdtm-$(CONFIG_LKDTM) += usercopy.o lkdtm-$(CONFIG_LKDTM) += stackleak.o lkdtm-$(CONFIG_LKDTM) += cfi.o +lkdtm-$(CONFIG_PPC_BOOK3S_64) += powerpc.o KASAN_SANITIZE_stackleak.o := n KCOV_INSTRUMENT_rodata.o := n diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index 97803f213d9d..1f612c76a61b 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -176,6 +176,9 @@ static const struct crashtype crashtypes[] = { #ifdef CONFIG_X86_32 CRASHTYPE(DOUBLE_FAULT), #endif +#ifdef CONFIG_PPC_BOOK3S_64 + CRASHTYPE(PPC_SLB_MULTIHIT), +#endif }; diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index 6dec4c9b442f..79ec05c18dd1 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -102,4 +102,7 @@ void lkdtm_STACKLEAK_ERASING(void); /* cfi.c */ void lkdtm_CFI_FORWARD_PROTO(void); +/* powerpc.c */ +void lkdtm_PPC_SLB_MULTIHIT(void); + #endif diff --git a/drivers/misc/lkdtm/powerpc.c b/drivers/misc/lkdtm/powerpc.c new file mode 100644 index 000000000000..077c9f9ed8d0 --- /dev/null +++ b/drivers/misc/lkdtm/powerpc.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "lkdtm.h" +#include +#include +#include + +/* Inserts new slb entries */ +static void insert_slb_entry(unsigned long p, int ssize, int page_size) +{ + unsigned long flags; + + flags = SLB_VSID_KERNEL | mmu_psize_defs[page_size].sllp; + preempt_disable(); + + asm volatile("slbmte %0,%1" : + : "r" (mk_vsid_data(p, ssize, flags)), + "r" (mk_esid_data(p, ssize, SLB_NUM_BOLTED)) + : "memory"); + + asm volatile("slbmte %0,%1" : + : "r" (mk_vsid_data(p, ssize, flags)), + "r" (mk_esid_data(p, ssize, SLB_NUM_BOLTED + 1)) + : "memory"); + preempt_enable(); +} + +/* Inject slb multihit on vmalloc-ed address i.e 0xD00... */ +static int inject_vmalloc_slb_multihit(void) +{ + char *p; + + p = vmalloc(PAGE_SIZE); + if (!p) + return -ENOMEM; + + insert_slb_entry((unsigned long)p, MMU_SEGSIZE_1T, mmu_vmalloc_psize); + /* + * This triggers exception, If handled correctly we must recover + * from this error. + */ + p[0] = '!'; + vfree(p); + return 0; +} + +/* Inject slb multihit on kmalloc-ed address i.e 0xC00... */ +static int inject_kmalloc_slb_multihit(void) +{ + char *p; + + p = kmalloc(2048, GFP_KERNEL); + if (!p) + return -ENOMEM; + + insert_slb_entry((unsigned long)p, MMU_SEGSIZE_1T, mmu_linear_psize); + /* + * This triggers exception, If handled correctly we must recover + * from this error. + */ + p[0] = '!'; + kfree(p); + return 0; +} + +/* + * Few initial SLB entries are bolted. Add a test to inject + * multihit in bolted entry 0. + */ +static void insert_dup_slb_entry_0(void) +{ + unsigned long test_address = PAGE_OFFSET, *test_ptr; + unsigned long esid, vsid; + unsigned long i = 0; + + test_ptr = (unsigned long *)test_address; + preempt_disable(); + + asm volatile("slbmfee %0,%1" : "=r" (esid) : "r" (i)); + asm volatile("slbmfev %0,%1" : "=r" (vsid) : "r" (i)); + + /* for i !=0 we would need to mask out the old entry number */ + asm volatile("slbmte %0,%1" : + : "r" (vsid), + "r" (esid | SLB_NUM_BOLTED) + : "memory"); + + asm volatile("slbmfee %0,%1" : "=r" (esid) : "r" (i)); + asm volatile("slbmfev %0,%1" : "=r" (vsid) : "r" (i)); + + /* for i !=0 we would need to mask out the old entry number */ + asm volatile("slbmte %0,%1" : + : "r" (vsid), + "r" (esid | (SLB_NUM_BOLTED + 1)) + : "memory"); + + pr_info("%s accessing test address 0x%lx: 0x%lx\n", + __func__, test_address, *test_ptr); + + preempt_enable(); +} + +void lkdtm_PPC_SLB_MULTIHIT(void) +{ + if (!radix_enabled()) { + pr_info("Injecting SLB multihit errors\n"); + /* + * These need not be separate tests, And they do pretty + * much same thing. In any case we must recover from the + * errors introduced by these functions, machine would not + * survive these tests in case of failure to handle. + */ + inject_vmalloc_slb_multihit(); + inject_kmalloc_slb_multihit(); + insert_dup_slb_entry_0(); + pr_info("Recovered from SLB multihit errors\n"); + } else { + pr_err("XFAIL: This test is for ppc64 and with hash mode MMU only\n"); + } +} diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 74a8d329a72c..18e4599863c0 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -68,3 +68,4 @@ USERCOPY_STACK_BEYOND USERCOPY_KERNEL STACKLEAK_ERASING OK: the rest of the thread stack is properly erased CFI_FORWARD_PROTO +PPC_SLB_MULTIHIT Recovered From b1198a88230f2ce50c271e22b82a8b8610b2eea9 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Sun, 22 Nov 2020 18:38:28 +1100 Subject: [PATCH 187/304] powerpc/powernv/npu: Do not attempt NPU2 setup on POWER8NVL NPU We execute certain NPU2 setup code (such as mapping an LPID to a device in NPU2) unconditionally if an Nvlink bridge is detected. However this cannot succeed on POWER8NVL machines and errors appear in dmesg. This is harmless as skiboot returns an error and the only place we check it is vfio-pci but that code does not get called on P8+ either. This adds a check if pnv_npu2_xxx helpers are called on a machine with NPU2 which initializes pnv_phb::npu in pnv_npu2_init(); pnv_phb::npu==NULL on POWER8/NVL (Naples). While at this, fix NULL derefencing in pnv_npu_peers_take_ownership/ pnv_npu_peers_release_ownership which occurs when GPUs on mentioned P8s cause EEH which happens if "vfio-pci" disables devices using the D3 power state; the vfio-pci's disable_idle_d3 module parameter controls this and must be set on Naples. The EEH handling clears the entire pnv_ioda_pe struct in pnv_ioda_free_pe() hence the NULL derefencing. We cannot recover from that but at least we stop crashing. Tested on - POWER9 pvr=004e1201, Ubuntu 19.04 host, Ubuntu 18.04 vm, NVIDIA GV100 10de:1db1 driver 418.39 - POWER8 pvr=004c0100, RHEL 7.6 host, Ubuntu 16.10 vm, NVIDIA P100 10de:15f9 driver 396.47 Fixes: 1b785611e119 ("powerpc/powernv/npu: Add release_ownership hook") Cc: stable@vger.kernel.org # 5.0 Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201122073828.15446-1-aik@ozlabs.ru --- arch/powerpc/platforms/powernv/npu-dma.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index abeaa533b976..b711dc3262a3 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -385,7 +385,8 @@ static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group) for (i = 0; i < npucomp->pe_num; ++i) { struct pnv_ioda_pe *pe = npucomp->pe[i]; - if (!pe->table_group.ops->take_ownership) + if (!pe->table_group.ops || + !pe->table_group.ops->take_ownership) continue; pe->table_group.ops->take_ownership(&pe->table_group); } @@ -401,7 +402,8 @@ static void pnv_npu_peers_release_ownership( for (i = 0; i < npucomp->pe_num; ++i) { struct pnv_ioda_pe *pe = npucomp->pe[i]; - if (!pe->table_group.ops->release_ownership) + if (!pe->table_group.ops || + !pe->table_group.ops->release_ownership) continue; pe->table_group.ops->release_ownership(&pe->table_group); } @@ -623,6 +625,11 @@ int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid, return -ENODEV; hose = pci_bus_to_host(npdev->bus); + if (hose->npu == NULL) { + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); + return 0; + } + nphb = hose->private_data; dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n", @@ -670,6 +677,11 @@ int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev) return -ENODEV; hose = pci_bus_to_host(npdev->bus); + if (hose->npu == NULL) { + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); + return 0; + } + nphb = hose->private_data; dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n", From 7c6c86b36a36dd4a13d30bba07718e767aa2e7a1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Dec 2020 10:35:38 +0000 Subject: [PATCH 188/304] powerpc/xmon: Change printk() to pr_cont() Since some time now, printk() adds carriage return, leading to unusable xmon output if there is no udbg backend available: [ 54.288722] sysrq: Entering xmon [ 54.292209] Vector: 0 at [cace3d2c] [ 54.292274] pc: [ 54.292331] c0023650 [ 54.292468] : xmon+0x28/0x58 [ 54.292519] [ 54.292574] lr: [ 54.292630] c0023724 [ 54.292749] : sysrq_handle_xmon+0xa4/0xfc [ 54.292801] [ 54.292867] sp: cace3de8 [ 54.292931] msr: 9032 [ 54.292999] current = 0xc28d0000 [ 54.293072] pid = 377, comm = sh [ 54.293157] Linux version 5.10.0-rc6-s3k-dev-01364-gedf13f0ccd76-dirty (root@po17688vm.idsi0.si.c-s.fr) (powerpc64-linux-gcc (GCC) 10.1.0, GNU ld (GNU Binutils) 2.34) #4211 PREEMPT Fri Dec 4 09:32:11 UTC 2020 [ 54.293287] enter ? for help [ 54.293470] [cace3de8] [ 54.293532] c0023724 [ 54.293654] sysrq_handle_xmon+0xa4/0xfc [ 54.293711] (unreliable) ... [ 54.296002] [ 54.296159] --- Exception: c01 (System Call) at [ 54.296217] 0fd4e784 [ 54.296303] [ 54.296375] SP (7fca6ff0) is in userspace [ 54.296431] mon> [ 54.296484] Use pr_cont() instead. Fixes: 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Christophe Leroy [mpe: Mention that it only happens when udbg is not available] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c8a6ec704416ecd5ff2bd26213c9bc026bdd19de.1607077340.git.christophe.leroy@csgroup.eu --- arch/powerpc/xmon/nonstdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c index 5c1a50912229..9b0d85bff021 100644 --- a/arch/powerpc/xmon/nonstdio.c +++ b/arch/powerpc/xmon/nonstdio.c @@ -178,7 +178,7 @@ void xmon_printf(const char *format, ...) if (n && rc == 0) { /* No udbg hooks, fallback to printk() - dangerous */ - printk("%s", xmon_outbuf); + pr_cont("%s", xmon_outbuf); } } From d85be8a49e733dcd23674aa6202870d54bf5600d Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 22 Oct 2020 09:29:20 +0000 Subject: [PATCH 189/304] powerpc: Fix incorrect stw{, ux, u, x} instructions in __set_pte_at The placeholder for instruction selection should use the second argument's operand, which is %1, not %0. This could generate incorrect assembly code if the memory addressing of operand %0 is a different form from that of operand %1. Also remove the %Un placeholder because having %Un placeholders for two operands which are based on the same local var (ptep) doesn't make much sense. By the way, it doesn't change the current behaviour because "<>" constraint is missing for the associated "=m". [chleroy: revised commit log iaw segher's comments and removed %U0] Fixes: 9bf2b5cdc5fe ("powerpc: Fixes for CONFIG_PTE_64BIT for SMP support") Cc: # v2.6.28+ Signed-off-by: Mathieu Desnoyers Signed-off-by: Christophe Leroy Acked-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/96354bd77977a6a933fe9020da57629007fdb920.1603358942.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 4 ++-- arch/powerpc/include/asm/nohash/pgtable.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 36443cda8dcf..41d8bc6db303 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -522,9 +522,9 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_val(*ptep) & _PAGE_HASHPTE) flush_hash_entry(mm, ptep, addr); __asm__ __volatile__("\ - stw%U0%X0 %2,%0\n\ + stw%X0 %2,%0\n\ eieio\n\ - stw%U0%X0 %L2,%1" + stw%X1 %L2,%1" : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 6277e7596ae5..ac75f4ab0dba 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -192,9 +192,9 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, */ if (IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_PTE_64BIT) && !percpu) { __asm__ __volatile__("\ - stw%U0%X0 %2,%0\n\ + stw%X0 %2,%0\n\ eieio\n\ - stw%U0%X0 %L2,%1" + stw%X1 %L2,%1" : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); return; From ff57698a9610fcf7d9c4469bf68c881eff22e2f8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 09:29:21 +0000 Subject: [PATCH 190/304] powerpc: Fix update form addressing in inline assembly In several places, inline assembly uses the "%Un" modifier to enable the use of instruction with update form addressing, but the associated "<>" constraint is missing. As mentioned in previous patch, this fails with gcc 4.9, so "<>" can't be used directly. Use UPD_CONSTR macro everywhere %Un modifier is used. Signed-off-by: Christophe Leroy Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/62eab5ca595485c192de1765bdac099f633a21d0.1603358942.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/atomic.h | 9 +++++---- arch/powerpc/include/asm/io.h | 4 ++-- arch/powerpc/kvm/powerpc.c | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 8a55eb8cc97b..61c6e8b200e8 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -10,6 +10,7 @@ #include #include #include +#include /* * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with @@ -26,14 +27,14 @@ static __inline__ int atomic_read(const atomic_t *v) { int t; - __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter)); + __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"UPD_CONSTR(v->counter)); return t; } static __inline__ void atomic_set(atomic_t *v, int i) { - __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i)); + __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i)); } #define ATOMIC_OP(op, asm_op) \ @@ -316,14 +317,14 @@ static __inline__ s64 atomic64_read(const atomic64_t *v) { s64 t; - __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter)); + __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"UPD_CONSTR(v->counter)); return t; } static __inline__ void atomic64_set(atomic64_t *v, s64 i) { - __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i)); + __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i)); } #define ATOMIC64_OP(op, asm_op) \ diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 2469b46ac2c4..273edd208ec5 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -122,7 +122,7 @@ static inline u##size name(const volatile u##size __iomem *addr) \ { \ u##size ret; \ __asm__ __volatile__("sync;"#insn"%U1%X1 %0,%1;twi 0,%0,0;isync"\ - : "=r" (ret) : "m" (*addr) : "memory"); \ + : "=r" (ret) : "m"UPD_CONSTR (*addr) : "memory"); \ return ret; \ } @@ -130,7 +130,7 @@ static inline u##size name(const volatile u##size __iomem *addr) \ static inline void name(volatile u##size __iomem *addr, u##size val) \ { \ __asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0" \ - : "=m" (*addr) : "r" (val) : "memory"); \ + : "=m"UPD_CONSTR (*addr) : "r" (val) : "memory"); \ mmiowb_set_pending(); \ } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 13999123b735..cf52d26f49cd 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1087,7 +1087,7 @@ static inline u64 sp_to_dp(u32 fprs) preempt_disable(); enable_kernel_fp(); - asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m" (fprd) : "m" (fprs) + asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m"UPD_CONSTR (fprd) : "m"UPD_CONSTR (fprs) : "fr0"); preempt_enable(); return fprd; @@ -1099,7 +1099,7 @@ static inline u32 dp_to_sp(u64 fprd) preempt_disable(); enable_kernel_fp(); - asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m" (fprs) : "m" (fprd) + asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m"UPD_CONSTR (fprs) : "m"UPD_CONSTR (fprd) : "fr0"); preempt_enable(); return fprs; From 39c8bf2b3cc166a2a75111e4941cc5f7efbddc35 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 Nov 2020 05:07:58 +0000 Subject: [PATCH 191/304] powerpc: Retire e200 core (mpc555x processor) There is no defconfig selecting CONFIG_E200, and no platform. e200 is an earlier version of booke, a predecessor of e500, with some particularities like an unified cache instead of both an instruction cache and a data cache. Remove it. Signed-off-by: Christophe Leroy Acked-by: Scott Wood Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/34ebc3ba2c768d97f363bd5f2deea2356e9ae127.1605589460.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cputable.h | 11 ----- arch/powerpc/include/asm/mmu.h | 2 +- arch/powerpc/include/asm/reg.h | 5 -- arch/powerpc/include/asm/reg_booke.h | 12 ----- arch/powerpc/kernel/cpu_setup_fsl_booke.S | 9 ---- arch/powerpc/kernel/cputable.c | 46 ------------------ arch/powerpc/kernel/head_booke.h | 3 +- arch/powerpc/kernel/head_fsl_booke.S | 57 +---------------------- arch/powerpc/kernel/setup_32.c | 2 - arch/powerpc/kernel/traps.c | 25 ---------- arch/powerpc/mm/nohash/fsl_booke.c | 12 ++--- arch/powerpc/platforms/Kconfig.cputype | 13 ++---- 12 files changed, 11 insertions(+), 186 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 845a338c8d3f..8a4e1ed8a4a2 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -41,7 +41,6 @@ extern int machine_check_4xx(struct pt_regs *regs); extern int machine_check_440A(struct pt_regs *regs); extern int machine_check_e500mc(struct pt_regs *regs); extern int machine_check_e500(struct pt_regs *regs); -extern int machine_check_e200(struct pt_regs *regs); extern int machine_check_47x(struct pt_regs *regs); int machine_check_8xx(struct pt_regs *regs); int machine_check_83xx(struct pt_regs *regs); @@ -381,10 +380,6 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTRS_440x6 (CPU_FTR_NOEXECUTE | \ CPU_FTR_INDEXED_DCR) #define CPU_FTRS_47X (CPU_FTRS_440x6) -#define CPU_FTRS_E200 (CPU_FTR_SPE_COMP | \ - CPU_FTR_COHERENT_ICACHE | \ - CPU_FTR_NOEXECUTE | \ - CPU_FTR_DEBUG_LVL_EXC) #define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_NOEXECUTE) @@ -529,9 +524,6 @@ enum { #elif defined(CONFIG_44x) CPU_FTRS_44X | CPU_FTRS_440x6 | #endif -#ifdef CONFIG_E200 - CPU_FTRS_E200 | -#endif #ifdef CONFIG_E500 CPU_FTRS_E500 | CPU_FTRS_E500_2 | #endif @@ -601,9 +593,6 @@ enum { #elif defined(CONFIG_44x) CPU_FTRS_44X & CPU_FTRS_440x6 & #endif -#ifdef CONFIG_E200 - CPU_FTRS_E200 & -#endif #ifdef CONFIG_E500 CPU_FTRS_E500 & CPU_FTRS_E500_2 & #endif diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 60aa420f414d..620e8fe6f8fd 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -171,7 +171,7 @@ enum { #elif defined(CONFIG_44x) MMU_FTR_TYPE_44x | #endif -#if defined(CONFIG_E200) || defined(CONFIG_E500) +#ifdef CONFIG_E500 MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX | #endif #ifdef CONFIG_PPC_BOOK3S_32 diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index ee645e790446..b9492f2b0608 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1233,14 +1233,9 @@ #define SPRN_SPRG_WSCRATCH_MC SPRN_SPRG1 #define SPRN_SPRG_RSCRATCH4 SPRN_SPRG7R #define SPRN_SPRG_WSCRATCH4 SPRN_SPRG7W -#ifdef CONFIG_E200 -#define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG6R -#define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG6W -#else #define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG9 #define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG9 #endif -#endif #ifdef CONFIG_PPC_8xx #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 29a948e0c0f2..262782f08fd4 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -281,18 +281,6 @@ #define MSRP_PMMP 0x00000004 /* Protect MSR[PMM] */ #endif -#ifdef CONFIG_E200 -#define MCSR_MCP 0x80000000UL /* Machine Check Input Pin */ -#define MCSR_CP_PERR 0x20000000UL /* Cache Push Parity Error */ -#define MCSR_CPERR 0x10000000UL /* Cache Parity Error */ -#define MCSR_EXCP_ERR 0x08000000UL /* ISI, ITLB, or Bus Error on 1st insn - fetch for an exception handler */ -#define MCSR_BUS_IRERR 0x00000010UL /* Read Bus Error on instruction fetch*/ -#define MCSR_BUS_DRERR 0x00000008UL /* Read Bus Error on data load */ -#define MCSR_BUS_WRERR 0x00000004UL /* Write Bus Error on buffered - store or cache line push */ -#endif - /* Bit definitions for the HID1 */ #ifdef CONFIG_E500 /* e500v1/v2 */ diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 1d308780e0d3..4bf33f1b4193 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -108,15 +108,6 @@ _GLOBAL(__setup_cpu_e6500) #endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 -#ifdef CONFIG_E200 -_GLOBAL(__setup_cpu_e200) - /* enable dedicated debug exception handling resources (Debug APU) */ - mfspr r3,SPRN_HID0 - ori r3,r3,HID0_DAPUEN@l - mtspr SPRN_HID0,r3 - b __setup_e200_ivors -#endif /* CONFIG_E200 */ - #ifdef CONFIG_E500 #ifndef CONFIG_PPC_E500MC _GLOBAL(__setup_cpu_e500v1) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b552e22dcddd..f2fcd29aab23 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -36,7 +36,6 @@ const char *powerpc_base_platform; * and ppc64 */ #ifdef CONFIG_PPC32 -extern void __setup_cpu_e200(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec* spec); @@ -1902,51 +1901,6 @@ static struct cpu_spec __initdata cpu_specs[] = { } #endif /* CONFIG_PPC_47x */ #endif /* CONFIG_44x */ -#ifdef CONFIG_E200 - { /* e200z5 */ - .pvr_mask = 0xfff00000, - .pvr_value = 0x81000000, - .cpu_name = "e200z5", - /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_EFP_SINGLE | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .machine_check = machine_check_e200, - .platform = "ppc5554", - }, - { /* e200z6 */ - .pvr_mask = 0xfff00000, - .pvr_value = 0x81100000, - .cpu_name = "e200z6", - /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_SPE_COMP | - PPC_FEATURE_HAS_EFP_SINGLE_COMP | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .machine_check = machine_check_e200, - .platform = "ppc5554", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic E200 PPC)", - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_EFP_SINGLE | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_e200, - .machine_check = machine_check_e200, - .platform = "ppc5554", - } -#endif /* CONFIG_E200 */ #endif /* CONFIG_PPC32 */ #ifdef CONFIG_E500 #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index e26d35de27e5..74e230c200fb 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -185,7 +185,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) * * On 40x critical is the only additional level * On 44x/e500 we have critical and machine check - * On e200 we have critical and debug (machine check occurs via critical) * * Additionally we reserve a SPRG for each priority level so we can free up a * GPR to use as the base for indirect access to the exception stacks. This @@ -201,7 +200,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define MC_STACK_BASE mcheckirq_ctx #define CRIT_STACK_BASE critirq_ctx -/* only on e500mc/e200 */ +/* only on e500mc */ #define DBG_STACK_BASE dbgirq_ctx #define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 586a6ac501e9..fdd4d274c245 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -187,9 +187,6 @@ set_ivor: /* Setup the defaults for TLB entries */ li r2,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l -#ifdef CONFIG_E200 - oris r2,r2,MAS4_TLBSELD(1)@h -#endif mtspr SPRN_MAS4, r2 #if !defined(CONFIG_BDI_SWITCH) @@ -362,13 +359,7 @@ interrupt_base: CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception) /* Machine Check Interrupt */ -#ifdef CONFIG_E200 - /* no RFMCI, MCSRRs on E200 */ - CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \ - machine_check_exception) -#else MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) -#endif /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) @@ -399,15 +390,9 @@ interrupt_base: /* Floating Point Unavailable Interrupt */ #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION -#else -#ifdef CONFIG_E200 - /* E200 treats 'normal' floating point instructions as FP Unavail exception */ - EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ - program_check_exception, EXC_XFER_STD) #else EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ unknown_exception, EXC_XFER_STD) -#endif #endif /* System Call Interrupt */ @@ -625,7 +610,7 @@ END_BTB_FLUSH_SECTION mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage -/* Define SPE handlers for e200 and e500v2 */ +/* Define SPE handlers for e500v2 */ #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) @@ -807,31 +792,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) #endif 3: mtspr SPRN_MAS2, r12 -#ifdef CONFIG_E200 - /* Round robin TLB1 entries assignment */ - mfspr r12, SPRN_MAS0 - - /* Extract TLB1CFG(NENTRY) */ - mfspr r11, SPRN_TLB1CFG - andi. r11, r11, 0xfff - - /* Extract MAS0(NV) */ - andi. r13, r12, 0xfff - addi r13, r13, 1 - cmpw 0, r13, r11 - addi r12, r12, 1 - - /* check if we need to wrap */ - blt 7f - - /* wrap back to first free tlbcam entry */ - lis r13, tlbcam_index@ha - lwz r13, tlbcam_index@l(r13) - rlwimi r12, r13, 0, 20, 31 -7: - mtspr SPRN_MAS0,r12 -#endif /* CONFIG_E200 */ - tlb_write_entry: tlbwe @@ -933,21 +893,6 @@ get_phys_addr: * Global functions */ -#ifdef CONFIG_E200 -/* Adjust or setup IVORs for e200 */ -_GLOBAL(__setup_e200_ivors) - li r3,DebugDebug@l - mtspr SPRN_IVOR15,r3 - li r3,SPEUnavailable@l - mtspr SPRN_IVOR32,r3 - li r3,SPEFloatingPointData@l - mtspr SPRN_IVOR33,r3 - li r3,SPEFloatingPointRound@l - mtspr SPRN_IVOR34,r3 - sync - blr -#endif - #ifdef CONFIG_E500 #ifndef CONFIG_PPC_E500MC /* Adjust or setup IVORs for e500v1/v2 */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 057d6b8e9bb0..416e2c7a8b0a 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -223,6 +223,4 @@ __init void initialize_cache_info(void) dcache_bsize = cur_cpu_spec->dcache_bsize; icache_bsize = cur_cpu_spec->icache_bsize; ucache_bsize = 0; - if (IS_ENABLED(CONFIG_E200)) - ucache_bsize = icache_bsize = dcache_bsize; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 46419ae4d17e..3ec7b443fe6b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -751,31 +751,6 @@ int machine_check_generic(struct pt_regs *regs) { return 0; } -#elif defined(CONFIG_E200) -int machine_check_e200(struct pt_regs *regs) -{ - unsigned long reason = mfspr(SPRN_MCSR); - - printk("Machine check in kernel mode.\n"); - printk("Caused by (from MCSR=%lx): ", reason); - - if (reason & MCSR_MCP) - pr_cont("Machine Check Signal\n"); - if (reason & MCSR_CP_PERR) - pr_cont("Cache Push Parity Error\n"); - if (reason & MCSR_CPERR) - pr_cont("Cache Parity Error\n"); - if (reason & MCSR_EXCP_ERR) - pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); - if (reason & MCSR_BUS_IRERR) - pr_cont("Bus - Read Bus Error on instruction fetch\n"); - if (reason & MCSR_BUS_DRERR) - pr_cont("Bus - Read Bus Error on data load\n"); - if (reason & MCSR_BUS_WRERR) - pr_cont("Bus - Write Bus Error on buffered store or cache line push\n"); - - return 0; -} #elif defined(CONFIG_PPC32) int machine_check_generic(struct pt_regs *regs) { diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c index 36bda962d3b3..03dacbe940e5 100644 --- a/arch/powerpc/mm/nohash/fsl_booke.c +++ b/arch/powerpc/mm/nohash/fsl_booke.c @@ -223,15 +223,9 @@ void flush_instruction_cache(void) { unsigned long tmp; - if (IS_ENABLED(CONFIG_E200)) { - tmp = mfspr(SPRN_L1CSR0); - tmp |= L1CSR0_CFI | L1CSR0_CLFC; - mtspr(SPRN_L1CSR0, tmp); - } else { - tmp = mfspr(SPRN_L1CSR1); - tmp |= L1CSR1_ICFI | L1CSR1_ICLFR; - mtspr(SPRN_L1CSR1, tmp); - } + tmp = mfspr(SPRN_L1CSR1); + tmp |= L1CSR1_ICFI | L1CSR1_ICLFR; + mtspr(SPRN_L1CSR1, tmp); isync(); } diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 60162b65909c..45ce09db8f46 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -23,7 +23,7 @@ choice The most common ones are the desktop and server CPUs (603, 604, 740, 750, 74xx) CPUs from Freescale and IBM, with their embedded 512x/52xx/82xx/83xx/86xx counterparts. - The other embedded parts, namely 4xx, 8xx, e200 (55xx) and e500 + The other embedded parts, namely 4xx, 8xx and e500 (85xx) each form a family of their own that is not compatible with the others. @@ -66,9 +66,6 @@ config 44x select HAVE_PCI select PHYS_64BIT -config E200 - bool "Freescale e200" - endchoice choice @@ -258,12 +255,12 @@ config 4xx config BOOKE bool - depends on E200 || E500 || 44x || PPC_BOOK3E + depends on E500 || 44x || PPC_BOOK3E default y config FSL_BOOKE bool - depends on (E200 || E500) && PPC32 + depends on E500 && PPC32 default y # this is for common code between PPC32 & PPC64 FSL BOOKE @@ -328,7 +325,7 @@ config VSX config SPE_POSSIBLE def_bool y - depends on E200 || (E500 && !PPC_E500MC) + depends on E500 && !PPC_E500MC config SPE bool "SPE Support" @@ -480,7 +477,7 @@ config NR_CPUS config NOT_COHERENT_CACHE bool - depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \ + depends on 4xx || PPC_8xx || PPC_MPC512x || \ GAMECUBE_COMMON || AMIGAONE select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE From 8817aabb1bdd5811130f94ff6442bb19c9158a3a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 Nov 2020 05:07:59 +0000 Subject: [PATCH 192/304] powerpc: Remove ucache_bsize ppc601 and e200 were the users of ucache_bsize. ppc601 and e200 are now gone. Remove ucache_bsize. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/288b6048597c0fdc495b203fda57a223d89499d2.1605589460.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/elf.h | 2 +- arch/powerpc/kernel/setup-common.c | 4 ---- arch/powerpc/kernel/setup_32.c | 1 - 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 4ecc372c408e..b8425e3cfd81 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -168,7 +168,7 @@ do { \ /* Cache size items */ \ NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize); \ NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize); \ - NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize); \ + NEW_AUX_ENT(AT_UCACHEBSIZE, 0); \ VDSO_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long)current->mm->context.vdso);\ ARCH_DLINFO_CACHE_GEOMETRY; \ } while (0) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index da8c71f321ad..71f38e9248be 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -90,8 +90,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid); */ int dcache_bsize; int icache_bsize; -int ucache_bsize; - unsigned long klimit = (unsigned long) _end; @@ -802,8 +800,6 @@ static __init void print_system_info(void) pr_info("dcache_bsize = 0x%x\n", dcache_bsize); pr_info("icache_bsize = 0x%x\n", icache_bsize); - if (ucache_bsize != 0) - pr_info("ucache_bsize = 0x%x\n", ucache_bsize); pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); pr_info(" possible = 0x%016lx\n", diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 416e2c7a8b0a..8ba49a6bf515 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -222,5 +222,4 @@ __init void initialize_cache_info(void) */ dcache_bsize = cur_cpu_spec->dcache_bsize; icache_bsize = cur_cpu_spec->icache_bsize; - ucache_bsize = 0; } From 4bb3219837a3dcf58bce96c27db6e0cd48f3d9b2 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Mon, 7 Dec 2020 12:05:18 +1100 Subject: [PATCH 193/304] powerpc/book3s64/kexec: Clear CIABR on kexec The value in CIABR persists across kexec which can lead to unintended results when the new kernel hits the old kernel's breakpoint. For example: 0:mon> bi $loadavg_proc_show 0:mon> b type address 1 inst c000000000519060 loadavg_proc_show+0x0/0x130 0:mon> x $ kexec -l /mnt/vmlinux --initrd=/mnt/rootfs.cpio.gz --append='xmon=off' $ kexec -e $ cat /proc/loadavg Trace/breakpoint trap Make sure CIABR is cleared so this does not happen. Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207010519.15597-1-jniethe5@gmail.com --- arch/powerpc/include/asm/book3s/64/kexec.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/64/kexec.h b/arch/powerpc/include/asm/book3s/64/kexec.h index 6b5c3a248ba2..d4b9d476ecba 100644 --- a/arch/powerpc/include/asm/book3s/64/kexec.h +++ b/arch/powerpc/include/asm/book3s/64/kexec.h @@ -3,6 +3,7 @@ #ifndef _ASM_POWERPC_BOOK3S_64_KEXEC_H_ #define _ASM_POWERPC_BOOK3S_64_KEXEC_H_ +#include #define reset_sprs reset_sprs static inline void reset_sprs(void) @@ -14,6 +15,10 @@ static inline void reset_sprs(void) if (cpu_has_feature(CPU_FTR_ARCH_207S)) { mtspr(SPRN_IAMR, 0); + if (cpu_has_feature(CPU_FTR_HVMODE)) + mtspr(SPRN_CIABR, 0); + else + plpar_set_ciabr(0); } /* Do we need isync()? We are going via a kexec reset */ From 250ad7a45b1e58d580decfb935fc063c4cf56f91 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Mon, 7 Dec 2020 12:05:19 +1100 Subject: [PATCH 194/304] powerpc/powernv/idle: Restore CIABR after idle for Power9 On Power9, CIABR is lost after idle. This means that instruction breakpoints set by xmon which use CIABR do not work. Fix this by restoring CIABR after idle. Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207010519.15597-2-jniethe5@gmail.com --- arch/powerpc/platforms/powernv/idle.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 1ed7c5286487..e6f461812856 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -589,6 +589,7 @@ struct p9_sprs { u64 spurr; u64 dscr; u64 wort; + u64 ciabr; u64 mmcra; u32 mmcr0; @@ -668,6 +669,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) sprs.spurr = mfspr(SPRN_SPURR); sprs.dscr = mfspr(SPRN_DSCR); sprs.wort = mfspr(SPRN_WORT); + sprs.ciabr = mfspr(SPRN_CIABR); sprs.mmcra = mfspr(SPRN_MMCRA); sprs.mmcr0 = mfspr(SPRN_MMCR0); @@ -785,6 +787,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) mtspr(SPRN_SPURR, sprs.spurr); mtspr(SPRN_DSCR, sprs.dscr); mtspr(SPRN_WORT, sprs.wort); + mtspr(SPRN_CIABR, sprs.ciabr); mtspr(SPRN_MMCRA, sprs.mmcra); mtspr(SPRN_MMCR0, sprs.mmcr0); From 475c8749d9542392d7e0855097d29ed14877ad0a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 8 Dec 2020 08:45:39 +0530 Subject: [PATCH 195/304] powerpc/book3s64/kuap: Improve error reporting with KUAP This partially reverts commit eb232b162446 ("powerpc/book3s64/kuap: Improve error reporting with KUAP") and update the fault handler to print [ 55.022514] Kernel attempted to access user page (7e6725b70000) - exploit attempt? (uid: 0) [ 55.022528] BUG: Unable to handle kernel data access on read at 0x7e6725b70000 [ 55.022533] Faulting instruction address: 0xc000000000e8b9bc [ 55.022540] Oops: Kernel access of bad area, sig: 11 [#1] .... when the kernel access userspace address without unlocking AMR. bad_kuap_fault() is added as part of commit 5e5be3aed230 ("powerpc/mm: Detect bad KUAP faults") to catch userspace access incorrectly blocked by AMR. Hence retain the full stack dump there even with hash translation. Also, add a comment explaining the difference between hash and radix. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201208031539.84878-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/32/kup.h | 4 +-- arch/powerpc/include/asm/book3s/64/kup.h | 34 ++++++++++---------- arch/powerpc/include/asm/kup.h | 4 +-- arch/powerpc/include/asm/nohash/32/kup-8xx.h | 4 +-- arch/powerpc/mm/fault.c | 4 +-- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index b18cd931e325..32fd4452e960 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -177,8 +177,8 @@ static inline void restore_user_access(unsigned long flags) allow_user_access(to, to, end - addr, KUAP_READ_WRITE); } -static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, - bool is_write, unsigned long error_code) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { unsigned long begin = regs->kuap & 0xf0000000; unsigned long end = regs->kuap << 28; diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index f2e6dd78d5e2..7075c92c320c 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -353,29 +353,29 @@ static inline void set_kuap(unsigned long value) isync(); } -#define RADIX_KUAP_BLOCK_READ UL(0x4000000000000000) -#define RADIX_KUAP_BLOCK_WRITE UL(0x8000000000000000) - static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, - bool is_write, unsigned long error_code) + bool is_write) { if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) return false; - - if (radix_enabled()) { - /* - * Will be a storage protection fault. - * Only check the details of AMR[0] - */ - return WARN((regs->kuap & (is_write ? RADIX_KUAP_BLOCK_WRITE : RADIX_KUAP_BLOCK_READ)), - "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); - } /* - * We don't want to WARN here because userspace can setup - * keys such that a kernel access to user address can cause - * fault + * For radix this will be a storage protection fault (DSISR_PROTFAULT). + * For hash this will be a key fault (DSISR_KEYFAULT) */ - return !!(error_code & DSISR_KEYFAULT); + /* + * We do have exception table entry, but accessing the + * userspace results in fault. This could be because we + * didn't unlock the AMR or access is denied by userspace + * using a key value that blocks access. We are only interested + * in catching the use case of accessing without unlocking + * the AMR. Hence check for BLOCK_WRITE/READ against AMR. + */ + if (is_write) { + return WARN(((regs->amr & AMR_KUAP_BLOCK_WRITE) == AMR_KUAP_BLOCK_WRITE), + "Bug: Write fault blocked by AMR!"); + } + return WARN(((regs->amr & AMR_KUAP_BLOCK_READ) == AMR_KUAP_BLOCK_READ), + "Bug: Read fault blocked by AMR!"); } static __always_inline void allow_user_access(void __user *to, const void __user *from, diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index f8ec679bd2de..5a9820c54da9 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -62,8 +62,8 @@ void setup_kuap(bool disabled); #else static inline void setup_kuap(bool disabled) { } -static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, - bool is_write, unsigned long error_code) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return false; } diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 7bdd9e5b63ed..567cdc557402 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -60,8 +60,8 @@ static inline void restore_user_access(unsigned long flags) mtspr(SPRN_MD_AP, flags); } -static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, - bool is_write, unsigned long error_code) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000), "Bug: fault blocked by AP register !"); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index c91621df0c61..b12595102525 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -210,7 +210,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return true; } - if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) && + if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) && !search_exception_tables(regs->nip)) { pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n", address, @@ -227,7 +227,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, // Read/write fault in a valid region (the exception table search passed // above), but blocked by KUAP is bad, it can never succeed. - if (bad_kuap_fault(regs, address, is_write, error_code)) + if (bad_kuap_fault(regs, address, is_write)) return true; // What's left? Kernel fault on user in well defined regions (extable From de0f7349a0dd072e54b5fc04c305907b22d28a5f Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:33 -0600 Subject: [PATCH 196/304] powerpc/rtas: prevent suspend-related sys_rtas use on LE While drmgr has had work in some areas to make its RTAS syscall interactions endian-neutral, its code for performing partition migration via the syscall has never worked on LE. While it is able to complete ibm,suspend-me successfully, it crashes when attempting the subsequent ibm,update-nodes call. drmgr is the only known (or plausible) user of ibm,suspend-me, ibm,update-nodes, and ibm,update-properties, so allow them only in big-endian configurations. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-2-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 954f41676f69..4ed64aba37d6 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1050,9 +1050,11 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { { "set-time-for-power-on", -1, -1, -1, -1, -1 }, { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, { "set-time-of-day", -1, -1, -1, -1, -1 }, +#ifdef CONFIG_CPU_BIG_ENDIAN { "ibm,suspend-me", -1, -1, -1, -1, -1 }, { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, +#endif { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, }; From 970e453ea4ecdd7a16a46c229294547148d1c7b6 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:34 -0600 Subject: [PATCH 197/304] powerpc/rtas: complete ibm,suspend-me status codes We don't completely account for the possible return codes for ibm,suspend-me. Add definitions for these. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-3-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 55f9a154c95d..f060181a0d32 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -23,11 +23,16 @@ #define RTAS_RMOBUF_MAX (64 * 1024) /* RTAS return status codes */ -#define RTAS_NOT_SUSPENDABLE -9004 #define RTAS_BUSY -2 /* RTAS Busy */ #define RTAS_EXTENDED_DELAY_MIN 9900 #define RTAS_EXTENDED_DELAY_MAX 9905 +/* statuses specific to ibm,suspend-me */ +#define RTAS_SUSPEND_ABORTED 9000 /* Suspension aborted */ +#define RTAS_NOT_SUSPENDABLE -9004 /* Partition not suspendable */ +#define RTAS_THREADS_ACTIVE -9005 /* Multiple processor threads active */ +#define RTAS_OUTSTANDING_COPROC -9006 /* Outstanding coprocessor operations */ + /* * In general to call RTAS use rtas_token("string") to lookup * an RTAS token for the given string (e.g. "event-scan"). From 7049b288ea8c95f270ec8fe643e3c3187938d5af Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:35 -0600 Subject: [PATCH 198/304] powerpc/rtas: rtas_ibm_suspend_me -> rtas_ibm_suspend_me_unsafe The pseries partition suspend sequence requires that all active CPUs call H_JOIN, which suspends all but one of them with interrupts disabled. The "chosen" CPU is then to call ibm,suspend-me to complete the suspend. Upon returning from ibm,suspend-me, the chosen CPU is to use H_PROD to wake the joined CPUs. Using on_each_cpu() for this, as rtas_ibm_suspend_me() does to implement partition migration, is susceptible to deadlock with other users of on_each_cpu() and with users of stop_machine APIs. The callback passed to on_each_cpu() is not allowed to synchronize with other CPUs in the way it is used here. Complicating the fix is the fact that rtas_ibm_suspend_me() also occupies the function name that should be used to provide a more conventional wrapper for ibm,suspend-me. Rename rtas_ibm_suspend_me() to rtas_ibm_suspend_me_unsafe() to free up the name and indicate that it should not gain users. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-4-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 2 +- arch/powerpc/kernel/rtas.c | 6 +++--- arch/powerpc/platforms/pseries/mobility.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index f060181a0d32..8436ed01567b 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -257,7 +257,7 @@ extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); -extern int rtas_ibm_suspend_me(u64 handle); +int rtas_ibm_suspend_me_unsafe(u64 handle); struct rtc_time; extern time64_t rtas_get_boot_time(void); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 4ed64aba37d6..0a8e5dc2c108 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -843,7 +843,7 @@ static void rtas_percpu_suspend_me(void *info) __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); } -int rtas_ibm_suspend_me(u64 handle) +int rtas_ibm_suspend_me_unsafe(u64 handle) { long state; long rc; @@ -949,7 +949,7 @@ int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) } #else /* CONFIG_PPC_PSERIES */ -int rtas_ibm_suspend_me(u64 handle) +int rtas_ibm_suspend_me_unsafe(u64 handle) { return -ENOSYS; } @@ -1185,7 +1185,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) int rc = 0; u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32) | be32_to_cpu(args.args[1]); - rc = rtas_ibm_suspend_me(handle); + rc = rtas_ibm_suspend_me_unsafe(handle); if (rc == -EAGAIN) args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE); else if (rc == -EIO) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 2f73cb5bf12d..6ff642e84c6a 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -370,7 +370,7 @@ static ssize_t migration_store(struct class *class, return rc; do { - rc = rtas_ibm_suspend_me(streamid); + rc = rtas_ibm_suspend_me_unsafe(streamid); if (rc == -EAGAIN) ssleep(1); } while (rc == -EAGAIN); From 701ba68342412ae9be99a7c7f3badebf95271403 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:36 -0600 Subject: [PATCH 199/304] powerpc/rtas: add rtas_ibm_suspend_me() Now that the name is available, provide a simple wrapper for ibm,suspend-me which returns both a Linux errno and optionally the actual RTAS status to the caller. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-5-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 1 + arch/powerpc/kernel/rtas.c | 57 +++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 8436ed01567b..b43165fc6c2a 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -258,6 +258,7 @@ extern void rtas_progress(char *s, unsigned short hex); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); int rtas_ibm_suspend_me_unsafe(u64 handle); +int rtas_ibm_suspend_me(int *fw_status); struct rtc_time; extern time64_t rtas_get_boot_time(void); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 0a8e5dc2c108..8a618a3c4beb 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -684,6 +684,63 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value) return rc; } +/** + * rtas_ibm_suspend_me() - Call ibm,suspend-me to suspend the LPAR. + * + * @fw_status: RTAS call status will be placed here if not NULL. + * + * rtas_ibm_suspend_me() should be called only on a CPU which has + * received H_CONTINUE from the H_JOIN hcall. All other active CPUs + * should be waiting to return from H_JOIN. + * + * rtas_ibm_suspend_me() may suspend execution of the OS + * indefinitely. Callers should take appropriate measures upon return, such as + * resetting watchdog facilities. + * + * Callers may choose to retry this call if @fw_status is + * %RTAS_THREADS_ACTIVE. + * + * Return: + * 0 - The partition has resumed from suspend, possibly after + * migration to a different host. + * -ECANCELED - The operation was aborted. + * -EAGAIN - There were other CPUs not in H_JOIN at the time of the call. + * -EBUSY - Some other condition prevented the suspend from succeeding. + * -EIO - Hardware/platform error. + */ +int rtas_ibm_suspend_me(int *fw_status) +{ + int fwrc; + int ret; + + fwrc = rtas_call(rtas_token("ibm,suspend-me"), 0, 1, NULL); + + switch (fwrc) { + case 0: + ret = 0; + break; + case RTAS_SUSPEND_ABORTED: + ret = -ECANCELED; + break; + case RTAS_THREADS_ACTIVE: + ret = -EAGAIN; + break; + case RTAS_NOT_SUSPENDABLE: + case RTAS_OUTSTANDING_COPROC: + ret = -EBUSY; + break; + case -1: + default: + ret = -EIO; + break; + } + + if (fw_status) + *fw_status = fwrc; + + return ret; +} + void __noreturn rtas_restart(char *cmd) { if (rtas_flash_term_hook) From 5f485a66f4d0693a535e4ab38ffc3538716d2c2b Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:37 -0600 Subject: [PATCH 200/304] powerpc/rtas: add rtas_activate_firmware() Provide a documented wrapper function for the ibm,activate-firmware service, which must be called after a partition migration or hibernation. If the function is absent or the call fails, the OS will continue to run normally with the current firmware, so there is no need to perform any recovery. Just log it and continue. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-6-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 1 + arch/powerpc/kernel/rtas.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index b43165fc6c2a..fdefe6a974eb 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -247,6 +247,7 @@ extern void __noreturn rtas_restart(char *cmd); extern void rtas_power_off(void); extern void __noreturn rtas_halt(void); extern void rtas_os_term(char *str); +void rtas_activate_firmware(void); extern int rtas_get_sensor(int sensor, int index, int *state); extern int rtas_get_sensor_fast(int sensor, int index, int *state); extern int rtas_get_power_level(int powerdomain, int *level); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8a618a3c4beb..3a740ae933f8 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -798,6 +798,36 @@ void rtas_os_term(char *str) printk(KERN_EMERG "ibm,os-term call failed %d\n", status); } +/** + * rtas_activate_firmware() - Activate a new version of firmware. + * + * Activate a new version of partition firmware. The OS must call this + * after resuming from a partition hibernation or migration in order + * to maintain the ability to perform live firmware updates. It's not + * catastrophic for this method to be absent or to fail; just log the + * condition in that case. + * + * Context: This function may sleep. + */ +void rtas_activate_firmware(void) +{ + int token; + int fwrc; + + token = rtas_token("ibm,activate-firmware"); + if (token == RTAS_UNKNOWN_SERVICE) { + pr_notice("ibm,activate-firmware method unavailable\n"); + return; + } + + do { + fwrc = rtas_call(token, 0, 1, NULL); + } while (rtas_busy_delay(fwrc)); + + if (fwrc) + pr_err("ibm,activate-firmware failed (%i)\n", fwrc); +} + static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; #ifdef CONFIG_PPC_PSERIES static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done) From 9bae89f528c041f3117f0a6c21878dda5a55af60 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:38 -0600 Subject: [PATCH 201/304] powerpc/hvcall: add token and codes for H_VASI_SIGNAL H_VASI_SIGNAL can be used by a partition to request cancellation of its migration. To be used in future changes. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-7-nathanl@linux.ibm.com --- arch/powerpc/include/asm/hvcall.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index c1fbccb04390..c98f5141e3fc 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -155,6 +155,14 @@ #define H_VASI_RESUMED 5 #define H_VASI_COMPLETED 6 +/* VASI signal codes. Only the Cancel code is valid for H_VASI_SIGNAL. */ +#define H_VASI_SIGNAL_CANCEL 1 +#define H_VASI_SIGNAL_ABORT 2 +#define H_VASI_SIGNAL_SUSPEND 3 +#define H_VASI_SIGNAL_COMPLETE 4 +#define H_VASI_SIGNAL_ENABLE 5 +#define H_VASI_SIGNAL_FAILOVER 6 + /* Each control block has to be on a 4K boundary */ #define H_CB_ALIGNMENT 4096 @@ -261,6 +269,7 @@ #define H_ADD_CONN 0x284 #define H_DEL_CONN 0x288 #define H_JOIN 0x298 +#define H_VASI_SIGNAL 0x2A0 #define H_VASI_STATE 0x2A4 #define H_VIOCTL 0x2A8 #define H_ENABLE_CRQ 0x2B0 From b06a6717873560e9dd1c07357781fc2b27545701 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:39 -0600 Subject: [PATCH 202/304] powerpc/pseries/mobility: don't error on absence of ibm, update-nodes Treat the absence of the ibm,update-nodes function as benign instead of reporting an error. If the platform does not provide that facility, it's not a problem for Linux. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-8-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 6ff642e84c6a..e66359b00297 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -261,7 +261,7 @@ int pseries_devicetree_update(s32 scope) update_nodes_token = rtas_token("ibm,update-nodes"); if (update_nodes_token == RTAS_UNKNOWN_SERVICE) - return -EINVAL; + return 0; rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); if (!rtas_buf) From aa5e5c9b556a2e5f68a915e4b5dfa5c6bda47c64 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:40 -0600 Subject: [PATCH 203/304] powerpc/pseries/mobility: add missing break to default case update_dt_node() has a switch statement where the default case lacks a break statement. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-9-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index e66359b00297..527a64e2d89f 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -213,6 +213,7 @@ static int update_dt_node(__be32 phandle, s32 scope) } prop_data += vd; + break; } cond_resched(); From 2d5be6f16c4ba5c27d06704976daf55f3236a236 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:41 -0600 Subject: [PATCH 204/304] powerpc/pseries/mobility: error message improvements - Convert printk(KERN_ERR) to pr_err(). - Include errno in property update failure message. - Remove reference to "Post-mobility" from device tree update message: with pr_err() it will have a "mobility:" prefix. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-10-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 527a64e2d89f..31d81b7da961 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -208,8 +208,8 @@ static int update_dt_node(__be32 phandle, s32 scope) rc = update_dt_property(dn, &prop, prop_name, vd, prop_data); if (rc) { - printk(KERN_ERR "Could not update %s" - " property\n", prop_name); + pr_err("updating %s property failed: %d\n", + prop_name, rc); } prop_data += vd; @@ -343,8 +343,7 @@ void post_mobility_fixup(void) rc = pseries_devicetree_update(MIGRATION_SCOPE); if (rc) - printk(KERN_ERR "Post-mobility device tree update " - "failed: %d\n", rc); + pr_err("device tree update failed: %d\n", rc); cacheinfo_rebuild(); From c3ae9781d5a64093f161e6cc5dfefb0773106ca9 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:42 -0600 Subject: [PATCH 205/304] powerpc/pseries/mobility: use rtas_activate_firmware() on resume It's incorrect to abort post-suspend processing if ibm,activate-firmware isn't available. Use rtas_activate_firmware(), which logs this condition appropriately and allows us to proceed. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-11-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 31d81b7da961..01ac7c03558e 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -312,21 +312,8 @@ int pseries_devicetree_update(s32 scope) void post_mobility_fixup(void) { int rc; - int activate_fw_token; - activate_fw_token = rtas_token("ibm,activate-firmware"); - if (activate_fw_token == RTAS_UNKNOWN_SERVICE) { - printk(KERN_ERR "Could not make post-mobility " - "activate-fw call.\n"); - return; - } - - do { - rc = rtas_call(activate_fw_token, 0, 1, NULL); - } while (rtas_busy_delay(rc)); - - if (rc) - printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc); + rtas_activate_firmware(); /* * We don't want CPUs to go online/offline while the device From d9213319b84ee8393475c38361c84151d5c33415 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:43 -0600 Subject: [PATCH 206/304] powerpc/pseries/mobility: extract VASI session polling logic The behavior of rtas_ibm_suspend_me_unsafe() is to return -EAGAIN to the caller until the specified VASI suspend session state makes the transition from H_VASI_ENABLED to H_VASI_SUSPENDING. In the interest of separating concerns to prepare for a new implementation of the join/suspend sequence, extract VASI session polling logic into a couple of local functions. Waiting for the session state to reach H_VASI_SUSPENDING before calling rtas_ibm_suspend_me_unsafe() ensures that we will never get an EAGAIN result necessitating a retry. No user-visible change in behavior is intended. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-12-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 69 +++++++++++++++++++++-- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 01ac7c03558e..573ed48b43d8 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -345,6 +345,66 @@ void post_mobility_fixup(void) return; } +static int poll_vasi_state(u64 handle, unsigned long *res) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long hvrc; + int ret; + + hvrc = plpar_hcall(H_VASI_STATE, retbuf, handle); + switch (hvrc) { + case H_SUCCESS: + ret = 0; + *res = retbuf[0]; + break; + case H_PARAMETER: + ret = -EINVAL; + break; + case H_FUNCTION: + ret = -EOPNOTSUPP; + break; + case H_HARDWARE: + default: + pr_err("unexpected H_VASI_STATE result %ld\n", hvrc); + ret = -EIO; + break; + } + return ret; +} + +static int wait_for_vasi_session_suspending(u64 handle) +{ + unsigned long state; + int ret; + + /* + * Wait for transition from H_VASI_ENABLED to + * H_VASI_SUSPENDING. Treat anything else as an error. + */ + while (true) { + ret = poll_vasi_state(handle, &state); + + if (ret != 0 || state == H_VASI_SUSPENDING) { + break; + } else if (state == H_VASI_ENABLED) { + ssleep(1); + } else { + pr_err("unexpected H_VASI_STATE result %lu\n", state); + ret = -EIO; + break; + } + } + + /* + * Proceed even if H_VASI_STATE is unavailable. If H_JOIN or + * ibm,suspend-me are also unimplemented, we'll recover then. + */ + if (ret == -EOPNOTSUPP) + ret = 0; + + return ret; +} + static ssize_t migration_store(struct class *class, struct class_attribute *attr, const char *buf, size_t count) @@ -356,12 +416,11 @@ static ssize_t migration_store(struct class *class, if (rc) return rc; - do { - rc = rtas_ibm_suspend_me_unsafe(streamid); - if (rc == -EAGAIN) - ssleep(1); - } while (rc == -EAGAIN); + rc = wait_for_vasi_session_suspending(streamid); + if (rc) + return rc; + rc = rtas_ibm_suspend_me_unsafe(streamid); if (rc) return rc; From 9327dc0aeef36a3cbb9d94f79b79cc4f91ff8a41 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:44 -0600 Subject: [PATCH 207/304] powerpc/pseries/mobility: use stop_machine for join/suspend The partition suspend sequence as specified in the platform architecture requires that all active processor threads call H_JOIN, which: - suspends the calling thread until it is the target of an H_PROD; or - immediately returns H_CONTINUE, if the calling thread is the last to call H_JOIN. This thread is expected to call ibm,suspend-me to completely suspend the partition. Upon returning from ibm,suspend-me the calling thread must wake all others using H_PROD. rtas_ibm_suspend_me_unsafe() uses on_each_cpu() to implement this protocol, but because of its synchronizing nature this is susceptible to deadlock versus users of stop_machine() or other callers of on_each_cpu(). Not only is stop_machine() intended for use cases like this, it handles error propagation and allows us to keep the data shared between CPUs minimal: a single atomic counter which ensures exactly one CPU will wake the others from their joined states. Switch the migration code to use stop_machine() and a less complex local implementation of the H_JOIN/ibm,suspend-me logic, which carries additional benefits: - more informative error reporting, appropriately ratelimited - resets the lockup detector / watchdog on resume to prevent lockup warnings when the OS has been suspended for a time exceeding the threshold. Fixes: 91dc182ca6e2 ("[PATCH] powerpc: special-case ibm,suspend-me RTAS call") Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-13-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 132 ++++++++++++++++++++-- 1 file changed, 125 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 573ed48b43d8..5a3951626a96 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -12,9 +12,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -405,6 +407,128 @@ static int wait_for_vasi_session_suspending(u64 handle) return ret; } +static void prod_single(unsigned int target_cpu) +{ + long hvrc; + int hwid; + + hwid = get_hard_smp_processor_id(target_cpu); + hvrc = plpar_hcall_norets(H_PROD, hwid); + if (hvrc == H_SUCCESS) + return; + pr_err_ratelimited("H_PROD of CPU %u (hwid %d) error: %ld\n", + target_cpu, hwid, hvrc); +} + +static void prod_others(void) +{ + unsigned int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + prod_single(cpu); + } +} + +static u16 clamp_slb_size(void) +{ + u16 prev = mmu_slb_size; + + slb_set_size(SLB_MIN_SIZE); + + return prev; +} + +static int do_suspend(void) +{ + u16 saved_slb_size; + int status; + int ret; + + pr_info("calling ibm,suspend-me on CPU %i\n", smp_processor_id()); + + /* + * The destination processor model may have fewer SLB entries + * than the source. We reduce mmu_slb_size to a safe minimum + * before suspending in order to minimize the possibility of + * programming non-existent entries on the destination. If + * suspend fails, we restore it before returning. On success + * the OF reconfig path will update it from the new device + * tree after resuming on the destination. + */ + saved_slb_size = clamp_slb_size(); + + ret = rtas_ibm_suspend_me(&status); + if (ret != 0) { + pr_err("ibm,suspend-me error: %d\n", status); + slb_set_size(saved_slb_size); + } + + return ret; +} + +static int do_join(void *arg) +{ + atomic_t *counter = arg; + long hvrc; + int ret; + + /* Must ensure MSR.EE off for H_JOIN. */ + hard_irq_disable(); + hvrc = plpar_hcall_norets(H_JOIN); + + switch (hvrc) { + case H_CONTINUE: + /* + * All other CPUs are offline or in H_JOIN. This CPU + * attempts the suspend. + */ + ret = do_suspend(); + break; + case H_SUCCESS: + /* + * The suspend is complete and this cpu has received a + * prod. + */ + ret = 0; + break; + case H_BAD_MODE: + case H_HARDWARE: + default: + ret = -EIO; + pr_err_ratelimited("H_JOIN error %ld on CPU %i\n", + hvrc, smp_processor_id()); + break; + } + + if (atomic_inc_return(counter) == 1) { + pr_info("CPU %u waking all threads\n", smp_processor_id()); + prod_others(); + } + /* + * Execution may have been suspended for several seconds, so + * reset the watchdog. + */ + touch_nmi_watchdog(); + return ret; +} + +static int pseries_migrate_partition(u64 handle) +{ + atomic_t counter = ATOMIC_INIT(0); + int ret; + + ret = wait_for_vasi_session_suspending(handle); + if (ret) + return ret; + + ret = stop_machine(do_join, &counter, cpu_online_mask); + if (ret == 0) + post_mobility_fixup(); + + return ret; +} + static ssize_t migration_store(struct class *class, struct class_attribute *attr, const char *buf, size_t count) @@ -416,16 +540,10 @@ static ssize_t migration_store(struct class *class, if (rc) return rc; - rc = wait_for_vasi_session_suspending(streamid); + rc = pseries_migrate_partition(streamid); if (rc) return rc; - rc = rtas_ibm_suspend_me_unsafe(streamid); - if (rc) - return rc; - - post_mobility_fixup(); - return count; } From 37cddc7d6cf4568a7fb69aeff6f26e4c8a3bc0f7 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:45 -0600 Subject: [PATCH 208/304] powerpc/pseries/mobility: signal suspend cancellation to platform If we're returning an error to user space, use H_VASI_SIGNAL to send a cancellation request to the platform. This isn't strictly required but it communicates that Linux will not attempt to complete the suspend, which allows the various entities involved to promptly end the operation in progress. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-14-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 31 +++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 5a3951626a96..f234a7ed87aa 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -513,6 +513,35 @@ static int do_join(void *arg) return ret; } +/* + * Abort reason code byte 0. We use only the 'Migrating partition' value. + */ +enum vasi_aborting_entity { + ORCHESTRATOR = 1, + VSP_SOURCE = 2, + PARTITION_FIRMWARE = 3, + PLATFORM_FIRMWARE = 4, + VSP_TARGET = 5, + MIGRATING_PARTITION = 6, +}; + +static void pseries_cancel_migration(u64 handle, int err) +{ + u32 reason_code; + u32 detail; + u8 entity; + long hvrc; + + entity = MIGRATING_PARTITION; + detail = abs(err) & 0xffffff; + reason_code = (entity << 24) | detail; + + hvrc = plpar_hcall_norets(H_VASI_SIGNAL, handle, + H_VASI_SIGNAL_CANCEL, reason_code); + if (hvrc) + pr_err("H_VASI_SIGNAL error: %ld\n", hvrc); +} + static int pseries_migrate_partition(u64 handle) { atomic_t counter = ATOMIC_INIT(0); @@ -525,6 +554,8 @@ static int pseries_migrate_partition(u64 handle) ret = stop_machine(do_join, &counter, cpu_online_mask); if (ret == 0) post_mobility_fixup(); + else + pseries_cancel_migration(handle, ret); return ret; } From aeca35b9a52b0e0d019a5244fbaab699f753b443 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:46 -0600 Subject: [PATCH 209/304] powerpc/pseries/mobility: retry partition suspend after error This is a mitigation for the relatively rare occurrence where a virtual IOA can be in a transient state that prevents the suspend/migration from succeeding, resulting in an error from ibm,suspend-me. If the join/suspend sequence returns an error, it is acceptable to retry as long as the VASI suspend session state is still "Suspending" (i.e. the platform is still waiting for the OS to suspend). Retry a few times on suspend failure while this condition holds, progressively increasing the delay between attempts. We don't want to retry indefinitey because firmware emits an error log event on each unsuccessful attempt. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-15-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 59 ++++++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index f234a7ed87aa..fe7e35cdc9d5 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -542,16 +542,71 @@ static void pseries_cancel_migration(u64 handle, int err) pr_err("H_VASI_SIGNAL error: %ld\n", hvrc); } +static int pseries_suspend(u64 handle) +{ + const unsigned int max_attempts = 5; + unsigned int retry_interval_ms = 1; + unsigned int attempt = 1; + int ret; + + while (true) { + atomic_t counter = ATOMIC_INIT(0); + unsigned long vasi_state; + int vasi_err; + + ret = stop_machine(do_join, &counter, cpu_online_mask); + if (ret == 0) + break; + /* + * Encountered an error. If the VASI stream is still + * in Suspending state, it's likely a transient + * condition related to some device in the partition + * and we can retry in the hope that the cause has + * cleared after some delay. + * + * A better design would allow drivers etc to prepare + * for the suspend and avoid conditions which prevent + * the suspend from succeeding. For now, we have this + * mitigation. + */ + pr_notice("Partition suspend attempt %u of %u error: %d\n", + attempt, max_attempts, ret); + + if (attempt == max_attempts) + break; + + vasi_err = poll_vasi_state(handle, &vasi_state); + if (vasi_err == 0) { + if (vasi_state != H_VASI_SUSPENDING) { + pr_notice("VASI state %lu after failed suspend\n", + vasi_state); + break; + } + } else if (vasi_err != -EOPNOTSUPP) { + pr_err("VASI state poll error: %d", vasi_err); + break; + } + + pr_notice("Will retry partition suspend after %u ms\n", + retry_interval_ms); + + msleep(retry_interval_ms); + retry_interval_ms *= 10; + attempt++; + } + + return ret; +} + static int pseries_migrate_partition(u64 handle) { - atomic_t counter = ATOMIC_INIT(0); int ret; ret = wait_for_vasi_session_suspending(handle); if (ret) return ret; - ret = stop_machine(do_join, &counter, cpu_online_mask); + ret = pseries_suspend(handle); if (ret == 0) post_mobility_fixup(); else From 4d756894ba75f1afe7945ccafe9afebff50484b6 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:47 -0600 Subject: [PATCH 210/304] powerpc/rtas: dispatch partition migration requests to pseries sys_rtas() cannot call ibm,suspend-me directly in the same way it handles other inputs. Instead it must dispatch the request to code that can first perform the H_JOIN sequence before any call to ibm,suspend-me can succeed. Over time kernel/rtas.c has accreted a fair amount of platform-specific code to implement this. Since a different, more robust implementation of the suspend sequence is now in the pseries platform code, we want to dispatch the request there. Note that invoking ibm,suspend-me via the RTAS syscall is all but deprecated; this change preserves ABI compatibility for old programs while providing to them the benefit of the new partition suspend implementation. This is a behavior change in that the kernel performs the device tree update and firmware activation before returning, but experimentation indicates this is tolerated fine by legacy user space. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-16-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 5 +++++ arch/powerpc/kernel/rtas.c | 2 +- arch/powerpc/platforms/pseries/mobility.c | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index fdefe6a974eb..3b52d8574fcc 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -279,8 +279,13 @@ extern time64_t last_rtas_event; extern int clobbering_unread_rtas_event(void); extern int pseries_devicetree_update(s32 scope); extern void post_mobility_fixup(void); +int rtas_syscall_dispatch_ibm_suspend_me(u64 handle); #else static inline int clobbering_unread_rtas_event(void) { return 0; } +static inline int rtas_syscall_dispatch_ibm_suspend_me(u64 handle) +{ + return -EINVAL; +} #endif #ifdef CONFIG_PPC_RTAS_DAEMON diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 3a740ae933f8..d4b048571728 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1272,7 +1272,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) int rc = 0; u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32) | be32_to_cpu(args.args[1]); - rc = rtas_ibm_suspend_me_unsafe(handle); + rc = rtas_syscall_dispatch_ibm_suspend_me(handle); if (rc == -EAGAIN) args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE); else if (rc == -EIO) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index fe7e35cdc9d5..e670180f311d 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -615,6 +615,11 @@ static int pseries_migrate_partition(u64 handle) return ret; } +int rtas_syscall_dispatch_ibm_suspend_me(u64 handle) +{ + return pseries_migrate_partition(handle); +} + static ssize_t migration_store(struct class *class, struct class_attribute *attr, const char *buf, size_t count) From 5f6665e400569de479733677e77862542aebb6cc Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:48 -0600 Subject: [PATCH 211/304] powerpc/rtas: remove rtas_ibm_suspend_me_unsafe() rtas_ibm_suspend_me_unsafe() is now unused; remove it and rtas_percpu_suspend_me() which becomes unused as a result. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-17-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 1 - arch/powerpc/kernel/rtas.c | 67 +-------------------------------- 2 files changed, 1 insertion(+), 67 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 3b52d8574fcc..9a6107ffe378 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -258,7 +258,6 @@ extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); -int rtas_ibm_suspend_me_unsafe(u64 handle); int rtas_ibm_suspend_me(int *fw_status); struct rtc_time; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index d4b048571728..7e6024f570da 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -925,66 +925,6 @@ int rtas_suspend_cpu(struct rtas_suspend_me_data *data) return __rtas_suspend_cpu(data, 0); } -static void rtas_percpu_suspend_me(void *info) -{ - __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); -} - -int rtas_ibm_suspend_me_unsafe(u64 handle) -{ - long state; - long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - struct rtas_suspend_me_data data; - DECLARE_COMPLETION_ONSTACK(done); - - if (!rtas_service_present("ibm,suspend-me")) - return -ENOSYS; - - /* Make sure the state is valid */ - rc = plpar_hcall(H_VASI_STATE, retbuf, handle); - - state = retbuf[0]; - - if (rc) { - printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc); - return rc; - } else if (state == H_VASI_ENABLED) { - return -EAGAIN; - } else if (state != H_VASI_SUSPENDING) { - printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned state %ld\n", - state); - return -EIO; - } - - atomic_set(&data.working, 0); - atomic_set(&data.done, 0); - atomic_set(&data.error, 0); - data.token = rtas_token("ibm,suspend-me"); - data.complete = &done; - - lock_device_hotplug(); - - cpu_hotplug_disable(); - - /* Call function on all CPUs. One of us will make the - * rtas call - */ - on_each_cpu(rtas_percpu_suspend_me, &data, 0); - - wait_for_completion(&done); - - if (atomic_read(&data.error) != 0) - printk(KERN_ERR "Error doing global join\n"); - - - cpu_hotplug_enable(); - - unlock_device_hotplug(); - - return atomic_read(&data.error); -} - /** * rtas_call_reentrant() - Used for reentrant rtas calls * @token: Token for desired reentrant RTAS call @@ -1035,12 +975,7 @@ int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) return ret; } -#else /* CONFIG_PPC_PSERIES */ -int rtas_ibm_suspend_me_unsafe(u64 handle) -{ - return -ENOSYS; -} -#endif +#endif /* CONFIG_PPC_PSERIES */ /** * Find a specific pseries error log in an RTAS extended event log. From 52719fce3f4c7a8ac9eaa191e8d75a697f9fbcbc Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:49 -0600 Subject: [PATCH 212/304] powerpc/pseries/hibernation: drop pseries_suspend_begin() from suspend ops There are three ways pseries_suspend_begin() can be reached: 1. When "mem" is written to /sys/power/state: kobj_attr_store() -> state_store() -> pm_suspend() -> suspend_devices_and_enter() -> pseries_suspend_begin() This never works because there is no way to supply a valid stream id using this interface, and H_VASI_STATE is called with a stream id of zero. So this call path is useless at best. 2. When a stream id is written to /sys/devices/system/power/hibernate. pseries_suspend_begin() is polled directly from store_hibernate() until the stream is in the "Suspending" state (i.e. the platform is ready for the OS to suspend execution): dev_attr_store() -> store_hibernate() -> pseries_suspend_begin() 3. When a stream id is written to /sys/devices/system/power/hibernate (continued). After #2, pseries_suspend_begin() is called once again from the pm core: dev_attr_store() -> store_hibernate() -> pm_suspend() -> suspend_devices_and_enter() -> pseries_suspend_begin() This is redundant because the VASI suspend state is already known to be Suspending. The begin() callback of platform_suspend_ops is optional, so we can simply remove that assignment with no loss of function. Fixes: 32d8ad4e621d ("powerpc/pseries: Partition hibernation support") Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-18-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/suspend.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 81e0ac58d620..3eaa9d59dc7a 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -187,7 +187,6 @@ static struct bus_type suspend_subsys = { static const struct platform_suspend_ops pseries_suspend_ops = { .valid = suspend_valid_only_mem, - .begin = pseries_suspend_begin, .prepare_late = pseries_prepare_late, .enter = pseries_suspend_enter, }; From a10a5a17f4ac4f84fcc26162d43b53e2a4e1009a Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:50 -0600 Subject: [PATCH 213/304] powerpc/pseries/hibernation: pass stream id via function arguments There is no need for the stream id to be a file-global variable; pass it from hibernate_store() to pseries_suspend_begin() for the H_VASI_STATE call. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-19-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/suspend.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 3eaa9d59dc7a..232621f33510 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -15,7 +15,6 @@ #include #include "../../kernel/cacheinfo.h" -static u64 stream_id; static struct device suspend_dev; static DECLARE_COMPLETION(suspend_work); static struct rtas_suspend_me_data suspend_data; @@ -29,7 +28,7 @@ static atomic_t suspending; * Return value: * 0 on success / other on failure **/ -static int pseries_suspend_begin(suspend_state_t state) +static int pseries_suspend_begin(u64 stream_id) { long vasi_state, rc; unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; @@ -132,6 +131,7 @@ static ssize_t store_hibernate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + u64 stream_id; int rc; if (!capable(CAP_SYS_ADMIN)) @@ -140,7 +140,7 @@ static ssize_t store_hibernate(struct device *dev, stream_id = simple_strtoul(buf, NULL, 16); do { - rc = pseries_suspend_begin(PM_SUSPEND_MEM); + rc = pseries_suspend_begin(stream_id); if (rc == -EAGAIN) ssleep(1); } while (rc == -EAGAIN); @@ -148,8 +148,6 @@ static ssize_t store_hibernate(struct device *dev, if (!rc) rc = pm_suspend(PM_SUSPEND_MEM); - stream_id = 0; - if (!rc) rc = count; From ed22bb8d39fa7f3980afc6e16d2a891847367d33 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:51 -0600 Subject: [PATCH 214/304] powerpc/pseries/hibernation: remove pseries_suspend_cpu() Since commit 48f6e7f6d948 ("powerpc/pseries: remove cede offline state for CPUs"), ppc_md.suspend_disable_cpu() is no longer used and all CPUs (save one) are placed into true offline state as opposed to H_JOIN. So pseries_suspend_cpu() is effectively unused; remove it. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-20-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/suspend.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 232621f33510..3315d698d5ab 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -48,20 +48,6 @@ static int pseries_suspend_begin(u64 stream_id) vasi_state); return -EIO; } - - return 0; -} - -/** - * pseries_suspend_cpu - Suspend a single CPU - * - * Makes the H_JOIN call to suspend the CPU - * - **/ -static int pseries_suspend_cpu(void) -{ - if (atomic_read(&suspending)) - return rtas_suspend_cpu(&suspend_data); return 0; } @@ -235,7 +221,6 @@ static int __init pseries_suspend_init(void) if ((rc = pseries_suspend_sysfs_register(&suspend_dev))) return rc; - ppc_md.suspend_disable_cpu = pseries_suspend_cpu; ppc_md.suspend_enable_irqs = pseries_suspend_enable_irqs; suspend_set_ops(&pseries_suspend_ops); return 0; From 796f9247b4fa9bec320d6b47ffde2ecf86cc71c0 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:52 -0600 Subject: [PATCH 215/304] powerpc/machdep: remove suspend_disable_cpu() There are no users left of the suspend_disable_cpu() callback, remove it. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-21-nathanl@linux.ibm.com --- arch/powerpc/include/asm/machdep.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 475687f24f4a..cf6ebbc16cb4 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -207,7 +207,6 @@ struct machdep_calls { void (*suspend_disable_irqs)(void); void (*suspend_enable_irqs)(void); #endif - int (*suspend_disable_cpu)(void); #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE ssize_t (*cpu_probe)(const char *, size_t); From 395b2c090907975c627902ba8fda0bdb04c7cad3 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:53 -0600 Subject: [PATCH 216/304] powerpc/rtas: remove rtas_suspend_cpu() rtas_suspend_cpu() no longer has users; remove it and __rtas_suspend_cpu() which now becomes unused as well. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-22-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 1 - arch/powerpc/kernel/rtas.c | 52 --------------------------------- 2 files changed, 53 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 9a6107ffe378..97ccb40fb09f 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -256,7 +256,6 @@ extern bool rtas_indicator_present(int token, int *maxindex); extern int rtas_set_indicator(int indicator, int index, int new_value); extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); -extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); int rtas_ibm_suspend_me(int *fw_status); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 7e6024f570da..aedd46967b99 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -873,58 +873,6 @@ int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data) return __rtas_suspend_last_cpu(data, 0); } -static int __rtas_suspend_cpu(struct rtas_suspend_me_data *data, int wake_when_done) -{ - long rc = H_SUCCESS; - unsigned long msr_save; - int cpu; - - atomic_inc(&data->working); - - /* really need to ensure MSR.EE is off for H_JOIN */ - msr_save = mfmsr(); - mtmsr(msr_save & ~(MSR_EE)); - - while (rc == H_SUCCESS && !atomic_read(&data->done) && !atomic_read(&data->error)) - rc = plpar_hcall_norets(H_JOIN); - - mtmsr(msr_save); - - if (rc == H_SUCCESS) { - /* This cpu was prodded and the suspend is complete. */ - goto out; - } else if (rc == H_CONTINUE) { - /* All other cpus are in H_JOIN, this cpu does - * the suspend. - */ - return __rtas_suspend_last_cpu(data, wake_when_done); - } else { - printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n", - smp_processor_id(), rc); - atomic_set(&data->error, rc); - } - - if (wake_when_done) { - atomic_set(&data->done, 1); - - /* This cpu did the suspend or got an error; in either case, - * we need to prod all other other cpus out of join state. - * Extra prods are harmless. - */ - for_each_online_cpu(cpu) - plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); - } -out: - if (atomic_dec_return(&data->working) == 0) - complete(data->complete); - return rc; -} - -int rtas_suspend_cpu(struct rtas_suspend_me_data *data) -{ - return __rtas_suspend_cpu(data, 0); -} - /** * rtas_call_reentrant() - Used for reentrant rtas calls * @token: Token for desired reentrant RTAS call From 366fb13bf13b029c4d43bf19382f7aea69bfa4b7 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:54 -0600 Subject: [PATCH 217/304] powerpc/pseries/hibernation: switch to rtas_ibm_suspend_me() rtas_suspend_last_cpu() and related code perform a lot of work that isn't relevant to the hibernation workflow. All other CPUs are offline when called so there is no need to place them in H_JOIN or prod them on resume, nor is there need for retries or operations on shared state. Call the rtas_ibm_suspend_me() wrapper function directly from pseries_suspend_enter() instead of using rtas_suspend_last_cpu(). Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-23-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/suspend.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 3315d698d5ab..703728cb95ec 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -76,11 +76,7 @@ static void pseries_suspend_enable_irqs(void) **/ static int pseries_suspend_enter(suspend_state_t state) { - int rc = rtas_suspend_last_cpu(&suspend_data); - - atomic_set(&suspending, 0); - atomic_set(&suspend_data.done, 1); - return rc; + return rtas_ibm_suspend_me(NULL); } /** From 1b2488176ea56e299d2b084772daeb5ecbfc16d1 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:55 -0600 Subject: [PATCH 218/304] powerpc/rtas: remove unused rtas_suspend_last_cpu() rtas_suspend_last_cpu() is now unused, remove it and __rtas_suspend_last_cpu() which also becomes unused. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-24-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 1 - arch/powerpc/kernel/rtas.c | 43 --------------------------------- 2 files changed, 44 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 97ccb40fb09f..332e1000ca0f 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -256,7 +256,6 @@ extern bool rtas_indicator_present(int token, int *maxindex); extern int rtas_set_indicator(int indicator, int index, int new_value); extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); -extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); int rtas_ibm_suspend_me(int *fw_status); struct rtc_time; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index aedd46967b99..9a7d1bba3ef7 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -830,49 +830,6 @@ void rtas_activate_firmware(void) static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; #ifdef CONFIG_PPC_PSERIES -static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done) -{ - u16 slb_size = mmu_slb_size; - int rc = H_MULTI_THREADS_ACTIVE; - int cpu; - - slb_set_size(SLB_MIN_SIZE); - printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id()); - - while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) && - !atomic_read(&data->error)) - rc = rtas_call(data->token, 0, 1, NULL); - - if (rc || atomic_read(&data->error)) { - printk(KERN_DEBUG "ibm,suspend-me returned %d\n", rc); - slb_set_size(slb_size); - } - - if (atomic_read(&data->error)) - rc = atomic_read(&data->error); - - atomic_set(&data->error, rc); - pSeries_coalesce_init(); - - if (wake_when_done) { - atomic_set(&data->done, 1); - - for_each_online_cpu(cpu) - plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); - } - - if (atomic_dec_return(&data->working) == 0) - complete(data->complete); - - return rc; -} - -int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data) -{ - atomic_inc(&data->working); - return __rtas_suspend_last_cpu(data, 0); -} - /** * rtas_call_reentrant() - Used for reentrant rtas calls * @token: Token for desired reentrant RTAS call From b866459489fe8ef0e92cde3cbd6bbb1af6c4e99b Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:56 -0600 Subject: [PATCH 219/304] powerpc/pseries/hibernation: remove redundant cacheinfo update Partitions with cache nodes in the device tree can encounter the following warning on resume: CPU 0 already accounted in PowerPC,POWER9@0(Data) WARNING: CPU: 0 PID: 3177 at arch/powerpc/kernel/cacheinfo.c:197 cacheinfo_cpu_online+0x640/0x820 These calls to cacheinfo_cpu_offline/online have been redundant since commit e610a466d16a ("powerpc/pseries/mobility: rebuild cacheinfo hierarchy post-migration"). Fixes: e610a466d16a ("powerpc/pseries/mobility: rebuild cacheinfo hierarchy post-migration") Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-25-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/suspend.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 703728cb95ec..6a94cc0deb88 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -13,7 +13,6 @@ #include #include #include -#include "../../kernel/cacheinfo.h" static struct device suspend_dev; static DECLARE_COMPLETION(suspend_work); @@ -63,9 +62,7 @@ static void pseries_suspend_enable_irqs(void) * Update configuration which can be modified based on device tree * changes during resume. */ - cacheinfo_cpu_offline(smp_processor_id()); post_mobility_fixup(); - cacheinfo_cpu_online(smp_processor_id()); } /** From fa53bcdb7413e7c40170106781f6b5bb9d74db84 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:57 -0600 Subject: [PATCH 220/304] powerpc/pseries/hibernation: perform post-suspend fixups later The pseries hibernate code calls post_mobility_fixup() which is sort of a dumping ground of fixups that need to run after resuming from suspend regardless of whether suspend was a hibernation or a migration. Calling post_mobility_fixup() from pseries_suspend_enable_irqs() runs this code early in resume with devices suspended and only one CPU up, while the much more commonly used migration case runs these fixups in a more typical process context. Call post_mobility_fixup() after the suspend core returns a success status to the hibernate sysfs store method and remove pseries_suspend_enable_irqs(). Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-26-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/suspend.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 6a94cc0deb88..589a91730db8 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -50,21 +50,6 @@ static int pseries_suspend_begin(u64 stream_id) return 0; } -/** - * pseries_suspend_enable_irqs - * - * Post suspend configuration updates - * - **/ -static void pseries_suspend_enable_irqs(void) -{ - /* - * Update configuration which can be modified based on device tree - * changes during resume. - */ - post_mobility_fixup(); -} - /** * pseries_suspend_enter - Final phase of hibernation * @@ -127,8 +112,11 @@ static ssize_t store_hibernate(struct device *dev, if (!rc) rc = pm_suspend(PM_SUSPEND_MEM); - if (!rc) + if (!rc) { rc = count; + post_mobility_fixup(); + } + return rc; } @@ -214,7 +202,6 @@ static int __init pseries_suspend_init(void) if ((rc = pseries_suspend_sysfs_register(&suspend_dev))) return rc; - ppc_md.suspend_enable_irqs = pseries_suspend_enable_irqs; suspend_set_ops(&pseries_suspend_ops); return 0; } From d102f8312e1ea5e8bf84fceebf99186f22d16fc6 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:58 -0600 Subject: [PATCH 221/304] powerpc/pseries/hibernation: remove prepare_late() callback The pseries hibernate code no longer calls into the original join/suspend code in kernel/rtas.c, so pseries_prepare_late() and related code don't accomplish anything now. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-27-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/suspend.c | 25 ------------------------ 1 file changed, 25 deletions(-) diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 589a91730db8..1b902cbf85c5 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -15,9 +15,6 @@ #include static struct device suspend_dev; -static DECLARE_COMPLETION(suspend_work); -static struct rtas_suspend_me_data suspend_data; -static atomic_t suspending; /** * pseries_suspend_begin - First phase of hibernation @@ -61,23 +58,6 @@ static int pseries_suspend_enter(suspend_state_t state) return rtas_ibm_suspend_me(NULL); } -/** - * pseries_prepare_late - Prepare to suspend all other CPUs - * - * Return value: - * 0 on success / other on failure - **/ -static int pseries_prepare_late(void) -{ - atomic_set(&suspending, 1); - atomic_set(&suspend_data.working, 0); - atomic_set(&suspend_data.done, 0); - atomic_set(&suspend_data.error, 0); - suspend_data.complete = &suspend_work; - reinit_completion(&suspend_work); - return 0; -} - /** * store_hibernate - Initiate partition hibernation * @dev: subsys root device @@ -152,7 +132,6 @@ static struct bus_type suspend_subsys = { static const struct platform_suspend_ops pseries_suspend_ops = { .valid = suspend_valid_only_mem, - .prepare_late = pseries_prepare_late, .enter = pseries_suspend_enter, }; @@ -195,10 +174,6 @@ static int __init pseries_suspend_init(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) return 0; - suspend_data.token = rtas_token("ibm,suspend-me"); - if (suspend_data.token == RTAS_UNKNOWN_SERVICE) - return 0; - if ((rc = pseries_suspend_sysfs_register(&suspend_dev))) return rc; From 87b57ea7e109520d3c6dfb01671a0cb134d3ccff Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:59 -0600 Subject: [PATCH 222/304] powerpc/rtas: remove unused rtas_suspend_me_data All code which used this type has been removed. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-28-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas-types.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/include/asm/rtas-types.h b/arch/powerpc/include/asm/rtas-types.h index aa420561bc10..8df6235d64d1 100644 --- a/arch/powerpc/include/asm/rtas-types.h +++ b/arch/powerpc/include/asm/rtas-types.h @@ -23,14 +23,6 @@ struct rtas_t { struct device_node *dev; /* virtual address pointer */ }; -struct rtas_suspend_me_data { - atomic_t working; /* number of cpus accessing this struct */ - atomic_t done; - int token; /* ibm,suspend-me */ - atomic_t error; - struct completion *complete; /* wait on this until working == 0 */ -}; - struct rtas_error_log { /* Byte 0 */ u8 byte0; /* Architectural version */ From 2efd7f6eb9b7107e469837d8452e750d7d080a5d Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:52:00 -0600 Subject: [PATCH 223/304] powerpc/pseries/mobility: refactor node lookup during DT update In pseries_devicetree_update(), with each call to ibm,update-nodes the partition firmware communicates the node to be deleted or updated by placing its phandle in the work buffer. Each of delete_dt_node(), update_dt_node(), and add_dt_node() have duplicate lookups using the phandle value and corresponding refcount management. Move the lookup and of_node_put() into pseries_devicetree_update(), and emit a warning on any failed lookups. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-29-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 49 ++++++++--------------- 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index e670180f311d..ea4d6a660e0d 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -61,18 +61,10 @@ static int mobility_rtas_call(int token, char *buf, s32 scope) return rc; } -static int delete_dt_node(__be32 phandle) +static int delete_dt_node(struct device_node *dn) { - struct device_node *dn; - - dn = of_find_node_by_phandle(be32_to_cpu(phandle)); - if (!dn) - return -ENOENT; - pr_debug("removing node %pOFfp\n", dn); - dlpar_detach_node(dn); - of_node_put(dn); return 0; } @@ -137,10 +129,9 @@ static int update_dt_property(struct device_node *dn, struct property **prop, return 0; } -static int update_dt_node(__be32 phandle, s32 scope) +static int update_dt_node(struct device_node *dn, s32 scope) { struct update_props_workarea *upwa; - struct device_node *dn; struct property *prop = NULL; int i, rc, rtas_rc; char *prop_data; @@ -157,14 +148,8 @@ static int update_dt_node(__be32 phandle, s32 scope) if (!rtas_buf) return -ENOMEM; - dn = of_find_node_by_phandle(be32_to_cpu(phandle)); - if (!dn) { - kfree(rtas_buf); - return -ENOENT; - } - upwa = (struct update_props_workarea *)&rtas_buf[0]; - upwa->phandle = phandle; + upwa->phandle = cpu_to_be32(dn->phandle); do { rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf, @@ -224,26 +209,18 @@ static int update_dt_node(__be32 phandle, s32 scope) cond_resched(); } while (rtas_rc == 1); - of_node_put(dn); kfree(rtas_buf); return 0; } -static int add_dt_node(__be32 parent_phandle, __be32 drc_index) +static int add_dt_node(struct device_node *parent_dn, __be32 drc_index) { struct device_node *dn; - struct device_node *parent_dn; int rc; - parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle)); - if (!parent_dn) - return -ENOENT; - dn = dlpar_configure_connector(drc_index, parent_dn); - if (!dn) { - of_node_put(parent_dn); + if (!dn) return -ENOENT; - } rc = dlpar_attach_node(dn, parent_dn); if (rc) @@ -251,7 +228,6 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index) pr_debug("added node %pOFfp\n", dn); - of_node_put(parent_dn); return rc; } @@ -284,22 +260,31 @@ int pseries_devicetree_update(s32 scope) data++; for (i = 0; i < node_count; i++) { + struct device_node *np; __be32 phandle = *data++; __be32 drc_index; + np = of_find_node_by_phandle(be32_to_cpu(phandle)); + if (!np) { + pr_warn("Failed lookup: phandle 0x%x for action 0x%x\n", + be32_to_cpu(phandle), action); + continue; + } + switch (action) { case DELETE_DT_NODE: - delete_dt_node(phandle); + delete_dt_node(np); break; case UPDATE_DT_NODE: - update_dt_node(phandle, scope); + update_dt_node(np, scope); break; case ADD_DT_NODE: drc_index = *data++; - add_dt_node(phandle, drc_index); + add_dt_node(np, drc_index); break; } + of_node_put(np); cond_resched(); } } From f8a4b277c3cf39ec8efe50114924a7743cc84800 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 7 Dec 2020 15:54:20 +0000 Subject: [PATCH 224/304] powerpc: fix spelling mistake in Kconfig "seleted" -> "selected" There is a spelling mistake in the help text of the Kconfig. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207155420.172370-1-colin.king@canonical.com --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9e679ba0811c..d0d16c3e1b4b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -87,7 +87,7 @@ config PPC_WATCHDOG help This is a placeholder when the powerpc hardlockup detector watchdog is selected (arch/powerpc/kernel/watchdog.c). It is - seleted via the generic lockup detector menu which is why we + selected via the generic lockup detector menu which is why we have no standalone config option for it here. config STACKTRACE_SUPPORT From db972a3787d12b1ce9ba7a31ec376d8a79e04c47 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Dec 2020 05:24:19 +0000 Subject: [PATCH 225/304] powerpc/powermac: Fix low_sleep_handler with CONFIG_VMAP_STACK low_sleep_handler() can't restore the context from standard stack because the stack can hardly be accessed with MMU OFF. Store everything in a global storage area instead of storing a pointer to the stack in that global storage area. To avoid a complete churn of the function, still use r1 as the pointer to the storage area during restore. Fixes: cd08f109e262 ("powerpc/32s: Enable CONFIG_VMAP_STACK") Reported-by: Giuseppe Sacco Signed-off-by: Christophe Leroy Tested-by: Giuseppe Sacco Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e3e0d8042a3ba75cb4a9546c19c408b5b5b28994.1607404931.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/Kconfig.cputype | 2 +- arch/powerpc/platforms/powermac/sleep.S | 132 +++++++++++------------- 2 files changed, 60 insertions(+), 74 deletions(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 45ce09db8f46..aa071663b9a9 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -36,7 +36,7 @@ config PPC_BOOK3S_6xx select PPC_HAVE_PMU_SUPPORT select PPC_HAVE_KUEP select PPC_HAVE_KUAP - select HAVE_ARCH_VMAP_STACK if !ADB_PMU + select HAVE_ARCH_VMAP_STACK config PPC_85xx bool "Freescale 85xx" diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index 7e0f8ba6e54a..d497a60003d2 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -44,7 +44,8 @@ #define SL_TB 0xa0 #define SL_R2 0xa8 #define SL_CR 0xac -#define SL_R12 0xb0 /* r12 to r31 */ +#define SL_LR 0xb0 +#define SL_R12 0xb4 /* r12 to r31 */ #define SL_SIZE (SL_R12 + 80) .section .text @@ -63,105 +64,107 @@ _GLOBAL(low_sleep_handler) blr #else mflr r0 - stw r0,4(r1) - stwu r1,-SL_SIZE(r1) + lis r11,sleep_storage@ha + addi r11,r11,sleep_storage@l + stw r0,SL_LR(r11) mfcr r0 - stw r0,SL_CR(r1) - stw r2,SL_R2(r1) - stmw r12,SL_R12(r1) + stw r0,SL_CR(r11) + stw r1,SL_SP(r11) + stw r2,SL_R2(r11) + stmw r12,SL_R12(r11) /* Save MSR & SDR1 */ mfmsr r4 - stw r4,SL_MSR(r1) + stw r4,SL_MSR(r11) mfsdr1 r4 - stw r4,SL_SDR1(r1) + stw r4,SL_SDR1(r11) /* Get a stable timebase and save it */ 1: mftbu r4 - stw r4,SL_TB(r1) + stw r4,SL_TB(r11) mftb r5 - stw r5,SL_TB+4(r1) + stw r5,SL_TB+4(r11) mftbu r3 cmpw r3,r4 bne 1b /* Save SPRGs */ mfsprg r4,0 - stw r4,SL_SPRG0(r1) + stw r4,SL_SPRG0(r11) mfsprg r4,1 - stw r4,SL_SPRG0+4(r1) + stw r4,SL_SPRG0+4(r11) mfsprg r4,2 - stw r4,SL_SPRG0+8(r1) + stw r4,SL_SPRG0+8(r11) mfsprg r4,3 - stw r4,SL_SPRG0+12(r1) + stw r4,SL_SPRG0+12(r11) /* Save BATs */ mfdbatu r4,0 - stw r4,SL_DBAT0(r1) + stw r4,SL_DBAT0(r11) mfdbatl r4,0 - stw r4,SL_DBAT0+4(r1) + stw r4,SL_DBAT0+4(r11) mfdbatu r4,1 - stw r4,SL_DBAT1(r1) + stw r4,SL_DBAT1(r11) mfdbatl r4,1 - stw r4,SL_DBAT1+4(r1) + stw r4,SL_DBAT1+4(r11) mfdbatu r4,2 - stw r4,SL_DBAT2(r1) + stw r4,SL_DBAT2(r11) mfdbatl r4,2 - stw r4,SL_DBAT2+4(r1) + stw r4,SL_DBAT2+4(r11) mfdbatu r4,3 - stw r4,SL_DBAT3(r1) + stw r4,SL_DBAT3(r11) mfdbatl r4,3 - stw r4,SL_DBAT3+4(r1) + stw r4,SL_DBAT3+4(r11) mfibatu r4,0 - stw r4,SL_IBAT0(r1) + stw r4,SL_IBAT0(r11) mfibatl r4,0 - stw r4,SL_IBAT0+4(r1) + stw r4,SL_IBAT0+4(r11) mfibatu r4,1 - stw r4,SL_IBAT1(r1) + stw r4,SL_IBAT1(r11) mfibatl r4,1 - stw r4,SL_IBAT1+4(r1) + stw r4,SL_IBAT1+4(r11) mfibatu r4,2 - stw r4,SL_IBAT2(r1) + stw r4,SL_IBAT2(r11) mfibatl r4,2 - stw r4,SL_IBAT2+4(r1) + stw r4,SL_IBAT2+4(r11) mfibatu r4,3 - stw r4,SL_IBAT3(r1) + stw r4,SL_IBAT3(r11) mfibatl r4,3 - stw r4,SL_IBAT3+4(r1) + stw r4,SL_IBAT3+4(r11) BEGIN_MMU_FTR_SECTION mfspr r4,SPRN_DBAT4U - stw r4,SL_DBAT4(r1) + stw r4,SL_DBAT4(r11) mfspr r4,SPRN_DBAT4L - stw r4,SL_DBAT4+4(r1) + stw r4,SL_DBAT4+4(r11) mfspr r4,SPRN_DBAT5U - stw r4,SL_DBAT5(r1) + stw r4,SL_DBAT5(r11) mfspr r4,SPRN_DBAT5L - stw r4,SL_DBAT5+4(r1) + stw r4,SL_DBAT5+4(r11) mfspr r4,SPRN_DBAT6U - stw r4,SL_DBAT6(r1) + stw r4,SL_DBAT6(r11) mfspr r4,SPRN_DBAT6L - stw r4,SL_DBAT6+4(r1) + stw r4,SL_DBAT6+4(r11) mfspr r4,SPRN_DBAT7U - stw r4,SL_DBAT7(r1) + stw r4,SL_DBAT7(r11) mfspr r4,SPRN_DBAT7L - stw r4,SL_DBAT7+4(r1) + stw r4,SL_DBAT7+4(r11) mfspr r4,SPRN_IBAT4U - stw r4,SL_IBAT4(r1) + stw r4,SL_IBAT4(r11) mfspr r4,SPRN_IBAT4L - stw r4,SL_IBAT4+4(r1) + stw r4,SL_IBAT4+4(r11) mfspr r4,SPRN_IBAT5U - stw r4,SL_IBAT5(r1) + stw r4,SL_IBAT5(r11) mfspr r4,SPRN_IBAT5L - stw r4,SL_IBAT5+4(r1) + stw r4,SL_IBAT5+4(r11) mfspr r4,SPRN_IBAT6U - stw r4,SL_IBAT6(r1) + stw r4,SL_IBAT6(r11) mfspr r4,SPRN_IBAT6L - stw r4,SL_IBAT6+4(r1) + stw r4,SL_IBAT6+4(r11) mfspr r4,SPRN_IBAT7U - stw r4,SL_IBAT7(r1) + stw r4,SL_IBAT7(r11) mfspr r4,SPRN_IBAT7L - stw r4,SL_IBAT7+4(r1) + stw r4,SL_IBAT7+4(r11) END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) /* Backup various CPU config stuffs */ @@ -180,9 +183,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) lis r5,grackle_wake_up@ha addi r5,r5,grackle_wake_up@l tophys(r5,r5) - stw r5,SL_PC(r1) + stw r5,SL_PC(r11) lis r4,KERNELBASE@h - tophys(r5,r1) + tophys(r5,r11) addi r5,r5,SL_PC lis r6,MAGIC@ha addi r6,r6,MAGIC@l @@ -194,12 +197,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) tophys(r3,r3) stw r3,0x80(r4) stw r5,0x84(r4) - /* Store a pointer to our backup storage into - * a kernel global - */ - lis r3,sleep_storage@ha - addi r3,r3,sleep_storage@l - stw r5,0(r3) .globl low_cpu_offline_self low_cpu_offline_self: @@ -279,7 +276,7 @@ _GLOBAL(core99_wake_up) lis r3,sleep_storage@ha addi r3,r3,sleep_storage@l tophys(r3,r3) - lwz r1,0(r3) + addi r1,r3,SL_PC /* Pass thru to older resume code ... */ _ASM_NOKPROBE_SYMBOL(core99_wake_up) @@ -399,13 +396,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blt 1b sync - /* restore the MSR and turn on the MMU */ - lwz r3,SL_MSR(r1) - bl turn_on_mmu - - /* get back the stack pointer */ - tovirt(r1,r1) - /* Restore TB */ li r3,0 mttbl r3 @@ -419,28 +409,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) mtcr r0 lwz r2,SL_R2(r1) lmw r12,SL_R12(r1) - addi r1,r1,SL_SIZE - lwz r0,4(r1) - mtlr r0 - blr -_ASM_NOKPROBE_SYMBOL(grackle_wake_up) -turn_on_mmu: - mflr r4 - tovirt(r4,r4) + /* restore the MSR and SP and turn on the MMU and return */ + lwz r3,SL_MSR(r1) + lwz r4,SL_LR(r1) + lwz r1,SL_SP(r1) mtsrr0 r4 mtsrr1 r3 sync isync rfi -_ASM_NOKPROBE_SYMBOL(turn_on_mmu) +_ASM_NOKPROBE_SYMBOL(grackle_wake_up) #endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */ - .section .data + .section .bss .balign L1_CACHE_BYTES sleep_storage: - .long 0 + .space SL_SIZE .balign L1_CACHE_BYTES, 0 #endif /* CONFIG_PPC_BOOK3S_32 */ From f10881a46f8914428110d110140a455c66bdf27b Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Tue, 8 Dec 2020 13:54:34 -0600 Subject: [PATCH 226/304] powerpc/rtas: Fix typo of ibm,open-errinjct in RTAS filter Commit bd59380c5ba4 ("powerpc/rtas: Restrict RTAS requests from userspace") introduced the following error when invoking the errinjct userspace tool: [root@ltcalpine2-lp5 librtas]# errinjct open [327884.071171] sys_rtas: RTAS call blocked - exploit attempt? [327884.071186] sys_rtas: token=0x26, nargs=0 (called by errinjct) errinjct: Could not open RTAS error injection facility errinjct: librtas: open: Unexpected I/O error The entry for ibm,open-errinjct in rtas_filter array has a typo where the "j" is omitted in the rtas call name. After fixing this typo the errinjct tool functions again as expected. [root@ltcalpine2-lp5 linux]# errinjct open RTAS error injection facility open, token = 1 Fixes: bd59380c5ba4 ("powerpc/rtas: Restrict RTAS requests from userspace") Cc: stable@vger.kernel.org Signed-off-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201208195434.8289-1-tyreld@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 9a7d1bba3ef7..d126d71ea5bd 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -957,7 +957,7 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { { "ibm,display-message", -1, 0, -1, -1, -1 }, { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, - { "ibm,open-errinct", -1, -1, -1, -1, -1 }, + { "ibm,open-errinjct", -1, -1, -1, -1, -1 }, { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, { "ibm,get-indices", -1, 2, 3, -1, -1 }, From f9158d58a4e1d91f21741e4e8ebe67f770b84e12 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:27 +0000 Subject: [PATCH 227/304] powerpc/mm: Add mask of always present MMU features On the same principle as commit 773edeadf672 ("powerpc/mm: Add mask of possible MMU features"), add mask for MMU features that are always there in order to optimise out dead branches. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4943775fbe91885eb3e09133b093aaf62e55c715.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mmu.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 620e8fe6f8fd..ebf50286a924 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -206,8 +206,30 @@ enum { 0, }; +#ifdef CONFIG_PPC_8xx +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_8xx +#endif +#ifdef CONFIG_40x +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_40x +#endif +#ifdef CONFIG_PPC_47x +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_47x +#elif defined(CONFIG_44x) +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_44x +#endif +#if defined(CONFIG_E200) || defined(CONFIG_E500) +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_FSL_E +#endif + +#ifndef MMU_FTRS_ALWAYS +#define MMU_FTRS_ALWAYS 0 +#endif + static inline bool early_mmu_has_feature(unsigned long feature) { + if (MMU_FTRS_ALWAYS & feature) + return true; + return !!(MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature); } @@ -236,6 +258,9 @@ static __always_inline bool mmu_has_feature(unsigned long feature) } #endif + if (MMU_FTRS_ALWAYS & feature) + return true; + if (!(MMU_FTRS_POSSIBLE & feature)) return false; From a54d310856b9c1fe15ad67a2f8ee9edc02965a3a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:28 +0000 Subject: [PATCH 228/304] powerpc/mm: Remove flush_tlb_page_nohash() prototype. flush_tlb_page_nohash() was removed by commit 703b41ad1a87 ("powerpc/mm: remove flush_tlb_page_nohash") Remove stale prototype and comment. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4a58831da6d6ba4fe309b94aa1dd8f02982d46b2.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/tlbflush.h | 1 - arch/powerpc/include/asm/nohash/tlbflush.h | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index 068085b709fb..29e292be4f1b 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -8,7 +8,6 @@ */ extern void flush_tlb_mm(struct mm_struct *mm); extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr); extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h index b1d8fec29169..1edb7243e515 100644 --- a/arch/powerpc/include/asm/nohash/tlbflush.h +++ b/arch/powerpc/include/asm/nohash/tlbflush.h @@ -10,7 +10,6 @@ * - local_flush_tlb_mm(mm, full) flushes the specified mm context on * the local processor * - local_flush_tlb_page(vma, vmaddr) flushes one page on the local processor - * - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages * From 03d5b19c7243d6e605d360972dd7b701e2b1ba72 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:29 +0000 Subject: [PATCH 229/304] powerpc/32s: Make bat_addrs[] static This table is used only locally. Declare it static. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/054fec0c139fc4c0a306360b360784733c0a6e65.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 23f60e97196e..78c13ca540d4 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -40,7 +40,7 @@ static unsigned int hash_mb, hash_mb2; struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */ -struct batrange { /* stores address ranges mapped by BATs */ +static struct batrange { /* stores address ranges mapped by BATs */ unsigned long start; unsigned long limit; phys_addr_t phys; From 4cc445b4ff456f3a3997c321d7a353360feea04f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:30 +0000 Subject: [PATCH 230/304] powerpc/32s: Use mmu_has_feature(MMU_FTR_HPTE_TABLE) instead of checking Hash var We now have an early hash table on hash MMU, so no need to check Hash var to know if the Hash table is set of not. Use mmu_has_feature(MMU_FTR_HPTE_TABLE) instead. This will allow optimisation via jump_label. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f1766631a9e014b6433f1a3c12c726ddfce34220.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/mmu.c | 2 +- arch/powerpc/mm/book3s32/tlb.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 78c13ca540d4..98d08697f3a9 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -306,7 +306,7 @@ static void hash_preload(struct mm_struct *mm, unsigned long ea) { pmd_t *pmd; - if (!Hash) + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) return; pmd = pmd_off(mm, ea); if (!pmd_none(*pmd)) diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index b6c7427daa6f..ae5dbba95805 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -36,7 +36,7 @@ void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) { unsigned long ptephys; - if (Hash) { + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) { ptephys = __pa(ptep) & PAGE_MASK; flush_hash_pages(mm->context.id, addr, ptephys, 1); } @@ -49,7 +49,7 @@ EXPORT_SYMBOL(flush_hash_entry); */ void tlb_flush(struct mmu_gather *tlb) { - if (!Hash) { + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) { /* * 603 needs to flush the whole TLB here since * it doesn't use a hash table. @@ -80,7 +80,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, unsigned int ctx = mm->context.id; start &= PAGE_MASK; - if (!Hash) { + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) { if (end - start <= PAGE_SIZE) _tlbie(start); else @@ -122,7 +122,7 @@ void flush_tlb_mm(struct mm_struct *mm) { struct vm_area_struct *mp; - if (!Hash) { + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) { _tlbia(); return; } @@ -143,7 +143,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) struct mm_struct *mm; pmd_t *pmd; - if (!Hash) { + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) { _tlbie(vmaddr); return; } From 4b74a35fc7e9b8efd9067b8a365bab0fefe889ff Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:31 +0000 Subject: [PATCH 231/304] powerpc/32s: Make Hash var static Hash var is used only locally in mmu.c now. No need to set it in head_32.S anymore. Make it static and initialises it to the early hash table. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/786c82a89cdfdaabb32b72a44f7c312fa81d192b.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 5 ----- arch/powerpc/mm/book3s32/mmu.c | 2 +- arch/powerpc/mm/mmu_decl.h | 1 - 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index a93c75ca8c8e..b102eca44874 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -155,10 +155,8 @@ __after_mmu_off: bl initial_bats bl load_segment_registers -BEGIN_MMU_FTR_SECTION bl reloc_offset bl early_hash_table -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) #if defined(CONFIG_BOOTX_TEXT) bl setup_disp_bat #endif @@ -922,9 +920,6 @@ early_hash_table: lis r6, early_hash - PAGE_OFFSET@h ori r6, r6, 3 /* 256kB table */ mtspr SPRN_SDR1, r6 - lis r6, early_hash@h - addis r3, r3, Hash@ha - stw r6, Hash@l(r3) blr load_up_mmu: diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 98d08697f3a9..4e01b7ec1dbe 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -33,7 +33,7 @@ u8 __initdata early_hash[SZ_256K] __aligned(SZ_256K) = {0}; -struct hash_pte *Hash; +static struct hash_pte *Hash = (struct hash_pte *)early_hash; static unsigned long Hash_size, Hash_mask; unsigned long _SDR1; static unsigned int hash_mb, hash_mb2; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 0ad6d476d01d..8326eab2321c 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -99,7 +99,6 @@ extern int __map_without_bats; extern unsigned int rtas_data, rtas_size; struct hash_pte; -extern struct hash_pte *Hash; extern u8 early_hash[]; #endif /* CONFIG_PPC32 */ From 6e980b5c56a266de479fcd022a03e094574e9a03 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:32 +0000 Subject: [PATCH 232/304] powerpc/32s: Declare Hash related vars as __initdata Hash related vars are used at init only. Declare them in __initdata. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3878ea30706839fcff9196790ff3f99c128c3f6a.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/mmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 4e01b7ec1dbe..081dd98c8232 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -33,10 +33,10 @@ u8 __initdata early_hash[SZ_256K] __aligned(SZ_256K) = {0}; -static struct hash_pte *Hash = (struct hash_pte *)early_hash; -static unsigned long Hash_size, Hash_mask; -unsigned long _SDR1; -static unsigned int hash_mb, hash_mb2; +static struct hash_pte __initdata *Hash = (struct hash_pte *)early_hash; +static unsigned long __initdata Hash_size, Hash_mask; +static unsigned int __initdata hash_mb, hash_mb2; +unsigned long __initdata _SDR1; struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */ From cfe32ad0b3dc74df34ab6fea38ccb1e53f904a10 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:33 +0000 Subject: [PATCH 233/304] powerpc/32s: Move _tlbie() and _tlbia() prototypes to tlbflush.h In order to use _tlbie() and _tlbia() directly from asm/book3s/32/tlbflush.h, move their prototypes from mm/mm_decl.h to there. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/867587af929973ad65f8ef6972f2474a80c1737a.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/tlbflush.h | 4 ++++ arch/powerpc/mm/mmu_decl.h | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index 29e292be4f1b..3043e7af70aa 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -11,6 +11,10 @@ extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); + +void _tlbie(unsigned long address); +void _tlbia(void); + static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 8326eab2321c..998810e68562 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -82,9 +82,6 @@ static inline void print_system_hash_info(void) {} #else /* CONFIG_PPC_MMU_NOHASH */ -extern void _tlbie(unsigned long address); -extern void _tlbia(void); - void print_system_hash_info(void); #endif /* CONFIG_PPC_MMU_NOHASH */ From b91280f3f36d64cc6f8022893af00935c99de197 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:34 +0000 Subject: [PATCH 234/304] powerpc/32s: Inline _tlbie() on non SMP On non SMP, _tlbie() is just a tlbie plus a sync instruction. Make it static inline. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/475136425541db5c7c8a0395d19d400525b251bc.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/tlbflush.h | 7 +++++++ arch/powerpc/mm/book3s32/hash_low.S | 7 ++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index 3043e7af70aa..f392a619138d 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -12,7 +12,14 @@ extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +#ifdef CONFIG_SMP void _tlbie(unsigned long address); +#else +static inline void _tlbie(unsigned long address) +{ + asm volatile ("tlbie %0; sync" : : "r" (address) : "memory"); +} +#endif void _tlbia(void); static inline void local_flush_tlb_page(struct vm_area_struct *vma, diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index aca353d1c5f4..ce1459dd08ba 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -622,8 +622,8 @@ _ASM_NOKPROBE_SYMBOL(flush_hash_pages) /* * Flush an entry from the TLB */ -_GLOBAL(_tlbie) #ifdef CONFIG_SMP +_GLOBAL(_tlbie) lwz r8,TASK_CPU(r2) oris r8,r8,11 mfmsr r10 @@ -647,12 +647,9 @@ _GLOBAL(_tlbie) stw r0,0(r9) /* clear mmu_hash_lock */ mtmsr r10 isync -#else /* CONFIG_SMP */ - tlbie r3 - sync -#endif /* CONFIG_SMP */ blr _ASM_NOKPROBE_SYMBOL(_tlbie) +#endif /* CONFIG_SMP */ /* * Flush the entire TLB. 603/603e only From f265512582a047e09390b1b41384f365d7dc806f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:35 +0000 Subject: [PATCH 235/304] powerpc/32s: Move _tlbie() and _tlbia() in a new file _tlbie() and _tlbia() are used only on 603 cores while the other functions are used only on cores having a hash table. Move them into a new file named nohash_low.S Add mmu_hash_lock var is used by both, it needs to go in a common file. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9a265b1b17a64153463d361280cb4b43eb1266a4.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/Makefile | 2 +- arch/powerpc/mm/book3s32/hash_low.S | 78 -------------------------- arch/powerpc/mm/book3s32/mmu.c | 4 ++ arch/powerpc/mm/book3s32/nohash_low.S | 80 +++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 79 deletions(-) create mode 100644 arch/powerpc/mm/book3s32/nohash_low.S diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 1732eaa740a9..3f972db17761 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -6,4 +6,4 @@ ifdef CONFIG_KASAN CFLAGS_mmu.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += mmu.o hash_low.o mmu_context.o tlb.o +obj-y += mmu.o hash_low.o mmu_context.o tlb.o nohash_low.o diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index ce1459dd08ba..ceb90a6e3256 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -26,13 +26,6 @@ #include #include -#ifdef CONFIG_SMP - .section .bss - .align 2 -mmu_hash_lock: - .space 4 -#endif /* CONFIG_SMP */ - /* * Load a PTE into the hash table, if possible. * The address is in r4, and r3 contains an access flag: @@ -618,74 +611,3 @@ _GLOBAL(flush_hash_pages) .previous EXPORT_SYMBOL(flush_hash_pages) _ASM_NOKPROBE_SYMBOL(flush_hash_pages) - -/* - * Flush an entry from the TLB - */ -#ifdef CONFIG_SMP -_GLOBAL(_tlbie) - lwz r8,TASK_CPU(r2) - oris r8,r8,11 - mfmsr r10 - rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ - rlwinm r0,r0,0,28,26 /* clear DR */ - mtmsr r0 - isync - lis r9,mmu_hash_lock@h - ori r9,r9,mmu_hash_lock@l - tophys(r9,r9) -10: lwarx r7,0,r9 - cmpwi 0,r7,0 - bne- 10b - stwcx. r8,0,r9 - bne- 10b - eieio - tlbie r3 - sync - TLBSYNC - li r0,0 - stw r0,0(r9) /* clear mmu_hash_lock */ - mtmsr r10 - isync - blr -_ASM_NOKPROBE_SYMBOL(_tlbie) -#endif /* CONFIG_SMP */ - -/* - * Flush the entire TLB. 603/603e only - */ -_GLOBAL(_tlbia) -#if defined(CONFIG_SMP) - lwz r8,TASK_CPU(r2) - oris r8,r8,10 - mfmsr r10 - rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ - rlwinm r0,r0,0,28,26 /* clear DR */ - mtmsr r0 - isync - lis r9,mmu_hash_lock@h - ori r9,r9,mmu_hash_lock@l - tophys(r9,r9) -10: lwarx r7,0,r9 - cmpwi 0,r7,0 - bne- 10b - stwcx. r8,0,r9 - bne- 10b -#endif /* CONFIG_SMP */ - li r5, 32 - lis r4, KERNELBASE@h - mtctr r5 - sync -0: tlbie r4 - addi r4, r4, 0x1000 - bdnz 0b - sync -#ifdef CONFIG_SMP - TLBSYNC - li r0,0 - stw r0,0(r9) /* clear mmu_hash_lock */ - mtmsr r10 - isync -#endif /* CONFIG_SMP */ - blr -_ASM_NOKPROBE_SYMBOL(_tlbia) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 081dd98c8232..1e03607d7c78 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -46,6 +46,10 @@ static struct batrange { /* stores address ranges mapped by BATs */ phys_addr_t phys; } bat_addrs[8]; +#ifdef CONFIG_SMP +unsigned long mmu_hash_lock; +#endif + /* * Return PA for this VA if it is mapped by a BAT, or 0 */ diff --git a/arch/powerpc/mm/book3s32/nohash_low.S b/arch/powerpc/mm/book3s32/nohash_low.S new file mode 100644 index 000000000000..19f418b0ed2d --- /dev/null +++ b/arch/powerpc/mm/book3s32/nohash_low.S @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file contains low-level assembler routines for managing + * the PowerPC 603 tlb invalidation. + */ + +#include +#include +#include + +/* + * Flush an entry from the TLB + */ +#ifdef CONFIG_SMP +_GLOBAL(_tlbie) + lwz r8,TASK_CPU(r2) + oris r8,r8,11 + mfmsr r10 + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear DR */ + mtmsr r0 + isync + lis r9,mmu_hash_lock@h + ori r9,r9,mmu_hash_lock@l + tophys(r9,r9) +10: lwarx r7,0,r9 + cmpwi 0,r7,0 + bne- 10b + stwcx. r8,0,r9 + bne- 10b + eieio + tlbie r3 + sync + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ + mtmsr r10 + isync + blr +_ASM_NOKPROBE_SYMBOL(_tlbie) +#endif /* CONFIG_SMP */ + +/* + * Flush the entire TLB. 603/603e only + */ +_GLOBAL(_tlbia) +#if defined(CONFIG_SMP) + lwz r8,TASK_CPU(r2) + oris r8,r8,10 + mfmsr r10 + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear DR */ + mtmsr r0 + isync + lis r9,mmu_hash_lock@h + ori r9,r9,mmu_hash_lock@l + tophys(r9,r9) +10: lwarx r7,0,r9 + cmpwi 0,r7,0 + bne- 10b + stwcx. r8,0,r9 + bne- 10b +#endif /* CONFIG_SMP */ + li r5, 32 + lis r4, KERNELBASE@h + mtctr r5 + sync +0: tlbie r4 + addi r4, r4, 0x1000 + bdnz 0b + sync +#ifdef CONFIG_SMP + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ + mtmsr r10 + isync +#endif /* CONFIG_SMP */ + blr +_ASM_NOKPROBE_SYMBOL(_tlbia) From fd1b4b7f51d0d75b73eeda41ef459ea7791aaab2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:36 +0000 Subject: [PATCH 236/304] powerpc/32s: Split and inline flush_tlb_mm() and flush_tlb_page() flush_tlb_mm() and flush_tlb_page() handle both the MMU_FTR_HPTE_TABLE case and the other case. The non MMU_FTR_HPTE_TABLE case is trivial as it is only a call to _tlbie()/_tlbia() which is not worth a dedicated function. Make flush_tlb_mm() and flush_tlb_page() hash specific and call them from tlbflush.h based on mmu_has_feature(MMU_FTR_HPTE_TABLE). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/11e932ded41ba6d9b251d89b7afa33cc060d3aa4.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/tlbflush.h | 20 +++++++++++++++++-- arch/powerpc/mm/book3s32/tlb.c | 17 ++++------------ 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index f392a619138d..542765944531 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -6,8 +6,8 @@ /* * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx */ -extern void flush_tlb_mm(struct mm_struct *mm); -extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +void hash__flush_tlb_mm(struct mm_struct *mm); +void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); @@ -22,6 +22,22 @@ static inline void _tlbie(unsigned long address) #endif void _tlbia(void); +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + hash__flush_tlb_mm(mm); + else + _tlbia(); +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + hash__flush_tlb_page(vma, vmaddr); + else + _tlbie(vmaddr); +} + static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index ae5dbba95805..65389bfe2eb8 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -118,15 +118,10 @@ EXPORT_SYMBOL(flush_tlb_kernel_range); /* * Flush all the (user) entries for the address space described by mm. */ -void flush_tlb_mm(struct mm_struct *mm) +void hash__flush_tlb_mm(struct mm_struct *mm) { struct vm_area_struct *mp; - if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) { - _tlbia(); - return; - } - /* * It is safe to go down the mm's list of vmas when called * from dup_mmap, holding mmap_lock. It would also be safe from @@ -136,23 +131,19 @@ void flush_tlb_mm(struct mm_struct *mm) for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); } -EXPORT_SYMBOL(flush_tlb_mm); +EXPORT_SYMBOL(hash__flush_tlb_mm); -void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { struct mm_struct *mm; pmd_t *pmd; - if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) { - _tlbie(vmaddr); - return; - } mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; pmd = pmd_off(mm, vmaddr); if (!pmd_none(*pmd)) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); } -EXPORT_SYMBOL(flush_tlb_page); +EXPORT_SYMBOL(hash__flush_tlb_page); /* * For each address in the range, find the pte for the address From 1e83396f29d75aae8a1d365f597996fec87ca4d0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:37 +0000 Subject: [PATCH 237/304] powerpc/32s: Inline flush_tlb_range() and flush_tlb_kernel_range() flush_tlb_range() and flush_tlb_kernel_range() are trivial calls to flush_range(). Make flush_range() global and inline flush_tlb_range() and flush_tlb_kernel_range(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c7029a78e78709ad9272d7a44260e06b649169b2.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/tlbflush.h | 15 ++++++++-- arch/powerpc/mm/book3s32/tlb.c | 30 +++++-------------- 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index 542765944531..2f480d184526 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -8,9 +8,7 @@ */ void hash__flush_tlb_mm(struct mm_struct *mm); void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end); -extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end); #ifdef CONFIG_SMP void _tlbie(unsigned long address); @@ -38,6 +36,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmad _tlbie(vmaddr); } +static inline void +flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + flush_range(vma->vm_mm, start, end); +} + +static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + flush_range(&init_mm, start, end); +} + static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index 65389bfe2eb8..f9b8e1ce4371 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -71,8 +71,12 @@ void tlb_flush(struct mmu_gather *tlb) * -- Cort */ -static void flush_range(struct mm_struct *mm, unsigned long start, - unsigned long end) +/* + * For each address in the range, find the pte for the address + * and check _PAGE_HASHPTE bit; if it is set, find and destroy + * the corresponding HPTE. + */ +void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end) { pmd_t *pmd; unsigned long pmd_end; @@ -105,15 +109,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, ++pmd; } } - -/* - * Flush kernel TLB entries in the given range - */ -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - flush_range(&init_mm, start, end); -} -EXPORT_SYMBOL(flush_tlb_kernel_range); +EXPORT_SYMBOL(flush_range); /* * Flush all the (user) entries for the address space described by mm. @@ -145,18 +141,6 @@ void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) } EXPORT_SYMBOL(hash__flush_tlb_page); -/* - * For each address in the range, find the pte for the address - * and check _PAGE_HASHPTE bit; if it is set, find and destroy - * the corresponding HPTE. - */ -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - flush_range(vma->vm_mm, start, end); -} -EXPORT_SYMBOL(flush_tlb_range); - void __init early_init_mmu(void) { } From 91ec450f8d8c1e599a943c526ab1d2a4acf73c22 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:38 +0000 Subject: [PATCH 238/304] powerpc/32s: Split and inline flush_range() flush_range() handle both the MMU_FTR_HPTE_TABLE case and the other case. The non MMU_FTR_HPTE_TABLE case is trivial as it is only a call to _tlbie()/_tlbia() which is not worth a dedicated function. Make flush_range() a hash specific and call it from tlbflush.h based on mmu_has_feature(MMU_FTR_HPTE_TABLE). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/132ab19aae52abc8e06ab524ec86d4229b5b9c3d.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/tlbflush.h | 13 ++++++++++++- arch/powerpc/mm/book3s32/tlb.c | 13 +++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index 2f480d184526..42708c1719d6 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -8,7 +8,7 @@ */ void hash__flush_tlb_mm(struct mm_struct *mm); void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end); +void hash__flush_range(struct mm_struct *mm, unsigned long start, unsigned long end); #ifdef CONFIG_SMP void _tlbie(unsigned long address); @@ -20,6 +20,17 @@ static inline void _tlbie(unsigned long address) #endif void _tlbia(void); +static inline void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end) +{ + start &= PAGE_MASK; + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + hash__flush_range(mm, start, end); + else if (end - start <= PAGE_SIZE) + _tlbie(start); + else + _tlbia(); +} + static inline void flush_tlb_mm(struct mm_struct *mm) { if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index f9b8e1ce4371..f0edbad5966c 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -76,7 +76,7 @@ void tlb_flush(struct mmu_gather *tlb) * and check _PAGE_HASHPTE bit; if it is set, find and destroy * the corresponding HPTE. */ -void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end) +void hash__flush_range(struct mm_struct *mm, unsigned long start, unsigned long end) { pmd_t *pmd; unsigned long pmd_end; @@ -84,13 +84,6 @@ void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end) unsigned int ctx = mm->context.id; start &= PAGE_MASK; - if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) { - if (end - start <= PAGE_SIZE) - _tlbie(start); - else - _tlbia(); - return; - } if (start >= end) return; end = (end - 1) | ~PAGE_MASK; @@ -109,7 +102,7 @@ void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end) ++pmd; } } -EXPORT_SYMBOL(flush_range); +EXPORT_SYMBOL(hash__flush_range); /* * Flush all the (user) entries for the address space described by mm. @@ -125,7 +118,7 @@ void hash__flush_tlb_mm(struct mm_struct *mm) * but it seems dup_mmap is the only SMP case which gets here. */ for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) - flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); + hash__flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); } EXPORT_SYMBOL(hash__flush_tlb_mm); From ef08d95546ccea540f6a592b89822bb085bf09c6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:39 +0000 Subject: [PATCH 239/304] powerpc/32s: Inline tlb_flush() On book3s/32, tlb_flush() does nothing when the CPU has a hash table, it calls _tlbia() otherwise. Inline it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ebc933d1c530a19ef3cf7983f6ae94814f6e92ac.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/tlbflush.h | 11 +++++++++++ arch/powerpc/mm/book3s32/tlb.c | 15 --------------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index 42708c1719d6..d941c06d4f2e 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -20,6 +20,17 @@ static inline void _tlbie(unsigned long address) #endif void _tlbia(void); +/* + * Called at the end of a mmu_gather operation to make sure the + * TLB flush is completely done. + */ +static inline void tlb_flush(struct mmu_gather *tlb) +{ + /* 603 needs to flush the whole TLB here since it doesn't use a hash table. */ + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) + _tlbia(); +} + static inline void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end) { start &= PAGE_MASK; diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index f0edbad5966c..e7865a3f0231 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -43,21 +43,6 @@ void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) } EXPORT_SYMBOL(flush_hash_entry); -/* - * Called at the end of a mmu_gather operation to make sure the - * TLB flush is completely done. - */ -void tlb_flush(struct mmu_gather *tlb) -{ - if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) { - /* - * 603 needs to flush the whole TLB here since - * it doesn't use a hash table. - */ - _tlbia(); - } -} - /* * TLB flushing: * From 80007a17fc59bc2766f7d5cb2f79b4c65651504b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:40 +0000 Subject: [PATCH 240/304] powerpc/32s: Inline flush_hash_entry() flush_hash_entry() is a simple function calling flush_hash_pages() if it's a hash MMU or doing nothing otherwise. Inline it. And use it also in __ptep_test_and_clear_young(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9af895be7d4b404d40e749a2659552fd138e62c4.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 17 +++++++++++------ arch/powerpc/include/asm/tlb.h | 3 --- arch/powerpc/mm/book3s32/tlb.c | 14 -------------- 3 files changed, 11 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 41d8bc6db303..1d6a7dafd3dc 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -238,8 +238,14 @@ extern void add_hash_page(unsigned context, unsigned long va, unsigned long pmdval); /* Flush an entry from the TLB/hash table */ -extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, - unsigned long address); +static inline void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) +{ + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) { + unsigned long ptephys = __pa(ptep) & PAGE_MASK; + + flush_hash_pages(mm->context.id, addr, ptephys, 1); + } +} /* * PTE updates. This function is called whenever an existing @@ -291,10 +297,9 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, { unsigned long old; old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); - if (old & _PAGE_HASHPTE) { - unsigned long ptephys = __pa(ptep) & PAGE_MASK; - flush_hash_pages(mm->context.id, addr, ptephys, 1); - } + if (old & _PAGE_HASHPTE) + flush_hash_entry(mm, ptep, addr); + return (old & _PAGE_ACCESSED) != 0; } #define ptep_test_and_clear_young(__vma, __addr, __ptep) \ diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index d97f061fecac..160422a439aa 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -40,9 +40,6 @@ extern void tlb_flush(struct mmu_gather *tlb); /* Get the generic bits... */ #include -extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, - unsigned long address); - static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address) { diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index e7865a3f0231..0d412953fe58 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -29,20 +29,6 @@ #include -/* - * Called when unmapping pages to flush entries from the TLB/hash table. - */ -void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) -{ - unsigned long ptephys; - - if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) { - ptephys = __pa(ptep) & PAGE_MASK; - flush_hash_pages(mm->context.id, addr, ptephys, 1); - } -} -EXPORT_SYMBOL(flush_hash_entry); - /* * TLB flushing: * From 068fdba10ea54b6ebc12c2b2d85020b2137316d1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:41 +0000 Subject: [PATCH 241/304] powerpc/32s: Move early_mmu_init() into mmu.c early_mmu_init() is independent of MMU type and not directly linked to tlb handling. In a following patch, tlb.c will be restricted to HASH mmu. Move early_mmu_init() into mmu.c which is common. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e51b5e2fe6bca623b33116403043d3a1b5eaf826.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/mmu.c | 4 ++++ arch/powerpc/mm/book3s32/tlb.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 1e03607d7c78..859e5bd603ac 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -471,3 +471,7 @@ void __init setup_kuap(bool disabled) pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n"); } #endif + +void __init early_init_mmu(void) +{ +} diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index 0d412953fe58..19f0ef950d77 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -104,7 +104,3 @@ void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); } EXPORT_SYMBOL(hash__flush_tlb_page); - -void __init early_init_mmu(void) -{ -} From a6a50d8495d098b6459166c3707ab251d3dc9e06 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:42 +0000 Subject: [PATCH 242/304] powerpc/32s: Remove CONFIG_PPC_BOOK3S_6xx As 601 is gone, CONFIG_PPC_BOO3S_6xx and CONFIG_PPC_BOOK3S_32 are dedundant. Remove CONFIG_PPC_BOOK3S_6xx. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f18c16af37f6f77b577bed8d9e12831b695617ae.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/cputable.c | 4 ++-- arch/powerpc/platforms/Kconfig.cputype | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index f2fcd29aab23..086eadd5c3c8 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -611,7 +611,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC32 -#ifdef CONFIG_PPC_BOOK3S_6xx +#ifdef CONFIG_PPC_BOOK3S_32 { /* 603 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00030000, @@ -1241,7 +1241,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc603", }, -#endif /* CONFIG_PPC_BOOK3S_6xx */ +#endif /* CONFIG_PPC_BOOK3S_32 */ #ifdef CONFIG_PPC_8xx { /* 8xx */ .pvr_mask = 0xffff0000, diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index aa071663b9a9..14a8481f36a8 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -11,9 +11,6 @@ config PPC64 This option selects whether a 32-bit or a 64-bit kernel will be built. -config PPC_BOOK3S_32 - bool - menu "Processor support" choice prompt "Processor Type" @@ -29,9 +26,8 @@ choice If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx. -config PPC_BOOK3S_6xx +config PPC_BOOK3S_32 bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" - select PPC_BOOK3S_32 imply PPC_FPU select PPC_HAVE_PMU_SUPPORT select PPC_HAVE_KUEP From ad510e37e4b48f7da462650946aeaa078b977277 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:43 +0000 Subject: [PATCH 243/304] powerpc/32s: Regroup 603 based CPUs in cputable In order to selectively build the kernel for 603 SW TLB handling, regroup all 603 based CPUs together. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/45065263fdb9f5cc2a2d210ec2a762ac8bf5b2bc.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cputable.h | 14 +++--- arch/powerpc/kernel/cputable.c | 78 ++++++++++++++--------------- 2 files changed, 47 insertions(+), 45 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 8a4e1ed8a4a2..b088c3443a99 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -503,15 +503,16 @@ static inline void cpu_feature_keys_init(void) { } enum { CPU_FTRS_POSSIBLE = #ifdef CONFIG_PPC_BOOK3S_32 - CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU | + CPU_FTRS_604 | CPU_FTRS_740_NOTAU | CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 | CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX | CPU_FTRS_7400_NOTAU | CPU_FTRS_7400 | CPU_FTRS_7450_20 | CPU_FTRS_7450_21 | CPU_FTRS_7450_23 | CPU_FTRS_7455_1 | CPU_FTRS_7455_20 | CPU_FTRS_7455 | CPU_FTRS_7447_10 | - CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_82XX | - CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 | + CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_CLASSIC32 | + CPU_FTRS_603 | CPU_FTRS_82XX | + CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 | #endif #ifdef CONFIG_PPC_8xx CPU_FTRS_8XX | @@ -572,15 +573,16 @@ enum { enum { CPU_FTRS_ALWAYS = #ifdef CONFIG_PPC_BOOK3S_32 - CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU & + CPU_FTRS_604 & CPU_FTRS_740_NOTAU & CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 & CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX & CPU_FTRS_7400_NOTAU & CPU_FTRS_7400 & CPU_FTRS_7450_20 & CPU_FTRS_7450_21 & CPU_FTRS_7450_23 & CPU_FTRS_7455_1 & CPU_FTRS_7455_20 & CPU_FTRS_7455 & CPU_FTRS_7447_10 & - CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_82XX & - CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 & + CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_CLASSIC32 & + CPU_FTRS_603 & CPU_FTRS_82XX & + CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 & #endif #ifdef CONFIG_PPC_8xx CPU_FTRS_8XX & diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 086eadd5c3c8..5bf971f7c325 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -612,45 +612,6 @@ static struct cpu_spec __initdata cpu_specs[] = { #ifdef CONFIG_PPC32 #ifdef CONFIG_PPC_BOOK3S_32 - { /* 603 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00030000, - .cpu_name = "603", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* 603e */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00060000, - .cpu_name = "603e", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* 603ev */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00070000, - .cpu_name = "603ev", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, { /* 604 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00040000, @@ -1140,6 +1101,45 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc7450", }, + { /* 603 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00030000, + .cpu_name = "603", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 603e */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00060000, + .cpu_name = "603e", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 603ev */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00070000, + .cpu_name = "603ev", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, { /* 82xx (8240, 8245, 8260 are all 603e cores) */ .pvr_mask = 0x7fff0000, .pvr_value = 0x00810000, From 44e9754d63c7b419874e4c18c0b5e7a770e058c6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:44 +0000 Subject: [PATCH 244/304] powerpc/32s: Make support for 603 and 604+ selectable book3s/32 has two main families: - CPU with 603 cores that don't have HASH PTE table and perform SW TLB loading. - Other CPUs based on 604+ cores that have HASH PTE table. This leads to some complex logic and additionnal code to support both. This makes sense for distribution kernels that aim at running on any CPU, but when you are fine tuning a kernel for an embedded 603 based board you don't need all the HASH logic. Allow selection of support for each family, in order to opt out unneeded parts of code. At least one must be selected. Note that some of the CPU supporting HASH also support SW TLB loading, however it is not supported by Linux kernel at the time being, because they do not have alternate registers in the TLB miss exception handlers. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8dde0cdb629a71abc29b0d85a52a86e920376cb6.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cputable.h | 8 ++++++-- arch/powerpc/include/asm/mmu.h | 5 ++++- arch/powerpc/kernel/cputable.c | 6 ++++++ arch/powerpc/platforms/Kconfig.cputype | 16 ++++++++++++++++ 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index b088c3443a99..5f21a5bab467 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -502,7 +502,7 @@ static inline void cpu_feature_keys_init(void) { } #else enum { CPU_FTRS_POSSIBLE = -#ifdef CONFIG_PPC_BOOK3S_32 +#ifdef CONFIG_PPC_BOOK3S_604 CPU_FTRS_604 | CPU_FTRS_740_NOTAU | CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 | CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX | @@ -511,6 +511,8 @@ enum { CPU_FTRS_7455_20 | CPU_FTRS_7455 | CPU_FTRS_7447_10 | CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_CLASSIC32 | +#endif +#ifdef CONFIG_PPC_BOOK3S_603 CPU_FTRS_603 | CPU_FTRS_82XX | CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 | #endif @@ -572,7 +574,7 @@ enum { #else enum { CPU_FTRS_ALWAYS = -#ifdef CONFIG_PPC_BOOK3S_32 +#ifdef CONFIG_PPC_BOOK3S_604 CPU_FTRS_604 & CPU_FTRS_740_NOTAU & CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 & CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX & @@ -581,6 +583,8 @@ enum { CPU_FTRS_7455_20 & CPU_FTRS_7455 & CPU_FTRS_7447_10 & CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_CLASSIC32 & +#endif +#ifdef CONFIG_PPC_BOOK3S_603 CPU_FTRS_603 & CPU_FTRS_82XX & CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 & #endif diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index ebf50286a924..80b27f5d9648 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -157,7 +157,7 @@ DECLARE_PER_CPU(int, next_tlbcam_idx); enum { MMU_FTRS_POSSIBLE = -#ifdef CONFIG_PPC_BOOK3S +#if defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_BOOK3S_604) MMU_FTR_HPTE_TABLE | #endif #ifdef CONFIG_PPC_8xx @@ -206,6 +206,9 @@ enum { 0, }; +#if defined(CONFIG_PPC_BOOK3S_604) && !defined(CONFIG_PPC_BOOK3S_603) +#define MMU_FTRS_ALWAYS MMU_FTR_HPTE_TABLE +#endif #ifdef CONFIG_PPC_8xx #define MMU_FTRS_ALWAYS MMU_FTR_TYPE_8xx #endif diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 5bf971f7c325..65f35ec052d4 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -612,6 +612,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #ifdef CONFIG_PPC32 #ifdef CONFIG_PPC_BOOK3S_32 +#ifdef CONFIG_PPC_BOOK3S_604 { /* 604 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00040000, @@ -1101,6 +1102,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc7450", }, +#endif /* CONFIG_PPC_BOOK3S_604 */ +#ifdef CONFIG_PPC_BOOK3S_603 { /* 603 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00030000, @@ -1229,6 +1232,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .platform = "ppc603", }, #endif +#endif /* CONFIG_PPC_BOOK3S_603 */ +#ifdef CONFIG_PPC_BOOK3S_604 { /* default match, we assume split I/D cache & TB (non-601)... */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, @@ -1241,6 +1246,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc603", }, +#endif /* CONFIG_PPC_BOOK3S_604 */ #endif /* CONFIG_PPC_BOOK3S_32 */ #ifdef CONFIG_PPC_8xx { /* 8xx */ diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 14a8481f36a8..3ce907523b1e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -64,6 +64,22 @@ config 44x endchoice +config PPC_BOOK3S_603 + bool "Support for 603 SW loaded TLB" + depends on PPC_BOOK3S_32 + default y + help + Provide support for processors based on the 603 cores. Those + processors don't have a HASH MMU and provide SW TLB loading. + +config PPC_BOOK3S_604 + bool "Support for 604+ HASH MMU" if PPC_BOOK3S_603 + depends on PPC_BOOK3S_32 + default y + help + Provide support for processors not based on the 603 cores. + Those processors have a HASH MMU. + choice prompt "Processor Type" depends on PPC64 From 1b03e71ff6f2bd10b45a0128ce76e0e42014a44c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 16 Nov 2020 16:09:31 +0000 Subject: [PATCH 245/304] powerpc/32s: Handle PROTFAULT in hash_page() also for CONFIG_PPC_KUAP On hash 32 bits, handling minor protection faults like unsetting dirty flag is heavy if done from the normal page_fault processing, because it implies hash table software lookup for flushing the entry and then a DSI is taken anyway to add the entry back. When KUAP was implemented, as explained in commit a68c31fc01ef ("powerpc/32s: Implement Kernel Userspace Access Protection"), protection faults has been diverted from hash_page() because hash_page() was not able to identify a KUAP fault. Implement KUAP verification in hash_page(), by clearing write permission when the access is a kernel access and Ks is 1. This works regardless of the address because kernel segments always have Ks set to 0 while user segments have Ks set to 0 only when kernel write to userspace is granted. Then protection faults can be handled by hash_page() even for KUAP. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8a4ffe4798e9ea32aaaccdf85e411bb1beed3500.1605542955.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 8 -------- arch/powerpc/mm/book3s32/hash_low.S | 13 +++++++++++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index b102eca44874..349bf3f0c3af 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -292,11 +292,7 @@ BEGIN_MMU_FTR_SECTION stw r11, THR11(r10) mfspr r10, SPRN_DSISR mfcr r11 -#ifdef CONFIG_PPC_KUAP - andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h -#else andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h -#endif mfspr r10, SPRN_SPRG_THREAD beq hash_page_dsi .Lhash_page_dsi_cont: @@ -313,11 +309,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) EXCEPTION_PROLOG handle_dar_dsisr=1 get_and_save_dar_dsisr_on_stack r4, r5, r11 BEGIN_MMU_FTR_SECTION -#ifdef CONFIG_PPC_KUAP - andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h -#else andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h -#endif bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ bl hash_page diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index ceb90a6e3256..98b8d8a5ad64 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -89,8 +89,6 @@ _GLOBAL(hash_page) #else rlwimi r8,r4,23,20,28 /* compute pte address */ #endif - rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ - ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE /* * Update the linux PTE atomically. We do the lwarx up-front @@ -106,7 +104,18 @@ _GLOBAL(hash_page) #endif .Lretry: lwarx r6,0,r8 /* get linux-style pte, flag word */ +#ifdef CONFIG_PPC_KUAP + mfsrin r5,r4 + rlwinm r0,r9,28,_PAGE_RW /* MSR[PR] => _PAGE_RW */ + rlwinm r5,r5,12,_PAGE_RW /* Ks => _PAGE_RW */ + andc r5,r5,r0 /* Ks & ~MSR[PR] */ + andc r5,r6,r5 /* Clear _PAGE_RW when Ks = 1 && MSR[PR] = 0 */ + andc. r5,r3,r5 /* check access & ~permission */ +#else andc. r5,r3,r6 /* check access & ~permission */ +#endif + rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ + ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE #ifdef CONFIG_SMP bne- .Lhash_page_out /* return if access not permitted */ #else From 1e78f723d6a52966bfe3804209dbf404fdc9d3bb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Dec 2020 10:11:34 +0000 Subject: [PATCH 246/304] powerpc/8xx: Fix early debug when SMC1 is relocated When SMC1 is relocated and early debug is selected, the board hangs is ppc_md.setup_arch(). This is because ones the microcode has been loaded and SMC1 relocated, early debug writes in the weed. To allow smooth continuation, the SMC1 parameter RAM set up by the bootloader have to be copied into the new location. Fixes: 43db76f41824 ("powerpc/8xx: Add microcode patch to move SMC parameter RAM.") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b2f71f39eca543f1e4ec06596f09a8b12235c701.1607076683.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cpm1.h | 1 + arch/powerpc/platforms/8xx/micropatch.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h index a116fe931789..3bdd74739cb8 100644 --- a/arch/powerpc/include/asm/cpm1.h +++ b/arch/powerpc/include/asm/cpm1.h @@ -68,6 +68,7 @@ extern void cpm_reset(void); #define PROFF_SPI ((uint)0x0180) #define PROFF_SCC3 ((uint)0x0200) #define PROFF_SMC1 ((uint)0x0280) +#define PROFF_DSP1 ((uint)0x02c0) #define PROFF_SCC4 ((uint)0x0300) #define PROFF_SMC2 ((uint)0x0380) diff --git a/arch/powerpc/platforms/8xx/micropatch.c b/arch/powerpc/platforms/8xx/micropatch.c index aed4bc75f352..aef179fcbd4f 100644 --- a/arch/powerpc/platforms/8xx/micropatch.c +++ b/arch/powerpc/platforms/8xx/micropatch.c @@ -360,6 +360,17 @@ void __init cpm_load_patch(cpm8xx_t *cp) if (IS_ENABLED(CONFIG_SMC_UCODE_PATCH)) { smc_uart_t *smp; + if (IS_ENABLED(CONFIG_PPC_EARLY_DEBUG_CPM)) { + int i; + + for (i = 0; i < sizeof(*smp); i += 4) { + u32 __iomem *src = (u32 __iomem *)&cp->cp_dparam[PROFF_SMC1 + i]; + u32 __iomem *dst = (u32 __iomem *)&cp->cp_dparam[PROFF_DSP1 + i]; + + out_be32(dst, in_be32(src)); + } + } + smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1]; out_be16(&smp->smc_rpbase, 0x1ec0); smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC2]; From ad3ed15cd04b96de7c38204ba7c698cbccd8fe88 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Dec 2020 10:12:51 +0000 Subject: [PATCH 247/304] powerpc/process: Remove target specific __set_dabr() __set_dabr() are simple functions that can be inline directly inside set_dabr() and using IS_ENABLED() instead of #ifdef Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c10b263668e137236c71d76648b03cf2cd1ee66f.1607076733.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/process.c | 37 ++++++++++++----------------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 96bb10d00d9c..a66f435dabbf 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -806,29 +806,6 @@ static void switch_hw_breakpoint(struct task_struct *new) #endif /* !CONFIG_HAVE_HW_BREAKPOINT */ #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ -#ifdef CONFIG_PPC_ADV_DEBUG_REGS -static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) -{ - mtspr(SPRN_DAC1, dabr); - if (IS_ENABLED(CONFIG_PPC_47x)) - isync(); - return 0; -} -#elif defined(CONFIG_PPC_BOOK3S) -static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) -{ - mtspr(SPRN_DABR, dabr); - if (cpu_has_feature(CPU_FTR_DABRX)) - mtspr(SPRN_DABRX, dabrx); - return 0; -} -#else -static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) -{ - return -EINVAL; -} -#endif - static inline int set_dabr(struct arch_hw_breakpoint *brk) { unsigned long dabr, dabrx; @@ -839,7 +816,19 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk) if (ppc_md.set_dabr) return ppc_md.set_dabr(dabr, dabrx); - return __set_dabr(dabr, dabrx); + if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_REGS)) { + mtspr(SPRN_DAC1, dabr); + if (IS_ENABLED(CONFIG_PPC_47x)) + isync(); + return 0; + } else if (IS_ENABLED(CONFIG_PPC_BOOK3S)) { + mtspr(SPRN_DABR, dabr); + if (cpu_has_feature(CPU_FTR_DABRX)) + mtspr(SPRN_DABRX, dabrx); + return 0; + } else { + return -EINVAL; + } } static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) From 613df979da6c032cbe6fa273fb8ca21af022157e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 24 Nov 2020 15:24:54 +0000 Subject: [PATCH 248/304] powerpc/8xx: DEBUG_PAGEALLOC doesn't require an ITLB miss exception handler Since commit e611939fc8ec ("powerpc/mm: Ensure change_page_attr() doesn't invalidate pinned TLBs"), pinned TLBs are not anymore invalidated by __kernel_map_pages() when CONFIG_DEBUG_PAGEALLOC is selected. Remove the dependency on CONFIG_DEBUG_PAGEALLOC. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e796c5fcb5898de827c803cf1ab8ba1d7a5d4b76.1606231483.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ee0bfebc375f..66ee62f30d36 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -47,8 +47,7 @@ * - Either we have modules * - Or we have not pinned the first 8M */ -#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) || \ - defined(CONFIG_DEBUG_PAGEALLOC) +#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) #define ITLB_MISS_KERNEL 1 #endif From bccc58986a2f98e3af349c85c5f49aac7fb19ef2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 24 Nov 2020 15:24:55 +0000 Subject: [PATCH 249/304] powerpc/8xx: Always pin kernel text TLB There is no big poing in not pinning kernel text anymore, as now we can keep pinned TLB even with things like DEBUG_PAGEALLOC. Remove CONFIG_PIN_TLB_TEXT, making it always right. Signed-off-by: Christophe Leroy [mpe: Drop ifdef around mmu_pin_tlb() to fix build errors] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/203b89de491e1379f1677a2685211b7c32adfff0.1606231483.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 3 +-- arch/powerpc/kernel/head_8xx.S | 22 +++------------------- arch/powerpc/mm/nohash/8xx.c | 3 +-- arch/powerpc/platforms/8xx/Kconfig | 7 ------- 4 files changed, 5 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d0d16c3e1b4b..2b8e47d1aa91 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -808,8 +808,7 @@ config DATA_SHIFT_BOOL bool "Set custom data alignment" depends on ADVANCED_OPTIONS depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC - depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && \ - (!PIN_TLB_TEXT || !STRICT_KERNEL_RWX)) + depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) help This option allows you to set the kernel data alignment. When RAM is mapped by blocks, the alignment needs to fit the size and diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 66ee62f30d36..e5d57018e1a0 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -42,15 +42,6 @@ #endif .endm -/* - * We need an ITLB miss handler for kernel addresses if: - * - Either we have modules - * - Or we have not pinned the first 8M - */ -#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) -#define ITLB_MISS_KERNEL 1 -#endif - /* * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. @@ -209,12 +200,12 @@ InstructionTLBMiss: mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10) mtspr SPRN_MD_EPN, r10 -#ifdef ITLB_MISS_KERNEL +#ifdef CONFIG_MODULES mfcr r11 compare_to_kernel_boundary r10, r10 #endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ -#ifdef ITLB_MISS_KERNEL +#ifdef CONFIG_MODULES blt+ 3f rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha @@ -618,10 +609,6 @@ start_here: lis r0, (MD_TWAM | MD_RSV4I)@h mtspr SPRN_MD_CTR, r0 #endif -#ifndef CONFIG_PIN_TLB_TEXT - li r0, 0 - mtspr SPRN_MI_CTR, r0 -#endif #if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR) lis r0, MD_TWAM@h mtspr SPRN_MD_CTR, r0 @@ -717,7 +704,6 @@ initial_mmu: mtspr SPRN_DER, r8 blr -#ifdef CONFIG_PIN_TLB _GLOBAL(mmu_pin_tlb) lis r9, (1f - PAGE_OFFSET)@h ori r9, r9, (1f - PAGE_OFFSET)@l @@ -739,7 +725,6 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_MD_CTR, r6 tlbia -#ifdef CONFIG_PIN_TLB_TEXT LOAD_REG_IMMEDIATE(r5, 28 << 8) LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) @@ -760,7 +745,7 @@ _GLOBAL(mmu_pin_tlb) bdnzt lt, 2b lis r0, MI_RSV4I@h mtspr SPRN_MI_CTR, r0 -#endif + LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) #ifdef CONFIG_PIN_TLB_DATA LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) @@ -818,7 +803,6 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 rfi -#endif /* CONFIG_PIN_TLB */ /* * We put a few things here that have to be page-aligned. diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 231ca95f9ffb..19a3eec1d8c5 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -186,8 +186,7 @@ void mmu_mark_initmem_nx(void) mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false); mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); - if (IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_pin_tlb(block_mapped_ram, false); + mmu_pin_tlb(block_mapped_ram, false); } #ifdef CONFIG_STRICT_KERNEL_RWX diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index abb2b45b2789..60cc5b537a98 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -194,13 +194,6 @@ config PIN_TLB_IMMR CONFIG_PIN_TLB_DATA is also selected, it will reduce CONFIG_PIN_TLB_DATA to 24 Mbytes. -config PIN_TLB_TEXT - bool "Pinned TLB for TEXT" - depends on PIN_TLB - default y - help - This pins kernel text with 8M pages. - endmenu endmenu From 576e02bbf1062b9118d5bbb96a40ed3b6b359f22 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 24 Nov 2020 15:24:56 +0000 Subject: [PATCH 250/304] powerpc/8xx: Simplify INVALIDATE_ADJACENT_PAGES_CPU15 We now have r11 available as a scratch register so INVALIDATE_ADJACENT_PAGES_CPU15() can be simplified. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bdafd651b4ac3a851fd09249f5f3699c50da29f2.1606231483.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index e5d57018e1a0..4d792a8fdc1b 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -180,14 +180,13 @@ SystemCall: */ #ifdef CONFIG_8xx_CPU15 -#define INVALIDATE_ADJACENT_PAGES_CPU15(addr) \ - addi addr, addr, PAGE_SIZE; \ - tlbie addr; \ - addi addr, addr, -(PAGE_SIZE << 1); \ - tlbie addr; \ - addi addr, addr, PAGE_SIZE +#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) \ + addi tmp, addr, PAGE_SIZE; \ + tlbie tmp; \ + addi tmp, addr, -PAGE_SIZE; \ + tlbie tmp #else -#define INVALIDATE_ADJACENT_PAGES_CPU15(addr) +#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) #endif InstructionTLBMiss: @@ -198,7 +197,7 @@ InstructionTLBMiss: * kernel page tables. */ mfspr r10, SPRN_SRR0 /* Get effective address of fault */ - INVALIDATE_ADJACENT_PAGES_CPU15(r10) + INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mtspr SPRN_MD_EPN, r10 #ifdef CONFIG_MODULES mfcr r11 From a314ea5abf6dbaf35f14c9bd1d93105260fb9336 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 24 Nov 2020 15:24:57 +0000 Subject: [PATCH 251/304] powerpc/8xx: Use SPRN_SPRG_SCRATCH2 in ITLB miss exception In order to re-enable MMU earlier, ensure ITLB miss exception cannot clobber SPRN_SPRG_SCRATCH0 and SPRN_SPRG_SCRATCH1. Do so by using SPRN_SPRG_SCRATCH2 and SPRN_M_TW instead, like the DTLB miss exception. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/abc78e8e9577d473691ebb9996c6413b37bfd9ca.1606231483.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 12 ++++++------ arch/powerpc/perf/8xx-pmu.c | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 4d792a8fdc1b..ef451fb08d68 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -190,8 +190,8 @@ SystemCall: #endif InstructionTLBMiss: - mtspr SPRN_SPRG_SCRATCH0, r10 - mtspr SPRN_SPRG_SCRATCH1, r11 + mtspr SPRN_SPRG_SCRATCH2, r10 + mtspr SPRN_M_TW, r11 /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -230,8 +230,8 @@ InstructionTLBMiss: mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ -0: mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 +0: mfspr r10, SPRN_SPRG_SCRATCH2 + mfspr r11, SPRN_M_TW rfi patch_site 0b, patch__itlbmiss_exit_1 @@ -240,8 +240,8 @@ InstructionTLBMiss: 0: lwz r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) addi r10, r10, 1 stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) - mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH2 + mfspr r11, SPRN_M_TW rfi #endif diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index e53c3c161257..02db58c7427a 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -165,9 +165,9 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags) break; case PERF_8xx_ID_ITLB_LOAD_MISS: if (atomic_dec_return(&itlb_miss_ref) == 0) { - /* mfspr r10, SPRN_SPRG_SCRATCH0 */ + /* mfspr r10, SPRN_SPRG_SCRATCH2 */ struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | - __PPC_SPR(SPRN_SPRG_SCRATCH0)); + __PPC_SPR(SPRN_SPRG_SCRATCH2)); patch_instruction_site(&patch__itlbmiss_exit_1, insn); } From 89eecd938cab80f0da18abbd2ed997a521f83f01 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 24 Nov 2020 15:24:58 +0000 Subject: [PATCH 252/304] powerpc/8xx: Use SPRN_SPRG_SCRATCH2 in DTLB miss exception Use SPRN_SPRG_SCRATCH2 in DTLB miss exception instead of DAR in order to be similar to ITLB miss exception. This also simplifies mpc8xx_pmu_del() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e3cc8f023ef40e1e8ae144e4dd1330a5ff022528.1606231483.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 9 ++++----- arch/powerpc/perf/8xx-pmu.c | 19 +++++++------------ 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ef451fb08d68..52702f3db6df 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -247,7 +247,7 @@ InstructionTLBMiss: . = 0x1200 DataStoreTLBMiss: - mtspr SPRN_DAR, r10 + mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 mfcr r11 @@ -286,11 +286,11 @@ DataStoreTLBMiss: li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ + mtspr SPRN_DAR, r11 /* Tag DAR */ /* Restore registers */ -0: mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ +0: mfspr r10, SPRN_SPRG_SCRATCH2 mfspr r11, SPRN_M_TW rfi patch_site 0b, patch__dtlbmiss_exit_1 @@ -300,8 +300,7 @@ DataStoreTLBMiss: 0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) addi r10, r10, 1 stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r10, SPRN_SPRG_SCRATCH2 mfspr r11, SPRN_M_TW rfi #endif diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index 02db58c7427a..93004ee586a1 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -153,6 +153,11 @@ static void mpc8xx_pmu_read(struct perf_event *event) static void mpc8xx_pmu_del(struct perf_event *event, int flags) { + struct ppc_inst insn; + + /* mfspr r10, SPRN_SPRG_SCRATCH2 */ + insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | __PPC_SPR(SPRN_SPRG_SCRATCH2)); + mpc8xx_pmu_read(event); /* If it was the last user, stop counting to avoid useles overhead */ @@ -164,22 +169,12 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags) mtspr(SPRN_ICTRL, 7); break; case PERF_8xx_ID_ITLB_LOAD_MISS: - if (atomic_dec_return(&itlb_miss_ref) == 0) { - /* mfspr r10, SPRN_SPRG_SCRATCH2 */ - struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | - __PPC_SPR(SPRN_SPRG_SCRATCH2)); - + if (atomic_dec_return(&itlb_miss_ref) == 0) patch_instruction_site(&patch__itlbmiss_exit_1, insn); - } break; case PERF_8xx_ID_DTLB_LOAD_MISS: - if (atomic_dec_return(&dtlb_miss_ref) == 0) { - /* mfspr r10, SPRN_DAR */ - struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | - __PPC_SPR(SPRN_DAR)); - + if (atomic_dec_return(&dtlb_miss_ref) == 0) patch_instruction_site(&patch__dtlbmiss_exit_1, insn); - } break; } } From 70b588a068668dd7a92ed19cf0373ba92847957c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 24 Nov 2020 15:24:59 +0000 Subject: [PATCH 253/304] powerpc/ppc-opcode: Add PPC_RAW_MFSPR() Add PPC_RAW_MFSPR() to replace open coding done in 8xx-pmu.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e281e3a611eead8817c49cf06a60072a021af823.1606231483.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 3 ++- arch/powerpc/perf/8xx-pmu.c | 5 +---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index a6e3700c4566..da6f300e9788 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -230,7 +230,6 @@ #define PPC_INST_POPCNTB_MASK 0xfc0007fe #define PPC_INST_RFEBB 0x4c000124 #define PPC_INST_RFID 0x4c000024 -#define PPC_INST_MFSPR 0x7c0002a6 #define PPC_INST_MFSPR_DSCR 0x7c1102a6 #define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR 0x7c1103a6 @@ -507,6 +506,8 @@ #define PPC_RAW_NEG(d, a) (0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a)) +#define PPC_RAW_MFSPR(d, spr) (0x7c0002a6 | ___PPC_RT(d) | __PPC_SPR(spr)) + /* Deal with instructions that older assemblers aren't aware of */ #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) #define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT) diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index 93004ee586a1..f970d1510d3d 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -153,10 +153,7 @@ static void mpc8xx_pmu_read(struct perf_event *event) static void mpc8xx_pmu_del(struct perf_event *event, int flags) { - struct ppc_inst insn; - - /* mfspr r10, SPRN_SPRG_SCRATCH2 */ - insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | __PPC_SPR(SPRN_SPRG_SCRATCH2)); + struct ppc_inst insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2)); mpc8xx_pmu_read(event); From 7ceb40027e19567a0a066e3b380cc034cdd9a124 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 9 Dec 2020 05:29:21 +0000 Subject: [PATCH 254/304] powerpc/mm: sanity_check_fault() should work for all, not only BOOK3S The verification and message introduced by commit 374f3f5979f9 ("powerpc/mm/hash: Handle user access of kernel address gracefully") applies to all platforms, it should not be limited to BOOK3S. Make the BOOK3S version of sanity_check_fault() the one for all, and bail out earlier if not BOOK3S. Fixes: 374f3f5979f9 ("powerpc/mm/hash: Handle user access of kernel address gracefully") Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/fe199d5af3578d3bf80035d203a94d742a7a28af.1607491748.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/fault.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b12595102525..f6ae56a0d7a3 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -303,7 +303,6 @@ static inline void cmo_account_page_fault(void) static inline void cmo_account_page_fault(void) { } #endif /* CONFIG_PPC_SMLPAR */ -#ifdef CONFIG_PPC_BOOK3S static void sanity_check_fault(bool is_write, bool is_user, unsigned long error_code, unsigned long address) { @@ -320,6 +319,9 @@ static void sanity_check_fault(bool is_write, bool is_user, return; } + if (!IS_ENABLED(CONFIG_PPC_BOOK3S)) + return; + /* * For hash translation mode, we should never get a * PROTFAULT. Any update to pte to reduce access will result in us @@ -354,10 +356,6 @@ static void sanity_check_fault(bool is_write, bool is_user, WARN_ON_ONCE(error_code & DSISR_PROTFAULT); } -#else -static void sanity_check_fault(bool is_write, bool is_user, - unsigned long error_code, unsigned long address) { } -#endif /* CONFIG_PPC_BOOK3S */ /* * Define the correct "is_write" bit in error_code based From 5250d026d241febfaf226d26cabe528fc478e225 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 9 Dec 2020 05:29:22 +0000 Subject: [PATCH 255/304] powerpc/fault: Unnest definition of page_fault_is_write() and page_fault_is_bad() To make it more readable, separate page_fault_is_write() and page_fault_is_bad() to avoir several levels of #ifdefs Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6afaac2495248d68f94c438c5ec36b6010931de5.1607491748.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/fault.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index f6ae56a0d7a3..3fcd34c28e10 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -363,17 +363,19 @@ static void sanity_check_fault(bool is_write, bool is_user, */ #if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) #define page_fault_is_write(__err) ((__err) & ESR_DST) -#define page_fault_is_bad(__err) (0) #else #define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE) -#if defined(CONFIG_PPC_8xx) +#endif + +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#define page_fault_is_bad(__err) (0) +#elif defined(CONFIG_PPC_8xx) #define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G) #elif defined(CONFIG_PPC64) #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S) #else #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S) #endif -#endif /* * For 600- and 800-family processors, the error_code parameter is DSISR From 3dc12dfe74300febc568c3b530c0f9bee01f2821 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 9 Dec 2020 05:29:23 +0000 Subject: [PATCH 256/304] powerpc/mm: Move the WARN() out of bad_kuap_fault() In order to prepare the removal of calls to search_exception_tables() on the fast path, move the WARN() out of bad_kuap_fault(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9501311014bd6507e04b27a0c3035186ccf65cd5.1607491748.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 6 +----- arch/powerpc/include/asm/book3s/64/kup.h | 6 ++---- arch/powerpc/include/asm/nohash/32/kup-8xx.h | 3 +-- arch/powerpc/mm/fault.c | 2 +- 4 files changed, 5 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 32fd4452e960..a0117a9d5b06 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -183,11 +183,7 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) unsigned long begin = regs->kuap & 0xf0000000; unsigned long end = regs->kuap << 28; - if (!is_write) - return false; - - return WARN(address < begin || address >= end, - "Bug: write fault blocked by segment registers !"); + return is_write && (address < begin || address >= end); } #endif /* CONFIG_PPC_KUAP */ diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 7075c92c320c..f50f72e535aa 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -371,11 +371,9 @@ static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, * the AMR. Hence check for BLOCK_WRITE/READ against AMR. */ if (is_write) { - return WARN(((regs->amr & AMR_KUAP_BLOCK_WRITE) == AMR_KUAP_BLOCK_WRITE), - "Bug: Write fault blocked by AMR!"); + return (regs->amr & AMR_KUAP_BLOCK_WRITE) == AMR_KUAP_BLOCK_WRITE; } - return WARN(((regs->amr & AMR_KUAP_BLOCK_READ) == AMR_KUAP_BLOCK_READ), - "Bug: Read fault blocked by AMR!"); + return (regs->amr & AMR_KUAP_BLOCK_READ) == AMR_KUAP_BLOCK_READ; } static __always_inline void allow_user_access(void __user *to, const void __user *from, diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 567cdc557402..17a4a616436f 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -63,8 +63,7 @@ static inline void restore_user_access(unsigned long flags) static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000), - "Bug: fault blocked by AP register !"); + return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000); } #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 3fcd34c28e10..04505f938bbc 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -228,7 +228,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, // Read/write fault in a valid region (the exception table search passed // above), but blocked by KUAP is bad, it can never succeed. if (bad_kuap_fault(regs, address, is_write)) - return true; + return WARN(true, "Bug: %s fault blocked by KUAP!", is_write ? "Write" : "Read"); // What's left? Kernel fault on user in well defined regions (extable // matched), and allowed by KUAP in the faulting context. From cbd7e6ca0210db05c315a27bb5db5a482f2772ce Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 9 Dec 2020 05:29:24 +0000 Subject: [PATCH 257/304] powerpc/fault: Avoid heavy search_exception_tables() verification search_exception_tables() is an heavy operation, we have to avoid it. When KUAP is selected, we'll know the fault has been blocked by KUAP. When it is blocked by KUAP, check whether we are in an expected userspace access place. If so, emit a warning to spot something is going work. Otherwise, just remain silent, it will likely Oops soon. When KUAP is not selected, it behaves just as if the address was already in the TLBs and no fault was generated. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9870f01e293a5a76c4f4e4ddd4a6b0f63038c591.1607491748.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/fault.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 04505f938bbc..389a2a875262 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -210,28 +210,26 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return true; } - if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) && - !search_exception_tables(regs->nip)) { - pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n", - address, - from_kuid(&init_user_ns, current_uid())); - } - // Kernel fault on kernel address is bad if (address >= TASK_SIZE) return true; - // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad - if (!search_exception_tables(regs->nip)) - return true; + // Read/write fault blocked by KUAP is bad, it can never succeed. + if (bad_kuap_fault(regs, address, is_write)) { + pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n", + is_write ? "write" : "read", address, + from_kuid(&init_user_ns, current_uid())); - // Read/write fault in a valid region (the exception table search passed - // above), but blocked by KUAP is bad, it can never succeed. - if (bad_kuap_fault(regs, address, is_write)) + // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad + if (!search_exception_tables(regs->nip)) + return true; + + // Read/write fault in a valid region (the exception table search passed + // above), but blocked by KUAP is bad, it can never succeed. return WARN(true, "Bug: %s fault blocked by KUAP!", is_write ? "Write" : "Read"); + } - // What's left? Kernel fault on user in well defined regions (extable - // matched), and allowed by KUAP in the faulting context. + // What's left? Kernel fault on user and allowed by KUAP in the faulting context. return false; } From 5f1888a077069988218805534f56b983b6d5710c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 9 Dec 2020 05:29:25 +0000 Subject: [PATCH 258/304] powerpc/fault: Perform exception fixup in do_page_fault() Exception fixup doesn't require the heady full regs saving, do it from do_page_fault() directly. For that, split bad_page_fault() in two parts. As bad_page_fault() can also be called from other places than handle_page_fault(), it will still perform exception fixup and fallback on __bad_page_fault(). handle_page_fault() directly calls __bad_page_fault() as the exception fixup will now be done by do_page_fault() Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bd07d6fef9237614cd6d318d8f19faeeadaa816b.1607491748.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/bug.h | 1 + arch/powerpc/kernel/entry_32.S | 2 +- arch/powerpc/kernel/exceptions-64e.S | 2 +- arch/powerpc/kernel/exceptions-64s.S | 2 +- arch/powerpc/mm/fault.c | 33 ++++++++++++++++++++-------- 5 files changed, 28 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index ba0500872cce..464f8ca8a5c9 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -113,6 +113,7 @@ struct pt_regs; extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); +void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 58177c71dfd4..1c9b0ccc2172 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -684,7 +684,7 @@ handle_page_fault: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD lwz r4,_DAR(r1) - bl bad_page_fault + bl __bad_page_fault b ret_from_except_full #ifdef CONFIG_PPC_BOOK3S_32 diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index f579ce46eef2..74d07dc0bb48 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1023,7 +1023,7 @@ storage_fault_common: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) - bl bad_page_fault + bl __bad_page_fault b ret_from_except /* diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1c8f1b90e174..e02ad6fefa46 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -3259,7 +3259,7 @@ handle_page_fault: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) - bl bad_page_fault + bl __bad_page_fault b interrupt_return /* We have a data breakpoint exception - handle it */ diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 389a2a875262..8961b44f350c 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -545,10 +545,20 @@ NOKPROBE_SYMBOL(__do_page_fault); int do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { + const struct exception_table_entry *entry; enum ctx_state prev_state = exception_enter(); int rc = __do_page_fault(regs, address, error_code); exception_exit(prev_state); - return rc; + if (likely(!rc)) + return 0; + + entry = search_exception_tables(regs->nip); + if (unlikely(!entry)) + return rc; + + instruction_pointer_set(regs, extable_fixup(entry)); + + return 0; } NOKPROBE_SYMBOL(do_page_fault); @@ -557,17 +567,10 @@ NOKPROBE_SYMBOL(do_page_fault); * It is called from the DSI and ISI handlers in head.S and from some * of the procedures in traps.c. */ -void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) { - const struct exception_table_entry *entry; int is_write = page_fault_is_write(regs->dsisr); - /* Are we prepared to handle this fault? */ - if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = extable_fixup(entry); - return; - } - /* kernel has accessed a bad area */ switch (TRAP(regs)) { @@ -601,3 +604,15 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) die("Kernel access of bad area", regs, sig); } + +void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +{ + const struct exception_table_entry *entry; + + /* Are we prepared to handle this fault? */ + entry = search_exception_tables(instruction_pointer(regs)); + if (entry) + instruction_pointer_set(regs, extable_fixup(entry)); + else + __bad_page_fault(regs, address, sig); +} From 7bfe54b5f16561bb703de6482f880614ada8dbf2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 6 Nov 2020 13:20:54 +0000 Subject: [PATCH 259/304] powerpc/mm: Refactor the floor/ceiling check in hugetlb range freeing functions All hugetlb range freeing functions have a verification like the following, which only differs by the mask used, depending on the page table level. start &= MASK; if (start < floor) return; if (ceiling) { ceiling &= MASK; if (! ceiling) return; } if (end - 1 > ceiling - 1) return; Refactor that into a helper function which takes the mask as an argument, returning true when [start;end[ is not fully contained inside [floor;ceiling[ Signed-off-by: Christophe Leroy Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/16a571bb32eb6e8cd44bda484c8d81cd8a25e6d7.1604668827.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/hugetlbpage.c | 56 ++++++++++++----------------------- 1 file changed, 19 insertions(+), 37 deletions(-) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 36c3800769fb..f8d8a4988e15 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -294,6 +294,21 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {} #endif +/* Return true when the entry to be freed maps more than the area being freed */ +static bool range_is_outside_limits(unsigned long start, unsigned long end, + unsigned long floor, unsigned long ceiling, + unsigned long mask) +{ + if ((start & mask) < floor) + return true; + if (ceiling) { + ceiling &= mask; + if (!ceiling) + return true; + } + return end - 1 > ceiling - 1; +} + static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, unsigned long start, unsigned long end, unsigned long floor, unsigned long ceiling) @@ -309,15 +324,7 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift > pdshift) num_hugepd = 1 << (shift - pdshift); - start &= pdmask; - if (start < floor) - return; - if (ceiling) { - ceiling &= pdmask; - if (! ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, pdmask)) return; for (i = 0; i < num_hugepd; i++, hpdp++) @@ -334,18 +341,9 @@ static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { - unsigned long start = addr; pgtable_t token = pmd_pgtable(*pmd); - start &= PMD_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PMD_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(addr, end, floor, ceiling, PMD_MASK)) return; pmd_clear(pmd); @@ -395,15 +393,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, addr, next, floor, ceiling); } while (addr = next, addr != end); - start &= PUD_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PUD_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, PUD_MASK)) return; pmd = pmd_offset(pud, start); @@ -446,15 +436,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, } } while (addr = next, addr != end); - start &= PGDIR_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PGDIR_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, PGDIR_MASK)) return; pud = pud_offset(p4d, start); From c5ccb4e78968fbe64f938a5a012fc8ec25cafabf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 24 Nov 2020 19:51:55 +0000 Subject: [PATCH 260/304] powerpc/32s: Remove unused counters incremented by create_hpte() primary_pteg_full and htab_hash_searches are not used. Remove them. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6470ab99e58c84a5445af43ce4d1d772b0dc3e93.1606247495.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/hash_low.S | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 98b8d8a5ad64..6c350c5e369e 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -353,11 +353,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) beq+ 10f /* no PTE: go look for an empty slot */ tlbie r4 - lis r4, (htab_hash_searches - PAGE_OFFSET)@ha - lwz r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) - addi r6,r6,1 /* count how many searches we do */ - stw r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) - /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ mtctr r0 addi r4,r3,-HPTE_SIZE @@ -387,12 +382,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ beq+ .Lfound_empty - /* update counter of times that the primary PTEG is full */ - lis r4, (primary_pteg_full - PAGE_OFFSET)@ha - lwz r6, (primary_pteg_full - PAGE_OFFSET)@l(r4) - addi r6,r6,1 - stw r6, (primary_pteg_full - PAGE_OFFSET)@l(r4) - patch_site 0f, patch__hash_page_C /* Search the secondary PTEG for an empty slot */ ori r5,r5,PTE_H /* set H (secondary hash) bit */ @@ -469,10 +458,6 @@ _ASM_NOKPROBE_SYMBOL(create_hpte) .align 2 next_slot: .space 4 -primary_pteg_full: - .space 4 -htab_hash_searches: - .space 4 .previous /* From fec6166b44ded68e68144144a02e498580118f1a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 24 Nov 2020 19:51:56 +0000 Subject: [PATCH 261/304] powerpc/32s: In add_hash_page(), calculate VSID later VSID is only for create_hpte(). When _PAGE_HASHPTE is already set, add_hash_page() bails out without calling create_hpte() and doesn't need the value of VSID. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3907199974c89b85a3441cf3f528751173b7649c.1606247495.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/hash_low.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 6c350c5e369e..b0bb9d193400 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -182,12 +182,6 @@ _GLOBAL(add_hash_page) mflr r0 stw r0,4(r1) - /* Convert context and va to VSID */ - mulli r3,r3,897*16 /* multiply context by context skew */ - rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ - mulli r0,r0,0x111 /* multiply by ESID skew */ - add r3,r3,r0 /* note create_hpte trims to 24 bits */ - #ifdef CONFIG_SMP lwz r8,TASK_CPU(r2) /* to go in mmu_hash_lock */ oris r8,r8,12 @@ -251,6 +245,12 @@ _GLOBAL(add_hash_page) stwcx. r5,0,r8 bne- 1b + /* Convert context and va to VSID */ + mulli r3,r3,897*16 /* multiply context by context skew */ + rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ + mulli r0,r0,0x111 /* multiply by ESID skew */ + add r3,r3,r0 /* note create_hpte trims to 24 bits */ + bl create_hpte 9: From da481c4fe0e485cdab5cf4d2761be8b8fb38d3d1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 24 Nov 2020 19:51:57 +0000 Subject: [PATCH 262/304] powerpc/32s: Cleanup around PTE_FLAGS_OFFSET in hash_low.S PTE_FLAGS_OFFSET is defined in asm/page_32.h and used only in hash_low.S And PTE_FLAGS_OFFSET nullity depends on CONFIG_PTE_64BIT Instead of tests like #if (PTE_FLAGS_OFFSET != 0), use CONFIG_PTE_64BIT related code. Also move the definition of PTE_FLAGS_OFFSET into hash_low.S directly, that improves readability. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f5bc21db7a33dab55924734e6060c2e9daed562e.1606247495.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/page_32.h | 6 ------ arch/powerpc/mm/book3s32/hash_low.S | 23 +++++++++++++---------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index d64dfe3ac712..56f217606327 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -16,12 +16,6 @@ #define ARCH_DMA_MINALIGN L1_CACHE_BYTES #endif -#ifdef CONFIG_PTE_64BIT -#define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */ -#else -#define PTE_FLAGS_OFFSET 0 -#endif - #if defined(CONFIG_PPC_256K_PAGES) || \ (defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)) #define PTE_SHIFT (PAGE_SHIFT - PTE_T_LOG2 - 2) /* 1/4 of a page */ diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index b0bb9d193400..0e6dc830c38b 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -26,6 +26,12 @@ #include #include +#ifdef CONFIG_PTE_64BIT +#define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */ +#else +#define PTE_FLAGS_OFFSET 0 +#endif + /* * Load a PTE into the hash table, if possible. * The address is in r4, and r3 contains an access flag: @@ -88,6 +94,11 @@ _GLOBAL(hash_page) rlwimi r8,r4,22,20,29 /* insert next 10 bits of address */ #else rlwimi r8,r4,23,20,28 /* compute pte address */ + /* + * If PTE_64BIT is set, the low word is the flags word; use that + * word for locking since it contains all the interesting bits. + */ + addi r8,r8,PTE_FLAGS_OFFSET #endif /* @@ -95,13 +106,7 @@ _GLOBAL(hash_page) * because almost always, there won't be a permission violation * and there won't already be an HPTE, and thus we will have * to update the PTE to set _PAGE_HASHPTE. -- paulus. - * - * If PTE_64BIT is set, the low word is the flags word; use that - * word for locking since it contains all the interesting bits. */ -#if (PTE_FLAGS_OFFSET != 0) - addi r8,r8,PTE_FLAGS_OFFSET -#endif .Lretry: lwarx r6,0,r8 /* get linux-style pte, flag word */ #ifdef CONFIG_PPC_KUAP @@ -489,8 +494,9 @@ _GLOBAL(flush_hash_pages) rlwimi r5,r4,22,20,29 #else rlwimi r5,r4,23,20,28 + addi r5,r5,PTE_FLAGS_OFFSET #endif -1: lwz r0,PTE_FLAGS_OFFSET(r5) +1: lwz r0,0(r5) cmpwi cr1,r6,1 andi. r0,r0,_PAGE_HASHPTE bne 2f @@ -534,9 +540,6 @@ _GLOBAL(flush_hash_pages) * already clear, we're done (for this pte). If not, * clear it (atomically) and proceed. -- paulus. */ -#if (PTE_FLAGS_OFFSET != 0) - addi r5,r5,PTE_FLAGS_OFFSET -#endif 33: lwarx r8,0,r5 /* fetch the pte flags word */ andi. r0,r8,_PAGE_HASHPTE beq 8f /* done if HASHPTE is already clear */ From c33cd1ed60013ec2ae50f91fed260def5f1d9851 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 11 Nov 2020 21:07:22 +1000 Subject: [PATCH 263/304] powerpc/64s/iommu: Don't use atomic_ function on atomic64_t type Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111110723.3148665-3-npiggin@gmail.com --- arch/powerpc/mm/book3s64/iommu_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index 563faa10bb66..685d7bb3d26f 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -263,7 +263,7 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) goto unlock_exit; /* Are there still mappings? */ - if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) { + if (atomic64_cmpxchg(&mem->mapped, 1, 0) != 1) { ++mem->used; ret = -EBUSY; goto unlock_exit; From e89a8ca94bf583f2577fe722483f0304b3390aa2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 6 Nov 2020 14:53:40 +1000 Subject: [PATCH 264/304] powerpc/64s: Remove MSR[ISF] bit No supported processor implements this mode. Setting the bit in MSR values can be a bit confusing (and would prevent the bit from ever being reused). Remove it. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201106045340.1935841-1-npiggin@gmail.com --- arch/powerpc/include/asm/reg.h | 5 +---- arch/powerpc/kernel/entry_64.S | 2 +- arch/powerpc/kernel/head_64.S | 3 +-- arch/powerpc/kvm/book3s_pr.c | 2 +- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b9492f2b0608..e40a921d78f9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -29,7 +29,6 @@ #include #define MSR_SF_LG 63 /* Enable 64 bit mode */ -#define MSR_ISF_LG 61 /* Interrupt 64b mode valid on 630 */ #define MSR_HV_LG 60 /* Hypervisor state */ #define MSR_TS_T_LG 34 /* Trans Mem state: Transactional */ #define MSR_TS_S_LG 33 /* Trans Mem state: Suspended */ @@ -69,13 +68,11 @@ #ifdef CONFIG_PPC64 #define MSR_SF __MASK(MSR_SF_LG) /* Enable 64 bit mode */ -#define MSR_ISF __MASK(MSR_ISF_LG) /* Interrupt 64b mode valid on 630 */ #define MSR_HV __MASK(MSR_HV_LG) /* Hypervisor state */ #define MSR_S __MASK(MSR_S_LG) /* Secure state */ #else /* so tests for these bits fail on 32-bit */ #define MSR_SF 0 -#define MSR_ISF 0 #define MSR_HV 0 #define MSR_S 0 #endif @@ -134,7 +131,7 @@ #define MSR_64BIT MSR_SF /* Server variant */ -#define __MSR (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV) +#define __MSR (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_HV) #ifdef __BIG_ENDIAN__ #define MSR_ __MSR #define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index c9d59450fba0..aa1af139d947 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -969,7 +969,7 @@ _GLOBAL(enter_prom) mtsrr1 r11 rfi #else /* CONFIG_PPC_BOOK3E */ - LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE) + LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE) andc r11,r11,r12 mtsrr1 r11 RFI_TO_KERNEL diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index f63d01c78398..ece7f97bafff 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -870,8 +870,7 @@ enable_64b_mode: oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ mtmsr r11 #else /* CONFIG_PPC_BOOK3E */ - li r12,(MSR_64BIT | MSR_ISF)@highest - sldi r12,r12,48 + LOAD_REG_IMMEDIATE(r12, MSR_64BIT) or r11,r11,r12 mtmsrd r11 isync diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index b1fefa63e125..913944dc3620 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -239,7 +239,7 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) smsr |= (guest_msr & vcpu->arch.guest_owned_ext); /* 64-bit Process MSR values */ #ifdef CONFIG_PPC_BOOK3S_64 - smsr |= MSR_ISF | MSR_HV; + smsr |= MSR_HV; #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* From 59d512e4374b2d8a6ad341475dc94c4a4bdec7d3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 7 Nov 2020 11:43:36 +1000 Subject: [PATCH 265/304] powerpc/64: irq replay remove decrementer overflow check This is way to catch some cases of decrementer overflow, when the decrementer has underflowed an odd number of times, while MSR[EE] was disabled. With a typical small decrementer, a timer that fires when MSR[EE] is disabled will be "lost" if MSR[EE] remains disabled for between 4.3 and 8.6 seconds after the timer expires. In any case, the decrementer interrupt would be taken at 8.6 seconds and the timer would be found at that point. So this check is for catching extreme latency events, and it prevents those latencies from being a further few seconds long. It's not obvious this is a good tradeoff. This is already a watchdog magnitude event and that situation is not improved a significantly with this check. For large decrementers, it's useless. Therefore remove this check, which avoids a mftb when enabling hard disabled interrupts (e.g., when enabling after coming from hardware interrupt handlers). Perhaps more importantly, it also removes the clunky MSR[EE] vs PACA_IRQ_HARD_DIS incoherency in soft-interrupt replay which simplifies the code. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201107014336.2337337-1-npiggin@gmail.com --- arch/powerpc/kernel/irq.c | 53 ++------------------------- arch/powerpc/kernel/time.c | 9 ++--- arch/powerpc/platforms/powernv/opal.c | 2 +- 3 files changed, 8 insertions(+), 56 deletions(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 7d0f7682d01d..6b1eca53e36c 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -102,14 +102,6 @@ static inline notrace unsigned long get_irq_happened(void) return happened; } -static inline notrace int decrementer_check_overflow(void) -{ - u64 now = get_tb(); - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - - return now >= *next_tb; -} - #ifdef CONFIG_PPC_BOOK3E /* This is called whenever we are re-enabling interrupts @@ -142,35 +134,6 @@ notrace unsigned int __check_irq_replay(void) trace_hardirqs_on(); trace_hardirqs_off(); - /* - * We are always hard disabled here, but PACA_IRQ_HARD_DIS may - * not be set, which means interrupts have only just been hard - * disabled as part of the local_irq_restore or interrupt return - * code. In that case, skip the decrementr check becaus it's - * expensive to read the TB. - * - * HARD_DIS then gets cleared here, but it's reconciled later. - * Either local_irq_disable will replay the interrupt and that - * will reconcile state like other hard interrupts. Or interrupt - * retur will replay the interrupt and in that case it sets - * PACA_IRQ_HARD_DIS by hand (see comments in entry_64.S). - */ - if (happened & PACA_IRQ_HARD_DIS) { - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - - /* - * We may have missed a decrementer interrupt if hard disabled. - * Check the decrementer register in case we had a rollover - * while hard disabled. - */ - if (!(happened & PACA_IRQ_DEC)) { - if (decrementer_check_overflow()) { - local_paca->irq_happened |= PACA_IRQ_DEC; - happened |= PACA_IRQ_DEC; - } - } - } - if (happened & PACA_IRQ_DEC) { local_paca->irq_happened &= ~PACA_IRQ_DEC; return 0x900; @@ -186,6 +149,9 @@ notrace unsigned int __check_irq_replay(void) return 0x280; } + if (happened & PACA_IRQ_HARD_DIS) + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + /* There should be nothing left ! */ BUG_ON(local_paca->irq_happened != 0); @@ -229,18 +195,6 @@ void replay_soft_interrupts(void) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(mfmsr() & MSR_EE); - if (happened & PACA_IRQ_HARD_DIS) { - /* - * We may have missed a decrementer interrupt if hard disabled. - * Check the decrementer register in case we had a rollover - * while hard disabled. - */ - if (!(happened & PACA_IRQ_DEC)) { - if (decrementer_check_overflow()) - happened |= PACA_IRQ_DEC; - } - } - /* * Force the delivery of pending soft-disabled interrupts on PS3. * Any HV call will have this side effect. @@ -345,6 +299,7 @@ notrace void arch_local_irq_restore(unsigned long mask) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(!(mfmsr() & MSR_EE)); __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; } else { /* * We should already be hard disabled here. We had bugs diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 92481463f9dc..5f9c323049eb 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -553,14 +553,11 @@ void timer_interrupt(struct pt_regs *regs) struct pt_regs *old_regs; u64 now; - /* Some implementations of hotplug will get timer interrupts while - * offline, just ignore these and we also need to set - * decrementers_next_tb as MAX to make sure __check_irq_replay - * don't replay timer interrupt when return, otherwise we'll trap - * here infinitely :( + /* + * Some implementations of hotplug will get timer interrupts while + * offline, just ignore these. */ if (unlikely(!cpu_online(smp_processor_id()))) { - *next_tb = ~(u64)0; set_dec(decrementer_max); return; } diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index d95954ad4c0a..c61c3b62c8c6 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -731,7 +731,7 @@ int opal_hmi_exception_early2(struct pt_regs *regs) return 1; } -/* HMI exception handler called in virtual mode during check_irq_replay. */ +/* HMI exception handler called in virtual mode when irqs are next enabled. */ int opal_handle_hmi_exception(struct pt_regs *regs) { /* From aa8e21c053d72b6639ea5a7f1d3a1d0209534c94 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 25 Nov 2020 02:26:55 -0500 Subject: [PATCH 266/304] powerpc/perf: Exclude kernel samples while counting events in user space. Perf event attritube supports exclude_kernel flag to avoid sampling/profiling in supervisor state (kernel). Based on this event attr flag, Monitor Mode Control Register bit is set to freeze on supervisor state. But sometimes (due to hardware limitation), Sampled Instruction Address Register (SIAR) locks on to kernel address even when freeze on supervisor is set. Patch here adds a check to drop those samples. Cc: stable@vger.kernel.org Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1606289215-1433-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index c0e5ea3d6b25..b027884173ee 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2158,6 +2158,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val, local64_set(&event->hw.period_left, left); perf_event_update_userpage(event); + /* + * Due to hardware limitation, sometimes SIAR could sample a kernel + * address even when freeze on supervisor state (kernel) is set in + * MMCR2. Check attr.exclude_kernel and address to drop the sample in + * these cases. + */ + if (event->attr.exclude_kernel && record) + if (is_kernel_addr(mfspr(SPRN_SIAR))) + record = 0; + /* * Finally record data if requested. */ From 02b02ee1b05ef225525835b2d45faf31b3420bdd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 11 Jul 2019 12:24:04 +1000 Subject: [PATCH 267/304] powerpc/64s: Remove idle workaround code from restore_cpu_cpufeatures Idle code no longer uses the .cpu_restore CPU operation to restore SPRs, so this workaround is no longer required. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190711022404.18132-2-npiggin@gmail.com --- arch/powerpc/kernel/dt_cpu_ftrs.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index bd8faa21d3dd..b5478b72c08c 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -69,7 +69,6 @@ static int hv_mode; static struct { u64 lpcr; - u64 lpcr_clear; u64 hfscr; u64 fscr; u64 pcr; @@ -79,24 +78,7 @@ static void (*init_pmu_registers)(void); static void __restore_cpu_cpufeatures(void) { - u64 lpcr; - - /* - * LPCR is restored by the power on engine already. It can be changed - * after early init e.g., by radix enable, and we have no unified API - * for saving and restoring such SPRs. - * - * This ->restore hook should really be removed from idle and register - * restore moved directly into the idle restore code, because this code - * doesn't know how idle is implemented or what it needs restored here. - * - * The best we can do to accommodate secondary boot and idle restore - * for now is "or" LPCR with existing. - */ - lpcr = mfspr(SPRN_LPCR); - lpcr |= system_registers.lpcr; - lpcr &= ~system_registers.lpcr_clear; - mtspr(SPRN_LPCR, lpcr); + mtspr(SPRN_LPCR, system_registers.lpcr); if (hv_mode) { mtspr(SPRN_LPID, 0); mtspr(SPRN_HFSCR, system_registers.hfscr); @@ -310,7 +292,6 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f) { u64 lpcr; - system_registers.lpcr_clear |= (LPCR_ISL | LPCR_UPRT | LPCR_HR); lpcr = mfspr(SPRN_LPCR); lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR); mtspr(SPRN_LPCR, lpcr); From ef6879f8c8053cc3b493f400a06d452d7fb13650 Mon Sep 17 00:00:00 2001 From: Balamuruhan S Date: Sun, 11 Oct 2020 10:39:04 +0530 Subject: [PATCH 268/304] powerpc/sstep: Emulate prefixed instructions only when CPU_FTR_ARCH_31 is set Unconditional emulation of prefixed instructions will allow emulation of them on Power10 predecessors which might cause issues. Restrict that. Fixes: 3920742b92f5 ("powerpc sstep: Add support for prefixed fixed-point arithmetic") Fixes: 50b80a12e4cc ("powerpc sstep: Add support for prefixed load/stores") Signed-off-by: Balamuruhan S Signed-off-by: Ravi Bangoria Reviewed-by: Sandipan Das Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201011050908.72173-2-ravi.bangoria@linux.ibm.com --- arch/powerpc/lib/sstep.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 855457ed09b5..bf2cd3d42125 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1346,6 +1346,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, switch (opcode) { #ifdef __powerpc64__ case 1: + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); rd = (suffix >> 21) & 0x1f; @@ -2733,6 +2736,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } break; case 1: /* Prefixed instructions */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); op->update_reg = ra; From 1817de2f141c718f1a0ae59927ec003e9b144349 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Sun, 11 Oct 2020 10:39:05 +0530 Subject: [PATCH 269/304] powerpc/sstep: Cover new VSX instructions under CONFIG_VSX Recently added Power10 prefixed VSX instruction are included unconditionally in the kernel. If they are executed on a machine without VSX support, it might create issues. Fix that. Also fix one mnemonics spelling mistake in comment. Fixes: 50b80a12e4cc ("powerpc sstep: Add support for prefixed load/stores") Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201011050908.72173-3-ravi.bangoria@linux.ibm.com --- arch/powerpc/lib/sstep.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index bf2cd3d42125..b18bce1a209f 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -2757,6 +2757,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 41: /* plwa */ op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4); break; +#ifdef CONFIG_VSX case 42: /* plxsd */ op->reg = rd + 32; op->type = MKOP(LOAD_VSX, PREFIXED, 8); @@ -2797,13 +2798,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->element_size = 16; op->vsx_flags = VSX_CHECK_VEC; break; +#endif /* CONFIG_VSX */ case 56: /* plq */ op->type = MKOP(LOAD, PREFIXED, 16); break; case 57: /* pld */ op->type = MKOP(LOAD, PREFIXED, 8); break; - case 60: /* stq */ + case 60: /* pstq */ op->type = MKOP(STORE, PREFIXED, 16); break; case 61: /* pstd */ From af99da74333b06d97ea6a9a2cd8906244da4bb26 Mon Sep 17 00:00:00 2001 From: Balamuruhan S Date: Sun, 11 Oct 2020 10:39:06 +0530 Subject: [PATCH 270/304] powerpc/sstep: Support VSX vector paired storage access instructions VSX Vector Paired instructions loads/stores an octword (32 bytes) from/to storage into two sequential VSRs. Add emulation support for these new instructions: * Load VSX Vector Paired (lxvp) * Load VSX Vector Paired Indexed (lxvpx) * Prefixed Load VSX Vector Paired (plxvp) * Store VSX Vector Paired (stxvp) * Store VSX Vector Paired Indexed (stxvpx) * Prefixed Store VSX Vector Paired (pstxvp) [kernel test robot reported a build failure] Reported-by: kernel test robot Suggested-by: Naveen N. Rao Signed-off-by: Balamuruhan S Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201011050908.72173-4-ravi.bangoria@linux.ibm.com --- arch/powerpc/lib/sstep.c | 150 +++++++++++++++++++++++++++++++++------ 1 file changed, 129 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index b18bce1a209f..bf7a7d62ae8b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -32,6 +32,10 @@ extern char system_call_vectored_emulate[]; #define XER_OV32 0x00080000U #define XER_CA32 0x00040000U +#ifdef CONFIG_VSX +#define VSX_REGISTER_XTP(rd) ((((rd) & 1) << 5) | ((rd) & 0xfe)) +#endif + #ifdef CONFIG_PPC_FPU /* * Functions in ldstfp.S @@ -279,6 +283,19 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int nb) up[1] = tmp; break; } + case 32: { + unsigned long *up = (unsigned long *)ptr; + unsigned long tmp; + + tmp = byterev_8(up[0]); + up[0] = byterev_8(up[3]); + up[3] = tmp; + tmp = byterev_8(up[2]); + up[2] = byterev_8(up[1]); + up[1] = tmp; + break; + } + #endif default: WARN_ON_ONCE(1); @@ -709,6 +726,8 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, reg->d[0] = reg->d[1] = 0; switch (op->element_size) { + case 32: + /* [p]lxvp[x] */ case 16: /* whole vector; lxv[x] or lxvl[l] */ if (size == 0) @@ -717,7 +736,7 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) rev = !rev; if (rev) - do_byte_reverse(reg, 16); + do_byte_reverse(reg, size); break; case 8: /* scalar loads, lxvd2x, lxvdsx */ @@ -793,6 +812,20 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, size = GETSIZE(op->type); switch (op->element_size) { + case 32: + /* [p]stxvp[x] */ + if (size == 0) + break; + if (rev) { + /* reverse 32 bytes */ + buf.d[0] = byterev_8(reg->d[3]); + buf.d[1] = byterev_8(reg->d[2]); + buf.d[2] = byterev_8(reg->d[1]); + buf.d[3] = byterev_8(reg->d[0]); + reg = &buf; + } + memcpy(mem, reg, size); + break; case 16: /* stxv, stxvx, stxvl, stxvll */ if (size == 0) @@ -861,28 +894,43 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op, bool cross_endian) { int reg = op->reg; - u8 mem[16]; - union vsx_reg buf; + int i, j, nr_vsx_regs; + u8 mem[32]; + union vsx_reg buf[2]; int size = GETSIZE(op->type); if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs)) return -EFAULT; - emulate_vsx_load(op, &buf, mem, cross_endian); + nr_vsx_regs = size / sizeof(__vector128); + emulate_vsx_load(op, buf, mem, cross_endian); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ if (regs->msr & MSR_FP) { - load_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + load_vsrn(reg + i, &buf[j].v); + } } else { - current->thread.fp_state.fpr[reg][0] = buf.d[0]; - current->thread.fp_state.fpr[reg][1] = buf.d[1]; + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + current->thread.fp_state.fpr[reg + i][0] = buf[j].d[0]; + current->thread.fp_state.fpr[reg + i][1] = buf[j].d[1]; + } } } else { - if (regs->msr & MSR_VEC) - load_vsrn(reg, &buf); - else - current->thread.vr_state.vr[reg - 32] = buf.v; + if (regs->msr & MSR_VEC) { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + load_vsrn(reg + i, &buf[j].v); + } + } else { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + current->thread.vr_state.vr[reg - 32 + i] = buf[j].v; + } + } } preempt_enable(); return 0; @@ -893,30 +941,45 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, bool cross_endian) { int reg = op->reg; - u8 mem[16]; - union vsx_reg buf; + int i, j, nr_vsx_regs; + u8 mem[32]; + union vsx_reg buf[2]; int size = GETSIZE(op->type); if (!address_ok(regs, ea, size)) return -EFAULT; + nr_vsx_regs = size / sizeof(__vector128); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ if (regs->msr & MSR_FP) { - store_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + store_vsrn(reg + i, &buf[j].v); + } } else { - buf.d[0] = current->thread.fp_state.fpr[reg][0]; - buf.d[1] = current->thread.fp_state.fpr[reg][1]; + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + buf[j].d[0] = current->thread.fp_state.fpr[reg + i][0]; + buf[j].d[1] = current->thread.fp_state.fpr[reg + i][1]; + } } } else { - if (regs->msr & MSR_VEC) - store_vsrn(reg, &buf); - else - buf.v = current->thread.vr_state.vr[reg - 32]; + if (regs->msr & MSR_VEC) { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + store_vsrn(reg + i, &buf[j].v); + } + } else { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + buf[j].v = current->thread.vr_state.vr[reg - 32 + i]; + } + } } preempt_enable(); - emulate_vsx_store(op, &buf, mem, cross_endian); + emulate_vsx_store(op, buf, mem, cross_endian); return copy_mem_out(mem, ea, size, regs); } #endif /* CONFIG_VSX */ @@ -2403,6 +2466,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->vsx_flags = VSX_SPLAT; break; + case 333: /* lxvpx */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(LOAD_VSX, 0, 32); + op->element_size = 32; + break; + case 364: /* lxvwsx */ op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); @@ -2431,6 +2502,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, VSX_CHECK_VEC; break; } + case 461: /* stxvpx */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(STORE_VSX, 0, 32); + op->element_size = 32; + break; case 524: /* lxsspx */ op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); @@ -2672,6 +2750,22 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif #ifdef CONFIG_VSX + case 6: + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + op->ea = dqform_ea(word, regs); + op->reg = VSX_REGISTER_XTP(rd); + op->element_size = 32; + switch (word & 0xf) { + case 0: /* lxvp */ + op->type = MKOP(LOAD_VSX, 0, 32); + break; + case 1: /* stxvp */ + op->type = MKOP(STORE_VSX, 0, 32); + break; + } + break; + case 61: /* stfdp, lxv, stxsd, stxssp, stxv */ switch (word & 7) { case 0: /* stfdp with LSB of DS field = 0 */ @@ -2805,12 +2899,26 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 57: /* pld */ op->type = MKOP(LOAD, PREFIXED, 8); break; +#ifdef CONFIG_VSX + case 58: /* plxvp */ + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(LOAD_VSX, PREFIXED, 32); + op->element_size = 32; + break; +#endif /* CONFIG_VSX */ case 60: /* pstq */ op->type = MKOP(STORE, PREFIXED, 16); break; case 61: /* pstd */ op->type = MKOP(STORE, PREFIXED, 8); break; +#ifdef CONFIG_VSX + case 62: /* pstxvp */ + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(STORE_VSX, PREFIXED, 32); + op->element_size = 32; + break; +#endif /* CONFIG_VSX */ } break; case 1: /* Type 01 Eight-Byte Register-to-Register */ From 6ce73ba7691555fd182bc68529dc03cbd4378f72 Mon Sep 17 00:00:00 2001 From: Balamuruhan S Date: Sun, 11 Oct 2020 10:39:07 +0530 Subject: [PATCH 271/304] powerpc/ppc-opcode: Add encoding macros for VSX vector paired instructions Add instruction encodings, DQ, D0, D1 immediate, XTP, XSP operands as macros for new VSX vector paired instructions, * Load VSX Vector Paired (lxvp) * Load VSX Vector Paired Indexed (lxvpx) * Prefixed Load VSX Vector Paired (plxvp) * Store VSX Vector Paired (stxvp) * Store VSX Vector Paired Indexed (stxvpx) * Prefixed Store VSX Vector Paired (pstxvp) Suggested-by: Naveen N. Rao Signed-off-by: Balamuruhan S Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201011050908.72173-5-ravi.bangoria@linux.ibm.com --- arch/powerpc/include/asm/ppc-opcode.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index da6f300e9788..ed161ef2b3ca 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -78,6 +78,9 @@ #define IMM_L(i) ((uintptr_t)(i) & 0xffff) #define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) +#define IMM_DQ(i) ((uintptr_t)(i) & 0xfff0) +#define IMM_D0(i) (((uintptr_t)(i) >> 16) & 0x3ffff) +#define IMM_D1(i) IMM_L(i) /* * 16-bit immediate helper macros: HA() is for use with sign-extending instrs @@ -294,6 +297,8 @@ #define __PPC_XB(b) ((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4)) #define __PPC_XS(s) ((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5)) #define __PPC_XT(s) __PPC_XS(s) +#define __PPC_XSP(s) ((((s) & 0x1e) | (((s) >> 5) & 0x1)) << 21) +#define __PPC_XTP(s) __PPC_XSP(s) #define __PPC_T_TLB(t) (((t) & 0x3) << 21) #define __PPC_WC(w) (((w) & 0x3) << 21) #define __PPC_WS(w) (((w) & 0x1f) << 11) @@ -394,6 +399,14 @@ #define PPC_RAW_XVCPSGNDP(t, a, b) ((0xf0000780 | VSX_XX3((t), (a), (b)))) #define PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc) \ ((0x1000002d | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6))) +#define PPC_RAW_LXVP(xtp, a, i) (0x18000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_DQ(i)) +#define PPC_RAW_STXVP(xsp, a, i) (0x18000001 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_DQ(i)) +#define PPC_RAW_LXVPX(xtp, a, b) (0x7c00029a | __PPC_XTP(xtp) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STXVPX(xsp, a, b) (0x7c00039a | __PPC_XSP(xsp) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_PLXVP(xtp, i, a, pr) \ + ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i))) +#define PPC_RAW_PSTXVP(xsp, i, a, pr) \ + ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i))) #define PPC_RAW_NAP (0x4c000364) #define PPC_RAW_SLEEP (0x4c0003a4) #define PPC_RAW_WINKLE (0x4c0003e4) From 35785b293da0070a8df19b0193f0e7de6c9eaecb Mon Sep 17 00:00:00 2001 From: Balamuruhan S Date: Sun, 11 Oct 2020 10:39:08 +0530 Subject: [PATCH 272/304] powerpc/sstep: Add testcases for VSX vector paired load/store instructions Add testcases for VSX vector paired load/store instructions. Sample o/p: emulate_step_test: lxvp : PASS emulate_step_test: stxvp : PASS emulate_step_test: lxvpx : PASS emulate_step_test: stxvpx : PASS emulate_step_test: plxvp : PASS emulate_step_test: pstxvp : PASS Signed-off-by: Balamuruhan S Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201011050908.72173-6-ravi.bangoria@linux.ibm.com --- arch/powerpc/lib/test_emulate_step.c | 270 +++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 0a201b771477..783d1b85ecfe 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -612,6 +612,273 @@ static void __init test_lxvd2x_stxvd2x(void) } #endif /* CONFIG_VSX */ +#ifdef CONFIG_VSX +static void __init test_lxvp_stxvp(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("lxvp", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("stxvp", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + /*** lxvp ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + regs.gpr[4] = (unsigned long)&c[0].a; + + /* + * lxvp XTp,DQ(RA) + * XTp = 32xTX + 2xTp + * let TX=1 Tp=1 RA=4 DQ=0 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVP(34, 4, 0))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvp", "FAIL"); + } + + /*** stxvp ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * stxvp XSp,DQ(RA) + * XSp = 32xSX + 2xSp + * let SX=1 Sp=1 RA=4 DQ=0 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVP(34, 4, 0))); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("stxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("stxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("stxvp", "FAIL"); + } +} +#else +static void __init test_lxvp_stxvp(void) +{ + show_result("lxvp", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvp", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_VSX +static void __init test_lxvpx_stxvpx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("lxvpx", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("stxvpx", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + /*** lxvpx ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + regs.gpr[3] = (unsigned long)&c[0].a; + regs.gpr[4] = 0; + + /* + * lxvpx XTp,RA,RB + * XTp = 32xTX + 2xTp + * let TX=1 Tp=1 RA=3 RB=4 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVPX(34, 3, 4))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvpx", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvpx", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvpx", "FAIL"); + } + + /*** stxvpx ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * stxvpx XSp,RA,RB + * XSp = 32xSX + 2xSp + * let SX=1 Sp=1 RA=3 RB=4 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVPX(34, 3, 4))); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("stxvpx", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("stxvpx", "PASS (!CPU_FTR_VSX)"); + else + show_result("stxvpx", "FAIL"); + } +} +#else +static void __init test_lxvpx_stxvpx(void) +{ + show_result("lxvpx", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvpx", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_VSX +static void __init test_plxvp_pstxvp(void) +{ + struct ppc_inst instr; + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("plxvp", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("pstxvp", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + /*** plxvp ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&c[0].a; + + /* + * plxvp XTp,D(RA),R + * XTp = 32xTX + 2xTp + * let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1 + */ + instr = ppc_inst_prefix(PPC_RAW_PLXVP(34, 0, 3, 0) >> 32, + PPC_RAW_PLXVP(34, 0, 3, 0) & 0xffffffff); + + stepped = emulate_step(®s, instr); + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("plxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("plxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("plxvp", "FAIL"); + } + + /*** pstxvp ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * pstxvp XSp,D(RA),R + * XSp = 32xSX + 2xSp + * let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1 + */ + instr = ppc_inst_prefix(PPC_RAW_PSTXVP(34, 0, 3, 0) >> 32, + PPC_RAW_PSTXVP(34, 0, 3, 0) & 0xffffffff); + + stepped = emulate_step(®s, instr); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("pstxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("pstxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("pstxvp", "FAIL"); + } +} +#else +static void __init test_plxvp_pstxvp(void) +{ + show_result("plxvp", "SKIP (CONFIG_VSX is not set)"); + show_result("pstxvp", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + static void __init run_tests_load_store(void) { test_ld(); @@ -628,6 +895,9 @@ static void __init run_tests_load_store(void) test_plfd_pstfd(); test_lvx_stvx(); test_lxvd2x_stxvd2x(); + test_lxvp_stxvp(); + test_lxvpx_stxvpx(); + test_plxvp_pstxvp(); } struct compute_test { From 3d2ffcdd2a982e8bbe65fa0f94fb21bf304c281e Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Fri, 6 Nov 2020 10:26:50 +0530 Subject: [PATCH 273/304] powerpc/watchpoint: Workaround P10 DD1 issue with VSX-32 byte instructions POWER10 DD1 has an issue where it generates watchpoint exceptions when it shouldn't. The conditions where this occur are: - octword op - ending address of DAWR range is less than starting address of op - those addresses need to be in the same or in two consecutive 512B blocks - 'op address + 64B' generates an address that has a carry into bit 52 (crosses 2K boundary) Handle such spurious exception by considering them as extraneous and emulating/single-steeping instruction without generating an event. [ravi: Fixed build warning reported by lkp@intel.com] Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201106045650.278987-1-ravi.bangoria@linux.ibm.com --- arch/powerpc/kernel/hw_breakpoint.c | 67 ++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index f4e8f21046f5..8fc7a14e4d71 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -499,6 +499,11 @@ static bool is_larx_stcx_instr(int type) return type == LARX || type == STCX; } +static bool is_octword_vsx_instr(int type, int size) +{ + return ((type == LOAD_VSX || type == STORE_VSX) && size == 32); +} + /* * We've failed in reliably handling the hw-breakpoint. Unregister * it and throw a warning message to let the user know about it. @@ -549,6 +554,58 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, return true; } +static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, + int *hit, unsigned long ea) +{ + int i; + unsigned long hw_end_addr; + + /* + * Handle spurious exception only when any bp_per_reg is set. + * Otherwise this might be created by xmon and not actually a + * spurious exception. + */ + for (i = 0; i < nr_wp_slots(); i++) { + if (!info[i]) + continue; + + hw_end_addr = ALIGN(info[i]->address + info[i]->len, HW_BREAKPOINT_SIZE); + + /* + * Ending address of DAWR range is less than starting + * address of op. + */ + if ((hw_end_addr - 1) >= ea) + continue; + + /* + * Those addresses need to be in the same or in two + * consecutive 512B blocks; + */ + if (((hw_end_addr - 1) >> 10) != (ea >> 10)) + continue; + + /* + * 'op address + 64B' generates an address that has a + * carry into bit 52 (crosses 2K boundary). + */ + if ((ea & 0x800) == ((ea + 64) & 0x800)) + continue; + + break; + } + + if (i == nr_wp_slots()) + return; + + for (i = 0; i < nr_wp_slots(); i++) { + if (info[i]) { + hit[i] = 1; + info[i]->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + } + } +} + int hw_breakpoint_handler(struct die_args *args) { bool err = false; @@ -607,8 +664,14 @@ int hw_breakpoint_handler(struct die_args *args) goto reset; if (!nr_hit) { - rc = NOTIFY_DONE; - goto out; + /* Workaround for Power10 DD1 */ + if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 && + is_octword_vsx_instr(type, size)) { + handle_p10dd1_spurious_exception(info, hit, ea); + } else { + rc = NOTIFY_DONE; + goto out; + } } /* From 790a1662d3a26fe9fa5f691386d8fde6bb8b0dc2 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Thu, 10 Dec 2020 16:08:55 +0530 Subject: [PATCH 274/304] powerpc/smp: Parse ibm,thread-groups with multiple properties The "ibm,thread-groups" device-tree property is an array that is used to indicate if groups of threads within a core share certain properties. It provides details of which property is being shared by which groups of threads. This array can encode information about multiple properties being shared by different thread-groups within the core. Example: Suppose, "ibm,thread-groups" = [1,2,4,8,10,12,14,9,11,13,15,2,2,4,8,10,12,14,9,11,13,15] This can be decomposed up into two consecutive arrays: a) [1,2,4,8,10,12,14,9,11,13,15] b) [2,2,4,8,10,12,14,9,11,13,15] where in, a) provides information of Property "1" being shared by "2" groups, each with "4" threads each. The "ibm,ppc-interrupt-server#s" of the first group is {8,10,12,14} and the "ibm,ppc-interrupt-server#s" of the second group is {9,11,13,15}. Property "1" is indicative of the thread in the group sharing L1 cache, translation cache and Instruction Data flow. b) provides information of Property "2" being shared by "2" groups, each group with "4" threads. The "ibm,ppc-interrupt-server#s" of the first group is {8,10,12,14} and the "ibm,ppc-interrupt-server#s" of the second group is {9,11,13,15}. Property "2" indicates that the threads in each group share the L2-cache. The existing code assumes that the "ibm,thread-groups" encodes information about only one property. Hence even on platforms which encode information about multiple properties being shared by the corresponding groups of threads, the current code will only pick the first one. (In the above example, it will only consider [1,2,4,8,10,12,14,9,11,13,15] but not [2,2,4,8,10,12,14,9,11,13,15]). This patch extends the parsing support on platforms which encode information about multiple properties being shared by the corresponding groups of threads. Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1607596739-32439-2-git-send-email-ego@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 172 +++++++++++++++++++++++++------------- 1 file changed, 112 insertions(+), 60 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8c2857cbd960..88d88ad907a7 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -106,6 +106,15 @@ struct thread_groups { unsigned int thread_list[MAX_THREAD_LIST_SIZE]; }; +/* Maximum number of properties that groups of threads within a core can share */ +#define MAX_THREAD_GROUP_PROPERTIES 1 + +struct thread_groups_list { + unsigned int nr_properties; + struct thread_groups property_tgs[MAX_THREAD_GROUP_PROPERTIES]; +}; + +static struct thread_groups_list tgl[NR_CPUS] __initdata; /* * On big-cores system, cpu_l1_cache_map for each CPU corresponds to * the set its siblings that share the L1-cache. @@ -695,81 +704,98 @@ static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int), /* * parse_thread_groups: Parses the "ibm,thread-groups" device tree * property for the CPU device node @dn and stores - * the parsed output in the thread_groups - * structure @tg if the ibm,thread-groups[0] - * matches @property. + * the parsed output in the thread_groups_list + * structure @tglp. * * @dn: The device node of the CPU device. - * @tg: Pointer to a thread group structure into which the parsed + * @tglp: Pointer to a thread group list structure into which the parsed * output of "ibm,thread-groups" is stored. - * @property: The property of the thread-group that the caller is - * interested in. * * ibm,thread-groups[0..N-1] array defines which group of threads in * the CPU-device node can be grouped together based on the property. * - * ibm,thread-groups[0] tells us the property based on which the + * This array can represent thread groupings for multiple properties. + * + * ibm,thread-groups[i + 0] tells us the property based on which the * threads are being grouped together. If this value is 1, it implies * that the threads in the same group share L1, translation cache. * - * ibm,thread-groups[1] tells us how many such thread groups exist. + * ibm,thread-groups[i+1] tells us how many such thread groups exist for the + * property ibm,thread-groups[i] * - * ibm,thread-groups[2] tells us the number of threads in each such + * ibm,thread-groups[i+2] tells us the number of threads in each such * group. + * Suppose k = (ibm,thread-groups[i+1] * ibm,thread-groups[i+2]), then, * - * ibm,thread-groups[3..N-1] is the list of threads identified by + * ibm,thread-groups[i+3..i+k+2] (is the list of threads identified by * "ibm,ppc-interrupt-server#s" arranged as per their membership in * the grouping. * - * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it - * implies that there are 2 groups of 4 threads each, where each group - * of threads share L1, translation cache. + * Example: + * If "ibm,thread-groups" = [1,2,4,8,10,12,14,9,11,13,15,2,2,4,8,10,12,14,9,11,13,15] + * This can be decomposed up into two consecutive arrays: + * a) [1,2,4,8,10,12,14,9,11,13,15] + * b) [2,2,4,8,10,12,14,9,11,13,15] * - * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8} - * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10, - * 11, 12} structure + * where in, + * + * a) provides information of Property "1" being shared by "2" groups, + * each with "4" threads each. The "ibm,ppc-interrupt-server#s" of + * the first group is {8,10,12,14} and the + * "ibm,ppc-interrupt-server#s" of the second group is + * {9,11,13,15}. Property "1" is indicative of the thread in the + * group sharing L1 cache, translation cache and Instruction Data + * flow. + * + * b) provides information of Property "2" being shared by "2" groups, + * each group with "4" threads. The "ibm,ppc-interrupt-server#s" of + * the first group is {8,10,12,14} and the + * "ibm,ppc-interrupt-server#s" of the second group is + * {9,11,13,15}. Property "2" indicates that the threads in each + * group share the L2-cache. * * Returns 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. */ static int parse_thread_groups(struct device_node *dn, - struct thread_groups *tg, - unsigned int property) + struct thread_groups_list *tglp) { - int i; - u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE]; - u32 *thread_list; + unsigned int property_idx = 0; + u32 *thread_group_array; size_t total_threads; - int ret; + int ret = 0, count; + u32 *thread_list; + int i = 0; + count = of_property_count_u32_elems(dn, "ibm,thread-groups"); + thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL); ret = of_property_read_u32_array(dn, "ibm,thread-groups", - thread_group_array, 3); + thread_group_array, count); if (ret) - return ret; + goto out_free; - tg->property = thread_group_array[0]; - tg->nr_groups = thread_group_array[1]; - tg->threads_per_group = thread_group_array[2]; - if (tg->property != property || - tg->nr_groups < 1 || - tg->threads_per_group < 1) - return -ENODATA; + while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) { + int j; + struct thread_groups *tg = &tglp->property_tgs[property_idx++]; - total_threads = tg->nr_groups * tg->threads_per_group; + tg->property = thread_group_array[i]; + tg->nr_groups = thread_group_array[i + 1]; + tg->threads_per_group = thread_group_array[i + 2]; + total_threads = tg->nr_groups * tg->threads_per_group; - ret = of_property_read_u32_array(dn, "ibm,thread-groups", - thread_group_array, - 3 + total_threads); - if (ret) - return ret; + thread_list = &thread_group_array[i + 3]; - thread_list = &thread_group_array[3]; + for (j = 0; j < total_threads; j++) + tg->thread_list[j] = thread_list[j]; + i = i + 3 + total_threads; + } - for (i = 0 ; i < total_threads; i++) - tg->thread_list[i] = thread_list[i]; + tglp->nr_properties = property_idx; - return 0; +out_free: + kfree(thread_group_array); + return ret; } /* @@ -805,50 +831,76 @@ static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg) return -1; } +static struct thread_groups *__init get_thread_groups(int cpu, + int group_property, + int *err) +{ + struct device_node *dn = of_get_cpu_node(cpu, NULL); + struct thread_groups_list *cpu_tgl = &tgl[cpu]; + struct thread_groups *tg = NULL; + int i; + *err = 0; + + if (!dn) { + *err = -ENODATA; + return NULL; + } + + if (!cpu_tgl->nr_properties) { + *err = parse_thread_groups(dn, cpu_tgl); + if (*err) + goto out; + } + + for (i = 0; i < cpu_tgl->nr_properties; i++) { + if (cpu_tgl->property_tgs[i].property == group_property) { + tg = &cpu_tgl->property_tgs[i]; + break; + } + } + + if (!tg) + *err = -EINVAL; +out: + of_node_put(dn); + return tg; +} + static int init_cpu_l1_cache_map(int cpu) { - struct device_node *dn = of_get_cpu_node(cpu, NULL); - struct thread_groups tg = {.property = 0, - .nr_groups = 0, - .threads_per_group = 0}; int first_thread = cpu_first_thread_sibling(cpu); int i, cpu_group_start = -1, err = 0; + struct thread_groups *tg = NULL; - if (!dn) - return -ENODATA; + tg = get_thread_groups(cpu, THREAD_GROUP_SHARE_L1, + &err); + if (!tg) + return err; - err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1); - if (err) - goto out; - - cpu_group_start = get_cpu_thread_group_start(cpu, &tg); + cpu_group_start = get_cpu_thread_group_start(cpu, tg); if (unlikely(cpu_group_start == -1)) { WARN_ON_ONCE(1); - err = -ENODATA; - goto out; + return -ENODATA; } zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), GFP_KERNEL, cpu_to_node(cpu)); for (i = first_thread; i < first_thread + threads_per_core; i++) { - int i_group_start = get_cpu_thread_group_start(i, &tg); + int i_group_start = get_cpu_thread_group_start(i, tg); if (unlikely(i_group_start == -1)) { WARN_ON_ONCE(1); - err = -ENODATA; - goto out; + return -ENODATA; } if (i_group_start == cpu_group_start) cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu)); } -out: - of_node_put(dn); - return err; + return 0; } static bool shared_caches; From 1fdc1d6632ff3f6813a2f15b65586bde8fe0f0ba Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Thu, 10 Dec 2020 16:08:56 +0530 Subject: [PATCH 275/304] powerpc/smp: Rename cpu_l1_cache_map as thread_group_l1_cache_map On platforms which have the "ibm,thread-groups" property, the per-cpu variable cpu_l1_cache_map keeps a track of which group of threads within the same core share the L1 cache, Instruction and Data flow. This patch renames the variable to "thread_group_l1_cache_map" to make it consistent with a subsequent patch which will introduce thread_group_l2_cache_map. This patch introduces no functional change. Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1607596739-32439-3-git-send-email-ego@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 88d88ad907a7..f3290d57fea6 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -116,10 +116,10 @@ struct thread_groups_list { static struct thread_groups_list tgl[NR_CPUS] __initdata; /* - * On big-cores system, cpu_l1_cache_map for each CPU corresponds to + * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to * the set its siblings that share the L1-cache. */ -DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map); +DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); /* SMP operations for this machine */ struct smp_ops_t *smp_ops; @@ -866,7 +866,7 @@ static struct thread_groups *__init get_thread_groups(int cpu, return tg; } -static int init_cpu_l1_cache_map(int cpu) +static int init_thread_group_l1_cache_map(int cpu) { int first_thread = cpu_first_thread_sibling(cpu); @@ -885,7 +885,7 @@ static int init_cpu_l1_cache_map(int cpu) return -ENODATA; } - zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), + zalloc_cpumask_var_node(&per_cpu(thread_group_l1_cache_map, cpu), GFP_KERNEL, cpu_to_node(cpu)); for (i = first_thread; i < first_thread + threads_per_core; i++) { @@ -897,7 +897,7 @@ static int init_cpu_l1_cache_map(int cpu) } if (i_group_start == cpu_group_start) - cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu)); + cpumask_set_cpu(i, per_cpu(thread_group_l1_cache_map, cpu)); } return 0; @@ -976,7 +976,7 @@ static int init_big_cores(void) int cpu; for_each_possible_cpu(cpu) { - int err = init_cpu_l1_cache_map(cpu); + int err = init_thread_group_l1_cache_map(cpu); if (err) return err; @@ -1372,7 +1372,7 @@ static inline void add_cpu_to_smallcore_masks(int cpu) cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu)); - for_each_cpu(i, per_cpu(cpu_l1_cache_map, cpu)) { + for_each_cpu(i, per_cpu(thread_group_l1_cache_map, cpu)) { if (cpu_online(i)) set_cpus_related(i, cpu, cpu_smallcore_mask); } From fbd2b672e91d276b9fa5a729e4a823ba29fa2692 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Thu, 10 Dec 2020 16:08:57 +0530 Subject: [PATCH 276/304] powerpc/smp: Rename init_thread_group_l1_cache_map() to make it generic init_thread_group_l1_cache_map() initializes the per-cpu cpumask thread_group_l1_cache_map with the core-siblings which share L1 cache with the CPU. Make this function generic to the cache-property (L1 or L2) and update a suitable mask. This is a preparatory patch for the next patch where we will introduce discovery of thread-groups that share L2-cache. No functional change. Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1607596739-32439-4-git-send-email-ego@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index f3290d57fea6..9078b5b5d6e4 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -866,15 +866,18 @@ static struct thread_groups *__init get_thread_groups(int cpu, return tg; } -static int init_thread_group_l1_cache_map(int cpu) +static int __init init_thread_group_cache_map(int cpu, int cache_property) { int first_thread = cpu_first_thread_sibling(cpu); int i, cpu_group_start = -1, err = 0; struct thread_groups *tg = NULL; + cpumask_var_t *mask; - tg = get_thread_groups(cpu, THREAD_GROUP_SHARE_L1, - &err); + if (cache_property != THREAD_GROUP_SHARE_L1) + return -EINVAL; + + tg = get_thread_groups(cpu, cache_property, &err); if (!tg) return err; @@ -885,8 +888,8 @@ static int init_thread_group_l1_cache_map(int cpu) return -ENODATA; } - zalloc_cpumask_var_node(&per_cpu(thread_group_l1_cache_map, cpu), - GFP_KERNEL, cpu_to_node(cpu)); + mask = &per_cpu(thread_group_l1_cache_map, cpu); + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); for (i = first_thread; i < first_thread + threads_per_core; i++) { int i_group_start = get_cpu_thread_group_start(i, tg); @@ -897,7 +900,7 @@ static int init_thread_group_l1_cache_map(int cpu) } if (i_group_start == cpu_group_start) - cpumask_set_cpu(i, per_cpu(thread_group_l1_cache_map, cpu)); + cpumask_set_cpu(i, *mask); } return 0; @@ -976,7 +979,7 @@ static int init_big_cores(void) int cpu; for_each_possible_cpu(cpu) { - int err = init_thread_group_l1_cache_map(cpu); + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L1); if (err) return err; From 9538abee18cca70ffd03cef56027388b0c5084cc Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Thu, 10 Dec 2020 16:08:58 +0530 Subject: [PATCH 277/304] powerpc/smp: Add support detecting thread-groups sharing L2 cache On POWER systems, groups of threads within a core sharing the L2-cache can be indicated by the "ibm,thread-groups" property array with the identifier "2". This patch adds support for detecting this, and when present, populate the populating the cpu_l2_cache_mask of every CPU to the core-siblings which share L2 with the CPU as specified in the by the "ibm,thread-groups" property array. On a platform with the following "ibm,thread-group" configuration 00000001 00000002 00000004 00000000 00000002 00000004 00000006 00000001 00000003 00000005 00000007 00000002 00000002 00000004 00000000 00000002 00000004 00000006 00000001 00000003 00000005 00000007 Without this patch, the sched-domain hierarchy for CPUs 0,1 would be CPU0 attaching sched-domain(s): domain-0: span=0,2,4,6 level=SMT domain-1: span=0-7 level=CACHE domain-2: span=0-15,24-39,48-55 level=MC domain-3: span=0-55 level=DIE CPU1 attaching sched-domain(s): domain-0: span=1,3,5,7 level=SMT domain-1: span=0-7 level=CACHE domain-2: span=0-15,24-39,48-55 level=MC domain-3: span=0-55 level=DIE The CACHE domain at 0-7 is incorrect since the ibm,thread-groups sub-array [00000002 00000002 00000004 00000000 00000002 00000004 00000006 00000001 00000003 00000005 00000007] indicates that L2 (Property "2") is shared only between the threads of a single group. There are "2" groups of threads where each group contains "4" threads each. The groups being {0,2,4,6} and {1,3,5,7}. With this patch, the sched-domain hierarchy for CPUs 0,1 would be CPU0 attaching sched-domain(s): domain-0: span=0,2,4,6 level=SMT domain-1: span=0-15,24-39,48-55 level=MC domain-2: span=0-55 level=DIE CPU1 attaching sched-domain(s): domain-0: span=1,3,5,7 level=SMT domain-1: span=0-15,24-39,48-55 level=MC domain-2: span=0-55 level=DIE The CACHE domain with span=0,2,4,6 for CPU 0 (span=1,3,5,7 for CPU 1 resp.) gets degenerated into the SMT domain. Furthermore, the last-level-cache domain gets correctly set to the SMT sched-domain. Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1607596739-32439-5-git-send-email-ego@linux.vnet.ibm.com --- arch/powerpc/include/asm/smp.h | 2 ++ arch/powerpc/kernel/smp.c | 58 +++++++++++++++++++++++++++++++--- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index b2035b2f57ce..035459ce6a1a 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -134,6 +134,7 @@ static inline struct cpumask *cpu_smallcore_mask(int cpu) extern int cpu_to_core_id(int cpu); extern bool has_big_cores; +extern bool thread_group_shares_l2; #define cpu_smt_mask cpu_smt_mask #ifdef CONFIG_SCHED_SMT @@ -187,6 +188,7 @@ extern void __cpu_die(unsigned int cpu); /* for UP */ #define hard_smp_processor_id() get_hard_smp_processor_id(0) #define smp_setup_cpu_maps() +#define thread_group_shares_l2 0 static inline void inhibit_secondary_onlining(void) {} static inline void uninhibit_secondary_onlining(void) {} static inline const struct cpumask *cpu_sibling_mask(int cpu) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9078b5b5d6e4..2b9b1bb4c5f2 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -76,6 +76,7 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; struct task_struct *secondary_current; bool has_big_cores; bool coregroup_enabled; +bool thread_group_shares_l2; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); @@ -99,6 +100,7 @@ enum { #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 +#define THREAD_GROUP_SHARE_L2 2 struct thread_groups { unsigned int property; unsigned int nr_groups; @@ -107,7 +109,7 @@ struct thread_groups { }; /* Maximum number of properties that groups of threads within a core can share */ -#define MAX_THREAD_GROUP_PROPERTIES 1 +#define MAX_THREAD_GROUP_PROPERTIES 2 struct thread_groups_list { unsigned int nr_properties; @@ -121,6 +123,13 @@ static struct thread_groups_list tgl[NR_CPUS] __initdata; */ DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); +/* + * On some big-cores system, thread_group_l2_cache_map for each CPU + * corresponds to the set its siblings within the core that share the + * L2-cache. + */ +DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); + /* SMP operations for this machine */ struct smp_ops_t *smp_ops; @@ -718,7 +727,9 @@ static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int), * * ibm,thread-groups[i + 0] tells us the property based on which the * threads are being grouped together. If this value is 1, it implies - * that the threads in the same group share L1, translation cache. + * that the threads in the same group share L1, translation cache. If + * the value is 2, it implies that the threads in the same group share + * the same L2 cache. * * ibm,thread-groups[i+1] tells us how many such thread groups exist for the * property ibm,thread-groups[i] @@ -872,9 +883,10 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property) int first_thread = cpu_first_thread_sibling(cpu); int i, cpu_group_start = -1, err = 0; struct thread_groups *tg = NULL; - cpumask_var_t *mask; + cpumask_var_t *mask = NULL; - if (cache_property != THREAD_GROUP_SHARE_L1) + if (cache_property != THREAD_GROUP_SHARE_L1 && + cache_property != THREAD_GROUP_SHARE_L2) return -EINVAL; tg = get_thread_groups(cpu, cache_property, &err); @@ -888,7 +900,11 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property) return -ENODATA; } - mask = &per_cpu(thread_group_l1_cache_map, cpu); + if (cache_property == THREAD_GROUP_SHARE_L1) + mask = &per_cpu(thread_group_l1_cache_map, cpu); + else if (cache_property == THREAD_GROUP_SHARE_L2) + mask = &per_cpu(thread_group_l2_cache_map, cpu); + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); for (i = first_thread; i < first_thread + threads_per_core; i++) { @@ -990,6 +1006,16 @@ static int init_big_cores(void) } has_big_cores = true; + + for_each_possible_cpu(cpu) { + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2); + + if (err) + return err; + } + + thread_group_shares_l2 = true; + pr_debug("L2 cache only shared by the threads in the small core\n"); return 0; } @@ -1304,6 +1330,28 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask) if (has_big_cores) submask_fn = cpu_smallcore_mask; + /* + * If the threads in a thread-group share L2 cache, then the + * L2-mask can be obtained from thread_group_l2_cache_map. + */ + if (thread_group_shares_l2) { + cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu)); + + for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) { + if (cpu_online(i)) + set_cpus_related(i, cpu, cpu_l2_cache_mask); + } + + /* Verify that L1-cache siblings are a subset of L2 cache-siblings */ + if (!cpumask_equal(submask_fn(cpu), cpu_l2_cache_mask(cpu)) && + !cpumask_subset(submask_fn(cpu), cpu_l2_cache_mask(cpu))) { + pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n", + cpu); + } + + return true; + } + l2_cache = cpu_to_l2cache(cpu); if (!l2_cache || !*mask) { /* Assume only core siblings share cache with this CPU */ From 0be47634db0baa9e91c7e635e7e73355d6a5cf43 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Thu, 10 Dec 2020 16:08:59 +0530 Subject: [PATCH 278/304] powerpc/cacheinfo: Print correct cache-sibling map/list for L2 cache On POWER platforms where only some groups of threads within a core share the L2-cache (indicated by the ibm,thread-groups device-tree property), we currently print the incorrect shared_cpu_map/list for L2-cache in the sysfs. This patch reports the correct shared_cpu_map/list on such platforms. Example: On a platform with "ibm,thread-groups" set to 00000001 00000002 00000004 00000000 00000002 00000004 00000006 00000001 00000003 00000005 00000007 00000002 00000002 00000004 00000000 00000002 00000004 00000006 00000001 00000003 00000005 00000007 This indicates that threads {0,2,4,6} in the core share the L2-cache and threads {1,3,5,7} in the core share the L2 cache. However, without the patch, the shared_cpu_map/list for L2 for CPUs 0, 1 is reported in the sysfs as follows: /sys/devices/system/cpu/cpu0/cache/index2/shared_cpu_list:0-7 /sys/devices/system/cpu/cpu0/cache/index2/shared_cpu_map:000000,000000ff /sys/devices/system/cpu/cpu1/cache/index2/shared_cpu_list:0-7 /sys/devices/system/cpu/cpu1/cache/index2/shared_cpu_map:000000,000000ff With the patch, the shared_cpu_map/list for L2 cache for CPUs 0, 1 is correctly reported as follows: /sys/devices/system/cpu/cpu0/cache/index2/shared_cpu_list:0,2,4,6 /sys/devices/system/cpu/cpu0/cache/index2/shared_cpu_map:000000,00000055 /sys/devices/system/cpu/cpu1/cache/index2/shared_cpu_list:1,3,5,7 /sys/devices/system/cpu/cpu1/cache/index2/shared_cpu_map:000000,000000aa This patch also defines cpu_l2_cache_mask() for !CONFIG_SMP case. Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1607596739-32439-6-git-send-email-ego@linux.vnet.ibm.com --- arch/powerpc/include/asm/smp.h | 4 ++++ arch/powerpc/kernel/cacheinfo.c | 30 ++++++++++++++++++++---------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 035459ce6a1a..c4e2d53acd2b 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -201,6 +201,10 @@ static inline const struct cpumask *cpu_smallcore_mask(int cpu) return cpumask_of(cpu); } +static inline const struct cpumask *cpu_l2_cache_mask(int cpu) +{ + return cpumask_of(cpu); +} #endif /* CONFIG_SMP */ #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 65ab9fcebd31..6f903e9aa20b 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -655,11 +655,27 @@ static unsigned int index_dir_to_cpu(struct cache_index_dir *index) * On big-core systems, each core has two groups of CPUs each of which * has its own L1-cache. The thread-siblings which share l1-cache with * @cpu can be obtained via cpu_smallcore_mask(). + * + * On some big-core systems, the L2 cache is shared only between some + * groups of siblings. This is already parsed and encoded in + * cpu_l2_cache_mask(). + * + * TODO: cache_lookup_or_instantiate() needs to be made aware of the + * "ibm,thread-groups" property so that cache->shared_cpu_map + * reflects the correct siblings on platforms that have this + * device-tree property. This helper function is only a stop-gap + * solution so that we report the correct siblings to the + * userspace via sysfs. */ -static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache *cache) +static const struct cpumask *get_shared_cpu_map(struct cache_index_dir *index, struct cache *cache) { - if (cache->level == 1) - return cpu_smallcore_mask(cpu); + if (has_big_cores) { + int cpu = index_dir_to_cpu(index); + if (cache->level == 1) + return cpu_smallcore_mask(cpu); + if (cache->level == 2 && thread_group_shares_l2) + return cpu_l2_cache_mask(cpu); + } return &cache->shared_cpu_map; } @@ -670,17 +686,11 @@ show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bo struct cache_index_dir *index; struct cache *cache; const struct cpumask *mask; - int cpu; index = kobj_to_cache_index_dir(k); cache = index->cache; - if (has_big_cores) { - cpu = index_dir_to_cpu(index); - mask = get_big_core_shared_cpu_map(cpu, cache); - } else { - mask = &cache->shared_cpu_map; - } + mask = get_shared_cpu_map(index, cache); return cpumap_print_to_pagebuf(list, buf, mask); } From 98983675008ab3ae9b37fc7a4bfa083998079215 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 10 Dec 2020 18:14:38 +0100 Subject: [PATCH 279/304] KVM: PPC: Book3S HV: XIVE: Show detailed configuration in debug output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is useful to track allocation of the HW resources on per guest basis. Making sure IPIs are local to the chip of the vCPUs reduces rerouting between interrupt controllers and gives better performance in case of pinning. Checking the distribution of VP structures on the chips also helps in reducing PowerBUS traffic. [ clg: resurrected show_sources and reworked ouput ] Signed-off-by: Greg Kurz Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201210171450.1933725-2-clg@kaod.org --- arch/powerpc/kvm/book3s_xive.c | 76 ++++++++++++++++++++++----- arch/powerpc/kvm/book3s_xive.h | 2 + arch/powerpc/kvm/book3s_xive_native.c | 21 ++++++-- 3 files changed, 82 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 85215e79db42..773e8e8c0015 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -2128,9 +2128,8 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) if (!q->qpage && !xc->esc_virq[i]) continue; - seq_printf(m, " [q%d]: ", i); - if (q->qpage) { + seq_printf(m, " q[%d]: ", i); idx = q->idx; i0 = be32_to_cpup(q->qpage + idx); idx = (idx + 1) & q->msk; @@ -2144,16 +2143,54 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) irq_data_get_irq_handler_data(d); u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); - seq_printf(m, "E:%c%c I(%d:%llx:%llx)", - (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', - (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', - xc->esc_virq[i], pq, xd->eoi_page); + seq_printf(m, " ESC %d %c%c EOI @%llx", + xc->esc_virq[i], + (pq & XIVE_ESB_VAL_P) ? 'P' : '-', + (pq & XIVE_ESB_VAL_Q) ? 'Q' : '-', + xd->eoi_page); seq_puts(m, "\n"); } } return 0; } +void kvmppc_xive_debug_show_sources(struct seq_file *m, + struct kvmppc_xive_src_block *sb) +{ + int i; + + seq_puts(m, " LISN HW/CHIP TYPE PQ EISN CPU/PRIO\n"); + for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { + struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; + struct xive_irq_data *xd; + u64 pq; + u32 hw_num; + + if (!state->valid) + continue; + + kvmppc_xive_select_irq(state, &hw_num, &xd); + + pq = xive_vm_esb_load(xd, XIVE_ESB_GET); + + seq_printf(m, "%08x %08x/%02x", state->number, hw_num, + xd->src_chip); + if (state->lsi) + seq_printf(m, " %cLSI", state->asserted ? '^' : ' '); + else + seq_puts(m, " MSI"); + + seq_printf(m, " %s %c%c %08x % 4d/%d", + state->ipi_number == hw_num ? "IPI" : " PT", + pq & XIVE_ESB_VAL_P ? 'P' : '-', + pq & XIVE_ESB_VAL_Q ? 'Q' : '-', + state->eisn, state->act_server, + state->act_priority); + + seq_puts(m, "\n"); + } +} + static int xive_debug_show(struct seq_file *m, void *private) { struct kvmppc_xive *xive = m->private; @@ -2174,7 +2211,7 @@ static int xive_debug_show(struct seq_file *m, void *private) if (!kvm) return 0; - seq_printf(m, "=========\nVCPU state\n=========\n"); + seq_puts(m, "=========\nVCPU state\n=========\n"); kvm_for_each_vcpu(i, vcpu, kvm) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; @@ -2182,11 +2219,12 @@ static int xive_debug_show(struct seq_file *m, void *private) if (!xc) continue; - seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x" - " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n", - xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr, - xc->mfrr, xc->pending, - xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); + seq_printf(m, "VCPU %d: VP:%#x/%02x\n" + " CPPR:%#x HWCPPR:%#x MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n", + xc->server_num, xc->vp_id, xc->vp_chip_id, + xc->cppr, xc->hw_cppr, + xc->mfrr, xc->pending, + xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); kvmppc_xive_debug_show_queues(m, vcpu); @@ -2202,13 +2240,25 @@ static int xive_debug_show(struct seq_file *m, void *private) t_vm_h_ipi += xc->stat_vm_h_ipi; } - seq_printf(m, "Hcalls totals\n"); + seq_puts(m, "Hcalls totals\n"); seq_printf(m, " H_XIRR R=%10lld V=%10lld\n", t_rm_h_xirr, t_vm_h_xirr); seq_printf(m, " H_IPOLL R=%10lld V=%10lld\n", t_rm_h_ipoll, t_vm_h_ipoll); seq_printf(m, " H_CPPR R=%10lld V=%10lld\n", t_rm_h_cppr, t_vm_h_cppr); seq_printf(m, " H_EOI R=%10lld V=%10lld\n", t_rm_h_eoi, t_vm_h_eoi); seq_printf(m, " H_IPI R=%10lld V=%10lld\n", t_rm_h_ipi, t_vm_h_ipi); + seq_puts(m, "=========\nSources\n=========\n"); + + for (i = 0; i <= xive->max_sbid; i++) { + struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; + + if (sb) { + arch_spin_lock(&sb->lock); + kvmppc_xive_debug_show_sources(m, sb); + arch_spin_unlock(&sb->lock); + } + } + return 0; } diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 382e3a56e789..d5d4fee7ac94 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -290,6 +290,8 @@ extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr); */ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu); int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu); +void kvmppc_xive_debug_show_sources(struct seq_file *m, + struct kvmppc_xive_src_block *sb); struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( struct kvmppc_xive *xive, int irq); void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb); diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index a59a94f02733..7f120cf9c594 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1219,18 +1219,31 @@ static int xive_native_debug_show(struct seq_file *m, void *private) if (!xc) continue; - seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", - xc->server_num, xc->vp_id, + seq_printf(m, "VCPU %d: VP=%#x/%02x\n" + " NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", + xc->server_num, xc->vp_id, xc->vp_chip_id, vcpu->arch.xive_saved_state.nsr, vcpu->arch.xive_saved_state.cppr, vcpu->arch.xive_saved_state.ipb, vcpu->arch.xive_saved_state.pipr, - vcpu->arch.xive_saved_state.w01, - (u32) vcpu->arch.xive_cam_word); + be64_to_cpu(vcpu->arch.xive_saved_state.w01), + be32_to_cpu(vcpu->arch.xive_cam_word)); kvmppc_xive_debug_show_queues(m, vcpu); } + seq_puts(m, "=========\nSources\n=========\n"); + + for (i = 0; i <= xive->max_sbid; i++) { + struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; + + if (sb) { + arch_spin_lock(&sb->lock); + kvmppc_xive_debug_show_sources(m, sb); + arch_spin_unlock(&sb->lock); + } + } + return 0; } From 4f1c3f7b08187e6b97701c7fb2dc6f3749566c62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 10 Dec 2020 18:14:39 +0100 Subject: [PATCH 280/304] powerpc/xive: Rename XIVE_IRQ_NO_EOI to show its a flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a simple cleanup to identify easily all flags of the XIVE interrupt structure. The interrupts flagged with XIVE_IRQ_FLAG_NO_EOI are the escalations used to wake up vCPUs in KVM. They are handled very differently from the rest. Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201210171450.1933725-3-clg@kaod.org --- arch/powerpc/include/asm/xive.h | 2 +- arch/powerpc/kvm/book3s_xive.c | 4 ++-- arch/powerpc/sysdev/xive/common.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 309b4d65b74f..d332dd9a18de 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -66,7 +66,7 @@ struct xive_irq_data { #define XIVE_IRQ_FLAG_H_INT_ESB 0x20 /* Special flag set by KVM for excalation interrupts */ -#define XIVE_IRQ_NO_EOI 0x80 +#define XIVE_IRQ_FLAG_NO_EOI 0x80 #define XIVE_INVALID_CHIP_ID -1 diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 773e8e8c0015..7f60d1353d0e 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -219,7 +219,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, /* In single escalation mode, we grab the ESB MMIO of the * interrupt and mask it. Also populate the VCPU v/raddr * of the ESB page for use by asm entry/exit code. Finally - * set the XIVE_IRQ_NO_EOI flag which will prevent the + * set the XIVE_IRQ_FLAG_NO_EOI flag which will prevent the * core code from performing an EOI on the escalation * interrupt, thus leaving it effectively masked after * it fires once. @@ -231,7 +231,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); vcpu->arch.xive_esc_raddr = xd->eoi_page; vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio; - xd->flags |= XIVE_IRQ_NO_EOI; + xd->flags |= XIVE_IRQ_FLAG_NO_EOI; } return 0; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a80440af491a..65af34ac1fa2 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -416,7 +416,7 @@ static void xive_irq_eoi(struct irq_data *d) * been passed-through to a KVM guest */ if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && - !(xd->flags & XIVE_IRQ_NO_EOI)) + !(xd->flags & XIVE_IRQ_FLAG_NO_EOI)) xive_do_source_eoi(irqd_to_hwirq(d), xd); else xd->stale_p = true; From e2cf43d59525477cfd030378c3c808187952c531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 10 Dec 2020 18:14:40 +0100 Subject: [PATCH 281/304] powerpc/xive: Introduce XIVE_IPI_HW_IRQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The XIVE driver deals with CPU IPIs in a peculiar way. Each CPU has its own XIVE IPI interrupt allocated at the HW level, for PowerNV, or at the hypervisor level for pSeries. In practice, these interrupts are not always used. pSeries/PowerVM prefers local doorbells for local threads since they are faster. On PowerNV, global doorbells are also preferred for the same reason. The mapping in the Linux is reduced to a single interrupt using HW interrupt number 0 and a custom irq_chip to handle EOI. This can cause performance issues in some benchmark (ipistorm) on multichip systems. Clarify the use of the 0 value, it will help in improving multichip support. Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201210171450.1933725-4-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 10 +++++----- arch/powerpc/sysdev/xive/xive-internal.h | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 65af34ac1fa2..ee375daf8114 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1142,7 +1142,7 @@ static void __init xive_request_ipi(void) return; /* Initialize it */ - virq = irq_create_mapping(xive_irq_domain, 0); + virq = irq_create_mapping(xive_irq_domain, XIVE_IPI_HW_IRQ); xive_ipi_irq = virq; WARN_ON(request_irq(virq, xive_muxed_ipi_action, @@ -1242,7 +1242,7 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, #ifdef CONFIG_SMP /* IPIs are special and come up with HW number 0 */ - if (hw == 0) { + if (hw == XIVE_IPI_HW_IRQ) { /* * IPIs are marked per-cpu. We use separate HW interrupts under * the hood but associated with the same "linux" interrupt @@ -1271,7 +1271,7 @@ static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq) if (!data) return; hw_irq = (unsigned int)irqd_to_hwirq(data); - if (hw_irq) + if (hw_irq != XIVE_IPI_HW_IRQ) xive_irq_free_data(virq); } @@ -1421,7 +1421,7 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) * Ignore anything that isn't a XIVE irq and ignore * IPIs, so can just be dropped. */ - if (d->domain != xive_irq_domain || hw_irq == 0) + if (d->domain != xive_irq_domain || hw_irq == XIVE_IPI_HW_IRQ) continue; /* @@ -1655,7 +1655,7 @@ static int xive_core_debug_show(struct seq_file *m, void *private) hw_irq = (unsigned int)irqd_to_hwirq(d); /* IPIs are special (HW number 0) */ - if (hw_irq) + if (hw_irq != XIVE_IPI_HW_IRQ) xive_debug_show_irq(m, hw_irq, d); } return 0; diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index b7b901da2168..d701af7fb48c 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -5,6 +5,8 @@ #ifndef __XIVE_INTERNAL_H #define __XIVE_INTERNAL_H +#define XIVE_IPI_HW_IRQ 0 /* interrupt source # for IPIs */ + /* * A "disabled" interrupt should never fire, to catch problems * we set its logical number to this From 9dfe4b14df93532da3dbf11952a17389ae3cdc67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 10 Dec 2020 18:14:42 +0100 Subject: [PATCH 282/304] powerpc/xive: Add a name to the IRQ domain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We hope one day to handle multiple irq_domain in the XIVE driver. Start simple by setting the name using the DT node. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201210171450.1933725-6-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 10 +++++----- arch/powerpc/sysdev/xive/native.c | 2 +- arch/powerpc/sysdev/xive/spapr.c | 2 +- arch/powerpc/sysdev/xive/xive-internal.h | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index ee375daf8114..8b1fe72a6a95 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1310,9 +1310,9 @@ static const struct irq_domain_ops xive_irq_domain_ops = { .xlate = xive_irq_domain_xlate, }; -static void __init xive_init_host(void) +static void __init xive_init_host(struct device_node *np) { - xive_irq_domain = irq_domain_add_nomap(NULL, XIVE_MAX_IRQ, + xive_irq_domain = irq_domain_add_nomap(np, XIVE_MAX_IRQ, &xive_irq_domain_ops, NULL); if (WARN_ON(xive_irq_domain == NULL)) return; @@ -1513,8 +1513,8 @@ void xive_shutdown(void) xive_ops->shutdown(); } -bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, - u8 max_prio) +bool __init xive_core_init(struct device_node *np, const struct xive_ops *ops, + void __iomem *area, u32 offset, u8 max_prio) { xive_tima = area; xive_tima_offset = offset; @@ -1525,7 +1525,7 @@ bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 o __xive_enabled = true; pr_devel("Initializing host..\n"); - xive_init_host(); + xive_init_host(np); pr_devel("Initializing boot CPU..\n"); diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index cb58ec7ce77a..c3182ec9ed65 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -622,7 +622,7 @@ bool __init xive_native_init(void) xive_native_setup_pools(); /* Initialize XIVE core with our backend */ - if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS, + if (!xive_core_init(np, &xive_native_ops, tima, TM_QW3_HV_PHYS, max_prio)) { opal_xive_reset(OPAL_XIVE_MODE_EMU); return false; diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 1e3674d7ea7b..6610e5149d5a 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -857,7 +857,7 @@ bool __init xive_spapr_init(void) } /* Initialize XIVE core with our backend */ - if (!xive_core_init(&xive_spapr_ops, tima, TM_QW1_OS, max_prio)) + if (!xive_core_init(np, &xive_spapr_ops, tima, TM_QW1_OS, max_prio)) return false; pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index d701af7fb48c..c07fadb9d264 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -63,8 +63,8 @@ struct xive_ops { const char *name; }; -bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, - u8 max_prio); +bool xive_core_init(struct device_node *np, const struct xive_ops *ops, + void __iomem *area, u32 offset, u8 max_prio); __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift); int xive_core_debug_init(void); From a5021abc48a0f44083a15a37b3e61378519cb00d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 10 Dec 2020 18:14:43 +0100 Subject: [PATCH 283/304] powerpc/xive: Add a debug_show handler to the XIVE irq_domain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Full state of the Linux interrupt descriptors can be dumped under debugfs when compiled with CONFIG_GENERIC_IRQ_DEBUGFS. Add support for the XIVE interrupt controller. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201210171450.1933725-7-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 58 +++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 8b1fe72a6a95..61a5f08798e9 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1303,11 +1303,69 @@ static int xive_irq_domain_match(struct irq_domain *h, struct device_node *node, return xive_ops->match(node); } +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +static const char * const esb_names[] = { "RESET", "OFF", "PENDING", "QUEUED" }; + +static const struct { + u64 mask; + char *name; +} xive_irq_flags[] = { + { XIVE_IRQ_FLAG_STORE_EOI, "STORE_EOI" }, + { XIVE_IRQ_FLAG_LSI, "LSI" }, + { XIVE_IRQ_FLAG_SHIFT_BUG, "SHIFT_BUG" }, + { XIVE_IRQ_FLAG_MASK_FW, "MASK_FW" }, + { XIVE_IRQ_FLAG_EOI_FW, "EOI_FW" }, + { XIVE_IRQ_FLAG_H_INT_ESB, "H_INT_ESB" }, + { XIVE_IRQ_FLAG_NO_EOI, "NO_EOI" }, +}; + +static void xive_irq_domain_debug_show(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind) +{ + struct xive_irq_data *xd; + u64 val; + int i; + + /* No IRQ domain level information. To be done */ + if (!irqd) + return; + + if (!is_xive_irq(irq_data_get_irq_chip(irqd))) + return; + + seq_printf(m, "%*sXIVE:\n", ind, ""); + ind++; + + xd = irq_data_get_irq_handler_data(irqd); + if (!xd) { + seq_printf(m, "%*snot assigned\n", ind, ""); + return; + } + + val = xive_esb_read(xd, XIVE_ESB_GET); + seq_printf(m, "%*sESB: %s\n", ind, "", esb_names[val & 0x3]); + seq_printf(m, "%*sPstate: %s %s\n", ind, "", xd->stale_p ? "stale" : "", + xd->saved_p ? "saved" : ""); + seq_printf(m, "%*sTarget: %d\n", ind, "", xd->target); + seq_printf(m, "%*sChip: %d\n", ind, "", xd->src_chip); + seq_printf(m, "%*sTrigger: 0x%016llx\n", ind, "", xd->trig_page); + seq_printf(m, "%*sEOI: 0x%016llx\n", ind, "", xd->eoi_page); + seq_printf(m, "%*sFlags: 0x%llx\n", ind, "", xd->flags); + for (i = 0; i < ARRAY_SIZE(xive_irq_flags); i++) { + if (xd->flags & xive_irq_flags[i].mask) + seq_printf(m, "%*s%s\n", ind + 12, "", xive_irq_flags[i].name); + } +} +#endif + static const struct irq_domain_ops xive_irq_domain_ops = { .match = xive_irq_domain_match, .map = xive_irq_domain_map, .unmap = xive_irq_domain_unmap, .xlate = xive_irq_domain_xlate, +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS + .debug_show = xive_irq_domain_debug_show, +#endif }; static void __init xive_init_host(struct device_node *np) From 7b3b3de3b04ecb7393cdfaa30a3468dd47b750cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 10 Dec 2020 18:14:44 +0100 Subject: [PATCH 284/304] powerpc: Increase NR_IRQS range to support more KVM guests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PowerNV systems can handle up to 4K guests and 1M interrupt numbers per chip. Increase the range of allowed interrupts to support a larger number of guests. Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201210171450.1933725-8-clg@kaod.org --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2b8e47d1aa91..a5f59defa8bc 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -66,7 +66,7 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK config NR_IRQS int "Number of virtual interrupt numbers" - range 32 32768 + range 32 1048576 default "512" help This defines the number of virtual interrupt numbers the kernel From 4cc0e36df2c0a41fd38645ddde08d2bfba699b7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 10 Dec 2020 18:14:45 +0100 Subject: [PATCH 285/304] powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_SHIFT_BUG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This flag was used to support the PHB4 LSIs on P9 DD1 and we have stopped supporting this CPU when DD2 came out. See skiboot commit: https://github.com/open-power/skiboot/commit/0b0d15e3c170 Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201210171450.1933725-9-clg@kaod.org --- arch/powerpc/include/asm/opal-api.h | 2 +- arch/powerpc/include/asm/xive.h | 2 +- arch/powerpc/kvm/book3s_xive_native.c | 3 --- arch/powerpc/kvm/book3s_xive_template.c | 3 --- arch/powerpc/sysdev/xive/common.c | 9 --------- arch/powerpc/sysdev/xive/native.c | 2 -- 6 files changed, 2 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 1dffa3cb16ba..48ee604ca39a 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1091,7 +1091,7 @@ enum { OPAL_XIVE_IRQ_TRIGGER_PAGE = 0x00000001, OPAL_XIVE_IRQ_STORE_EOI = 0x00000002, OPAL_XIVE_IRQ_LSI = 0x00000004, - OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008, + OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008, /* P9 DD1.0 workaround */ OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010, OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020, }; diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index d332dd9a18de..b3c039d0bb6e 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -60,7 +60,7 @@ struct xive_irq_data { }; #define XIVE_IRQ_FLAG_STORE_EOI 0x01 #define XIVE_IRQ_FLAG_LSI 0x02 -#define XIVE_IRQ_FLAG_SHIFT_BUG 0x04 +/* #define XIVE_IRQ_FLAG_SHIFT_BUG 0x04 */ /* P9 DD1.0 workaround */ #define XIVE_IRQ_FLAG_MASK_FW 0x08 #define XIVE_IRQ_FLAG_EOI_FW 0x10 #define XIVE_IRQ_FLAG_H_INT_ESB 0x20 diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 7f120cf9c594..76800c84f2a3 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -37,9 +37,6 @@ static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) * ordering. */ - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; - val = in_be64(xd->eoi_mmio + offset); return (u8)val; } diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 4ad3c0279458..ece36e024a8f 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -61,9 +61,6 @@ static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset) if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) offset |= XIVE_ESB_LD_ST_MO; - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; - val =__x_readq(__x_eoi_page(xd) + offset); #ifdef __LITTLE_ENDIAN__ val >>= 64-8; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 61a5f08798e9..8499d0b24c1d 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -200,10 +200,6 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) offset |= XIVE_ESB_LD_ST_MO; - /* Handle HW errata */ - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; - if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0); else @@ -214,10 +210,6 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data) { - /* Handle HW errata */ - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; - if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) xive_ops->esb_rw(xd->hw_irq, offset, data, 1); else @@ -1312,7 +1304,6 @@ static const struct { } xive_irq_flags[] = { { XIVE_IRQ_FLAG_STORE_EOI, "STORE_EOI" }, { XIVE_IRQ_FLAG_LSI, "LSI" }, - { XIVE_IRQ_FLAG_SHIFT_BUG, "SHIFT_BUG" }, { XIVE_IRQ_FLAG_MASK_FW, "MASK_FW" }, { XIVE_IRQ_FLAG_EOI_FW, "EOI_FW" }, { XIVE_IRQ_FLAG_H_INT_ESB, "H_INT_ESB" }, diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index c3182ec9ed65..f501b1640068 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -64,8 +64,6 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->flags |= XIVE_IRQ_FLAG_STORE_EOI; if (opal_flags & OPAL_XIVE_IRQ_LSI) data->flags |= XIVE_IRQ_FLAG_LSI; - if (opal_flags & OPAL_XIVE_IRQ_SHIFT_BUG) - data->flags |= XIVE_IRQ_FLAG_SHIFT_BUG; if (opal_flags & OPAL_XIVE_IRQ_MASK_VIA_FW) data->flags |= XIVE_IRQ_FLAG_MASK_FW; if (opal_flags & OPAL_XIVE_IRQ_EOI_VIA_FW) From b5277d18c65e31ce51f6733ebdca3985a962fab5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 10 Dec 2020 18:14:46 +0100 Subject: [PATCH 286/304] powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_MASK_FW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This flag was used to support the PHB4 LSIs on P9 DD1 and we have stopped supporting this CPU when DD2 came out. See skiboot commit: https://github.com/open-power/skiboot/commit/0b0d15e3c170 Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201210171450.1933725-10-clg@kaod.org --- arch/powerpc/include/asm/opal-api.h | 2 +- arch/powerpc/include/asm/xive.h | 2 +- arch/powerpc/kvm/book3s_xive.c | 56 +++++------------------------ arch/powerpc/sysdev/xive/common.c | 40 +-------------------- arch/powerpc/sysdev/xive/native.c | 2 -- 5 files changed, 12 insertions(+), 90 deletions(-) diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 48ee604ca39a..0455b679c050 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1092,7 +1092,7 @@ enum { OPAL_XIVE_IRQ_STORE_EOI = 0x00000002, OPAL_XIVE_IRQ_LSI = 0x00000004, OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008, /* P9 DD1.0 workaround */ - OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010, + OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010, /* P9 DD1.0 workaround */ OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020, }; diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index b3c039d0bb6e..8d5b0dcc253c 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -61,7 +61,7 @@ struct xive_irq_data { #define XIVE_IRQ_FLAG_STORE_EOI 0x01 #define XIVE_IRQ_FLAG_LSI 0x02 /* #define XIVE_IRQ_FLAG_SHIFT_BUG 0x04 */ /* P9 DD1.0 workaround */ -#define XIVE_IRQ_FLAG_MASK_FW 0x08 +/* #define XIVE_IRQ_FLAG_MASK_FW 0x08 */ /* P9 DD1.0 workaround */ #define XIVE_IRQ_FLAG_EOI_FW 0x10 #define XIVE_IRQ_FLAG_H_INT_ESB 0x20 diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 7f60d1353d0e..87535bbe1d74 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -419,37 +419,16 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive, /* Get the right irq */ kvmppc_xive_select_irq(state, &hw_num, &xd); - /* - * If the interrupt is marked as needing masking via - * firmware, we do it here. Firmware masking however - * is "lossy", it won't return the old p and q bits - * and won't set the interrupt to a state where it will - * record queued ones. If this is an issue we should do - * lazy masking instead. - * - * For now, we work around this in unmask by forcing - * an interrupt whenever we unmask a non-LSI via FW - * (if ever). - */ - if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { - xive_native_configure_irq(hw_num, - kvmppc_xive_vp(xive, state->act_server), - MASKED, state->number); - /* set old_p so we can track if an H_EOI was done */ - state->old_p = true; - state->old_q = false; - } else { - /* Set PQ to 10, return old P and old Q and remember them */ - val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10); - state->old_p = !!(val & 2); - state->old_q = !!(val & 1); + /* Set PQ to 10, return old P and old Q and remember them */ + val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10); + state->old_p = !!(val & 2); + state->old_q = !!(val & 1); - /* - * Synchronize hardware to sensure the queues are updated - * when masking - */ - xive_native_sync_source(hw_num); - } + /* + * Synchronize hardware to sensure the queues are updated when + * masking + */ + xive_native_sync_source(hw_num); return old_prio; } @@ -483,23 +462,6 @@ static void xive_finish_unmask(struct kvmppc_xive *xive, /* Get the right irq */ kvmppc_xive_select_irq(state, &hw_num, &xd); - /* - * See comment in xive_lock_and_mask() concerning masking - * via firmware. - */ - if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { - xive_native_configure_irq(hw_num, - kvmppc_xive_vp(xive, state->act_server), - state->act_priority, state->number); - /* If an EOI is needed, do it here */ - if (!state->old_p) - xive_vm_source_eoi(hw_num, xd); - /* If this is not an LSI, force a trigger */ - if (!(xd->flags & OPAL_XIVE_IRQ_LSI)) - xive_irq_trigger(xd); - goto bail; - } - /* Old Q set, set PQ to 11 */ if (state->old_q) xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11); diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 8499d0b24c1d..9165d4834b2d 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -424,9 +424,7 @@ static void xive_irq_eoi(struct irq_data *d) } /* - * Helper used to mask and unmask an interrupt source. This - * is only called for normal interrupts that do not require - * masking/unmasking via firmware. + * Helper used to mask and unmask an interrupt source. */ static void xive_do_source_set_mask(struct xive_irq_data *xd, bool mask) @@ -673,20 +671,6 @@ static void xive_irq_unmask(struct irq_data *d) pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd); - /* - * This is a workaround for PCI LSI problems on P9, for - * these, we call FW to set the mask. The problems might - * be fixed by P9 DD2.0, if that is the case, firmware - * will no longer set that flag. - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) { - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - xive_ops->configure_irq(hw_irq, - get_hard_smp_processor_id(xd->target), - xive_irq_priority, d->irq); - return; - } - xive_do_source_set_mask(xd, false); } @@ -696,20 +680,6 @@ static void xive_irq_mask(struct irq_data *d) pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd); - /* - * This is a workaround for PCI LSI problems on P9, for - * these, we call OPAL to set the mask. The problems might - * be fixed by P9 DD2.0, if that is the case, firmware - * will no longer set that flag. - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) { - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - xive_ops->configure_irq(hw_irq, - get_hard_smp_processor_id(xd->target), - 0xff, d->irq); - return; - } - xive_do_source_set_mask(xd, true); } @@ -852,13 +822,6 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) int rc; u8 pq; - /* - * We only support this on interrupts that do not require - * firmware calls for masking and unmasking - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) - return -EIO; - /* * This is called by KVM with state non-NULL for enabling * pass-through or NULL for disabling it @@ -1304,7 +1267,6 @@ static const struct { } xive_irq_flags[] = { { XIVE_IRQ_FLAG_STORE_EOI, "STORE_EOI" }, { XIVE_IRQ_FLAG_LSI, "LSI" }, - { XIVE_IRQ_FLAG_MASK_FW, "MASK_FW" }, { XIVE_IRQ_FLAG_EOI_FW, "EOI_FW" }, { XIVE_IRQ_FLAG_H_INT_ESB, "H_INT_ESB" }, { XIVE_IRQ_FLAG_NO_EOI, "NO_EOI" }, diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index f501b1640068..6c04ac1f3a1f 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -64,8 +64,6 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->flags |= XIVE_IRQ_FLAG_STORE_EOI; if (opal_flags & OPAL_XIVE_IRQ_LSI) data->flags |= XIVE_IRQ_FLAG_LSI; - if (opal_flags & OPAL_XIVE_IRQ_MASK_VIA_FW) - data->flags |= XIVE_IRQ_FLAG_MASK_FW; if (opal_flags & OPAL_XIVE_IRQ_EOI_VIA_FW) data->flags |= XIVE_IRQ_FLAG_EOI_FW; data->eoi_page = be64_to_cpu(eoi_page); From cf58b746665d0177b86d42d18e60985fa1fdb909 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 10 Dec 2020 18:14:47 +0100 Subject: [PATCH 287/304] powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_EOI_FW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This flag was used to support the P9 DD1 and we have stopped supporting this CPU when DD2 came out. See skiboot commit: https://github.com/open-power/skiboot/commit/0b0d15e3c170 Also, remove eoi handler which is now unused. Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201210171450.1933725-11-clg@kaod.org --- arch/powerpc/include/asm/opal-api.h | 2 +- arch/powerpc/include/asm/xive.h | 2 +- arch/powerpc/kvm/book3s_xive_template.c | 2 -- arch/powerpc/sysdev/xive/common.c | 14 +------------- arch/powerpc/sysdev/xive/native.c | 12 ------------ arch/powerpc/sysdev/xive/spapr.c | 6 ------ arch/powerpc/sysdev/xive/xive-internal.h | 1 - 7 files changed, 3 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 0455b679c050..0b63ba7d5917 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1093,7 +1093,7 @@ enum { OPAL_XIVE_IRQ_LSI = 0x00000004, OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008, /* P9 DD1.0 workaround */ OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010, /* P9 DD1.0 workaround */ - OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020, + OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020, /* P9 DD1.0 workaround */ }; /* Flags for OPAL_XIVE_GET/SET_QUEUE_INFO */ diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 8d5b0dcc253c..9a312b975ca8 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -62,7 +62,7 @@ struct xive_irq_data { #define XIVE_IRQ_FLAG_LSI 0x02 /* #define XIVE_IRQ_FLAG_SHIFT_BUG 0x04 */ /* P9 DD1.0 workaround */ /* #define XIVE_IRQ_FLAG_MASK_FW 0x08 */ /* P9 DD1.0 workaround */ -#define XIVE_IRQ_FLAG_EOI_FW 0x10 +/* #define XIVE_IRQ_FLAG_EOI_FW 0x10 */ /* P9 DD1.0 workaround */ #define XIVE_IRQ_FLAG_H_INT_ESB 0x20 /* Special flag set by KVM for excalation interrupts */ diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index ece36e024a8f..b0015e05d99a 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -74,8 +74,6 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd) /* If the XIVE supports the new "store EOI facility, use it */ if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) __x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI); - else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) - opal_int_eoi(hw_irq); else if (xd->flags & XIVE_IRQ_FLAG_LSI) { /* * For LSIs the HW EOI cycle is used rather than PQ bits, diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 9165d4834b2d..3e91ec4bb49e 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -354,18 +354,7 @@ static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) /* If the XIVE supports the new "store EOI facility, use it */ if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0); - else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) { - /* - * The FW told us to call it. This happens for some - * interrupt sources that need additional HW whacking - * beyond the ESB manipulation. For example LPC interrupts - * on P9 DD1.0 needed a latch to be clared in the LPC bridge - * itself. The Firmware will take care of it. - */ - if (WARN_ON_ONCE(!xive_ops->eoi)) - return; - xive_ops->eoi(hw_irq); - } else { + else { u8 eoi_val; /* @@ -1267,7 +1256,6 @@ static const struct { } xive_irq_flags[] = { { XIVE_IRQ_FLAG_STORE_EOI, "STORE_EOI" }, { XIVE_IRQ_FLAG_LSI, "LSI" }, - { XIVE_IRQ_FLAG_EOI_FW, "EOI_FW" }, { XIVE_IRQ_FLAG_H_INT_ESB, "H_INT_ESB" }, { XIVE_IRQ_FLAG_NO_EOI, "NO_EOI" }, }; diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 6c04ac1f3a1f..e91519c42463 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -64,8 +64,6 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->flags |= XIVE_IRQ_FLAG_STORE_EOI; if (opal_flags & OPAL_XIVE_IRQ_LSI) data->flags |= XIVE_IRQ_FLAG_LSI; - if (opal_flags & OPAL_XIVE_IRQ_EOI_VIA_FW) - data->flags |= XIVE_IRQ_FLAG_EOI_FW; data->eoi_page = be64_to_cpu(eoi_page); data->trig_page = be64_to_cpu(trig_page); data->esb_shift = be32_to_cpu(esb_shift); @@ -380,15 +378,6 @@ static void xive_native_update_pending(struct xive_cpu *xc) } } -static void xive_native_eoi(u32 hw_irq) -{ - /* - * Not normally used except if specific interrupts need - * a workaround on EOI. - */ - opal_int_eoi(hw_irq); -} - static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) { s64 rc; @@ -471,7 +460,6 @@ static const struct xive_ops xive_native_ops = { .match = xive_native_match, .shutdown = xive_native_shutdown, .update_pending = xive_native_update_pending, - .eoi = xive_native_eoi, .setup_cpu = xive_native_setup_cpu, .teardown_cpu = xive_native_teardown_cpu, .sync_source = xive_native_sync_source, diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 6610e5149d5a..01ccc0786ada 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -628,11 +628,6 @@ static void xive_spapr_update_pending(struct xive_cpu *xc) } } -static void xive_spapr_eoi(u32 hw_irq) -{ - /* Not used */; -} - static void xive_spapr_setup_cpu(unsigned int cpu, struct xive_cpu *xc) { /* Only some debug on the TIMA settings */ @@ -677,7 +672,6 @@ static const struct xive_ops xive_spapr_ops = { .match = xive_spapr_match, .shutdown = xive_spapr_shutdown, .update_pending = xive_spapr_update_pending, - .eoi = xive_spapr_eoi, .setup_cpu = xive_spapr_setup_cpu, .teardown_cpu = xive_spapr_teardown_cpu, .sync_source = xive_spapr_sync_source, diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index c07fadb9d264..9cf57c722faa 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -52,7 +52,6 @@ struct xive_ops { void (*shutdown)(void); void (*update_pending)(struct xive_cpu *xc); - void (*eoi)(u32 hw_irq); void (*sync_source)(u32 hw_irq); u64 (*esb_rw)(u32 hw_irq, u32 offset, u64 data, bool write); #ifdef CONFIG_SMP From 614546d56296380b59e94484813eeef62a7d2b6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 10 Dec 2020 18:14:48 +0100 Subject: [PATCH 288/304] powerpc/xive: Simplify xive_do_source_eoi() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous patches removed the need of the first argument which was a hack for Firwmware EOI. Remove it and flatten the routine which has became simpler. Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201210171450.1933725-12-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 74 ++++++++++++++----------------- 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 3e91ec4bb49e..595310e056f4 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -348,39 +348,40 @@ static void xive_do_queue_eoi(struct xive_cpu *xc) * EOI an interrupt at the source. There are several methods * to do this depending on the HW version and source type */ -static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) +static void xive_do_source_eoi(struct xive_irq_data *xd) { + u8 eoi_val; + xd->stale_p = false; + /* If the XIVE supports the new "store EOI facility, use it */ - if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) + if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) { xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0); - else { - u8 eoi_val; - - /* - * Otherwise for EOI, we use the special MMIO that does - * a clear of both P and Q and returns the old Q, - * except for LSIs where we use the "EOI cycle" special - * load. - * - * This allows us to then do a re-trigger if Q was set - * rather than synthesizing an interrupt in software - * - * For LSIs the HW EOI cycle is used rather than PQ bits, - * as they are automatically re-triggred in HW when still - * pending. - */ - if (xd->flags & XIVE_IRQ_FLAG_LSI) - xive_esb_read(xd, XIVE_ESB_LOAD_EOI); - else { - eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00); - DBG_VERBOSE("eoi_val=%x\n", eoi_val); - - /* Re-trigger if needed */ - if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio) - out_be64(xd->trig_mmio, 0); - } + return; } + + /* + * For LSIs, we use the "EOI cycle" special load rather than + * PQ bits, as they are automatically re-triggered in HW when + * still pending. + */ + if (xd->flags & XIVE_IRQ_FLAG_LSI) { + xive_esb_read(xd, XIVE_ESB_LOAD_EOI); + return; + } + + /* + * Otherwise, we use the special MMIO that does a clear of + * both P and Q and returns the old Q. This allows us to then + * do a re-trigger if Q was set rather than synthesizing an + * interrupt in software + */ + eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00); + DBG_VERBOSE("eoi_val=%x\n", eoi_val); + + /* Re-trigger if needed */ + if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio) + out_be64(xd->trig_mmio, 0); } /* irq_chip eoi callback, called with irq descriptor lock held */ @@ -398,7 +399,7 @@ static void xive_irq_eoi(struct irq_data *d) */ if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && !(xd->flags & XIVE_IRQ_FLAG_NO_EOI)) - xive_do_source_eoi(irqd_to_hwirq(d), xd); + xive_do_source_eoi(xd); else xd->stale_p = true; @@ -788,14 +789,7 @@ static int xive_irq_retrigger(struct irq_data *d) * 11, then perform an EOI. */ xive_esb_read(xd, XIVE_ESB_SET_PQ_11); - - /* - * Note: We pass "0" to the hw_irq argument in order to - * avoid calling into the backend EOI code which we don't - * want to do in the case of a re-trigger. Backends typically - * only do EOI for LSIs anyway. - */ - xive_do_source_eoi(0, xd); + xive_do_source_eoi(xd); return 1; } @@ -910,7 +904,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) * while masked, the generic code will re-mask it anyway. */ if (!xd->saved_p) - xive_do_source_eoi(hw_irq, xd); + xive_do_source_eoi(xd); } return 0; @@ -1054,7 +1048,7 @@ static void xive_ipi_eoi(struct irq_data *d) DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n", d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio); - xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data); + xive_do_source_eoi(&xc->ipi_data); xive_do_queue_eoi(xc); } @@ -1445,7 +1439,7 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) * still asserted. Otherwise do an MSI retrigger. */ if (xd->flags & XIVE_IRQ_FLAG_LSI) - xive_do_source_eoi(irqd_to_hwirq(d), xd); + xive_do_source_eoi(xd); else xive_irq_retrigger(d); From 07efbca11c1a985efa4d15bd76a637c6bffc253b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 10 Dec 2020 18:14:49 +0100 Subject: [PATCH 289/304] powerpc/xive: Improve error reporting of OPAL calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a vp_err() macro to standardize error reporting. Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201210171450.1933725-13-clg@kaod.org --- arch/powerpc/sysdev/xive/native.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index e91519c42463..05a800a3104e 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -122,6 +122,8 @@ static int xive_native_get_irq_config(u32 hw_irq, u32 *target, u8 *prio, return rc == 0 ? 0 : -ENXIO; } +#define vp_err(vp, fmt, ...) pr_err("VP[0x%x]: " fmt, vp, ##__VA_ARGS__) + /* This can be called multiple time to change a queue configuration */ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, __be32 *qpage, u32 order, bool can_escalate) @@ -149,7 +151,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, &esc_irq_be, NULL); if (rc) { - pr_err("Error %lld getting queue info prio %d\n", rc, prio); + vp_err(vp_id, "Failed to get queue %d info : %lld\n", prio, rc); rc = -EIO; goto fail; } @@ -172,7 +174,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, msleep(OPAL_BUSY_DELAY_MS); } if (rc) { - pr_err("Error %lld setting queue for prio %d\n", rc, prio); + vp_err(vp_id, "Failed to set queue %d info: %lld\n", prio, rc); rc = -EIO; } else { /* @@ -199,7 +201,7 @@ static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio) msleep(OPAL_BUSY_DELAY_MS); } if (rc) - pr_err("Error %lld disabling queue for prio %d\n", rc, prio); + vp_err(vp_id, "Failed to disable queue %d : %lld\n", prio, rc); } void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio) @@ -698,6 +700,8 @@ int xive_native_enable_vp(u32 vp_id, bool single_escalation) break; msleep(OPAL_BUSY_DELAY_MS); } + if (rc) + vp_err(vp_id, "Failed to enable VP : %lld\n", rc); return rc ? -EIO : 0; } EXPORT_SYMBOL_GPL(xive_native_enable_vp); @@ -712,6 +716,8 @@ int xive_native_disable_vp(u32 vp_id) break; msleep(OPAL_BUSY_DELAY_MS); } + if (rc) + vp_err(vp_id, "Failed to disable VP : %lld\n", rc); return rc ? -EIO : 0; } EXPORT_SYMBOL_GPL(xive_native_disable_vp); @@ -723,8 +729,10 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id) s64 rc; rc = opal_xive_get_vp_info(vp_id, NULL, &vp_cam_be, NULL, &vp_chip_id_be); - if (rc) + if (rc) { + vp_err(vp_id, "Failed to get VP info : %lld\n", rc); return -EIO; + } *out_cam_id = be64_to_cpu(vp_cam_be) & 0xffffffffu; *out_chip_id = be32_to_cpu(vp_chip_id_be); @@ -755,8 +763,7 @@ int xive_native_get_queue_info(u32 vp_id, u32 prio, rc = opal_xive_get_queue_info(vp_id, prio, &qpage, &qsize, &qeoi_page, &escalate_irq, &qflags); if (rc) { - pr_err("OPAL failed to get queue info for VCPU %d/%d : %lld\n", - vp_id, prio, rc); + vp_err(vp_id, "failed to get queue %d info : %lld\n", prio, rc); return -EIO; } @@ -784,8 +791,7 @@ int xive_native_get_queue_state(u32 vp_id, u32 prio, u32 *qtoggle, u32 *qindex) rc = opal_xive_get_queue_state(vp_id, prio, &opal_qtoggle, &opal_qindex); if (rc) { - pr_err("OPAL failed to get queue state for VCPU %d/%d : %lld\n", - vp_id, prio, rc); + vp_err(vp_id, "failed to get queue %d state : %lld\n", prio, rc); return -EIO; } @@ -804,8 +810,7 @@ int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex) rc = opal_xive_set_queue_state(vp_id, prio, qtoggle, qindex); if (rc) { - pr_err("OPAL failed to set queue state for VCPU %d/%d : %lld\n", - vp_id, prio, rc); + vp_err(vp_id, "failed to set queue %d state : %lld\n", prio, rc); return -EIO; } @@ -827,8 +832,7 @@ int xive_native_get_vp_state(u32 vp_id, u64 *out_state) rc = opal_xive_get_vp_state(vp_id, &state); if (rc) { - pr_err("OPAL failed to get vp state for VCPU %d : %lld\n", - vp_id, rc); + vp_err(vp_id, "failed to get vp state : %lld\n", rc); return -EIO; } From dddc4ef92d1ce92987da1d6926cdfa99e8acb622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 10 Dec 2020 18:14:50 +0100 Subject: [PATCH 290/304] KVM: PPC: Book3S HV: XIVE: Add a comment regarding VP numbering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the XIVE resources are allocated at the HW level, the VP structures describing the vCPUs of a guest are distributed among the chips to optimize the PowerBUS usage. For best performance, the guest vCPUs can be pinned to match the VP structure distribution. Currently, the VP identifiers are deduced from the vCPU id using the kvmppc_pack_vcpu_id() routine which is not incorrect but not optimal either. It VSMT is used, the result is not continuous and the constraints on HW resources described above can not be met. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201210171450.1933725-14-clg@kaod.org --- arch/powerpc/kvm/book3s_xive.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index d5d4fee7ac94..86c24a4ad809 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -218,6 +218,17 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp return xive->src_blocks[bid]; } +/* + * When the XIVE resources are allocated at the HW level, the VP + * structures describing the vCPUs of a guest are distributed among + * the chips to optimize the PowerBUS usage. For best performance, the + * guest vCPUs can be pinned to match the VP structure distribution. + * + * Currently, the VP identifiers are deduced from the vCPU id using + * the kvmppc_pack_vcpu_id() routine which is not incorrect but not + * optimal either. It VSMT is used, the result is not continuous and + * the constraints on HW resources described above can not be met. + */ static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server) { return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); From 44b4c4450f8d31296ba633d74be753a85fd627bd Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 14 Dec 2020 13:31:21 +0530 Subject: [PATCH 291/304] powerpc/64s: Mark the kuap/kuep functions non __init The kernel calls these functions on CPU online and hence they must not be marked __init. Otherwise if the memory they occupied has been reused the system can crash in various ways. Sachin reported it caused his LPAR to spontaneously restart with no other output. With xmon enabled it may drop into xmon with a dump like: cpu 0x1: Vector: 700 (Program Check) at [c000000003c5fcb0] pc: 00000000011e0a78 lr: 00000000011c51d4 sp: c000000003c5ff50 msr: 8000000000081001 current = 0xc000000002c12b00 paca = 0xc000000003cff280 irqmask: 0x03 irq_happened: 0x01 pid = 0, comm = swapper/1 ... [c000000003c5ff50] 0000000000087c38 (unreliable) [c000000003c5ff70] 000000000003870c [c000000003c5ff90] 000000000000d108 Fixes: 3b47b7549ead ("powerpc/book3s64/kuap: Move KUAP related function outside radix") Reported-by: Sachin Sant Signed-off-by: Aneesh Kumar K.V [mpe: Expand change log with details and xmon output] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201214080121.358567-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/pkeys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 2b7ded396db4..f1c6f264ed91 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -251,7 +251,7 @@ void __init pkey_early_init_devtree(void) } #ifdef CONFIG_PPC_KUEP -void __init setup_kuep(bool disabled) +void setup_kuep(bool disabled) { if (disabled) return; @@ -277,7 +277,7 @@ void __init setup_kuep(bool disabled) #endif #ifdef CONFIG_PPC_KUAP -void __init setup_kuap(bool disabled) +void setup_kuap(bool disabled) { if (disabled) return; From 1791ebd131c46539b024c0f2ebf12b6c88a265b9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 14 Dec 2020 21:56:16 +1100 Subject: [PATCH 292/304] powerpc: Inline setup_kup() setup_kup() is used by both 64-bit and 32-bit code. However on 64-bit it must not be __init, because it's used for CPU hotplug, whereas on 32-bit it should be __init because it calls setup_kuap/kuep() which are __init. We worked around that problem in the past by marking it __ref, see commit 67d53f30e23e ("powerpc/mm: fix section mismatch for setup_kup()"). Marking it __ref basically just omits it from section mismatch checking, which can lead to bugs, and in fact it did, see commit 44b4c4450f8d ("powerpc/64s: Mark the kuap/kuep functions non __init") We can avoid all these problems by just making it static inline. Because all it does is call other functions, making it inline actually shrinks the 32-bit vmlinux by ~76 bytes. Make it __always_inline as pointed out by Christophe. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201214123011.311024-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/kup.h | 8 ++++++-- arch/powerpc/mm/init-common.c | 6 ------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 5a9820c54da9..bf221a2a523e 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -49,8 +49,6 @@ extern bool disable_kuap; #include -void setup_kup(void); - #ifdef CONFIG_PPC_KUEP void setup_kuep(bool disabled); #else @@ -85,6 +83,12 @@ static inline void restore_user_access(unsigned long flags) { } #endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_KUAP */ +static __always_inline void setup_kup(void) +{ + setup_kuep(disable_kuep); + setup_kuap(disable_kuap); +} + static inline void allow_read_from_user(const void __user *from, unsigned long size) { allow_user_access(NULL, from, size, KUAP_READ); diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index afdebb95bcae..3a82f89827a5 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -47,12 +47,6 @@ static int __init parse_nosmap(char *p) } early_param("nosmap", parse_nosmap); -void __ref setup_kup(void) -{ - setup_kuep(disable_kuep); - setup_kuap(disable_kuap); -} - #define CTOR(shift) static void ctor_##shift(void *addr) \ { \ memset(addr, 0, sizeof(void *) << (shift)); \ From 13751f8747519fe3bdc738fa6d802fbd94a85ac4 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Sat, 7 Nov 2020 14:26:22 +0800 Subject: [PATCH 293/304] KVM: PPC: Book3S: Assign boolean values to a bool variable Fix the following coccinelle warnings: ./arch/powerpc/kvm/book3s_xics.c:476:3-15: WARNING: Assignment of 0/1 to bool variable ./arch/powerpc/kvm/book3s_xics.c:504:3-15: WARNING: Assignment of 0/1 to bool variable Reported-by: Tosk Robot Signed-off-by: Kaixu Xia Reviewed-by: Greg Kurz Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1604730382-5810-1-git-send-email-kaixuxia@tencent.com --- arch/powerpc/kvm/book3s_xics.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 5fee5a11550d..303e3cb096db 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -473,7 +473,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, arch_spin_unlock(&ics->lock); local_irq_restore(flags); new_irq = reject; - check_resend = 0; + check_resend = false; goto again; } } else { @@ -501,7 +501,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, state->resend = 0; arch_spin_unlock(&ics->lock); local_irq_restore(flags); - check_resend = 0; + check_resend = false; goto again; } } From a300bf8c5f24bdeaa84925d1e0ec6221cbdc7597 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Sat, 7 Nov 2020 23:49:38 +0800 Subject: [PATCH 294/304] KVM: PPC: fix comparison to bool warning Fix the following coccicheck warning: ./arch/powerpc/kvm/booke.c:503:6-16: WARNING: Comparison to bool ./arch/powerpc/kvm/booke.c:505:6-17: WARNING: Comparison to bool ./arch/powerpc/kvm/booke.c:507:6-16: WARNING: Comparison to bool Reported-by: Tosk Robot Signed-off-by: Kaixu Xia Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1604764178-8087-1-git-send-email-kaixuxia@tencent.com --- arch/powerpc/kvm/booke.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index b1abcb816439..288a9820ec01 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -500,11 +500,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, vcpu->arch.regs.nip = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; - if (update_esr == true) + if (update_esr) kvmppc_set_esr(vcpu, vcpu->arch.queued_esr); - if (update_dear == true) + if (update_dear) kvmppc_set_dar(vcpu, vcpu->arch.queued_dear); - if (update_epr == true) { + if (update_epr) { if (vcpu->arch.epr_flags & KVMPPC_EPR_USER) kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) { From 87fb4978ef8f7e3d6f51ea8e259638c4e96f2fc0 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Tue, 8 Dec 2020 18:57:08 -0300 Subject: [PATCH 295/304] KVM: PPC: Book3S HV: Fix mask size for emulated msgsndp According to ISAv3.1 and ISAv3.0b, the msgsndp is described to split RB in: msgtype <- (RB) 32:36 payload <- (RB) 37:63 t <- (RB) 57:63 The current way of getting 'msgtype', and 't' is missing their MSB: msgtype: ((arg >> 27) & 0xf) : Gets (RB) 33:36, missing bit 32 t: (arg &= 0x3f) : Gets (RB) 58:63, missing bit 57 Fixes this by applying the correct mask. Signed-off-by: Leonardo Bras Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201208215707.31149-1-leobras.c@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index cfaa91b27112..6f612d240392 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1241,9 +1241,9 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) switch (get_xop(inst)) { case OP_31_XOP_MSGSNDP: arg = kvmppc_get_gpr(vcpu, rb); - if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER) + if (((arg >> 27) & 0x1f) != PPC_DBELL_SERVER) break; - arg &= 0x3f; + arg &= 0x7f; if (arg >= kvm->arch.emul_smt_mode) break; tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg); @@ -1256,7 +1256,7 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) break; case OP_31_XOP_MSGCLRP: arg = kvmppc_get_gpr(vcpu, rb); - if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER) + if (((arg >> 27) & 0x1f) != PPC_DBELL_SERVER) break; vcpu->arch.vcore->dpdes = 0; vcpu->arch.doorbell_request = 0; From 2198d4934ee8b81341a84c9ec8bb25b4b0d02522 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 12 Dec 2020 13:41:25 +0000 Subject: [PATCH 296/304] powerpc/mm: Fix hugetlb_free_pmd_range() and hugetlb_free_pud_range() Commit 7bfe54b5f165 ("powerpc/mm: Refactor the floor/ceiling check in hugetlb range freeing functions") inadvertely removed the mask applied to start parameter in those two functions, leading to the following crash on power9. LTP: starting hugemmap05_1 (hugemmap05 -m) ------------[ cut here ]------------ kernel BUG at arch/powerpc/mm/book3s64/pgtable.c:387! Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=256 NUMA PowerNV ... CPU: 99 PID: 308 Comm: ksoftirqd/99 Tainted: G O 5.10.0-rc7-next-20201211 #1 NIP: c00000000005dbec LR: c0000000003352f4 CTR: 0000000000000000 REGS: c00020000bb6f830 TRAP: 0700 Tainted: G O (5.10.0-rc7-next-20201211) MSR: 900000000282b033 CR: 24002284 XER: 20040000 GPR00: c0000000003352f4 c00020000bb6fad0 c000000007f70b00 c0002000385b3ff0 GPR04: 0000000000000000 0000000000000003 c00020000bb6f8b4 0000000000000001 GPR08: 0000000000000001 0000000000000009 0000000000000008 0000000000000002 GPR12: 0000000024002488 c000201fff649c00 c000000007f2a20c 0000000000000000 GPR16: 0000000000000007 0000000000000000 c000000000194d10 c000000000194d10 GPR24: 0000000000000014 0000000000000015 c000201cc6e72398 c000000007fac4b4 GPR28: c000000007f2bf80 c000000007fac2f8 0000000000000008 c000200033870000 NIP [c00000000005dbec] __tlb_remove_table+0x1dc/0x1e0 pgtable_free at arch/powerpc/mm/book3s64/pgtable.c:387 (inlined by) __tlb_remove_table at arch/powerpc/mm/book3s64/pgtable.c:405 LR [c0000000003352f4] tlb_remove_table_rcu+0x54/0xa0 Call Trace: __tlb_remove_table+0x13c/0x1e0 (unreliable) tlb_remove_table_rcu+0x54/0xa0 __tlb_remove_table_free at mm/mmu_gather.c:101 (inlined by) tlb_remove_table_rcu at mm/mmu_gather.c:156 rcu_core+0x35c/0xbb0 rcu_do_batch at kernel/rcu/tree.c:2502 (inlined by) rcu_core at kernel/rcu/tree.c:2737 __do_softirq+0x480/0x704 run_ksoftirqd+0x74/0xd0 run_ksoftirqd at kernel/softirq.c:651 (inlined by) run_ksoftirqd at kernel/softirq.c:642 smpboot_thread_fn+0x278/0x320 kthread+0x1c4/0x1d0 ret_from_kernel_thread+0x5c/0x80 Properly apply the masks before calling pmd_free_tlb() and pud_free_tlb() respectively. Fixes: 7bfe54b5f165 ("powerpc/mm: Refactor the floor/ceiling check in hugetlb range freeing functions") Reported-by: Qian Cai Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/56feccd7b6fcd98e353361a233fa7bb8e67c3164.1607780469.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/hugetlbpage.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index f8d8a4988e15..8b3cc4d688e8 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -396,9 +396,9 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, if (range_is_outside_limits(start, end, floor, ceiling, PUD_MASK)) return; - pmd = pmd_offset(pud, start); + pmd = pmd_offset(pud, start & PUD_MASK); pud_clear(pud); - pmd_free_tlb(tlb, pmd, start); + pmd_free_tlb(tlb, pmd, start & PUD_MASK); mm_dec_nr_pmds(tlb->mm); } @@ -439,9 +439,9 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, if (range_is_outside_limits(start, end, floor, ceiling, PGDIR_MASK)) return; - pud = pud_offset(p4d, start); + pud = pud_offset(p4d, start & PGDIR_MASK); p4d_clear(p4d); - pud_free_tlb(tlb, pud, start); + pud_free_tlb(tlb, pud, start & PGDIR_MASK); mm_dec_nr_puds(tlb->mm); } From ef0e3b650f8ddc54bb70868852f50642ee3ae765 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Tue, 15 Dec 2020 03:56:18 -0500 Subject: [PATCH 297/304] powerpc/perf: Fix Threshold Event Counter Multiplier width for P10 Threshold Event Counter Multiplier (TECM) is part of Monitor Mode Control Register A (MMCRA). This field along with Threshold Event Counter Exponent (TECE) is used to get threshould counter value. In Power10, this is a 8bit field, so patch fixes the current code to modify the MMCRA[TECM] extraction macro to handle this change. ISA v3.1 says this is a 7 bit field but POWER10 it's actually 8 bits which will hopefully be fixed in ISA v3.1 update. Fixes: 170a315f41c6 ("powerpc/perf: Support to export MMCRA[TEC*] field to userspace") Signed-off-by: Madhavan Srinivasan Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1608022578-1532-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/isa207-common.c | 3 +++ arch/powerpc/perf/isa207-common.h | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 24f0a900a824..6ab5b272090a 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -247,6 +247,9 @@ void isa207_get_mem_weight(u64 *weight) u64 sier = mfspr(SPRN_SIER); u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mantissa = P10_MMCRA_THR_CTR_MANT(mmcra); + if (val == 0 || val == 7) *weight = 0; else diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 42087643c333..454b32c31440 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -231,6 +231,10 @@ #define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\ MMCRA_THR_CTR_EXP_MASK) +#define P10_MMCRA_THR_CTR_MANT_MASK 0xFFul +#define P10_MMCRA_THR_CTR_MANT(v) (((v) >> MMCRA_THR_CTR_MANT_SHIFT) &\ + P10_MMCRA_THR_CTR_MANT_MASK) + /* MMCRA Threshold Compare bit constant for power9 */ #define p9_MMCRA_THR_CMP_SHIFT 45 From 328e7e487a464aad024fbde6663b7859df082b7b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 15 Oct 2020 10:52:20 +0000 Subject: [PATCH 298/304] powerpc: force inlining of csum_partial() to avoid multiple csum_partial() with GCC10 ppc-linux-objdump -d vmlinux | grep -e "" -e "<__csum_partial>" With gcc9 I get: c0017ef8 <__csum_partial>: c00182fc: 4b ff fb fd bl c0017ef8 <__csum_partial> c0018478: 4b ff fa 80 b c0017ef8 <__csum_partial> c03e8458: 4b c2 fa a0 b c0017ef8 <__csum_partial> c03e8518: 4b c2 f9 e1 bl c0017ef8 <__csum_partial> c03ef410: 4b c2 8a e9 bl c0017ef8 <__csum_partial> c03f0b24: 4b c2 73 d5 bl c0017ef8 <__csum_partial> c04279a4: 4b bf 05 55 bl c0017ef8 <__csum_partial> c0429820: 4b be e6 d9 bl c0017ef8 <__csum_partial> c0429944: 4b be e5 b5 bl c0017ef8 <__csum_partial> c042b478: 4b be ca 81 bl c0017ef8 <__csum_partial> c042b554: 4b be c9 a5 bl c0017ef8 <__csum_partial> c045f15c: 4b bb 8d 9d bl c0017ef8 <__csum_partial> c0492190: 4b b8 5d 69 bl c0017ef8 <__csum_partial> c0492310: 4b b8 5b e9 bl c0017ef8 <__csum_partial> c0495594: 4b b8 29 65 bl c0017ef8 <__csum_partial> c049c420: 4b b7 ba d9 bl c0017ef8 <__csum_partial> c049c870: 4b b7 b6 89 bl c0017ef8 <__csum_partial> c049c930: 4b b7 b5 c9 bl c0017ef8 <__csum_partial> c04a9ca0: 4b b6 e2 59 bl c0017ef8 <__csum_partial> c04bdde4: 4b b5 a1 15 bl c0017ef8 <__csum_partial> c04be480: 4b b5 9a 79 bl c0017ef8 <__csum_partial> c04be710: 4b b5 97 e9 bl c0017ef8 <__csum_partial> c04c969c: 4b b4 e8 5d bl c0017ef8 <__csum_partial> c04ca2fc: 4b b4 db fd bl c0017ef8 <__csum_partial> c04cf5bc: 4b b4 89 3d bl c0017ef8 <__csum_partial> c04d0440: 4b b4 7a b9 bl c0017ef8 <__csum_partial> With gcc10 I get: c0018d08 <__csum_partial>: c0019020 : c0019020: 4b ff fc e8 b c0018d08 <__csum_partial> c001914c: 4b ff fe d4 b c0019020 c0019250: 4b ff fd d1 bl c0019020 c03e404c : c03e404c: 4b c3 4c bc b c0018d08 <__csum_partial> c03e4050: 4b ff ff fc b c03e404c c03e40fc: 4b ff ff 51 bl c03e404c c03e6680: 4b ff d9 cd bl c03e404c c03e68c4: 4b ff d7 89 bl c03e404c c03e7934: 4b ff c7 19 bl c03e404c c03e7bf8: 4b ff c4 55 bl c03e404c c03eb148: 4b ff 8f 05 bl c03e404c c03ecf68: 4b c2 bd a1 bl c0018d08 <__csum_partial> c04275b8 : c04275b8: 4b bf 17 50 b c0018d08 <__csum_partial> c0427884: 4b ff fd 35 bl c04275b8 c0427b18: 4b ff fa a1 bl c04275b8 c0427bd8: 4b ff f9 e1 bl c04275b8 c0427cd4: 4b ff f8 e5 bl c04275b8 c0427e34: 4b ff f7 85 bl c04275b8 c045a1c0: 4b bb eb 49 bl c0018d08 <__csum_partial> c0489464 : c0489464: 4b b8 f8 a4 b c0018d08 <__csum_partial> c04896b0: 4b ff fd b5 bl c0489464 c048982c: 4b ff fc 39 bl c0489464 c0490158: 4b b8 8b b1 bl c0018d08 <__csum_partial> c0492f0c : c0492f0c: 4b b8 5d fc b c0018d08 <__csum_partial> c049326c: 4b ff fc a1 bl c0492f0c c049333c: 4b ff fb d1 bl c0492f0c c0493b18: 4b ff f3 f5 bl c0492f0c c0493f50: 4b ff ef bd bl c0492f0c c0493ffc: 4b ff ef 11 bl c0492f0c c04a0f78: 4b b7 7d 91 bl c0018d08 <__csum_partial> c04b3e3c: 4b b6 4e cd bl c0018d08 <__csum_partial> c04b40d0 : c04b40d0: 4b b6 4c 38 b c0018d08 <__csum_partial> c04b4448: 4b ff fc 89 bl c04b40d0 c04b46f4: 4b ff f9 dd bl c04b40d0 c04bf448: 4b b5 98 c0 b c0018d08 <__csum_partial> c04c5264: 4b b5 3a a5 bl c0018d08 <__csum_partial> c04c61e4: 4b b5 2b 25 bl c0018d08 <__csum_partial> gcc10 defines multiple versions of csum_partial() which are just an unconditionnal branch to __csum_partial(). To enforce inlining of that branch to __csum_partial(), mark csum_partial() as __always_inline. With this patch with gcc10: c0018d08 <__csum_partial>: c0019148: 4b ff fb c0 b c0018d08 <__csum_partial> c001924c: 4b ff fa bd bl c0018d08 <__csum_partial> c03e40ec: 4b c3 4c 1d bl c0018d08 <__csum_partial> c03e4120: 4b c3 4b e8 b c0018d08 <__csum_partial> c03eb004: 4b c2 dd 05 bl c0018d08 <__csum_partial> c03ecef4: 4b c2 be 15 bl c0018d08 <__csum_partial> c0427558: 4b bf 17 b1 bl c0018d08 <__csum_partial> c04286e4: 4b bf 06 25 bl c0018d08 <__csum_partial> c0428cd8: 4b bf 00 31 bl c0018d08 <__csum_partial> c0428d84: 4b be ff 85 bl c0018d08 <__csum_partial> c045a17c: 4b bb eb 8d bl c0018d08 <__csum_partial> c0489450: 4b b8 f8 b9 bl c0018d08 <__csum_partial> c0491860: 4b b8 74 a9 bl c0018d08 <__csum_partial> c0492eec: 4b b8 5e 1d bl c0018d08 <__csum_partial> c04a0eac: 4b b7 7e 5d bl c0018d08 <__csum_partial> c04b3e34: 4b b6 4e d5 bl c0018d08 <__csum_partial> c04b426c: 4b b6 4a 9d bl c0018d08 <__csum_partial> c04b463c: 4b b6 46 cd bl c0018d08 <__csum_partial> c04c004c: 4b b5 8c bd bl c0018d08 <__csum_partial> c04c0368: 4b b5 89 a1 bl c0018d08 <__csum_partial> c04c5254: 4b b5 3a b5 bl c0018d08 <__csum_partial> c04c60d4: 4b b5 2c 35 bl c0018d08 <__csum_partial> Signed-off-by: Christophe Leroy Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a1d31f84ddb0926813b17fcd5cc7f3fa7b4deac2.1602759123.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 82f099ba2411..d5da7ddbf0fc 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -163,7 +163,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) */ __wsum __csum_partial(const void *buff, int len, __wsum sum); -static inline __wsum csum_partial(const void *buff, int len, __wsum sum) +static __always_inline __wsum csum_partial(const void *buff, int len, __wsum sum) { if (__builtin_constant_p(len) && len <= 16 && (len & 1) == 0) { if (len == 2) From d0edaa28a1f7830997131cbce87b6c52472825d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20Stehl=C3=A9?= Date: Sun, 13 Dec 2020 19:26:22 +0100 Subject: [PATCH 299/304] powerpc/ps3: use dma_mapping_error() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DMA address returned by dma_map_single() should be checked with dma_mapping_error(). Fix the ps3stor_setup() function accordingly. Fixes: 80071802cb9c ("[POWERPC] PS3: Storage Driver Core") Signed-off-by: Vincent Stehlé Reviewed-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201213182622.23047-1-vincent.stehle@laposte.net --- drivers/ps3/ps3stor_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ps3/ps3stor_lib.c b/drivers/ps3/ps3stor_lib.c index 333ba83006e4..a12a1ad9b5fe 100644 --- a/drivers/ps3/ps3stor_lib.c +++ b/drivers/ps3/ps3stor_lib.c @@ -189,7 +189,7 @@ int ps3stor_setup(struct ps3_storage_device *dev, irq_handler_t handler) dev->bounce_lpar = ps3_mm_phys_to_lpar(__pa(dev->bounce_buf)); dev->bounce_dma = dma_map_single(&dev->sbd.core, dev->bounce_buf, dev->bounce_size, DMA_BIDIRECTIONAL); - if (!dev->bounce_dma) { + if (dma_mapping_error(&dev->sbd.core, dev->bounce_dma)) { dev_err(&dev->sbd.core, "%s:%u: map DMA region failed\n", __func__, __LINE__); error = -ENODEV; From 20e9de85edae3a5866f29b6cce87c9ec66d62a1b Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Fri, 11 Dec 2020 15:59:54 +0100 Subject: [PATCH 300/304] powerpc/pseries/memhotplug: Quieten some DLPAR operations When attempting to remove by index a set of LMBs a lot of messages are displayed on the console, even when everything goes fine: pseries-hotplug-mem: Attempting to hot-remove LMB, drc index 8000002d Offlined Pages 4096 pseries-hotplug-mem: Memory at 2d0000000 was hot-removed The 2 messages prefixed by "pseries-hotplug-mem" are not really helpful for the end user, they should be debug outputs. In case of error, because some of the LMB's pages couldn't be offlined, the following is displayed on the console: pseries-hotplug-mem: Attempting to hot-remove LMB, drc index 8000003e pseries-hotplug-mem: Failed to hot-remove memory at 3e0000000 dlpar: Could not handle DLPAR request "memory remove index 0x8000003e" Again, the 2 messages prefixed by "pseries-hotplug-mem" are useless, and the generic DLPAR prefixed message should be enough. These 2 first changes are mainly triggered by the changes introduced in drmgr: https://groups.google.com/g/powerpc-utils-devel/c/Y6ef4NB3EzM/m/9cu5JHRxAQAJ Also, when adding a bunch of LMBs, a message is displayed in the console per LMB like these ones: pseries-hotplug-mem: Memory at 7e0000000 (drc index 8000007e) was hot-added pseries-hotplug-mem: Memory at 7f0000000 (drc index 8000007f) was hot-added pseries-hotplug-mem: Memory at 800000000 (drc index 80000080) was hot-added pseries-hotplug-mem: Memory at 810000000 (drc index 80000081) was hot-added When adding 1TB of memory and LMB size is 256MB, this leads to 4096 messages to be displayed on the console. These messages are not really helpful for the end user, so moving them to the DEBUG level. Signed-off-by: Laurent Dufour [mpe: Tweak change log wording] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201211145954.90143-1-ldufour@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 7efe6ec5d14a..8377f1f7c78e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -479,7 +479,7 @@ static int dlpar_memory_remove_by_index(u32 drc_index) int lmb_found; int rc; - pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index); + pr_debug("Attempting to hot-remove LMB, drc index %x\n", drc_index); lmb_found = 0; for_each_drmem_lmb(lmb) { @@ -497,10 +497,10 @@ static int dlpar_memory_remove_by_index(u32 drc_index) rc = -EINVAL; if (rc) - pr_info("Failed to hot-remove memory at %llx\n", - lmb->base_addr); + pr_debug("Failed to hot-remove memory at %llx\n", + lmb->base_addr); else - pr_info("Memory at %llx was hot-removed\n", lmb->base_addr); + pr_debug("Memory at %llx was hot-removed\n", lmb->base_addr); return rc; } @@ -717,8 +717,8 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) if (!drmem_lmb_reserved(lmb)) continue; - pr_info("Memory at %llx (drc index %x) was hot-added\n", - lmb->base_addr, lmb->drc_index); + pr_debug("Memory at %llx (drc index %x) was hot-added\n", + lmb->base_addr, lmb->drc_index); drmem_remove_lmb_reservation(lmb); } rc = 0; From c88017cf2af614409da69934c1738ed5ff2f7022 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 11 Dec 2020 13:11:41 +1100 Subject: [PATCH 301/304] powerpc/powernv: Rate limit opal-elog read failure message Sometimes we can't read an error log from OPAL, and we print an error message accordingly. But the OPAL userspace tools seem to like retrying a lot, in which case we flood the kernel log with a lot of messages. Change pr_err() to pr_err_ratelimited() to help with this. Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201211021140.28402-1-ajd@linux.ibm.com --- arch/powerpc/platforms/powernv/opal-elog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 37b380eef41a..5821b0fa8614 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -171,8 +171,8 @@ static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, opal_rc = opal_read_elog(__pa(elog->buffer), elog->size, elog->id); if (opal_rc != OPAL_SUCCESS) { - pr_err("ELOG: log read failed for log-id=%llx\n", - elog->id); + pr_err_ratelimited("ELOG: log read failed for log-id=%llx\n", + elog->id); kfree(elog->buffer); elog->buffer = NULL; return -EIO; From 5d82344795dbd3fcd74c974ab60b2845970dc5e3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Nov 2020 14:15:51 +1100 Subject: [PATCH 302/304] powerpc/configs: Add ppc64le_allnoconfig target Add a phony target for ppc64le_allnoconfig, which tests some combinations of CONFIG symbols that aren't covered by any of our defconfigs. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201125031551.2112715-1-mpe@ellerman.id.au --- arch/powerpc/Makefile | 5 +++++ arch/powerpc/configs/ppc64le.config | 2 ++ 2 files changed, 7 insertions(+) create mode 100644 arch/powerpc/configs/ppc64le.config diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index fde3dbe57bda..2bd509fa2487 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -375,6 +375,11 @@ ppc64le_allmodconfig: $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/le.config \ -f $(srctree)/Makefile allmodconfig +PHONY += ppc64le_allnoconfig +ppc64le_allnoconfig: + $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/ppc64le.config \ + -f $(srctree)/Makefile allnoconfig + PHONY += ppc64_book3e_allmodconfig ppc64_book3e_allmodconfig: $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/85xx-64bit.config \ diff --git a/arch/powerpc/configs/ppc64le.config b/arch/powerpc/configs/ppc64le.config new file mode 100644 index 000000000000..14dca1062c1b --- /dev/null +++ b/arch/powerpc/configs/ppc64le.config @@ -0,0 +1,2 @@ +CONFIG_PPC64=y +CONFIG_CPU_LITTLE_ENDIAN=y From c15d1f9d03a0f4f68bf52dffdd541c8054e6de35 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 23 Oct 2020 15:00:02 +1100 Subject: [PATCH 303/304] powerpc: Add config fragment for disabling -Werror This makes it easy to disable building with -Werror: $ make defconfig $ grep WERROR .config # CONFIG_PPC_DISABLE_WERROR is not set CONFIG_PPC_WERROR=y $ make disable-werror.config GEN Makefile Using .config as base Merging arch/powerpc/configs/disable-werror.config Value of CONFIG_PPC_DISABLE_WERROR is redefined by fragment arch/powerpc/configs/disable-werror.config: Previous value: # CONFIG_PPC_DISABLE_WERROR is not set New value: CONFIG_PPC_DISABLE_WERROR=y ... $ grep WERROR .config CONFIG_PPC_DISABLE_WERROR=y Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201023040002.3313371-1-mpe@ellerman.id.au --- arch/powerpc/configs/disable-werror.config | 1 + 1 file changed, 1 insertion(+) create mode 100644 arch/powerpc/configs/disable-werror.config diff --git a/arch/powerpc/configs/disable-werror.config b/arch/powerpc/configs/disable-werror.config new file mode 100644 index 000000000000..6ea12a12432c --- /dev/null +++ b/arch/powerpc/configs/disable-werror.config @@ -0,0 +1 @@ +CONFIG_PPC_DISABLE_WERROR=y From c1bea0a840ac75dca19bc6aa05575a33eb9fd058 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 15 Dec 2020 12:57:20 +1100 Subject: [PATCH 304/304] powerpc/32s: Fix cleanup_cpu_mmu_context() compile bug Currently pmac32_defconfig with SMP=y doesn't build: arch/powerpc/platforms/powermac/smp.c: error: implicit declaration of function 'cleanup_cpu_mmu_context' It would be nice for consistency if all platforms clear mm_cpumask and flush TLBs on unplug, but the TLB invalidation bug described in commit 01b0f0eae081 ("powerpc/64s: Trim offlined CPUs from mm_cpumasks") only applies to 64s and for now we only have the TLB flush code for that platform. So just add an empty version for 32-bit Book3S. Fixes: 01b0f0eae081 ("powerpc/64s: Trim offlined CPUs from mm_cpumasks") Reported-by: Geert Uytterhoeven Signed-off-by: Nicholas Piggin [mpe: Change log based on comments from Nick] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/mmu-hash.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 331187661236..685c589e723f 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -94,6 +94,7 @@ typedef struct { } mm_context_t; void update_bats(void); +static inline void cleanup_cpu_mmu_context(void) { }; /* patch sites */ extern s32 patch__hash_page_A0, patch__hash_page_A1, patch__hash_page_A2;