arm64 fixes for -rc1

- Fix shadow call stack patching with LTO=full
 
 - Fix voluntary preemption of the FPSIMD registers from assembly code
 
 - Fix workaround for A520 CPU erratum #2966298 and extend to A510
 
 - Fix SME issues that resulted in corruption of the register state
 
 - Minor fixes (missing includes, formatting)
 -----BEGIN PGP SIGNATURE-----
 
 iQFEBAABCgAuFiEEPxTL6PPUbjXGY88ct6xw3ITBYzQFAmWqUgEQHHdpbGxAa2Vy
 bmVsLm9yZwAKCRC3rHDchMFjNB+7B/0VDHq2F8KtOhW02XqcKJaqiDk8QggTZn0D
 3JxZs6P6y9KP88xa6gr3G+PzLYjKV66aP871oKPECtsQAAIJzMUfhB7C7+zJzxPL
 kxrP3fTCwGUUkBlH7+dhyoX4hmV174c0xp70vp/2+hG5IixwtpFVi4284pgU6RcC
 El6LH0UrRiHUI7oP5vLArk3vp1X8yFXxGRCeFCmP9mOBB4Auf9q5F0YoESPz0LBS
 ohb9L8vZw1eBYJxoSNiGo819FX4Q2nximR75byLYMB1+M0wlqFo1Or/AbfpZGPzY
 q5plHckTU25NxPEMWVvzXlu/O1gBkAfsWcxb0TIDpVWGDrL1+6Qm
 =9pba
 -----END PGP SIGNATURE-----

Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 fixes from Will Deacon:
 "I think the main one is fixing the dynamic SCS patching when full LTO
  is enabled (clang was silently getting this horribly wrong), but it's
  all good stuff.

  Rob just pointed out that the fix to the workaround for erratum
  #2966298 might not be necessary, but in the worst case it's harmless
  and since the official description leaves a little to be desired here,
  I've left it in.

  Summary:

   - Fix shadow call stack patching with LTO=full

   - Fix voluntary preemption of the FPSIMD registers from assembly code

   - Fix workaround for A520 CPU erratum #2966298 and extend to A510

   - Fix SME issues that resulted in corruption of the register state

   - Minor fixes (missing includes, formatting)"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64: Fix silcon-errata.rst formatting
  arm64/sme: Always exit sme_alloc() early with existing storage
  arm64/fpsimd: Remove spurious check for SVE support
  arm64/ptrace: Don't flush ZA/ZT storage when writing ZA via ptrace
  arm64: entry: simplify kernel_exit logic
  arm64: entry: fix ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
  arm64: errata: Add Cortex-A510 speculative unprivileged load workaround
  arm64: Rename ARM64_WORKAROUND_2966298
  arm64: fpsimd: Bring cond_yield asm macro in line with new rules
  arm64: scs: Work around full LTO issue with dynamic SCS
  arm64: irq: include <linux/cpumask.h>
This commit is contained in:
Linus Torvalds 2024-01-19 13:36:15 -08:00
commit 18b5cb6cb8
11 changed files with 83 additions and 49 deletions

View file

@ -71,6 +71,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A510 | #2658417 | ARM64_ERRATUM_2658417 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A510 | #3117295 | ARM64_ERRATUM_3117295 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A520 | #2966298 | ARM64_ERRATUM_2966298 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 |
@ -235,11 +237,9 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| Rockchip | RK3588 | #3588001 | ROCKCHIP_ERRATUM_3588001 |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| ASR | ASR8601 | #8601001 | N/A |
+----------------+-----------------+-----------------+-----------------------------+

View file

@ -1039,8 +1039,12 @@ config ARM64_ERRATUM_2645198
If unsure, say Y.
config ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
bool
config ARM64_ERRATUM_2966298
bool "Cortex-A520: 2966298: workaround for speculatively executed unprivileged load"
select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
default y
help
This option adds the workaround for ARM Cortex-A520 erratum 2966298.
@ -1052,6 +1056,20 @@ config ARM64_ERRATUM_2966298
If unsure, say Y.
config ARM64_ERRATUM_3117295
bool "Cortex-A510: 3117295: workaround for speculatively executed unprivileged load"
select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
default y
help
This option adds the workaround for ARM Cortex-A510 erratum 3117295.
On an affected Cortex-A510 core, a speculatively executed unprivileged
load might leak data from a privileged level via a cache side channel.
Work around this problem by executing a TLBI before returning to EL0.
If unsure, say Y.
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y

View file

@ -760,32 +760,25 @@ alternative_endif
.endm
/*
* Check whether preempt/bh-disabled asm code should yield as soon as
* it is able. This is the case if we are currently running in task
* context, and either a softirq is pending, or the TIF_NEED_RESCHED
* flag is set and re-enabling preemption a single time would result in
* a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is
* stored negated in the top word of the thread_info::preempt_count
* Check whether asm code should yield as soon as it is able. This is
* the case if we are currently running in task context, and the
* TIF_NEED_RESCHED flag is set. (Note that the TIF_NEED_RESCHED flag
* is stored negated in the top word of the thread_info::preempt_count
* field)
*/
.macro cond_yield, lbl:req, tmp:req, tmp2:req
.macro cond_yield, lbl:req, tmp:req, tmp2
#ifdef CONFIG_PREEMPT_VOLUNTARY
get_current_task \tmp
ldr \tmp, [\tmp, #TSK_TI_PREEMPT]
/*
* If we are serving a softirq, there is no point in yielding: the
* softirq will not be preempted no matter what we do, so we should
* run to completion as quickly as we can.
* run to completion as quickly as we can. The preempt_count field will
* have BIT(SOFTIRQ_SHIFT) set in this case, so the zero check will
* catch this case too.
*/
tbnz \tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@
#ifdef CONFIG_PREEMPTION
sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET
cbz \tmp, \lbl
#endif
adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING
get_this_cpu_offset \tmp2
ldr w\tmp, [\tmp, \tmp2]
cbnz w\tmp, \lbl // yield on pending softirq in task context
.Lnoyield_\@:
.endm
/*

View file

@ -4,6 +4,8 @@
#ifndef __ASSEMBLER__
#include <linux/cpumask.h>
#include <asm-generic/irq.h>
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu);

View file

@ -73,7 +73,13 @@ obj-$(CONFIG_ARM64_MTE) += mte.o
obj-y += vdso-wrap.o
obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o
CFLAGS_patch-scs.o += -mbranch-protection=none
# We need to prevent the SCS patching code from patching itself. Using
# -mbranch-protection=none here to avoid the patchable PAC opcodes from being
# generated triggers an issue with full LTO on Clang, which stops emitting PAC
# instructions altogether. So instead, omit the unwind tables used by the
# patching code, so it will not be able to locate its own PAC instructions.
CFLAGS_patch-scs.o += -fno-asynchronous-unwind-tables -fno-unwind-tables
# Force dependency (vdso*-wrap.S includes vdso.so through incbin)
$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so

View file

@ -117,8 +117,6 @@ int main(void)
DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
BLANK();
DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT);
DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
BLANK();
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();

View file

@ -416,6 +416,19 @@ static struct midr_range broken_aarch32_aes[] = {
};
#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
static const struct midr_range erratum_spec_unpriv_load_list[] = {
#ifdef CONFIG_ARM64_ERRATUM_3117295
MIDR_ALL_VERSIONS(MIDR_CORTEX_A510),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2966298
/* Cortex-A520 r0p0 to r0p1 */
MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1),
#endif
{},
};
#endif
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
@ -713,12 +726,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)),
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_2966298
#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
{
.desc = "ARM erratum 2966298",
.capability = ARM64_WORKAROUND_2966298,
.desc = "ARM errata 2966298, 3117295",
.capability = ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD,
/* Cortex-A520 r0p0 - r0p1 */
ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1),
ERRATA_MIDR_RANGE_LIST(erratum_spec_unpriv_load_list),
},
#endif
#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38

View file

@ -428,16 +428,9 @@ alternative_else_nop_endif
ldp x28, x29, [sp, #16 * 14]
.if \el == 0
alternative_if ARM64_WORKAROUND_2966298
tlbi vale1, xzr
dsb nsh
alternative_else_nop_endif
alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
ldr lr, [sp, #S_LR]
add sp, sp, #PT_REGS_SIZE // restore sp
eret
alternative_else_nop_endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
alternative_insn "b .L_skip_tramp_exit_\@", nop, ARM64_UNMAP_KERNEL_AT_EL0
msr far_el1, x29
ldr_this_cpu x30, this_cpu_vector, x29
@ -446,16 +439,26 @@ alternative_else_nop_endif
ldr lr, [sp, #S_LR] // restore x30
add sp, sp, #PT_REGS_SIZE // restore sp
br x29
.L_skip_tramp_exit_\@:
#endif
.else
.endif
ldr lr, [sp, #S_LR]
add sp, sp, #PT_REGS_SIZE // restore sp
.if \el == 0
/* This must be after the last explicit memory access */
alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
tlbi vale1, xzr
dsb nsh
alternative_else_nop_endif
.else
/* Ensure any device/NC reads complete */
alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412
.endif
eret
.endif
sb
.endm

View file

@ -898,10 +898,8 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
* allocate SVE now in case it is needed for use in streaming
* mode.
*/
if (system_supports_sve()) {
sve_free(task);
sve_alloc(task, true);
}
sve_free(task);
sve_alloc(task, true);
if (free_sme)
sme_free(task);
@ -1219,8 +1217,10 @@ void fpsimd_release_task(struct task_struct *dead_task)
*/
void sme_alloc(struct task_struct *task, bool flush)
{
if (task->thread.sme_state && flush) {
memset(task->thread.sme_state, 0, sme_state_size(task));
if (task->thread.sme_state) {
if (flush)
memset(task->thread.sme_state, 0,
sme_state_size(task));
return;
}

View file

@ -1108,12 +1108,13 @@ static int za_set(struct task_struct *target,
}
}
/* Allocate/reinit ZA storage */
sme_alloc(target, true);
if (!target->thread.sme_state) {
ret = -ENOMEM;
goto out;
}
/*
* Only flush the storage if PSTATE.ZA was not already set,
* otherwise preserve any existing data.
*/
sme_alloc(target, !thread_za_enabled(&target->thread));
if (!target->thread.sme_state)
return -ENOMEM;
/* If there is no data then disable ZA */
if (!count) {

View file

@ -84,7 +84,6 @@ WORKAROUND_2077057
WORKAROUND_2457168
WORKAROUND_2645198
WORKAROUND_2658417
WORKAROUND_2966298
WORKAROUND_AMPERE_AC03_CPU_38
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
WORKAROUND_TSB_FLUSH_FAILURE
@ -100,3 +99,4 @@ WORKAROUND_NVIDIA_CARMEL_CNP
WORKAROUND_QCOM_FALKOR_E1003
WORKAROUND_REPEAT_TLBI
WORKAROUND_SPECULATIVE_AT
WORKAROUND_SPECULATIVE_UNPRIV_LOAD