linux/arch/arm64/kernel/efi.c
Mark Rutland bbbb65770b arm64: Avoid cpus_have_const_cap() for ARM64_HAS_BTI
In system_supports_bti() we use cpus_have_const_cap() to check for
ARM64_HAS_BTI, but this is not necessary and alternative_has_cap_*() or
cpus_have_final_*cap() would be preferable.

For historical reasons, cpus_have_const_cap() is more complicated than
it needs to be. Before cpucaps are finalized, it will perform a bitmap
test of the system_cpucaps bitmap, and once cpucaps are finalized it
will use an alternative branch. This used to be necessary to handle some
race conditions in the window between cpucap detection and the
subsequent patching of alternatives and static branches, where different
branches could be out-of-sync with one another (or w.r.t. alternative
sequences). Now that we use alternative branches instead of static
branches, these are all patched atomically w.r.t. one another, and there
are only a handful of cases that need special care in the window between
cpucap detection and alternative patching.

Due to the above, it would be nice to remove cpus_have_const_cap(), and
migrate callers over to alternative_has_cap_*(), cpus_have_final_cap(),
or cpus_have_cap() depending on when their requirements. This will
remove redundant instructions and improve code generation, and will make
it easier to determine how each callsite will behave before, during, and
after alternative patching.

When CONFIG_ARM64_BTI_KERNEL=y, the ARM64_HAS_BTI cpucap is a strict
boot cpu feature which is detected and patched early on the boot cpu.
All uses guarded by CONFIG_ARM64_BTI_KERNEL happen after the boot CPU
has detected ARM64_HAS_BTI and patched boot alternatives, and hence can
safely use alternative_has_cap_*() or cpus_have_final_boot_cap().

Regardless of CONFIG_ARM64_BTI_KERNEL, all other uses of ARM64_HAS_BTI
happen after system capabilities have been finalized and alternatives
have been patched. Hence these can safely use alternative_has_cap_*) or
cpus_have_final_cap().

This patch splits system_supports_bti() into system_supports_bti() and
system_supports_bti_kernel(), with the former handling where the cpucap
affects userspace functionality, and ther latter handling where the
cpucap affects kernel functionality. The use of cpus_have_const_cap() is
replaced by cpus_have_final_cap() in cpus_have_const_cap, and
cpus_have_final_boot_cap() in system_supports_bti_kernel(). This will
avoid generating code to test the system_cpucaps bitmap and should be
better for all subsequent calls at runtime. The use of
cpus_have_final_cap() and cpus_have_final_boot_cap() will make it easier
to spot if code is chaanged such that these run before the ARM64_HAS_BTI
cpucap is guaranteed to have been finalized.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2023-10-16 14:17:04 +01:00

223 lines
6 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Extensible Firmware Interface
*
* Based on Extensible Firmware Interface Specification version 2.4
*
* Copyright (C) 2013, 2014 Linaro Ltd.
*/
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/screen_info.h>
#include <asm/efi.h>
#include <asm/stacktrace.h>
static bool region_is_misaligned(const efi_memory_desc_t *md)
{
if (PAGE_SIZE == EFI_PAGE_SIZE)
return false;
return !PAGE_ALIGNED(md->phys_addr) ||
!PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT);
}
/*
* Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
* executable, everything else can be mapped with the XN bits
* set. Also take the new (optional) RO/XP bits into account.
*/
static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
{
u64 attr = md->attribute;
u32 type = md->type;
if (type == EFI_MEMORY_MAPPED_IO)
return PROT_DEVICE_nGnRE;
if (region_is_misaligned(md)) {
static bool __initdata code_is_misaligned;
/*
* Regions that are not aligned to the OS page size cannot be
* mapped with strict permissions, as those might interfere
* with the permissions that are needed by the adjacent
* region's mapping. However, if we haven't encountered any
* misaligned runtime code regions so far, we can safely use
* non-executable permissions for non-code regions.
*/
code_is_misaligned |= (type == EFI_RUNTIME_SERVICES_CODE);
return code_is_misaligned ? pgprot_val(PAGE_KERNEL_EXEC)
: pgprot_val(PAGE_KERNEL);
}
/* R-- */
if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) ==
(EFI_MEMORY_XP | EFI_MEMORY_RO))
return pgprot_val(PAGE_KERNEL_RO);
/* R-X */
if (attr & EFI_MEMORY_RO)
return pgprot_val(PAGE_KERNEL_ROX);
/* RW- */
if (((attr & (EFI_MEMORY_RP | EFI_MEMORY_WP | EFI_MEMORY_XP)) ==
EFI_MEMORY_XP) ||
type != EFI_RUNTIME_SERVICES_CODE)
return pgprot_val(PAGE_KERNEL);
/* RWX */
return pgprot_val(PAGE_KERNEL_EXEC);
}
/* we will fill this structure from the stub, so don't put it in .bss */
struct screen_info screen_info __section(".data");
EXPORT_SYMBOL(screen_info);
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
pteval_t prot_val = create_mapping_protection(md);
bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE ||
md->type == EFI_RUNTIME_SERVICES_DATA);
/*
* If this region is not aligned to the page size used by the OS, the
* mapping will be rounded outwards, and may end up sharing a page
* frame with an adjacent runtime memory region. Given that the page
* table descriptor covering the shared page will be rewritten when the
* adjacent region gets mapped, we must avoid block mappings here so we
* don't have to worry about splitting them when that happens.
*/
if (region_is_misaligned(md))
page_mappings_only = true;
create_pgd_mapping(mm, md->phys_addr, md->virt_addr,
md->num_pages << EFI_PAGE_SHIFT,
__pgprot(prot_val | PTE_NG), page_mappings_only);
return 0;
}
struct set_perm_data {
const efi_memory_desc_t *md;
bool has_bti;
};
static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
{
struct set_perm_data *spd = data;
const efi_memory_desc_t *md = spd->md;
pte_t pte = READ_ONCE(*ptep);
if (md->attribute & EFI_MEMORY_RO)
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
if (md->attribute & EFI_MEMORY_XP)
pte = set_pte_bit(pte, __pgprot(PTE_PXN));
else if (system_supports_bti_kernel() && spd->has_bti)
pte = set_pte_bit(pte, __pgprot(PTE_GP));
set_pte(ptep, pte);
return 0;
}
int __init efi_set_mapping_permissions(struct mm_struct *mm,
efi_memory_desc_t *md,
bool has_bti)
{
struct set_perm_data data = { md, has_bti };
BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE &&
md->type != EFI_RUNTIME_SERVICES_DATA);
if (region_is_misaligned(md))
return 0;
/*
* Calling apply_to_page_range() is only safe on regions that are
* guaranteed to be mapped down to pages. Since we are only called
* for regions that have been mapped using efi_create_mapping() above
* (and this is checked by the generic Memory Attributes table parsing
* routines), there is no need to check that again here.
*/
return apply_to_page_range(mm, md->virt_addr,
md->num_pages << EFI_PAGE_SHIFT,
set_permissions, &data);
}
/*
* UpdateCapsule() depends on the system being shutdown via
* ResetSystem().
*/
bool efi_poweroff_required(void)
{
return efi_enabled(EFI_RUNTIME_SERVICES);
}
asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
{
pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f);
return s;
}
static DEFINE_RAW_SPINLOCK(efi_rt_lock);
void arch_efi_call_virt_setup(void)
{
efi_virtmap_load();
__efi_fpsimd_begin();
raw_spin_lock(&efi_rt_lock);
}
void arch_efi_call_virt_teardown(void)
{
raw_spin_unlock(&efi_rt_lock);
__efi_fpsimd_end();
efi_virtmap_unload();
}
asmlinkage u64 *efi_rt_stack_top __ro_after_init;
asmlinkage efi_status_t __efi_rt_asm_recover(void);
bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg)
{
/* Check whether the exception occurred while running the firmware */
if (!current_in_efi() || regs->pc >= TASK_SIZE_64)
return false;
pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg);
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
regs->regs[0] = EFI_ABORTED;
regs->regs[30] = efi_rt_stack_top[-1];
regs->pc = (u64)__efi_rt_asm_recover;
if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK))
regs->regs[18] = efi_rt_stack_top[-2];
return true;
}
/* EFI requires 8 KiB of stack space for runtime services */
static_assert(THREAD_SIZE >= SZ_8K);
static int __init arm64_efi_rt_init(void)
{
void *p;
if (!efi_enabled(EFI_RUNTIME_SERVICES))
return 0;
p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL,
NUMA_NO_NODE, &&l);
l: if (!p) {
pr_warn("Failed to allocate EFI runtime stack\n");
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return -ENOMEM;
}
efi_rt_stack_top = p + THREAD_SIZE;
return 0;
}
core_initcall(arm64_efi_rt_init);