linux/arch/x86/mm/pkeys.c
Linus Torvalds 1423e2660c Fixes and improvements for FPU handling on x86:
- Prevent sigaltstack out of bounds writes. The kernel unconditionally
     writes the FPU state to the alternate stack without checking whether
     the stack is large enough to accomodate it.
 
     Check the alternate stack size before doing so and in case it's too
     small force a SIGSEGV instead of silently corrupting user space data.
 
   - MINSIGSTKZ and SIGSTKSZ are constants in signal.h and have never been
     updated despite the fact that the FPU state which is stored on the
     signal stack has grown over time which causes trouble in the field
     when AVX512 is available on a CPU. The kernel does not expose the
     minimum requirements for the alternate stack size depending on the
     available and enabled CPU features.
 
     ARM already added an aux vector AT_MINSIGSTKSZ for the same reason.
     Add it to x86 as well
 
   - A major cleanup of the x86 FPU code. The recent discoveries of XSTATE
     related issues unearthed quite some inconsistencies, duplicated code
     and other issues.
 
     The fine granular overhaul addresses this, makes the code more robust
     and maintainable, which allows to integrate upcoming XSTATE related
     features in sane ways.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmDlcpETHHRnbHhAbGlu
 dXRyb25peC5kZQAKCRCmGPVMDXSYoeP5D/4i+AgYYeiMLgGb+NS7iaKPfoWo6LIz
 y3qdTSA0DQaIYbYivWwRO/g0GYdDMXDWeZalFi7eGnVI8O3eOog+22Zrf/y0UINB
 KJHdYd4ApWHhs401022y5hexrWQvnV8w1yQCuj/zLm6eC+AVhdwt2AY+IBoRrdUj
 wqY97B/4rJNsBvvqTDn9EeDrJA2y0y0Suc7AhIp2BGMI+dpIdxys8RJDamXNWyDL
 gJf0YRgUoiIn3AHKb+fgv60AoxfC175NSg/5/y/scFNXqVlW0Up4YCb7pqG9o2Ga
 f3XvtWfbw1N5PmUYjFkALwEkzGUbM3v0RA3xLY2j2WlWm9fBPPy59dt+i/h/VKyA
 GrA7i7lcIqX8dfVH6XkrReZBkRDSB6t9SZTvV54jAz5fcIZO2Rg++UFUvI/R6GKK
 XCcxukYaArwo+IG62iqDszS3gfLGhcor/cviOeULRC5zMUIO4Jah+IhDnifmShtC
 M5s9QzrwIRD/XMewGRQmvkiN4kBfE7jFoBQr1J9leCXJKrM+2JQmMzVInuubTQIq
 SdlKOaAIn7xtekz+6XdFG9Gmhck0PCLMJMOLNvQkKWI3KqGLRZ+dAWKK0vsCizAx
 0BA7ZeB9w9lFT+D8mQCX77JvW9+VNwyfwIOLIrJRHk3VqVpS5qvoiFTLGJJBdZx4
 /TbbRZu7nXDN2w==
 =Mq1m
 -----END PGP SIGNATURE-----

Merge tag 'x86-fpu-2021-07-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fpu updates from Thomas Gleixner:
 "Fixes and improvements for FPU handling on x86:

   - Prevent sigaltstack out of bounds writes.

     The kernel unconditionally writes the FPU state to the alternate
     stack without checking whether the stack is large enough to
     accomodate it.

     Check the alternate stack size before doing so and in case it's too
     small force a SIGSEGV instead of silently corrupting user space
     data.

   - MINSIGSTKZ and SIGSTKSZ are constants in signal.h and have never
     been updated despite the fact that the FPU state which is stored on
     the signal stack has grown over time which causes trouble in the
     field when AVX512 is available on a CPU. The kernel does not expose
     the minimum requirements for the alternate stack size depending on
     the available and enabled CPU features.

     ARM already added an aux vector AT_MINSIGSTKSZ for the same reason.
     Add it to x86 as well.

   - A major cleanup of the x86 FPU code. The recent discoveries of
     XSTATE related issues unearthed quite some inconsistencies,
     duplicated code and other issues.

     The fine granular overhaul addresses this, makes the code more
     robust and maintainable, which allows to integrate upcoming XSTATE
     related features in sane ways"

* tag 'x86-fpu-2021-07-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (74 commits)
  x86/fpu/xstate: Clear xstate header in copy_xstate_to_uabi_buf() again
  x86/fpu/signal: Let xrstor handle the features to init
  x86/fpu/signal: Handle #PF in the direct restore path
  x86/fpu: Return proper error codes from user access functions
  x86/fpu/signal: Split out the direct restore code
  x86/fpu/signal: Sanitize copy_user_to_fpregs_zeroing()
  x86/fpu/signal: Sanitize the xstate check on sigframe
  x86/fpu/signal: Remove the legacy alignment check
  x86/fpu/signal: Move initial checks into fpu__restore_sig()
  x86/fpu: Mark init_fpstate __ro_after_init
  x86/pkru: Remove xstate fiddling from write_pkru()
  x86/fpu: Don't store PKRU in xstate in fpu_reset_fpstate()
  x86/fpu: Remove PKRU handling from switch_fpu_finish()
  x86/fpu: Mask PKRU from kernel XRSTOR[S] operations
  x86/fpu: Hook up PKRU into ptrace()
  x86/fpu: Add PKRU storage outside of task XSAVE buffer
  x86/fpu: Dont restore PKRU in fpregs_restore_userspace()
  x86/fpu: Rename xfeatures_mask_user() to xfeatures_mask_uabi()
  x86/fpu: Move FXSAVE_LEAK quirk info __copy_kernel_to_fpregs()
  x86/fpu: Rename __fpregs_load_activate() to fpregs_restore_userregs()
  ...
2021-07-07 11:12:01 -07:00

195 lines
5.3 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Intel Memory Protection Keys management
* Copyright (c) 2015, Intel Corporation.
*/
#include <linux/debugfs.h> /* debugfs_create_u32() */
#include <linux/mm_types.h> /* mm_struct, vma, etc... */
#include <linux/pkeys.h> /* PKEY_* */
#include <uapi/asm-generic/mman-common.h>
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
#include <asm/mmu_context.h> /* vma_pkey() */
int __execute_only_pkey(struct mm_struct *mm)
{
bool need_to_set_mm_pkey = false;
int execute_only_pkey = mm->context.execute_only_pkey;
int ret;
/* Do we need to assign a pkey for mm's execute-only maps? */
if (execute_only_pkey == -1) {
/* Go allocate one to use, which might fail */
execute_only_pkey = mm_pkey_alloc(mm);
if (execute_only_pkey < 0)
return -1;
need_to_set_mm_pkey = true;
}
/*
* We do not want to go through the relatively costly
* dance to set PKRU if we do not need to. Check it
* first and assume that if the execute-only pkey is
* write-disabled that we do not have to set it
* ourselves.
*/
if (!need_to_set_mm_pkey &&
!__pkru_allows_read(read_pkru(), execute_only_pkey)) {
return execute_only_pkey;
}
/*
* Set up PKRU so that it denies access for everything
* other than execution.
*/
ret = arch_set_user_pkey_access(current, execute_only_pkey,
PKEY_DISABLE_ACCESS);
/*
* If the PKRU-set operation failed somehow, just return
* 0 and effectively disable execute-only support.
*/
if (ret) {
mm_set_pkey_free(mm, execute_only_pkey);
return -1;
}
/* We got one, store it and use it from here on out */
if (need_to_set_mm_pkey)
mm->context.execute_only_pkey = execute_only_pkey;
return execute_only_pkey;
}
static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
{
/* Do this check first since the vm_flags should be hot */
if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC)
return false;
if (vma_pkey(vma) != vma->vm_mm->context.execute_only_pkey)
return false;
return true;
}
/*
* This is only called for *plain* mprotect calls.
*/
int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey)
{
/*
* Is this an mprotect_pkey() call? If so, never
* override the value that came from the user.
*/
if (pkey != -1)
return pkey;
/*
* The mapping is execute-only. Go try to get the
* execute-only protection key. If we fail to do that,
* fall through as if we do not have execute-only
* support in this mm.
*/
if (prot == PROT_EXEC) {
pkey = execute_only_pkey(vma->vm_mm);
if (pkey > 0)
return pkey;
} else if (vma_is_pkey_exec_only(vma)) {
/*
* Protections are *not* PROT_EXEC, but the mapping
* is using the exec-only pkey. This mapping was
* PROT_EXEC and will no longer be. Move back to
* the default pkey.
*/
return ARCH_DEFAULT_PKEY;
}
/*
* This is a vanilla, non-pkey mprotect (or we failed to
* setup execute-only), inherit the pkey from the VMA we
* are working on.
*/
return vma_pkey(vma);
}
#define PKRU_AD_KEY(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
/*
* Make the default PKRU value (at execve() time) as restrictive
* as possible. This ensures that any threads clone()'d early
* in the process's lifetime will not accidentally get access
* to data which is pkey-protected later on.
*/
u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
PKRU_AD_KEY(10) | PKRU_AD_KEY(11) | PKRU_AD_KEY(12) |
PKRU_AD_KEY(13) | PKRU_AD_KEY(14) | PKRU_AD_KEY(15);
static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
char buf[32];
unsigned int len;
len = sprintf(buf, "0x%x\n", init_pkru_value);
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
}
static ssize_t init_pkru_write_file(struct file *file,
const char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[32];
ssize_t len;
u32 new_init_pkru;
len = min(count, sizeof(buf) - 1);
if (copy_from_user(buf, user_buf, len))
return -EFAULT;
/* Make the buffer a valid string that we can not overrun */
buf[len] = '\0';
if (kstrtouint(buf, 0, &new_init_pkru))
return -EINVAL;
/*
* Don't allow insane settings that will blow the system
* up immediately if someone attempts to disable access
* or writes to pkey 0.
*/
if (new_init_pkru & (PKRU_AD_BIT|PKRU_WD_BIT))
return -EINVAL;
WRITE_ONCE(init_pkru_value, new_init_pkru);
return count;
}
static const struct file_operations fops_init_pkru = {
.read = init_pkru_read_file,
.write = init_pkru_write_file,
.llseek = default_llseek,
};
static int __init create_init_pkru_value(void)
{
/* Do not expose the file if pkeys are not supported. */
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
return 0;
debugfs_create_file("init_pkru", S_IRUSR | S_IWUSR,
arch_debugfs_dir, NULL, &fops_init_pkru);
return 0;
}
late_initcall(create_init_pkru_value);
static __init int setup_init_pkru(char *opt)
{
u32 new_init_pkru;
if (kstrtouint(opt, 0, &new_init_pkru))
return 1;
WRITE_ONCE(init_pkru_value, new_init_pkru);
return 1;
}
__setup("init_pkru=", setup_init_pkru);