- Add the call depth tracking mitigation for Retbleed which has

been long in the making. It is a lighterweight software-only fix for
 Skylake-based cores where enabling IBRS is a big hammer and causes a
 significant performance impact.
 
 What it basically does is, it aligns all kernel functions to 16 bytes
 boundary and adds a 16-byte padding before the function, objtool
 collects all functions' locations and when the mitigation gets applied,
 it patches a call accounting thunk which is used to track the call depth
 of the stack at any time.
 
 When that call depth reaches a magical, microarchitecture-specific value
 for the Return Stack Buffer, the code stuffs that RSB and avoids its
 underflow which could otherwise lead to the Intel variant of Retbleed.
 
 This software-only solution brings a lot of the lost performance back,
 as benchmarks suggest:
 
   https://lore.kernel.org/all/20220915111039.092790446@infradead.org/
 
 That page above also contains a lot more detailed explanation of the
 whole mechanism
 
 - Implement a new control flow integrity scheme called FineIBT which is
 based on the software kCFI implementation and uses hardware IBT support
 where present to annotate and track indirect branches using a hash to
 validate them
 
 - Other misc fixes and cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmOZp5EACgkQEsHwGGHe
 VUrZFxAAvi/+8L0IYSK4mKJvixGbTFjxN/Swo2JVOfs34LqGUT6JaBc+VUMwZxdb
 VMTFIZ3ttkKEodjhxGI7oGev6V8UfhI37SmO2lYKXpQVjXXnMlv/M+Vw3teE38CN
 gopi+xtGnT1IeWQ3tc/Tv18pleJ0mh5HKWiW+9KoqgXj0wgF9x4eRYDz1TDCDA/A
 iaBzs56j8m/FSykZHnrWZ/MvjKNPdGlfJASUCPeTM2dcrXQGJ93+X2hJctzDte0y
 Nuiw6Y0htfFBE7xoJn+sqm5Okr+McoUM18/CCprbgSKYk18iMYm3ZtAi6FUQZS1A
 ua4wQCf49loGp15PO61AS5d3OBf5D3q/WihQRbCaJvTVgPp9sWYnWwtcVUuhMllh
 ZQtBU9REcVJ/22bH09Q9CjBW0VpKpXHveqQdqRDViLJ6v/iI6EFGmD24SW/VxyRd
 73k9MBGrL/dOf1SbEzdsnvcSB3LGzp0Om8o/KzJWOomrVKjBCJy16bwTEsCZEJmP
 i406m92GPXeaN1GhTko7vmF0GnkEdJs1GVCZPluCAxxbhHukyxHnrjlQjI4vC80n
 Ylc0B3Kvitw7LGJsPqu+/jfNHADC/zhx1qz/30wb5cFmFbN1aRdp3pm8JYUkn+l/
 zri2Y6+O89gvE/9/xUhMohzHsWUO7xITiBavewKeTP9GSWybWUs=
 =cRy1
 -----END PGP SIGNATURE-----

Merge tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 core updates from Borislav Petkov:

 - Add the call depth tracking mitigation for Retbleed which has been
   long in the making. It is a lighterweight software-only fix for
   Skylake-based cores where enabling IBRS is a big hammer and causes a
   significant performance impact.

   What it basically does is, it aligns all kernel functions to 16 bytes
   boundary and adds a 16-byte padding before the function, objtool
   collects all functions' locations and when the mitigation gets
   applied, it patches a call accounting thunk which is used to track
   the call depth of the stack at any time.

   When that call depth reaches a magical, microarchitecture-specific
   value for the Return Stack Buffer, the code stuffs that RSB and
   avoids its underflow which could otherwise lead to the Intel variant
   of Retbleed.

   This software-only solution brings a lot of the lost performance
   back, as benchmarks suggest:

       https://lore.kernel.org/all/20220915111039.092790446@infradead.org/

   That page above also contains a lot more detailed explanation of the
   whole mechanism

 - Implement a new control flow integrity scheme called FineIBT which is
   based on the software kCFI implementation and uses hardware IBT
   support where present to annotate and track indirect branches using a
   hash to validate them

 - Other misc fixes and cleanups

* tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (80 commits)
  x86/paravirt: Use common macro for creating simple asm paravirt functions
  x86/paravirt: Remove clobber bitmask from .parainstructions
  x86/debug: Include percpu.h in debugreg.h to get DECLARE_PER_CPU() et al
  x86/cpufeatures: Move X86_FEATURE_CALL_DEPTH from bit 18 to bit 19 of word 11, to leave space for WIP X86_FEATURE_SGX_EDECCSSA bit
  x86/Kconfig: Enable kernel IBT by default
  x86,pm: Force out-of-line memcpy()
  objtool: Fix weak hole vs prefix symbol
  objtool: Optimize elf_dirty_reloc_sym()
  x86/cfi: Add boot time hash randomization
  x86/cfi: Boot time selection of CFI scheme
  x86/ibt: Implement FineIBT
  objtool: Add --cfi to generate the .cfi_sites section
  x86: Add prefix symbols for function padding
  objtool: Add option to generate prefix symbols
  objtool: Avoid O(bloody terrible) behaviour -- an ode to libelf
  objtool: Slice up elf_create_section_symbol()
  kallsyms: Revert "Take callthunks into account"
  x86: Unconfuse CONFIG_ and X86_FEATURE_ namespaces
  x86/retpoline: Fix crash printing warning
  x86/paravirt: Fix a !PARAVIRT build warning
  ...
This commit is contained in:
Linus Torvalds 2022-12-14 15:03:00 -08:00
commit 94a855111e
103 changed files with 2709 additions and 597 deletions

View file

@ -1006,8 +1006,8 @@ KBUILD_CFLAGS += $(CC_FLAGS_CFI)
export CC_FLAGS_CFI
endif
ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B
KBUILD_CFLAGS += -falign-functions=64
ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0)
KBUILD_CFLAGS += -falign-functions=$(CONFIG_FUNCTION_ALIGNMENT)
endif
# arch Makefile may override CC so keep this after arch Makefile is included

View file

@ -1438,4 +1438,28 @@ source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
config FUNCTION_ALIGNMENT_4B
bool
config FUNCTION_ALIGNMENT_8B
bool
config FUNCTION_ALIGNMENT_16B
bool
config FUNCTION_ALIGNMENT_32B
bool
config FUNCTION_ALIGNMENT_64B
bool
config FUNCTION_ALIGNMENT
int
default 64 if FUNCTION_ALIGNMENT_64B
default 32 if FUNCTION_ALIGNMENT_32B
default 16 if FUNCTION_ALIGNMENT_16B
default 8 if FUNCTION_ALIGNMENT_8B
default 4 if FUNCTION_ALIGNMENT_4B
default 0
endmenu

View file

@ -63,6 +63,7 @@ config IA64
select NUMA if !FLATMEM
select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
select ZONE_DMA32
select FUNCTION_ALIGNMENT_32B
default y
help
The Itanium Processor Family is Intel's 64-bit successor to

View file

@ -23,7 +23,7 @@ KBUILD_AFLAGS_KERNEL := -mconstant-gp
EXTRA :=
cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \
-falign-functions=32 -frename-registers -fno-optimize-sibling-calls
-frename-registers -fno-optimize-sibling-calls
KBUILD_CFLAGS_KERNEL := -mconstant-gp
GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")

View file

@ -444,6 +444,11 @@ void apply_returns(s32 *start, s32 *end)
{
}
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi)
{
}
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}

View file

@ -292,6 +292,8 @@ config X86
select X86_FEATURE_NAMES if PROC_FS
select PROC_PID_ARCH_STATUS if PROC_FS
select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX
select FUNCTION_ALIGNMENT_16B if X86_64 || X86_ALIGNMENT_16
select FUNCTION_ALIGNMENT_4B
imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI
select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
@ -1855,7 +1857,7 @@ config CC_HAS_IBT
config X86_KERNEL_IBT
prompt "Indirect Branch Tracking"
bool
def_bool y
depends on X86_64 && CC_HAS_IBT && HAVE_OBJTOOL
# https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f
depends on !LD_IS_LLD || LLD_VERSION >= 140000
@ -2492,6 +2494,46 @@ config CC_HAS_SLS
config CC_HAS_RETURN_THUNK
def_bool $(cc-option,-mfunction-return=thunk-extern)
config CC_HAS_ENTRY_PADDING
def_bool $(cc-option,-fpatchable-function-entry=16,16)
config FUNCTION_PADDING_CFI
int
default 59 if FUNCTION_ALIGNMENT_64B
default 27 if FUNCTION_ALIGNMENT_32B
default 11 if FUNCTION_ALIGNMENT_16B
default 3 if FUNCTION_ALIGNMENT_8B
default 0
# Basically: FUNCTION_ALIGNMENT - 5*CFI_CLANG
# except Kconfig can't do arithmetic :/
config FUNCTION_PADDING_BYTES
int
default FUNCTION_PADDING_CFI if CFI_CLANG
default FUNCTION_ALIGNMENT
config CALL_PADDING
def_bool n
depends on CC_HAS_ENTRY_PADDING && OBJTOOL
select FUNCTION_ALIGNMENT_16B
config FINEIBT
def_bool y
depends on X86_KERNEL_IBT && CFI_CLANG && RETPOLINE
select CALL_PADDING
config HAVE_CALL_THUNKS
def_bool y
depends on CC_HAS_ENTRY_PADDING && RETHUNK && OBJTOOL
config CALL_THUNKS
def_bool n
select CALL_PADDING
config PREFIX_SYMBOLS
def_bool y
depends on CALL_PADDING && !CFI_CLANG
menuconfig SPECULATION_MITIGATIONS
bool "Mitigations for speculative execution vulnerabilities"
default y
@ -2543,6 +2585,37 @@ config CPU_UNRET_ENTRY
help
Compile the kernel with support for the retbleed=unret mitigation.
config CALL_DEPTH_TRACKING
bool "Mitigate RSB underflow with call depth tracking"
depends on CPU_SUP_INTEL && HAVE_CALL_THUNKS
select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
select CALL_THUNKS
default y
help
Compile the kernel with call depth tracking to mitigate the Intel
SKL Return-Speculation-Buffer (RSB) underflow issue. The
mitigation is off by default and needs to be enabled on the
kernel command line via the retbleed=stuff option. For
non-affected systems the overhead of this option is marginal as
the call depth tracking is using run-time generated call thunks
in a compiler generated padding area and call patching. This
increases text size by ~5%. For non affected systems this space
is unused. On affected SKL systems this results in a significant
performance gain over the IBRS mitigation.
config CALL_THUNKS_DEBUG
bool "Enable call thunks and call depth tracking debugging"
depends on CALL_DEPTH_TRACKING
select FUNCTION_ALIGNMENT_32B
default n
help
Enable call/ret counters for imbalance detection and build in
a noisy dmesg about callthunks generation and call patching for
trouble shooting. The debug prints need to be enabled on the
kernel command line with 'debug-callthunks'.
Only enable this, when you are debugging call thunks as this
creates a noticable runtime overhead. If unsure say N.
config CPU_IBPB_ENTRY
bool "Enable IBPB on kernel entry"
depends on CPU_SUP_AMD && X86_64

View file

@ -208,6 +208,12 @@ ifdef CONFIG_SLS
KBUILD_CFLAGS += -mharden-sls=all
endif
ifdef CONFIG_CALL_PADDING
PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES)
KBUILD_CFLAGS += $(PADDING_CFLAGS)
export PADDING_CFLAGS
endif
KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
ifdef CONFIG_LTO_CLANG

View file

@ -37,6 +37,14 @@
#include <asm/trapnr.h>
#include "pgtable.h"
/*
* Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result
* in assembly errors due to trying to move .org backward due to the excessive
* alignment.
*/
#undef __ALIGN
#define __ALIGN .balign 16, 0x90
/*
* Locally defined symbols should be marked hidden:
*/

View file

@ -712,7 +712,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.text
.align 8
SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
/* input:
* %rdi: ctx, CTX
@ -799,7 +798,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
jmp .Lenc_done;
SYM_FUNC_END(__camellia_enc_blk16)
.align 8
SYM_FUNC_START_LOCAL(__camellia_dec_blk16)
/* input:
* %rdi: ctx, CTX

View file

@ -221,7 +221,6 @@
* Size optimization... with inlined roundsm32 binary would be over 5 times
* larger and would only marginally faster.
*/
.align 8
SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
%ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
@ -229,7 +228,6 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c
RET;
SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
.align 8
SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
%ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
@ -748,7 +746,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.text
.align 8
SYM_FUNC_START_LOCAL(__camellia_enc_blk32)
/* input:
* %rdi: ctx, CTX
@ -835,7 +832,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk32)
jmp .Lenc_done;
SYM_FUNC_END(__camellia_enc_blk32)
.align 8
SYM_FUNC_START_LOCAL(__camellia_dec_blk32)
/* input:
* %rdi: ctx, CTX

View file

@ -208,7 +208,6 @@
.text
.align 16
SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
/* input:
* %rdi: ctx
@ -282,7 +281,6 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
RET;
SYM_FUNC_END(__cast5_enc_blk16)
.align 16
SYM_FUNC_START_LOCAL(__cast5_dec_blk16)
/* input:
* %rdi: ctx

View file

@ -94,7 +94,6 @@
#
# Assumes len >= 16.
#
.align 16
SYM_FUNC_START(crc_t10dif_pcl)
movdqa .Lbswap_mask(%rip), BSWAP_MASK

View file

@ -108,7 +108,6 @@ if (!$kernel) {
sub declare_function() {
my ($name, $align, $nargs) = @_;
if($kernel) {
$code .= ".align $align\n";
$code .= "SYM_FUNC_START($name)\n";
$code .= ".L$name:\n";
} else {

View file

@ -550,7 +550,6 @@
#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
.align 8
SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx)
/* input:
* %rdi: ctx, CTX
@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx)
RET;
SYM_FUNC_END(__serpent_enc_blk8_avx)
.align 8
SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx)
/* input:
* %rdi: ctx, CTX

View file

@ -550,7 +550,6 @@
#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
.align 8
SYM_FUNC_START_LOCAL(__serpent_enc_blk16)
/* input:
* %rdi: ctx, CTX
@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16)
RET;
SYM_FUNC_END(__serpent_enc_blk16)
.align 8
SYM_FUNC_START_LOCAL(__serpent_dec_blk16)
/* input:
* %rdi: ctx, CTX

View file

@ -93,7 +93,6 @@
* numBlocks: Number of blocks to process
*/
.text
.align 32
SYM_TYPED_FUNC_START(sha1_ni_transform)
push %rbp
mov %rsp, %rbp

View file

@ -348,7 +348,6 @@ a = TMP_
########################################################################
.text
SYM_TYPED_FUNC_START(sha256_transform_avx)
.align 32
pushq %rbx
pushq %r12
pushq %r13

View file

@ -525,7 +525,6 @@ STACK_SIZE = _CTX + _CTX_SIZE
########################################################################
.text
SYM_TYPED_FUNC_START(sha256_transform_rorx)
.align 32
pushq %rbx
pushq %r12
pushq %r13

View file

@ -357,7 +357,6 @@ a = TMP_
########################################################################
.text
SYM_TYPED_FUNC_START(sha256_transform_ssse3)
.align 32
pushq %rbx
pushq %r12
pushq %r13

View file

@ -97,7 +97,6 @@
*/
.text
.align 32
SYM_TYPED_FUNC_START(sha256_ni_transform)
shl $6, NUM_BLKS /* convert to bytes */

View file

@ -328,7 +328,6 @@
* void sm3_transform_avx(struct sm3_state *state,
* const u8 *data, int nblocks);
*/
.align 16
SYM_TYPED_FUNC_START(sm3_transform_avx)
/* input:
* %rdi: ctx, CTX

View file

@ -140,13 +140,11 @@
.text
.align 16
/*
* void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
* const u8 *src, int nblocks)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx_crypt4)
/* input:
* %rdi: round key array, CTX
@ -250,7 +248,6 @@ SYM_FUNC_START(sm4_aesni_avx_crypt4)
RET;
SYM_FUNC_END(sm4_aesni_avx_crypt4)
.align 8
SYM_FUNC_START_LOCAL(__sm4_crypt_blk8)
/* input:
* %rdi: round key array, CTX
@ -364,7 +361,6 @@ SYM_FUNC_END(__sm4_crypt_blk8)
* void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
* const u8 *src, int nblocks)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx_crypt8)
/* input:
* %rdi: round key array, CTX
@ -420,7 +416,6 @@ SYM_FUNC_END(sm4_aesni_avx_crypt8)
* void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_TYPED_FUNC_START(sm4_aesni_avx_ctr_enc_blk8)
/* input:
* %rdi: round key array, CTX
@ -495,7 +490,6 @@ SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8)
* void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_TYPED_FUNC_START(sm4_aesni_avx_cbc_dec_blk8)
/* input:
* %rdi: round key array, CTX
@ -545,7 +539,6 @@ SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8)
* void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_TYPED_FUNC_START(sm4_aesni_avx_cfb_dec_blk8)
/* input:
* %rdi: round key array, CTX

View file

@ -154,9 +154,6 @@
.long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef
.text
.align 16
.align 8
SYM_FUNC_START_LOCAL(__sm4_crypt_blk16)
/* input:
* %rdi: round key array, CTX
@ -282,7 +279,6 @@ SYM_FUNC_END(__sm4_crypt_blk16)
* void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_TYPED_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
/* input:
* %rdi: round key array, CTX
@ -395,7 +391,6 @@ SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)
* void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_TYPED_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
/* input:
* %rdi: round key array, CTX
@ -449,7 +444,6 @@ SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)
* void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_TYPED_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
/* input:
* %rdi: round key array, CTX

View file

@ -228,7 +228,6 @@
vpxor x2, wkey, x2; \
vpxor x3, wkey, x3;
.align 8
SYM_FUNC_START_LOCAL(__twofish_enc_blk8)
/* input:
* %rdi: ctx, CTX
@ -270,7 +269,6 @@ SYM_FUNC_START_LOCAL(__twofish_enc_blk8)
RET;
SYM_FUNC_END(__twofish_enc_blk8)
.align 8
SYM_FUNC_START_LOCAL(__twofish_dec_blk8)
/* input:
* %rdi: ctx, CTX

View file

@ -1181,7 +1181,7 @@ SYM_CODE_START(asm_exc_nmi)
* is using the thread stack right now, so it's safe for us to use it.
*/
movl %esp, %ebx
movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esp
call exc_nmi
movl %ebx, %esp
@ -1243,7 +1243,7 @@ SYM_CODE_START(rewind_stack_and_make_dead)
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esi
leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
call make_task_dead

View file

@ -92,7 +92,7 @@ SYM_CODE_START(entry_SYSCALL_64)
/* tss.sp2 is scratch space. */
movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
@ -252,7 +252,7 @@ SYM_FUNC_START(__switch_to_asm)
#ifdef CONFIG_STACKPROTECTOR
movq TASK_stack_canary(%rsi), %rbx
movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
movq %rbx, PER_CPU_VAR(fixed_percpu_data) + FIXED_stack_canary
#endif
/*
@ -284,9 +284,11 @@ SYM_FUNC_END(__switch_to_asm)
* r12: kernel thread arg
*/
.pushsection .text, "ax"
SYM_CODE_START(ret_from_fork)
__FUNC_ALIGN
SYM_CODE_START_NOALIGN(ret_from_fork)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR // copy_thread
CALL_DEPTH_ACCOUNT
movq %rax, %rdi
call schedule_tail /* rdi: 'prev' task parameter */
@ -326,11 +328,12 @@ SYM_CODE_END(ret_from_fork)
#endif
.endm
SYM_CODE_START_LOCAL(xen_error_entry)
SYM_CODE_START(xen_error_entry)
ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
UNTRAIN_RET
UNTRAIN_RET_FROM_CALL
RET
SYM_CODE_END(xen_error_entry)
@ -600,13 +603,13 @@ SYM_CODE_END(\asmsym)
* shared between 32 and 64 bit and emit the __irqentry_text_* markers
* so the stacktrace boundary checks work.
*/
.align 16
__ALIGN
.globl __irqentry_text_start
__irqentry_text_start:
#include <asm/idtentry.h>
.align 16
__ALIGN
.globl __irqentry_text_end
__irqentry_text_end:
ANNOTATE_NOENDBR
@ -828,7 +831,8 @@ EXPORT_SYMBOL(asm_load_gs_index)
*
* C calling convention: exc_xen_hypervisor_callback(struct *pt_regs)
*/
SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback)
__FUNC_ALIGN
SYM_CODE_START_LOCAL_NOALIGN(exc_xen_hypervisor_callback)
/*
* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
@ -856,7 +860,8 @@ SYM_CODE_END(exc_xen_hypervisor_callback)
* We distinguish between categories by comparing each saved segment register
* with its current contents: any discrepancy means we in category 1.
*/
SYM_CODE_START(xen_failsafe_callback)
__FUNC_ALIGN
SYM_CODE_START_NOALIGN(xen_failsafe_callback)
UNWIND_HINT_EMPTY
ENDBR
movl %ds, %ecx
@ -903,7 +908,8 @@ SYM_CODE_END(xen_failsafe_callback)
* R14 - old CR3
* R15 - old SPEC_CTRL
*/
SYM_CODE_START_LOCAL(paranoid_entry)
SYM_CODE_START(paranoid_entry)
ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
@ -972,7 +978,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
* CR3 above, keep the old value in a callee saved register.
*/
IBRS_ENTER save_reg=%r15
UNTRAIN_RET
UNTRAIN_RET_FROM_CALL
RET
SYM_CODE_END(paranoid_entry)
@ -1038,7 +1044,8 @@ SYM_CODE_END(paranoid_exit)
/*
* Switch GS and CR3 if needed.
*/
SYM_CODE_START_LOCAL(error_entry)
SYM_CODE_START(error_entry)
ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
PUSH_AND_CLEAR_REGS save_ret=1
@ -1056,14 +1063,11 @@ SYM_CODE_START_LOCAL(error_entry)
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
IBRS_ENTER
UNTRAIN_RET
UNTRAIN_RET_FROM_CALL
leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
.Lerror_entry_from_usermode_after_swapgs:
/* Put us onto the real thread stack. */
call sync_regs
RET
jmp sync_regs
/*
* There are two places in the kernel that can potentially fault with
@ -1094,6 +1098,7 @@ SYM_CODE_START_LOCAL(error_entry)
*/
.Lerror_entry_done_lfence:
FENCE_SWAPGS_KERNEL_ENTRY
CALL_DEPTH_ACCOUNT
leaq 8(%rsp), %rax /* return pt_regs pointer */
ANNOTATE_UNRET_END
RET
@ -1112,7 +1117,7 @@ SYM_CODE_START_LOCAL(error_entry)
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
IBRS_ENTER
UNTRAIN_RET
UNTRAIN_RET_FROM_CALL
/*
* Pretend that the exception came from user mode: set up pt_regs
@ -1121,7 +1126,7 @@ SYM_CODE_START_LOCAL(error_entry)
leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
call fixup_bad_iret
mov %rax, %rdi
jmp .Lerror_entry_from_usermode_after_swapgs
jmp sync_regs
SYM_CODE_END(error_entry)
SYM_CODE_START_LOCAL(error_return)
@ -1206,7 +1211,7 @@ SYM_CODE_START(asm_exc_nmi)
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
UNWIND_HINT_IRET_REGS base=%rdx offset=8
pushq 5*8(%rdx) /* pt_regs->ss */
pushq 4*8(%rdx) /* pt_regs->rsp */
@ -1516,12 +1521,13 @@ SYM_CODE_END(ignore_sysret)
#endif
.pushsection .text, "ax"
SYM_CODE_START(rewind_stack_and_make_dead)
__FUNC_ALIGN
SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
UNWIND_HINT_FUNC
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rax
leaq -PTREGS_SIZE(%rax), %rsp
UNWIND_HINT_REGS

View file

@ -58,7 +58,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
popq %rax
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
@ -128,7 +128,6 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
popfq
jmp .Lsysenter_flags_fixed
SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL)
ANNOTATE_NOENDBR // is_sysenter_singlestep
SYM_CODE_END(entry_SYSENTER_compat)
/*
@ -191,7 +190,7 @@ SYM_CODE_START(entry_SYSCALL_compat)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
/* Switch to the kernel stack */
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
@ -332,7 +331,7 @@ SYM_CODE_START(entry_INT80_compat)
ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
movq %rsp, %rax
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
pushq 5*8(%rax) /* regs->ss */
pushq 4*8(%rax) /* regs->rsp */

View file

@ -11,7 +11,7 @@
/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
.macro THUNK name, func
SYM_FUNC_START_NOALIGN(\name)
SYM_FUNC_START(\name)
pushq %rbp
movq %rsp, %rbp
@ -36,7 +36,7 @@ SYM_FUNC_END(\name)
EXPORT_SYMBOL(preempt_schedule_thunk)
EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore)
SYM_CODE_START_LOCAL(__thunk_restore)
popq %r11
popq %r10
popq %r9

View file

@ -33,11 +33,12 @@ vobjs32-y += vdso32/vclock_gettime.o
vobjs-$(CONFIG_X86_SGX) += vsgx.o
# files to link into kernel
obj-y += vma.o extable.o
KASAN_SANITIZE_vma.o := y
UBSAN_SANITIZE_vma.o := y
KCSAN_SANITIZE_vma.o := y
OBJECT_FILES_NON_STANDARD_vma.o := n
obj-y += vma.o extable.o
KASAN_SANITIZE_vma.o := y
UBSAN_SANITIZE_vma.o := y
KCSAN_SANITIZE_vma.o := y
OBJECT_FILES_NON_STANDARD_vma.o := n
OBJECT_FILES_NON_STANDARD_extable.o := n
# vDSO images to build
vdso_img-$(VDSO64-y) += 64
@ -94,7 +95,7 @@ ifneq ($(RETPOLINE_VDSO_CFLAGS),)
endif
endif
$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
$(vobjs): KBUILD_CFLAGS := $(filter-out $(PADDING_CFLAGS) $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO
#
@ -157,6 +158,7 @@ KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_CFI),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(PADDING_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
KBUILD_CFLAGS_32 += -fno-stack-protector
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)

View file

@ -78,8 +78,43 @@ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
extern void apply_retpolines(s32 *start, s32 *end);
extern void apply_returns(s32 *start, s32 *end);
extern void apply_ibt_endbr(s32 *start, s32 *end);
extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine,
s32 *start_cfi, s32 *end_cfi);
struct module;
struct paravirt_patch_site;
struct callthunk_sites {
s32 *call_start, *call_end;
struct paravirt_patch_site *pv_start, *pv_end;
};
#ifdef CONFIG_CALL_THUNKS
extern void callthunks_patch_builtin_calls(void);
extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
struct module *mod);
extern void *callthunks_translate_call_dest(void *dest);
extern bool is_callthunk(void *addr);
extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
#else
static __always_inline void callthunks_patch_builtin_calls(void) {}
static __always_inline void
callthunks_patch_module_calls(struct callthunk_sites *sites,
struct module *mod) {}
static __always_inline void *callthunks_translate_call_dest(void *dest)
{
return dest;
}
static __always_inline bool is_callthunk(void *addr)
{
return false;
}
static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
void *func)
{
return 0;
}
#endif
#ifdef CONFIG_SMP
extern void alternatives_smp_module_add(struct module *mod, char *name,
@ -347,6 +382,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define old_len 141b-140b
#define new_len1 144f-143f
#define new_len2 145f-144f
#define new_len3 146f-145f
/*
* gas compatible max based on the idea from:
@ -354,7 +390,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
*
* The additional "-" is needed because gas uses a "true" value of -1.
*/
#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
#define alt_max_2(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
#define alt_max_3(a, b, c) (alt_max_2(alt_max_2(a, b), c))
/*
@ -366,8 +403,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
140:
\oldinstr
141:
.skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
(alt_max_short(new_len1, new_len2) - (old_len)),0x90
.skip -((alt_max_2(new_len1, new_len2) - (old_len)) > 0) * \
(alt_max_2(new_len1, new_len2) - (old_len)),0x90
142:
.pushsection .altinstructions,"a"
@ -384,6 +421,31 @@ static inline int alternatives_text_reserved(void *start, void *end)
.popsection
.endm
.macro ALTERNATIVE_3 oldinstr, newinstr1, feature1, newinstr2, feature2, newinstr3, feature3
140:
\oldinstr
141:
.skip -((alt_max_3(new_len1, new_len2, new_len3) - (old_len)) > 0) * \
(alt_max_3(new_len1, new_len2, new_len3) - (old_len)),0x90
142:
.pushsection .altinstructions,"a"
altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f
altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f
altinstruction_entry 140b,145f,\feature3,142b-140b,146f-145f
.popsection
.pushsection .altinstr_replacement,"ax"
143:
\newinstr1
144:
\newinstr2
145:
\newinstr3
146:
.popsection
.endm
/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \

View file

@ -305,8 +305,7 @@
#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */
#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */
#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */

View file

@ -3,16 +3,42 @@
#define _ASM_X86_CURRENT_H
#include <linux/compiler.h>
#include <asm/percpu.h>
#ifndef __ASSEMBLY__
#include <linux/cache.h>
#include <asm/percpu.h>
struct task_struct;
DECLARE_PER_CPU(struct task_struct *, current_task);
struct pcpu_hot {
union {
struct {
struct task_struct *current_task;
int preempt_count;
int cpu_number;
#ifdef CONFIG_CALL_DEPTH_TRACKING
u64 call_depth;
#endif
unsigned long top_of_stack;
void *hardirq_stack_ptr;
u16 softirq_pending;
#ifdef CONFIG_X86_64
bool hardirq_stack_inuse;
#else
void *softirq_stack_ptr;
#endif
};
u8 pad[64];
};
};
static_assert(sizeof(struct pcpu_hot) == 64);
DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot);
static __always_inline struct task_struct *get_current(void)
{
return this_cpu_read_stable(current_task);
return this_cpu_read_stable(pcpu_hot.current_task);
}
#define current get_current()

View file

@ -2,8 +2,8 @@
#ifndef _ASM_X86_DEBUGREG_H
#define _ASM_X86_DEBUGREG_H
#include <linux/bug.h>
#include <linux/percpu.h>
#include <uapi/asm/debugreg.h>
DECLARE_PER_CPU(unsigned long, cpu_dr7);

View file

@ -69,6 +69,12 @@
# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
#endif
#ifdef CONFIG_CALL_DEPTH_TRACKING
# define DISABLE_CALL_DEPTH_TRACKING 0
#else
# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
#endif
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
@ -107,7 +113,8 @@
#define DISABLED_MASK8 (DISABLE_XENPV|DISABLE_TDX_GUEST)
#define DISABLED_MASK9 (DISABLE_SGX)
#define DISABLED_MASK10 0
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
DISABLE_CALL_DEPTH_TRACKING)
#define DISABLED_MASK12 0
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0

View file

@ -3,9 +3,9 @@
#define _ASM_X86_HARDIRQ_H
#include <linux/threads.h>
#include <asm/current.h>
typedef struct {
u16 __softirq_pending;
#if IS_ENABLED(CONFIG_KVM_INTEL)
u8 kvm_cpu_l1tf_flush_l1d;
#endif
@ -60,6 +60,7 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu);
extern u64 arch_irq_stat(void);
#define arch_irq_stat arch_irq_stat
#define local_softirq_pending_ref pcpu_hot.softirq_pending
#if IS_ENABLED(CONFIG_KVM_INTEL)
static inline void kvm_set_cpu_l1tf_flush_l1d(void)

View file

@ -116,7 +116,7 @@
ASM_CALL_ARG2
#define call_on_irqstack(func, asm_call, argconstr...) \
call_on_stack(__this_cpu_read(hardirq_stack_ptr), \
call_on_stack(__this_cpu_read(pcpu_hot.hardirq_stack_ptr), \
func, asm_call, argconstr)
/* Macros to assert type correctness for run_*_on_irqstack macros */
@ -135,7 +135,7 @@
* User mode entry and interrupt on the irq stack do not \
* switch stacks. If from user mode the task stack is empty. \
*/ \
if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) { \
if (user_mode(regs) || __this_cpu_read(pcpu_hot.hardirq_stack_inuse)) { \
irq_enter_rcu(); \
func(c_args); \
irq_exit_rcu(); \
@ -146,9 +146,9 @@
* places. Invoke the stack switch macro with the call \
* sequence which matches the above direct invocation. \
*/ \
__this_cpu_write(hardirq_stack_inuse, true); \
__this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \
call_on_irqstack(func, asm_call, constr); \
__this_cpu_write(hardirq_stack_inuse, false); \
__this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \
} \
}
@ -212,9 +212,9 @@
*/
#define do_softirq_own_stack() \
{ \
__this_cpu_write(hardirq_stack_inuse, true); \
__this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \
call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \
__this_cpu_write(hardirq_stack_inuse, false); \
__this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \
}
#endif

View file

@ -12,13 +12,26 @@
#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
#endif /* CONFIG_X86_32 */
#ifdef __ASSEMBLY__
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
#define __ALIGN .p2align 4, 0x90
#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x90;
#define __ALIGN_STR __stringify(__ALIGN)
#if defined(CONFIG_CALL_PADDING) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
#define FUNCTION_PADDING .skip CONFIG_FUNCTION_ALIGNMENT, 0x90;
#else
#define FUNCTION_PADDING
#endif
#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BULID_VDSO)
# define __FUNC_ALIGN __ALIGN; FUNCTION_PADDING
#else
# define __FUNC_ALIGN __ALIGN
#endif
#define ASM_FUNC_ALIGN __stringify(__FUNC_ALIGN)
#define SYM_F_ALIGN __FUNC_ALIGN
#ifdef __ASSEMBLY__
#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
#define RET jmp __x86_return_thunk
#else /* CONFIG_RETPOLINE */
@ -43,11 +56,45 @@
#endif /* __ASSEMBLY__ */
/*
* Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the
* CFI symbol layout changes.
*
* Without CALL_THUNKS:
*
* .align FUNCTION_ALIGNMENT
* __cfi_##name:
* .skip FUNCTION_PADDING, 0x90
* .byte 0xb8
* .long __kcfi_typeid_##name
* name:
*
* With CALL_THUNKS:
*
* .align FUNCTION_ALIGNMENT
* __cfi_##name:
* .byte 0xb8
* .long __kcfi_typeid_##name
* .skip FUNCTION_PADDING, 0x90
* name:
*
* In both cases the whole thing is FUNCTION_ALIGNMENT aligned and sized.
*/
#ifdef CONFIG_CALL_PADDING
#define CFI_PRE_PADDING
#define CFI_POST_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90;
#else
#define CFI_PRE_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90;
#define CFI_POST_PADDING
#endif
#define __CFI_TYPE(name) \
SYM_START(__cfi_##name, SYM_L_LOCAL, SYM_A_NONE) \
.fill 11, 1, 0x90 ASM_NL \
CFI_PRE_PADDING \
.byte 0xb8 ASM_NL \
.long __kcfi_typeid_##name ASM_NL \
CFI_POST_PADDING \
SYM_FUNC_END(__cfi_##name)
/* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */
@ -57,7 +104,7 @@
/* SYM_FUNC_START -- use for global functions */
#define SYM_FUNC_START(name) \
SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \
SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \
ENDBR
/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */
@ -67,7 +114,7 @@
/* SYM_FUNC_START_LOCAL -- use for local functions */
#define SYM_FUNC_START_LOCAL(name) \
SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \
SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN) \
ENDBR
/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */
@ -77,7 +124,7 @@
/* SYM_FUNC_START_WEAK -- use for weak functions */
#define SYM_FUNC_START_WEAK(name) \
SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \
SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN) \
ENDBR
/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */

View file

@ -12,8 +12,104 @@
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
#include <asm/percpu.h>
#include <asm/current.h>
#define RETPOLINE_THUNK_SIZE 32
/*
* Call depth tracking for Intel SKL CPUs to address the RSB underflow
* issue in software.
*
* The tracking does not use a counter. It uses uses arithmetic shift
* right on call entry and logical shift left on return.
*
* The depth tracking variable is initialized to 0x8000.... when the call
* depth is zero. The arithmetic shift right sign extends the MSB and
* saturates after the 12th call. The shift count is 5 for both directions
* so the tracking covers 12 nested calls.
*
* Call
* 0: 0x8000000000000000 0x0000000000000000
* 1: 0xfc00000000000000 0xf000000000000000
* ...
* 11: 0xfffffffffffffff8 0xfffffffffffffc00
* 12: 0xffffffffffffffff 0xffffffffffffffe0
*
* After a return buffer fill the depth is credited 12 calls before the
* next stuffing has to take place.
*
* There is a inaccuracy for situations like this:
*
* 10 calls
* 5 returns
* 3 calls
* 4 returns
* 3 calls
* ....
*
* The shift count might cause this to be off by one in either direction,
* but there is still a cushion vs. the RSB depth. The algorithm does not
* claim to be perfect and it can be speculated around by the CPU, but it
* is considered that it obfuscates the problem enough to make exploitation
* extremly difficult.
*/
#define RET_DEPTH_SHIFT 5
#define RSB_RET_STUFF_LOOPS 16
#define RET_DEPTH_INIT 0x8000000000000000ULL
#define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL
#define RET_DEPTH_CREDIT 0xffffffffffffffffULL
#ifdef CONFIG_CALL_THUNKS_DEBUG
# define CALL_THUNKS_DEBUG_INC_CALLS \
incq %gs:__x86_call_count;
# define CALL_THUNKS_DEBUG_INC_RETS \
incq %gs:__x86_ret_count;
# define CALL_THUNKS_DEBUG_INC_STUFFS \
incq %gs:__x86_stuffs_count;
# define CALL_THUNKS_DEBUG_INC_CTXSW \
incq %gs:__x86_ctxsw_count;
#else
# define CALL_THUNKS_DEBUG_INC_CALLS
# define CALL_THUNKS_DEBUG_INC_RETS
# define CALL_THUNKS_DEBUG_INC_STUFFS
# define CALL_THUNKS_DEBUG_INC_CTXSW
#endif
#if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)
#include <asm/asm-offsets.h>
#define CREDIT_CALL_DEPTH \
movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
#define ASM_CREDIT_CALL_DEPTH \
movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
#define RESET_CALL_DEPTH \
mov $0x80, %rax; \
shl $56, %rax; \
movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
#define RESET_CALL_DEPTH_FROM_CALL \
mov $0xfc, %rax; \
shl $56, %rax; \
movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
CALL_THUNKS_DEBUG_INC_CALLS
#define INCREMENT_CALL_DEPTH \
sarq $5, %gs:pcpu_hot + X86_call_depth; \
CALL_THUNKS_DEBUG_INC_CALLS
#define ASM_INCREMENT_CALL_DEPTH \
sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
CALL_THUNKS_DEBUG_INC_CALLS
#else
#define CREDIT_CALL_DEPTH
#define ASM_CREDIT_CALL_DEPTH
#define RESET_CALL_DEPTH
#define INCREMENT_CALL_DEPTH
#define ASM_INCREMENT_CALL_DEPTH
#define RESET_CALL_DEPTH_FROM_CALL
#endif
/*
* Fill the CPU return stack buffer.
@ -32,6 +128,7 @@
* from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
*/
#define RETPOLINE_THUNK_SIZE 32
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
/*
@ -60,7 +157,9 @@
dec reg; \
jnz 771b; \
/* barrier for jnz misprediction */ \
lfence;
lfence; \
ASM_CREDIT_CALL_DEPTH \
CALL_THUNKS_DEBUG_INC_CTXSW
#else
/*
* i386 doesn't unconditionally have LFENCE, as such it can't
@ -185,11 +284,32 @@
* where we have a stack but before any RET instruction.
*/
.macro UNTRAIN_RET
#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY)
#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
defined(CONFIG_CALL_DEPTH_TRACKING)
ANNOTATE_UNRET_END
ALTERNATIVE_2 "", \
CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
"call entry_ibpb", X86_FEATURE_ENTRY_IBPB
ALTERNATIVE_3 "", \
CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
"call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \
__stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
#endif
.endm
.macro UNTRAIN_RET_FROM_CALL
#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
defined(CONFIG_CALL_DEPTH_TRACKING)
ANNOTATE_UNRET_END
ALTERNATIVE_3 "", \
CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
"call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \
__stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH
#endif
.endm
.macro CALL_DEPTH_ACCOUNT
#ifdef CONFIG_CALL_DEPTH_TRACKING
ALTERNATIVE "", \
__stringify(ASM_INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
#endif
.endm
@ -203,11 +323,45 @@
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
extern retpoline_thunk_t __x86_indirect_thunk_array[];
extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
extern void __x86_return_thunk(void);
extern void zen_untrain_ret(void);
extern void entry_ibpb(void);
#ifdef CONFIG_CALL_THUNKS
extern void (*x86_return_thunk)(void);
#else
#define x86_return_thunk (&__x86_return_thunk)
#endif
#ifdef CONFIG_CALL_DEPTH_TRACKING
extern void __x86_return_skl(void);
static inline void x86_set_skl_return_thunk(void)
{
x86_return_thunk = &__x86_return_skl;
}
#define CALL_DEPTH_ACCOUNT \
ALTERNATIVE("", \
__stringify(INCREMENT_CALL_DEPTH), \
X86_FEATURE_CALL_DEPTH)
#ifdef CONFIG_CALL_THUNKS_DEBUG
DECLARE_PER_CPU(u64, __x86_call_count);
DECLARE_PER_CPU(u64, __x86_ret_count);
DECLARE_PER_CPU(u64, __x86_stuffs_count);
DECLARE_PER_CPU(u64, __x86_ctxsw_count);
#endif
#else
static inline void x86_set_skl_return_thunk(void) {}
#define CALL_DEPTH_ACCOUNT ""
#endif
#ifdef CONFIG_RETPOLINE
#define GEN(reg) \
@ -215,6 +369,16 @@ extern void entry_ibpb(void);
#include <asm/GEN-for-each-reg.h>
#undef GEN
#define GEN(reg) \
extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN
#define GEN(reg) \
extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN
#ifdef CONFIG_X86_64
/*

View file

@ -4,13 +4,13 @@
/* Various instructions on x86 need to be replaced for
* para-virtualization: those hooks are defined here. */
#include <asm/paravirt_types.h>
#ifdef CONFIG_PARAVIRT
#include <asm/pgtable_types.h>
#include <asm/asm.h>
#include <asm/nospec-branch.h>
#include <asm/paravirt_types.h>
#ifndef __ASSEMBLY__
#include <linux/bug.h>
#include <linux/types.h>
@ -665,6 +665,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
asm(".pushsection " section ", \"ax\";" \
".globl " PV_THUNK_NAME(func) ";" \
".type " PV_THUNK_NAME(func) ", @function;" \
ASM_FUNC_ALIGN \
PV_THUNK_NAME(func) ":" \
ASM_ENDBR \
FRAME_BEGIN \
@ -730,6 +731,18 @@ static __always_inline unsigned long arch_local_irq_save(void)
#undef PVOP_VCALL4
#undef PVOP_CALL4
#define DEFINE_PARAVIRT_ASM(func, instr, sec) \
asm (".pushsection " #sec ", \"ax\"\n" \
".global " #func "\n\t" \
".type " #func ", @function\n\t" \
ASM_FUNC_ALIGN "\n" \
#func ":\n\t" \
ASM_ENDBR \
instr "\n\t" \
ASM_RET \
".size " #func ", . - " #func "\n\t" \
".popsection")
extern void default_banner(void);
#else /* __ASSEMBLY__ */

View file

@ -2,6 +2,24 @@
#ifndef _ASM_X86_PARAVIRT_TYPES_H
#define _ASM_X86_PARAVIRT_TYPES_H
#ifndef __ASSEMBLY__
/* These all sit in the .parainstructions section to tell us what to patch. */
struct paravirt_patch_site {
u8 *instr; /* original instructions */
u8 type; /* type of this instruction */
u8 len; /* length of original instruction */
};
/* Lazy mode for batching updates / context switch */
enum paravirt_lazy_mode {
PARAVIRT_LAZY_NONE,
PARAVIRT_LAZY_MMU,
PARAVIRT_LAZY_CPU,
};
#endif
#ifdef CONFIG_PARAVIRT
#ifndef __ASSEMBLY__
#include <asm/desc_defs.h>
@ -534,13 +552,6 @@ int paravirt_disable_iospace(void);
__PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
/* Lazy mode for batching updates / context switch */
enum paravirt_lazy_mode {
PARAVIRT_LAZY_NONE,
PARAVIRT_LAZY_MMU,
PARAVIRT_LAZY_CPU,
};
enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
void paravirt_start_context_switch(struct task_struct *prev);
void paravirt_end_context_switch(struct task_struct *next);
@ -556,16 +567,9 @@ unsigned long paravirt_ret0(void);
#define paravirt_nop ((void *)_paravirt_nop)
/* These all sit in the .parainstructions section to tell us what to patch. */
struct paravirt_patch_site {
u8 *instr; /* original instructions */
u8 type; /* type of this instruction */
u8 len; /* length of original instruction */
};
extern struct paravirt_patch_site __parainstructions[],
__parainstructions_end[];
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_PARAVIRT */
#endif /* _ASM_X86_PARAVIRT_TYPES_H */

View file

@ -4,11 +4,11 @@
#include <asm/rmwcc.h>
#include <asm/percpu.h>
#include <asm/current.h>
#include <linux/thread_info.h>
#include <linux/static_call_types.h>
DECLARE_PER_CPU(int, __preempt_count);
/* We use the MSB mostly because its available */
#define PREEMPT_NEED_RESCHED 0x80000000
@ -24,7 +24,7 @@ DECLARE_PER_CPU(int, __preempt_count);
*/
static __always_inline int preempt_count(void)
{
return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
return raw_cpu_read_4(pcpu_hot.preempt_count) & ~PREEMPT_NEED_RESCHED;
}
static __always_inline void preempt_count_set(int pc)
@ -32,10 +32,10 @@ static __always_inline void preempt_count_set(int pc)
int old, new;
do {
old = raw_cpu_read_4(__preempt_count);
old = raw_cpu_read_4(pcpu_hot.preempt_count);
new = (old & PREEMPT_NEED_RESCHED) |
(pc & ~PREEMPT_NEED_RESCHED);
} while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old);
} while (raw_cpu_cmpxchg_4(pcpu_hot.preempt_count, old, new) != old);
}
/*
@ -44,7 +44,7 @@ static __always_inline void preempt_count_set(int pc)
#define init_task_preempt_count(p) do { } while (0)
#define init_idle_preempt_count(p, cpu) do { \
per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \
per_cpu(pcpu_hot.preempt_count, (cpu)) = PREEMPT_DISABLED; \
} while (0)
/*
@ -58,17 +58,17 @@ static __always_inline void preempt_count_set(int pc)
static __always_inline void set_preempt_need_resched(void)
{
raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
raw_cpu_and_4(pcpu_hot.preempt_count, ~PREEMPT_NEED_RESCHED);
}
static __always_inline void clear_preempt_need_resched(void)
{
raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
raw_cpu_or_4(pcpu_hot.preempt_count, PREEMPT_NEED_RESCHED);
}
static __always_inline bool test_preempt_need_resched(void)
{
return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
return !(raw_cpu_read_4(pcpu_hot.preempt_count) & PREEMPT_NEED_RESCHED);
}
/*
@ -77,12 +77,12 @@ static __always_inline bool test_preempt_need_resched(void)
static __always_inline void __preempt_count_add(int val)
{
raw_cpu_add_4(__preempt_count, val);
raw_cpu_add_4(pcpu_hot.preempt_count, val);
}
static __always_inline void __preempt_count_sub(int val)
{
raw_cpu_add_4(__preempt_count, -val);
raw_cpu_add_4(pcpu_hot.preempt_count, -val);
}
/*
@ -92,7 +92,8 @@ static __always_inline void __preempt_count_sub(int val)
*/
static __always_inline bool __preempt_count_dec_and_test(void)
{
return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var]));
return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e,
__percpu_arg([var]));
}
/*
@ -100,7 +101,7 @@ static __always_inline bool __preempt_count_dec_and_test(void)
*/
static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset);
}
#ifdef CONFIG_PREEMPTION

View file

@ -377,8 +377,6 @@ struct irq_stack {
char stack[IRQ_STACK_SIZE];
} __aligned(IRQ_STACK_SIZE);
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#ifdef CONFIG_X86_64
struct fixed_percpu_data {
/*
@ -401,8 +399,6 @@ static inline unsigned long cpu_kernelmode_gs_base(int cpu)
return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
}
DECLARE_PER_CPU(void *, hardirq_stack_ptr);
DECLARE_PER_CPU(bool, hardirq_stack_inuse);
extern asmlinkage void ignore_sysret(void);
/* Save actual FS/GS selectors and bases to current->thread */
@ -411,8 +407,6 @@ void current_save_fsgs(void);
#ifdef CONFIG_STACKPROTECTOR
DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
#endif
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
#endif /* !X86_64 */
struct perf_event;
@ -517,7 +511,7 @@ static __always_inline unsigned long current_top_of_stack(void)
* and around vm86 mode and sp0 on x86_64 is special because of the
* entry trampoline.
*/
return this_cpu_read_stable(cpu_current_top_of_stack);
return this_cpu_read_stable(pcpu_hot.top_of_stack);
}
static __always_inline bool on_thread_stack(void)
@ -554,10 +548,9 @@ extern int sysenter_setup(void);
/* Defined in head.S */
extern struct desc_ptr early_gdt_descr;
extern void switch_to_new_gdt(int);
extern void switch_gdt_and_percpu_base(int);
extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
extern void load_percpu_segment(int);
extern void cpu_init(void);
extern void cpu_init_secondary(void);
extern void cpu_init_exception_handling(void);

View file

@ -14,8 +14,6 @@
__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
#define __pv_queued_spin_unlock __pv_queued_spin_unlock
#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock"
#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath"
/*
* Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock
@ -37,32 +35,27 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
* rsi = lockval (second argument)
* rdx = internal variable (set to 0)
*/
asm (".pushsection .spinlock.text, \"ax\";"
".globl " PV_UNLOCK ";"
".type " PV_UNLOCK ", @function;"
".align 4,0x90;"
PV_UNLOCK ": "
ASM_ENDBR
FRAME_BEGIN
"push %rdx;"
"mov $0x1,%eax;"
"xor %edx,%edx;"
LOCK_PREFIX "cmpxchg %dl,(%rdi);"
"cmp $0x1,%al;"
"jne .slowpath;"
"pop %rdx;"
#define PV_UNLOCK_ASM \
FRAME_BEGIN \
"push %rdx\n\t" \
"mov $0x1,%eax\n\t" \
"xor %edx,%edx\n\t" \
LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t" \
"cmp $0x1,%al\n\t" \
"jne .slowpath\n\t" \
"pop %rdx\n\t" \
FRAME_END \
ASM_RET \
".slowpath:\n\t" \
"push %rsi\n\t" \
"movzbl %al,%esi\n\t" \
"call __raw_callee_save___pv_queued_spin_unlock_slowpath\n\t" \
"pop %rsi\n\t" \
"pop %rdx\n\t" \
FRAME_END
ASM_RET
".slowpath: "
"push %rsi;"
"movzbl %al,%esi;"
"call " PV_UNLOCK_SLOWPATH ";"
"pop %rsi;"
"pop %rdx;"
FRAME_END
ASM_RET
".size " PV_UNLOCK ", .-" PV_UNLOCK ";"
".popsection");
DEFINE_PARAVIRT_ASM(__raw_callee_save___pv_queued_spin_unlock,
PV_UNLOCK_ASM, .spinlock.text);
#else /* CONFIG_64BIT */

View file

@ -3,10 +3,10 @@
#define _ASM_X86_SMP_H
#ifndef __ASSEMBLY__
#include <linux/cpumask.h>
#include <asm/percpu.h>
#include <asm/thread_info.h>
#include <asm/cpumask.h>
#include <asm/current.h>
#include <asm/thread_info.h>
extern int smp_num_siblings;
extern unsigned int num_processors;
@ -19,7 +19,6 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id);
DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
@ -150,11 +149,10 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r);
/*
* This function is needed by all SMP systems. It must _always_ be valid
* from the initial startup. We map APIC_BASE very early in page_setup(),
* so this is correct in the x86 case.
* from the initial startup.
*/
#define raw_smp_processor_id() this_cpu_read(cpu_number)
#define __smp_processor_id() __this_cpu_read(cpu_number)
#define raw_smp_processor_id() this_cpu_read(pcpu_hot.cpu_number)
#define __smp_processor_id() __this_cpu_read(pcpu_hot.cpu_number)
#ifdef CONFIG_X86_32
extern int safe_smp_processor_id(void);

View file

@ -45,6 +45,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void text_poke_sync(void);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok);
extern void *text_poke_set(void *addr, int c, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);

View file

@ -143,6 +143,8 @@ obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o
obj-$(CONFIG_CFI_CLANG) += cfi.o
obj-$(CONFIG_CALL_THUNKS) += callthunks.o
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)

View file

@ -116,6 +116,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
extern s32 __retpoline_sites[], __retpoline_sites_end[];
extern s32 __return_sites[], __return_sites_end[];
extern s32 __cfi_sites[], __cfi_sites_end[];
extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
@ -377,6 +378,56 @@ static int emit_indirect(int op, int reg, u8 *bytes)
return i;
}
static inline bool is_jcc32(struct insn *insn)
{
/* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80;
}
static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes)
{
u8 op = insn->opcode.bytes[0];
int i = 0;
/*
* Clang does 'weird' Jcc __x86_indirect_thunk_r11 conditional
* tail-calls. Deal with them.
*/
if (is_jcc32(insn)) {
bytes[i++] = op;
op = insn->opcode.bytes[1];
goto clang_jcc;
}
if (insn->length == 6)
bytes[i++] = 0x2e; /* CS-prefix */
switch (op) {
case CALL_INSN_OPCODE:
__text_gen_insn(bytes+i, op, addr+i,
__x86_indirect_call_thunk_array[reg],
CALL_INSN_SIZE);
i += CALL_INSN_SIZE;
break;
case JMP32_INSN_OPCODE:
clang_jcc:
__text_gen_insn(bytes+i, op, addr+i,
__x86_indirect_jump_thunk_array[reg],
JMP32_INSN_SIZE);
i += JMP32_INSN_SIZE;
break;
default:
WARN(1, "%pS %px %*ph\n", addr, addr, 6, addr);
return -1;
}
WARN_ON_ONCE(i != insn->length);
return i;
}
/*
* Rewrite the compiler generated retpoline thunk calls.
*
@ -409,8 +460,12 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
BUG_ON(reg == 4);
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
!cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE))
!cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
return emit_call_track_retpoline(addr, insn, reg, bytes);
return -1;
}
op = insn->opcode.bytes[0];
@ -427,8 +482,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
* [ NOP ]
* 1:
*/
/* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) {
if (is_jcc32(insn)) {
cc = insn->opcode.bytes[1] & 0xf;
cc ^= 1; /* invert condition */
@ -518,6 +572,11 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
}
#ifdef CONFIG_RETHUNK
#ifdef CONFIG_CALL_THUNKS
void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
#endif
/*
* Rewrite the compiler generated return thunk tail-calls.
*
@ -533,14 +592,18 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes)
{
int i = 0;
if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
return -1;
if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
if (x86_return_thunk == __x86_return_thunk)
return -1;
bytes[i++] = RET_INSN_OPCODE;
i = JMP32_INSN_SIZE;
__text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i);
} else {
bytes[i++] = RET_INSN_OPCODE;
}
for (; i < insn->length;)
bytes[i++] = INT3_INSN_OPCODE;
return i;
}
@ -594,6 +657,28 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
#ifdef CONFIG_X86_KERNEL_IBT
static void poison_endbr(void *addr, bool warn)
{
u32 endbr, poison = gen_endbr_poison();
if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
return;
if (!is_endbr(endbr)) {
WARN_ON_ONCE(warn);
return;
}
DPRINTK("ENDBR at: %pS (%px)", addr, addr);
/*
* When we have IBT, the lack of ENDBR will trigger #CP
*/
DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
text_poke_early(addr, &poison, 4);
}
/*
* Generated by: objtool --ibt
*/
@ -602,23 +687,11 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
s32 *s;
for (s = start; s < end; s++) {
u32 endbr, poison = gen_endbr_poison();
void *addr = (void *)s + *s;
if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
continue;
if (WARN_ON_ONCE(!is_endbr(endbr)))
continue;
DPRINTK("ENDBR at: %pS (%px)", addr, addr);
/*
* When we have IBT, the lack of ENDBR will trigger #CP
*/
DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
text_poke_early(addr, &poison, 4);
poison_endbr(addr, true);
if (IS_ENABLED(CONFIG_FINEIBT))
poison_endbr(addr - 16, false);
}
}
@ -628,6 +701,378 @@ void __init_or_module apply_ibt_endbr(s32 *start, s32 *end) { }
#endif /* CONFIG_X86_KERNEL_IBT */
#ifdef CONFIG_FINEIBT
enum cfi_mode {
CFI_DEFAULT,
CFI_OFF,
CFI_KCFI,
CFI_FINEIBT,
};
static enum cfi_mode cfi_mode __ro_after_init = CFI_DEFAULT;
static bool cfi_rand __ro_after_init = true;
static u32 cfi_seed __ro_after_init;
/*
* Re-hash the CFI hash with a boot-time seed while making sure the result is
* not a valid ENDBR instruction.
*/
static u32 cfi_rehash(u32 hash)
{
hash ^= cfi_seed;
while (unlikely(is_endbr(hash) || is_endbr(-hash))) {
bool lsb = hash & 1;
hash >>= 1;
if (lsb)
hash ^= 0x80200003;
}
return hash;
}
static __init int cfi_parse_cmdline(char *str)
{
if (!str)
return -EINVAL;
while (str) {
char *next = strchr(str, ',');
if (next) {
*next = 0;
next++;
}
if (!strcmp(str, "auto")) {
cfi_mode = CFI_DEFAULT;
} else if (!strcmp(str, "off")) {
cfi_mode = CFI_OFF;
cfi_rand = false;
} else if (!strcmp(str, "kcfi")) {
cfi_mode = CFI_KCFI;
} else if (!strcmp(str, "fineibt")) {
cfi_mode = CFI_FINEIBT;
} else if (!strcmp(str, "norand")) {
cfi_rand = false;
} else {
pr_err("Ignoring unknown cfi option (%s).", str);
}
str = next;
}
return 0;
}
early_param("cfi", cfi_parse_cmdline);
/*
* kCFI FineIBT
*
* __cfi_\func: __cfi_\func:
* movl $0x12345678,%eax // 5 endbr64 // 4
* nop subl $0x12345678,%r10d // 7
* nop jz 1f // 2
* nop ud2 // 2
* nop 1: nop // 1
* nop
* nop
* nop
* nop
* nop
* nop
* nop
*
*
* caller: caller:
* movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6
* addl $-15(%r11),%r10d // 4 sub $16,%r11 // 4
* je 1f // 2 nop4 // 4
* ud2 // 2
* 1: call __x86_indirect_thunk_r11 // 5 call *%r11; nop2; // 5
*
*/
asm( ".pushsection .rodata \n"
"fineibt_preamble_start: \n"
" endbr64 \n"
" subl $0x12345678, %r10d \n"
" je fineibt_preamble_end \n"
" ud2 \n"
" nop \n"
"fineibt_preamble_end: \n"
".popsection\n"
);
extern u8 fineibt_preamble_start[];
extern u8 fineibt_preamble_end[];
#define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start)
#define fineibt_preamble_hash 7
asm( ".pushsection .rodata \n"
"fineibt_caller_start: \n"
" movl $0x12345678, %r10d \n"
" sub $16, %r11 \n"
ASM_NOP4
"fineibt_caller_end: \n"
".popsection \n"
);
extern u8 fineibt_caller_start[];
extern u8 fineibt_caller_end[];
#define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start)
#define fineibt_caller_hash 2
#define fineibt_caller_jmp (fineibt_caller_size - 2)
static u32 decode_preamble_hash(void *addr)
{
u8 *p = addr;
/* b8 78 56 34 12 mov $0x12345678,%eax */
if (p[0] == 0xb8)
return *(u32 *)(addr + 1);
return 0; /* invalid hash value */
}
static u32 decode_caller_hash(void *addr)
{
u8 *p = addr;
/* 41 ba 78 56 34 12 mov $0x12345678,%r10d */
if (p[0] == 0x41 && p[1] == 0xba)
return -*(u32 *)(addr + 2);
/* e8 0c 78 56 34 12 jmp.d8 +12 */
if (p[0] == JMP8_INSN_OPCODE && p[1] == fineibt_caller_jmp)
return -*(u32 *)(addr + 2);
return 0; /* invalid hash value */
}
/* .retpoline_sites */
static int cfi_disable_callers(s32 *start, s32 *end)
{
/*
* Disable kCFI by patching in a JMP.d8, this leaves the hash immediate
* in tact for later usage. Also see decode_caller_hash() and
* cfi_rewrite_callers().
*/
const u8 jmp[] = { JMP8_INSN_OPCODE, fineibt_caller_jmp };
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
u32 hash;
addr -= fineibt_caller_size;
hash = decode_caller_hash(addr);
if (!hash) /* nocfi callers */
continue;
text_poke_early(addr, jmp, 2);
}
return 0;
}
static int cfi_enable_callers(s32 *start, s32 *end)
{
/*
* Re-enable kCFI, undo what cfi_disable_callers() did.
*/
const u8 mov[] = { 0x41, 0xba };
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
u32 hash;
addr -= fineibt_caller_size;
hash = decode_caller_hash(addr);
if (!hash) /* nocfi callers */
continue;
text_poke_early(addr, mov, 2);
}
return 0;
}
/* .cfi_sites */
static int cfi_rand_preamble(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
u32 hash;
hash = decode_preamble_hash(addr);
if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
addr, addr, 5, addr))
return -EINVAL;
hash = cfi_rehash(hash);
text_poke_early(addr + 1, &hash, 4);
}
return 0;
}
static int cfi_rewrite_preamble(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
u32 hash;
hash = decode_preamble_hash(addr);
if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
addr, addr, 5, addr))
return -EINVAL;
text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size);
WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678);
text_poke_early(addr + fineibt_preamble_hash, &hash, 4);
}
return 0;
}
/* .retpoline_sites */
static int cfi_rand_callers(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
u32 hash;
addr -= fineibt_caller_size;
hash = decode_caller_hash(addr);
if (hash) {
hash = -cfi_rehash(hash);
text_poke_early(addr + 2, &hash, 4);
}
}
return 0;
}
static int cfi_rewrite_callers(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
u32 hash;
addr -= fineibt_caller_size;
hash = decode_caller_hash(addr);
if (hash) {
text_poke_early(addr, fineibt_caller_start, fineibt_caller_size);
WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678);
text_poke_early(addr + fineibt_caller_hash, &hash, 4);
}
/* rely on apply_retpolines() */
}
return 0;
}
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi, bool builtin)
{
int ret;
if (WARN_ONCE(fineibt_preamble_size != 16,
"FineIBT preamble wrong size: %ld", fineibt_preamble_size))
return;
if (cfi_mode == CFI_DEFAULT) {
cfi_mode = CFI_KCFI;
if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT))
cfi_mode = CFI_FINEIBT;
}
/*
* Rewrite the callers to not use the __cfi_ stubs, such that we might
* rewrite them. This disables all CFI. If this succeeds but any of the
* later stages fails, we're without CFI.
*/
ret = cfi_disable_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
if (cfi_rand) {
if (builtin)
cfi_seed = get_random_u32();
ret = cfi_rand_preamble(start_cfi, end_cfi);
if (ret)
goto err;
ret = cfi_rand_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
}
switch (cfi_mode) {
case CFI_OFF:
if (builtin)
pr_info("Disabling CFI\n");
return;
case CFI_KCFI:
ret = cfi_enable_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
if (builtin)
pr_info("Using kCFI\n");
return;
case CFI_FINEIBT:
ret = cfi_rewrite_preamble(start_cfi, end_cfi);
if (ret)
goto err;
ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
if (builtin)
pr_info("Using FineIBT CFI\n");
return;
default:
break;
}
err:
pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n");
}
#else
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi, bool builtin)
{
}
#endif
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi)
{
return __apply_fineibt(start_retpoline, end_retpoline,
start_cfi, end_cfi,
/* .builtin = */ false);
}
#ifdef CONFIG_SMP
static void alternatives_smp_lock(const s32 *start, const s32 *end,
u8 *text, u8 *text_end)
@ -934,6 +1379,9 @@ void __init alternative_instructions(void)
*/
apply_paravirt(__parainstructions, __parainstructions_end);
__apply_fineibt(__retpoline_sites, __retpoline_sites_end,
__cfi_sites, __cfi_sites_end, true);
/*
* Rewrite the retpolines, must be done before alternatives since
* those can rewrite the retpoline thunks.
@ -947,6 +1395,12 @@ void __init alternative_instructions(void)
*/
apply_alternatives(__alt_instructions, __alt_instructions_end);
/*
* Now all calls are established. Apply the call thunks if
* required.
*/
callthunks_patch_builtin_calls();
apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
#ifdef CONFIG_SMP
@ -1236,6 +1690,27 @@ void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
return __text_poke(text_poke_memcpy, addr, opcode, len);
}
void *text_poke_copy_locked(void *addr, const void *opcode, size_t len,
bool core_ok)
{
unsigned long start = (unsigned long)addr;
size_t patched = 0;
if (WARN_ON_ONCE(!core_ok && core_kernel_text(start)))
return NULL;
while (patched < len) {
unsigned long ptr = start + patched;
size_t s;
s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched);
__text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s);
patched += s;
}
return addr;
}
/**
* text_poke_copy - Copy instructions into (an unused part of) RX memory
* @addr: address to modify
@ -1250,22 +1725,8 @@ void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
*/
void *text_poke_copy(void *addr, const void *opcode, size_t len)
{
unsigned long start = (unsigned long)addr;
size_t patched = 0;
if (WARN_ON_ONCE(core_kernel_text(start)))
return NULL;
mutex_lock(&text_mutex);
while (patched < len) {
unsigned long ptr = start + patched;
size_t s;
s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched);
__text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s);
patched += s;
}
addr = text_poke_copy_locked(addr, opcode, len, false);
mutex_unlock(&text_mutex);
return addr;
}

View file

@ -107,4 +107,9 @@ static void __used common(void)
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
#ifdef CONFIG_CALL_DEPTH_TRACKING
OFFSET(X86_call_depth, pcpu_hot, call_depth);
#endif
}

View file

@ -57,7 +57,7 @@ int main(void)
BLANK();
#ifdef CONFIG_STACKPROTECTOR
DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary));
OFFSET(FIXED_stack_canary, fixed_percpu_data, stack_canary);
BLANK();
#endif
return 0;

View file

@ -0,0 +1,388 @@
// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) "callthunks: " fmt
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/memory.h>
#include <linux/moduleloader.h>
#include <linux/static_call.h>
#include <asm/alternative.h>
#include <asm/asm-offsets.h>
#include <asm/cpu.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
#include <asm/kexec.h>
#include <asm/nospec-branch.h>
#include <asm/paravirt.h>
#include <asm/sections.h>
#include <asm/switch_to.h>
#include <asm/sync_core.h>
#include <asm/text-patching.h>
#include <asm/xen/hypercall.h>
static int __initdata_or_module debug_callthunks;
#define prdbg(fmt, args...) \
do { \
if (debug_callthunks) \
printk(KERN_DEBUG pr_fmt(fmt), ##args); \
} while(0)
static int __init debug_thunks(char *str)
{
debug_callthunks = 1;
return 1;
}
__setup("debug-callthunks", debug_thunks);
#ifdef CONFIG_CALL_THUNKS_DEBUG
DEFINE_PER_CPU(u64, __x86_call_count);
DEFINE_PER_CPU(u64, __x86_ret_count);
DEFINE_PER_CPU(u64, __x86_stuffs_count);
DEFINE_PER_CPU(u64, __x86_ctxsw_count);
EXPORT_SYMBOL_GPL(__x86_ctxsw_count);
EXPORT_SYMBOL_GPL(__x86_call_count);
#endif
extern s32 __call_sites[], __call_sites_end[];
struct thunk_desc {
void *template;
unsigned int template_size;
};
struct core_text {
unsigned long base;
unsigned long end;
const char *name;
};
static bool thunks_initialized __ro_after_init;
static const struct core_text builtin_coretext = {
.base = (unsigned long)_text,
.end = (unsigned long)_etext,
.name = "builtin",
};
asm (
".pushsection .rodata \n"
".global skl_call_thunk_template \n"
"skl_call_thunk_template: \n"
__stringify(INCREMENT_CALL_DEPTH)" \n"
".global skl_call_thunk_tail \n"
"skl_call_thunk_tail: \n"
".popsection \n"
);
extern u8 skl_call_thunk_template[];
extern u8 skl_call_thunk_tail[];
#define SKL_TMPL_SIZE \
((unsigned int)(skl_call_thunk_tail - skl_call_thunk_template))
extern void error_entry(void);
extern void xen_error_entry(void);
extern void paranoid_entry(void);
static inline bool within_coretext(const struct core_text *ct, void *addr)
{
unsigned long p = (unsigned long)addr;
return ct->base <= p && p < ct->end;
}
static inline bool within_module_coretext(void *addr)
{
bool ret = false;
#ifdef CONFIG_MODULES
struct module *mod;
preempt_disable();
mod = __module_address((unsigned long)addr);
if (mod && within_module_core((unsigned long)addr, mod))
ret = true;
preempt_enable();
#endif
return ret;
}
static bool is_coretext(const struct core_text *ct, void *addr)
{
if (ct && within_coretext(ct, addr))
return true;
if (within_coretext(&builtin_coretext, addr))
return true;
return within_module_coretext(addr);
}
static __init_or_module bool skip_addr(void *dest)
{
if (dest == error_entry)
return true;
if (dest == paranoid_entry)
return true;
if (dest == xen_error_entry)
return true;
/* Does FILL_RSB... */
if (dest == __switch_to_asm)
return true;
/* Accounts directly */
if (dest == ret_from_fork)
return true;
#ifdef CONFIG_HOTPLUG_CPU
if (dest == start_cpu0)
return true;
#endif
#ifdef CONFIG_FUNCTION_TRACER
if (dest == __fentry__)
return true;
#endif
#ifdef CONFIG_KEXEC_CORE
if (dest >= (void *)relocate_kernel &&
dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE)
return true;
#endif
#ifdef CONFIG_XEN
if (dest >= (void *)hypercall_page &&
dest < (void*)hypercall_page + PAGE_SIZE)
return true;
#endif
return false;
}
static __init_or_module void *call_get_dest(void *addr)
{
struct insn insn;
void *dest;
int ret;
ret = insn_decode_kernel(&insn, addr);
if (ret)
return ERR_PTR(ret);
/* Patched out call? */
if (insn.opcode.bytes[0] != CALL_INSN_OPCODE)
return NULL;
dest = addr + insn.length + insn.immediate.value;
if (skip_addr(dest))
return NULL;
return dest;
}
static const u8 nops[] = {
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
};
static __init_or_module void *patch_dest(void *dest, bool direct)
{
unsigned int tsize = SKL_TMPL_SIZE;
u8 *pad = dest - tsize;
/* Already patched? */
if (!bcmp(pad, skl_call_thunk_template, tsize))
return pad;
/* Ensure there are nops */
if (bcmp(pad, nops, tsize)) {
pr_warn_once("Invalid padding area for %pS\n", dest);
return NULL;
}
if (direct)
memcpy(pad, skl_call_thunk_template, tsize);
else
text_poke_copy_locked(pad, skl_call_thunk_template, tsize, true);
return pad;
}
static __init_or_module void patch_call(void *addr, const struct core_text *ct)
{
void *pad, *dest;
u8 bytes[8];
if (!within_coretext(ct, addr))
return;
dest = call_get_dest(addr);
if (!dest || WARN_ON_ONCE(IS_ERR(dest)))
return;
if (!is_coretext(ct, dest))
return;
pad = patch_dest(dest, within_coretext(ct, dest));
if (!pad)
return;
prdbg("Patch call at: %pS %px to %pS %px -> %px \n", addr, addr,
dest, dest, pad);
__text_gen_insn(bytes, CALL_INSN_OPCODE, addr, pad, CALL_INSN_SIZE);
text_poke_early(addr, bytes, CALL_INSN_SIZE);
}
static __init_or_module void
patch_call_sites(s32 *start, s32 *end, const struct core_text *ct)
{
s32 *s;
for (s = start; s < end; s++)
patch_call((void *)s + *s, ct);
}
static __init_or_module void
patch_paravirt_call_sites(struct paravirt_patch_site *start,
struct paravirt_patch_site *end,
const struct core_text *ct)
{
struct paravirt_patch_site *p;
for (p = start; p < end; p++)
patch_call(p->instr, ct);
}
static __init_or_module void
callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct)
{
prdbg("Patching call sites %s\n", ct->name);
patch_call_sites(cs->call_start, cs->call_end, ct);
patch_paravirt_call_sites(cs->pv_start, cs->pv_end, ct);
prdbg("Patching call sites done%s\n", ct->name);
}
void __init callthunks_patch_builtin_calls(void)
{
struct callthunk_sites cs = {
.call_start = __call_sites,
.call_end = __call_sites_end,
.pv_start = __parainstructions,
.pv_end = __parainstructions_end
};
if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
return;
pr_info("Setting up call depth tracking\n");
mutex_lock(&text_mutex);
callthunks_setup(&cs, &builtin_coretext);
static_call_force_reinit();
thunks_initialized = true;
mutex_unlock(&text_mutex);
}
void *callthunks_translate_call_dest(void *dest)
{
void *target;
lockdep_assert_held(&text_mutex);
if (!thunks_initialized || skip_addr(dest))
return dest;
if (!is_coretext(NULL, dest))
return dest;
target = patch_dest(dest, false);
return target ? : dest;
}
bool is_callthunk(void *addr)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
void *tmpl = skl_call_thunk_template;
unsigned long dest;
dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT);
if (!thunks_initialized || skip_addr((void *)dest))
return false;
return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size);
}
#ifdef CONFIG_BPF_JIT
int x86_call_depth_emit_accounting(u8 **pprog, void *func)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
void *tmpl = skl_call_thunk_template;
if (!thunks_initialized)
return 0;
/* Is function call target a thunk? */
if (func && is_callthunk(func))
return 0;
memcpy(*pprog, tmpl, tmpl_size);
*pprog += tmpl_size;
return tmpl_size;
}
#endif
#ifdef CONFIG_MODULES
void noinline callthunks_patch_module_calls(struct callthunk_sites *cs,
struct module *mod)
{
struct core_text ct = {
.base = (unsigned long)mod->core_layout.base,
.end = (unsigned long)mod->core_layout.base + mod->core_layout.size,
.name = mod->name,
};
if (!thunks_initialized)
return;
mutex_lock(&text_mutex);
callthunks_setup(cs, &ct);
mutex_unlock(&text_mutex);
}
#endif /* CONFIG_MODULES */
#if defined(CONFIG_CALL_THUNKS_DEBUG) && defined(CONFIG_DEBUG_FS)
static int callthunks_debug_show(struct seq_file *m, void *p)
{
unsigned long cpu = (unsigned long)m->private;
seq_printf(m, "C: %16llu R: %16llu S: %16llu X: %16llu\n,",
per_cpu(__x86_call_count, cpu),
per_cpu(__x86_ret_count, cpu),
per_cpu(__x86_stuffs_count, cpu),
per_cpu(__x86_ctxsw_count, cpu));
return 0;
}
static int callthunks_debug_open(struct inode *inode, struct file *file)
{
return single_open(file, callthunks_debug_show, inode->i_private);
}
static const struct file_operations dfs_ops = {
.open = callthunks_debug_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int __init callthunks_debugfs_init(void)
{
struct dentry *dir;
unsigned long cpu;
dir = debugfs_create_dir("callthunks", NULL);
for_each_possible_cpu(cpu) {
void *arg = (void *)cpu;
char name [10];
sprintf(name, "cpu%lu", cpu);
debugfs_create_file(name, 0644, dir, arg, &dfs_ops);
}
return 0;
}
__initcall(callthunks_debugfs_init);
#endif

View file

@ -17,9 +17,6 @@ KMSAN_SANITIZE_common.o := n
# As above, instrumenting secondary CPU boot code causes boot hangs.
KCSAN_SANITIZE_common.o := n
# Make sure load_percpu_segment has no stackprotector
CFLAGS_common.o := -fno-stack-protector
obj-y := cacheinfo.o scattered.o topology.o
obj-y += common.o
obj-y += rdrand.o

View file

@ -787,6 +787,7 @@ enum retbleed_mitigation {
RETBLEED_MITIGATION_IBPB,
RETBLEED_MITIGATION_IBRS,
RETBLEED_MITIGATION_EIBRS,
RETBLEED_MITIGATION_STUFF,
};
enum retbleed_mitigation_cmd {
@ -794,6 +795,7 @@ enum retbleed_mitigation_cmd {
RETBLEED_CMD_AUTO,
RETBLEED_CMD_UNRET,
RETBLEED_CMD_IBPB,
RETBLEED_CMD_STUFF,
};
static const char * const retbleed_strings[] = {
@ -802,6 +804,7 @@ static const char * const retbleed_strings[] = {
[RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB",
[RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS",
[RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS",
[RETBLEED_MITIGATION_STUFF] = "Mitigation: Stuffing",
};
static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
@ -831,8 +834,12 @@ static int __init retbleed_parse_cmdline(char *str)
retbleed_cmd = RETBLEED_CMD_UNRET;
} else if (!strcmp(str, "ibpb")) {
retbleed_cmd = RETBLEED_CMD_IBPB;
} else if (!strcmp(str, "stuff")) {
retbleed_cmd = RETBLEED_CMD_STUFF;
} else if (!strcmp(str, "nosmt")) {
retbleed_nosmt = true;
} else if (!strcmp(str, "force")) {
setup_force_cpu_bug(X86_BUG_RETBLEED);
} else {
pr_err("Ignoring unknown retbleed option (%s).", str);
}
@ -879,6 +886,21 @@ static void __init retbleed_select_mitigation(void)
}
break;
case RETBLEED_CMD_STUFF:
if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING) &&
spectre_v2_enabled == SPECTRE_V2_RETPOLINE) {
retbleed_mitigation = RETBLEED_MITIGATION_STUFF;
} else {
if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING))
pr_err("WARNING: retbleed=stuff depends on spectre_v2=retpoline\n");
else
pr_err("WARNING: kernel not compiled with CALL_DEPTH_TRACKING.\n");
goto do_cmd_auto;
}
break;
do_cmd_auto:
case RETBLEED_CMD_AUTO:
default:
@ -916,6 +938,12 @@ static void __init retbleed_select_mitigation(void)
mitigate_smt = true;
break;
case RETBLEED_MITIGATION_STUFF:
setup_force_cpu_cap(X86_FEATURE_RETHUNK);
setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH);
x86_set_skl_return_thunk();
break;
default:
break;
}
@ -926,7 +954,7 @@ static void __init retbleed_select_mitigation(void)
/*
* Let IBRS trump all on Intel without affecting the effects of the
* retbleed= cmdline option.
* retbleed= cmdline option except for call depth based stuffing
*/
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
switch (spectre_v2_enabled) {
@ -939,7 +967,8 @@ static void __init retbleed_select_mitigation(void)
retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
break;
default:
pr_err(RETBLEED_INTEL_MSG);
if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF)
pr_err(RETBLEED_INTEL_MSG);
}
}
@ -1413,6 +1442,7 @@ static void __init spectre_v2_select_mitigation(void)
if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) &&
boot_cpu_has_bug(X86_BUG_RETBLEED) &&
retbleed_cmd != RETBLEED_CMD_OFF &&
retbleed_cmd != RETBLEED_CMD_STUFF &&
boot_cpu_has(X86_FEATURE_IBRS) &&
boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
mode = SPECTRE_V2_IBRS;

View file

@ -610,6 +610,7 @@ static __always_inline void setup_cet(struct cpuinfo_x86 *c)
if (!ibt_selftest()) {
pr_err("IBT selftest: Failed!\n");
wrmsrl(MSR_IA32_S_CET, 0);
setup_clear_cpu_cap(X86_FEATURE_IBT);
return;
}
@ -702,16 +703,6 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
__u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
void load_percpu_segment(int cpu)
{
#ifdef CONFIG_X86_32
loadsegment(fs, __KERNEL_PERCPU);
#else
__loadsegment_simple(gs, 0);
wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
#endif
}
#ifdef CONFIG_X86_32
/* The 32-bit entry code needs to find cpu_entry_area. */
DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
@ -739,16 +730,45 @@ void load_fixmap_gdt(int cpu)
}
EXPORT_SYMBOL_GPL(load_fixmap_gdt);
/*
* Current gdt points %fs at the "master" per-cpu area: after this,
* it's on the real one.
/**
* switch_gdt_and_percpu_base - Switch to direct GDT and runtime per CPU base
* @cpu: The CPU number for which this is invoked
*
* Invoked during early boot to switch from early GDT and early per CPU to
* the direct GDT and the runtime per CPU area. On 32-bit the percpu base
* switch is implicit by loading the direct GDT. On 64bit this requires
* to update GSBASE.
*/
void switch_to_new_gdt(int cpu)
void __init switch_gdt_and_percpu_base(int cpu)
{
/* Load the original GDT */
load_direct_gdt(cpu);
/* Reload the per-cpu base */
load_percpu_segment(cpu);
#ifdef CONFIG_X86_64
/*
* No need to load %gs. It is already correct.
*
* Writing %gs on 64bit would zero GSBASE which would make any per
* CPU operation up to the point of the wrmsrl() fault.
*
* Set GSBASE to the new offset. Until the wrmsrl() happens the
* early mapping is still valid. That means the GSBASE update will
* lose any prior per CPU data which was not copied over in
* setup_per_cpu_areas().
*
* This works even with stackprotector enabled because the
* per CPU stack canary is 0 in both per CPU areas.
*/
wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
#else
/*
* %fs is already set to __KERNEL_PERCPU, but after switching GDT
* it is required to load FS again so that the 'hidden' part is
* updated from the new GDT. Up to this point the early per CPU
* translation is active. Any content of the early per CPU data
* which was not copied over in setup_per_cpu_areas() is lost.
*/
loadsegment(fs, __KERNEL_PERCPU);
#endif
}
static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
@ -1993,27 +2013,18 @@ static __init int setup_clearcpuid(char *arg)
}
__setup("clearcpuid=", setup_clearcpuid);
DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
.current_task = &init_task,
.preempt_count = INIT_PREEMPT_COUNT,
.top_of_stack = TOP_OF_INIT_STACK,
};
EXPORT_PER_CPU_SYMBOL(pcpu_hot);
#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
/*
* The following percpu variables are hot. Align current_task to
* cacheline size such that they fall in the same cacheline.
*/
DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(void *, hardirq_stack_ptr);
DEFINE_PER_CPU(bool, hardirq_stack_inuse);
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK;
static void wrmsrl_cstar(unsigned long val)
{
/*
@ -2064,20 +2075,6 @@ void syscall_init(void)
#else /* CONFIG_X86_64 */
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
/*
* On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
* the top of the kernel stack. Use an extra percpu variable to track the
* top of the kernel stack directly.
*/
DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
(unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
#ifdef CONFIG_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
@ -2248,12 +2245,6 @@ void cpu_init(void)
boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE))
cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
/*
* Initialize the per-CPU GDT with the boot GDT,
* and set up the GDT descriptor:
*/
switch_to_new_gdt(cpu);
if (IS_ENABLED(CONFIG_X86_64)) {
loadsegment(fs, 0);
memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);

View file

@ -37,7 +37,7 @@ const char *stack_type_name(enum stack_type type)
static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
{
unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr);
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
/*
@ -62,7 +62,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
{
unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr);
unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.softirq_stack_ptr);
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
/*

View file

@ -134,7 +134,7 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac
static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info)
{
unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
unsigned long *end = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr);
unsigned long *begin;
/*

View file

@ -69,6 +69,10 @@ static const char *ftrace_nop_replace(void)
static const char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
/*
* No need to translate into a callthunk. The trampoline does
* the depth accounting itself.
*/
return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr);
}
@ -317,7 +321,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
unsigned long size;
unsigned long *ptr;
void *trampoline;
void *ip;
void *ip, *dest;
/* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE };
@ -359,7 +363,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
ip = trampoline + size;
if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
__text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE);
__text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE);
else
memcpy(ip, retq, sizeof(retq));
@ -404,17 +408,19 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
/* put in the call to the function */
mutex_lock(&text_mutex);
call_offset -= start_offset;
/*
* No need to translate into a callthunk. The trampoline does
* the depth accounting before the call already.
*/
dest = ftrace_ops_get_func(ops);
memcpy(trampoline + call_offset,
text_gen_insn(CALL_INSN_OPCODE,
trampoline + call_offset,
ftrace_ops_get_func(ops)), CALL_INSN_SIZE);
text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest),
CALL_INSN_SIZE);
mutex_unlock(&text_mutex);
/* ALLOC_TRAMP flags lets us know we created it */
ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
set_vm_flush_reset_perms(trampoline);
if (likely(system_state != SYSTEM_BOOTING))
set_memory_ro((unsigned long)trampoline, npages);
set_memory_x((unsigned long)trampoline, npages);

View file

@ -3,8 +3,9 @@
* Copyright (C) 2014 Steven Rostedt, Red Hat Inc
*/
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
#include <asm/ftrace.h>
#include <asm/export.h>
@ -131,16 +132,19 @@
.endm
SYM_TYPED_FUNC_START(ftrace_stub)
CALL_DEPTH_ACCOUNT
RET
SYM_FUNC_END(ftrace_stub)
SYM_TYPED_FUNC_START(ftrace_stub_graph)
CALL_DEPTH_ACCOUNT
RET
SYM_FUNC_END(ftrace_stub_graph)
#ifdef CONFIG_DYNAMIC_FTRACE
SYM_FUNC_START(__fentry__)
CALL_DEPTH_ACCOUNT
RET
SYM_FUNC_END(__fentry__)
EXPORT_SYMBOL(__fentry__)
@ -149,6 +153,8 @@ SYM_FUNC_START(ftrace_caller)
/* save_mcount_regs fills in first two parameters */
save_mcount_regs
CALL_DEPTH_ACCOUNT
/* Stack - skipping return address of ftrace_caller */
leaq MCOUNT_REG_SIZE+8(%rsp), %rcx
movq %rcx, RSP(%rsp)
@ -164,6 +170,9 @@ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL)
/* Only ops with REGS flag set should have CS register set */
movq $0, CS(%rsp)
/* Account for the function call below */
CALL_DEPTH_ACCOUNT
SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
call ftrace_stub
@ -193,6 +202,8 @@ SYM_FUNC_START(ftrace_regs_caller)
save_mcount_regs 8
/* save_mcount_regs fills in first two parameters */
CALL_DEPTH_ACCOUNT
SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
/* Load the ftrace_ops into the 3rd parameter */
@ -223,6 +234,9 @@ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL)
/* regs go into 4th parameter */
leaq (%rsp), %rcx
/* Account for the function call below */
CALL_DEPTH_ACCOUNT
SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
call ftrace_stub
@ -275,7 +289,20 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
/* Restore flags */
popfq
UNWIND_HINT_FUNC
RET
/*
* The above left an extra return value on the stack; effectively
* doing a tail-call without using a register. This PUSH;RET
* pattern unbalances the RSB, inject a pointless CALL to rebalance.
*/
ANNOTATE_INTRA_FUNCTION_CALL
CALL .Ldo_rebalance
int3
.Ldo_rebalance:
add $8, %rsp
ALTERNATIVE __stringify(RET), \
__stringify(ANNOTATE_UNRET_SAFE; ret; int3), \
X86_FEATURE_CALL_DEPTH
SYM_FUNC_END(ftrace_regs_caller)
STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
@ -284,6 +311,8 @@ STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
SYM_FUNC_START(__fentry__)
CALL_DEPTH_ACCOUNT
cmpq $ftrace_stub, ftrace_trace_function
jnz trace
RET
@ -337,6 +366,8 @@ SYM_CODE_START(return_to_handler)
int3
.Ldo_rop:
mov %rdi, (%rsp)
RET
ALTERNATIVE __stringify(RET), \
__stringify(ANNOTATE_UNRET_SAFE; ret; int3), \
X86_FEATURE_CALL_DEPTH
SYM_CODE_END(return_to_handler)
#endif

View file

@ -370,6 +370,7 @@ SYM_CODE_END(secondary_startup_64)
* start_secondary() via .Ljump_to_C_code.
*/
SYM_CODE_START(start_cpu0)
ANNOTATE_NOENDBR
UNWIND_HINT_EMPTY
movq initial_stack(%rip), %rsp
jmp .Ljump_to_C_code

View file

@ -52,9 +52,6 @@ static inline int check_stack_overflow(void) { return 0; }
static inline void print_stack_overflow(void) { }
#endif
DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DEFINE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
static void call_on_stack(void *func, void *stack)
{
asm volatile("xchgl %%ebx,%%esp \n"
@ -77,7 +74,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
u32 *isp, *prev_esp, arg1;
curstk = (struct irq_stack *) current_stack();
irqstk = __this_cpu_read(hardirq_stack_ptr);
irqstk = __this_cpu_read(pcpu_hot.hardirq_stack_ptr);
/*
* this is where we switch to the IRQ stack. However, if we are
@ -115,7 +112,7 @@ int irq_init_percpu_irqstack(unsigned int cpu)
int node = cpu_to_node(cpu);
struct page *ph, *ps;
if (per_cpu(hardirq_stack_ptr, cpu))
if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu))
return 0;
ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
@ -127,8 +124,8 @@ int irq_init_percpu_irqstack(unsigned int cpu)
return -ENOMEM;
}
per_cpu(hardirq_stack_ptr, cpu) = page_address(ph);
per_cpu(softirq_stack_ptr, cpu) = page_address(ps);
per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = page_address(ph);
per_cpu(pcpu_hot.softirq_stack_ptr, cpu) = page_address(ps);
return 0;
}
@ -138,7 +135,7 @@ void do_softirq_own_stack(void)
struct irq_stack *irqstk;
u32 *isp, *prev_esp;
irqstk = __this_cpu_read(softirq_stack_ptr);
irqstk = __this_cpu_read(pcpu_hot.softirq_stack_ptr);
/* build the stack frame on the softirq stack */
isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));

View file

@ -50,7 +50,7 @@ static int map_irq_stack(unsigned int cpu)
return -ENOMEM;
/* Store actual TOS to avoid adjustment in the hotpath */
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
return 0;
}
#else
@ -63,14 +63,14 @@ static int map_irq_stack(unsigned int cpu)
void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
/* Store actual TOS to avoid adjustment in the hotpath */
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
return 0;
}
#endif
int irq_init_percpu_irqstack(unsigned int cpu)
{
if (per_cpu(hardirq_stack_ptr, cpu))
if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu))
return 0;
return map_irq_stack(cpu);
}

View file

@ -414,7 +414,6 @@ void *alloc_insn_page(void)
if (!page)
return NULL;
set_vm_flush_reset_perms(page);
/*
* First make the page read-only, and only then make it executable to
* prevent it from being W+X in between.

View file

@ -798,19 +798,13 @@ extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
* Hand-optimize version for x86-64 to avoid 8 64-bit register saving and
* restoring to/from the stack.
*/
asm(
".pushsection .text;"
".global __raw_callee_save___kvm_vcpu_is_preempted;"
".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
"__raw_callee_save___kvm_vcpu_is_preempted:"
ASM_ENDBR
"movq __per_cpu_offset(,%rdi,8), %rax;"
"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
"setne %al;"
ASM_RET
".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
".popsection");
#define PV_VCPU_PREEMPTED_ASM \
"movq __per_cpu_offset(,%rdi,8), %rax\n\t" \
"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax)\n\t" \
"setne %al\n\t"
DEFINE_PARAVIRT_ASM(__raw_callee_save___kvm_vcpu_is_preempted,
PV_VCPU_PREEMPTED_ASM, .text);
#endif
static void __init kvm_guest_init(void)

View file

@ -74,10 +74,11 @@ void *module_alloc(unsigned long size)
return NULL;
p = __vmalloc_node_range(size, MODULE_ALIGN,
MODULES_VADDR + get_module_load_offset(),
MODULES_END, gfp_mask,
PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
__builtin_return_address(0));
MODULES_VADDR + get_module_load_offset(),
MODULES_END, gfp_mask, PAGE_KERNEL,
VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK,
NUMA_NO_NODE, __builtin_return_address(0));
if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
vfree(p);
return NULL;
@ -253,7 +254,8 @@ int module_finalize(const Elf_Ehdr *hdr,
{
const Elf_Shdr *s, *alt = NULL, *locks = NULL,
*para = NULL, *orc = NULL, *orc_ip = NULL,
*retpolines = NULL, *returns = NULL, *ibt_endbr = NULL;
*retpolines = NULL, *returns = NULL, *ibt_endbr = NULL,
*calls = NULL, *cfi = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@ -271,6 +273,10 @@ int module_finalize(const Elf_Ehdr *hdr,
retpolines = s;
if (!strcmp(".return_sites", secstrings + s->sh_name))
returns = s;
if (!strcmp(".call_sites", secstrings + s->sh_name))
calls = s;
if (!strcmp(".cfi_sites", secstrings + s->sh_name))
cfi = s;
if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name))
ibt_endbr = s;
}
@ -283,6 +289,22 @@ int module_finalize(const Elf_Ehdr *hdr,
void *pseg = (void *)para->sh_addr;
apply_paravirt(pseg, pseg + para->sh_size);
}
if (retpolines || cfi) {
void *rseg = NULL, *cseg = NULL;
unsigned int rsize = 0, csize = 0;
if (retpolines) {
rseg = (void *)retpolines->sh_addr;
rsize = retpolines->sh_size;
}
if (cfi) {
cseg = (void *)cfi->sh_addr;
csize = cfi->sh_size;
}
apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize);
}
if (retpolines) {
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size);
@ -296,6 +318,21 @@ int module_finalize(const Elf_Ehdr *hdr,
void *aseg = (void *)alt->sh_addr;
apply_alternatives(aseg, aseg + alt->sh_size);
}
if (calls || para) {
struct callthunk_sites cs = {};
if (calls) {
cs.call_start = (void *)calls->sh_addr;
cs.call_end = (void *)calls->sh_addr + calls->sh_size;
}
if (para) {
cs.pv_start = (void *)para->sh_addr;
cs.pv_end = (void *)para->sh_addr + para->sh_size;
}
callthunks_patch_module_calls(&cs, me);
}
if (ibt_endbr) {
void *iseg = (void *)ibt_endbr->sh_addr;
apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size);

View file

@ -37,27 +37,10 @@
* nop stub, which must not clobber anything *including the stack* to
* avoid confusing the entry prologues.
*/
extern void _paravirt_nop(void);
asm (".pushsection .entry.text, \"ax\"\n"
".global _paravirt_nop\n"
"_paravirt_nop:\n\t"
ASM_ENDBR
ASM_RET
".size _paravirt_nop, . - _paravirt_nop\n\t"
".type _paravirt_nop, @function\n\t"
".popsection");
DEFINE_PARAVIRT_ASM(_paravirt_nop, "", .entry.text);
/* stub always returning 0. */
asm (".pushsection .entry.text, \"ax\"\n"
".global paravirt_ret0\n"
"paravirt_ret0:\n\t"
ASM_ENDBR
"xor %" _ASM_AX ", %" _ASM_AX ";\n\t"
ASM_RET
".size paravirt_ret0, . - paravirt_ret0\n\t"
".type paravirt_ret0, @function\n\t"
".popsection");
DEFINE_PARAVIRT_ASM(paravirt_ret0, "xor %eax,%eax", .entry.text);
void __init default_banner(void)
{

View file

@ -191,13 +191,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
arch_end_context_switch(next_p);
/*
* Reload esp0 and cpu_current_top_of_stack. This changes
* Reload esp0 and pcpu_hot.top_of_stack. This changes
* current_thread_info(). Refresh the SYSENTER configuration in
* case prev or next is vm86.
*/
update_task_stack(next_p);
refresh_sysenter_cs(next);
this_cpu_write(cpu_current_top_of_stack,
this_cpu_write(pcpu_hot.top_of_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE);
@ -207,7 +207,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (prev->gs | next->gs)
loadsegment(gs, next->gs);
this_cpu_write(current_task, next_p);
raw_cpu_write(pcpu_hot.current_task, next_p);
switch_fpu_finish();

View file

@ -563,7 +563,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
int cpu = smp_processor_id();
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
this_cpu_read(hardirq_stack_inuse));
this_cpu_read(pcpu_hot.hardirq_stack_inuse));
if (!test_thread_flag(TIF_NEED_FPU_LOAD))
switch_fpu_prepare(prev_fpu, cpu);
@ -617,8 +617,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch the PDA and FPU contexts.
*/
this_cpu_write(current_task, next_p);
this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
raw_cpu_write(pcpu_hot.current_task, next_p);
raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p));
switch_fpu_finish();

View file

@ -41,6 +41,7 @@
.text
.align PAGE_SIZE
.code64
SYM_CODE_START_NOALIGN(relocate_range)
SYM_CODE_START_NOALIGN(relocate_kernel)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR
@ -312,5 +313,5 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
int3
SYM_CODE_END(swap_pages)
.globl kexec_control_code_size
.set kexec_control_code_size, . - relocate_kernel
.skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc
SYM_CODE_END(relocate_range);

View file

@ -23,9 +23,6 @@
#include <asm/cpumask.h>
#include <asm/cpu.h>
DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);
#ifdef CONFIG_X86_64
#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
#else
@ -172,7 +169,7 @@ void __init setup_per_cpu_areas(void)
for_each_possible_cpu(cpu) {
per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
per_cpu(cpu_number, cpu) = cpu;
per_cpu(pcpu_hot.cpu_number, cpu) = cpu;
setup_percpu_segment(cpu);
/*
* Copy data used in early init routines from the
@ -211,7 +208,7 @@ void __init setup_per_cpu_areas(void)
* area. Reload any changed state for the boot CPU.
*/
if (!cpu)
switch_to_new_gdt(cpu);
switch_gdt_and_percpu_base(cpu);
}
/* indicate the early static arrays will soon be gone */

View file

@ -1048,7 +1048,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
per_cpu(current_task, cpu) = idle;
per_cpu(pcpu_hot.current_task, cpu) = idle;
cpu_init_stack_canary(cpu, idle);
/* Initialize the interrupt stack(s) */
@ -1058,7 +1058,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle);
#else
initial_gs = per_cpu_offset(cpu);
#endif
@ -1453,7 +1453,11 @@ void arch_thaw_secondary_cpus_end(void)
void __init native_smp_prepare_boot_cpu(void)
{
int me = smp_processor_id();
switch_to_new_gdt(me);
/* SMP handles this from setup_per_cpu_areas() */
if (!IS_ENABLED(CONFIG_SMP))
switch_gdt_and_percpu_base(me);
/* already set me in cpu_online_mask in boot_cpu_init() */
cpumask_set_cpu(me, cpu_callout_mask);
cpu_set_state_online(me);

View file

@ -34,6 +34,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
switch (type) {
case CALL:
func = callthunks_translate_call_dest(func);
code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
if (func == &__static_call_return0) {
emulate = code;
@ -52,7 +53,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
case RET:
if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk);
code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk);
else
code = &retinsn;
break;

View file

@ -858,7 +858,7 @@ DEFINE_IDTENTRY_RAW(exc_int3)
*/
asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs)
{
struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
struct pt_regs *regs = (struct pt_regs *)this_cpu_read(pcpu_hot.top_of_stack) - 1;
if (regs != eregs)
*regs = *eregs;
return regs;
@ -876,7 +876,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
* trust it and switch to the current kernel stack
*/
if (ip_within_syscall_gap(regs)) {
sp = this_cpu_read(cpu_current_top_of_stack);
sp = this_cpu_read(pcpu_hot.top_of_stack);
goto sync;
}

View file

@ -136,6 +136,21 @@ static struct orc_entry null_orc_entry = {
.type = UNWIND_HINT_TYPE_CALL
};
#ifdef CONFIG_CALL_THUNKS
static struct orc_entry *orc_callthunk_find(unsigned long ip)
{
if (!is_callthunk((void *)ip))
return NULL;
return &null_orc_entry;
}
#else
static struct orc_entry *orc_callthunk_find(unsigned long ip)
{
return NULL;
}
#endif
/* Fake frame pointer entry -- used as a fallback for generated code */
static struct orc_entry orc_fp_entry = {
.type = UNWIND_HINT_TYPE_CALL,
@ -189,7 +204,11 @@ static struct orc_entry *orc_find(unsigned long ip)
if (orc)
return orc;
return orc_ftrace_find(ip);
orc = orc_ftrace_find(ip);
if (orc)
return orc;
return orc_callthunk_find(ip);
}
#ifdef CONFIG_MODULES

View file

@ -132,18 +132,19 @@ SECTIONS
CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
ALIGN_ENTRY_TEXT_BEGIN
ENTRY_TEXT
ALIGN_ENTRY_TEXT_END
SOFTIRQENTRY_TEXT
STATIC_CALL_TEXT
*(.gnu.warning)
#ifdef CONFIG_RETPOLINE
__indirect_thunk_start = .;
*(.text.__x86.*)
__indirect_thunk_end = .;
#endif
STATIC_CALL_TEXT
ALIGN_ENTRY_TEXT_BEGIN
ENTRY_TEXT
ALIGN_ENTRY_TEXT_END
*(.gnu.warning)
} :text =0xcccc
/* End of text section, which should occupy whole number of pages */
@ -290,6 +291,13 @@ SECTIONS
*(.return_sites)
__return_sites_end = .;
}
. = ALIGN(8);
.call_sites : AT(ADDR(.call_sites) - LOAD_OFFSET) {
__call_sites = .;
*(.call_sites)
__call_sites_end = .;
}
#endif
#ifdef CONFIG_X86_KERNEL_IBT
@ -301,6 +309,15 @@ SECTIONS
}
#endif
#ifdef CONFIG_FINEIBT
. = ALIGN(8);
.cfi_sites : AT(ADDR(.cfi_sites) - LOAD_OFFSET) {
__cfi_sites = .;
*(.cfi_sites)
__cfi_sites_end = .;
}
#endif
/*
* struct alt_inst entries. From the header (alternative.h):
* "Alternative instructions for different CPU types or capabilities"
@ -493,11 +510,3 @@ INIT_PER_CPU(irq_stack_backing_store);
#endif
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_KEXEC_CORE
#include <asm/kexec.h>
. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
"kexec control code size is too big");
#endif

View file

@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm/asm-offsets.h>
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>

View file

@ -11,6 +11,7 @@ asm(
".text\n"
".type just_return_func, @function\n"
".globl just_return_func\n"
ASM_FUNC_ALIGN
"just_return_func:\n"
ANNOTATE_NOENDBR
ASM_RET

View file

@ -47,8 +47,6 @@ SYM_FUNC_START(__put_user_1)
LOAD_TASK_SIZE_MINUS_N(0)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL)
ENDBR
ASM_STAC
1: movb %al,(%_ASM_CX)
xor %ecx,%ecx
@ -56,54 +54,87 @@ SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL)
RET
SYM_FUNC_END(__put_user_1)
EXPORT_SYMBOL(__put_user_1)
SYM_FUNC_START(__put_user_nocheck_1)
ENDBR
ASM_STAC
2: movb %al,(%_ASM_CX)
xor %ecx,%ecx
ASM_CLAC
RET
SYM_FUNC_END(__put_user_nocheck_1)
EXPORT_SYMBOL(__put_user_nocheck_1)
SYM_FUNC_START(__put_user_2)
LOAD_TASK_SIZE_MINUS_N(1)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL)
ENDBR
ASM_STAC
2: movw %ax,(%_ASM_CX)
3: movw %ax,(%_ASM_CX)
xor %ecx,%ecx
ASM_CLAC
RET
SYM_FUNC_END(__put_user_2)
EXPORT_SYMBOL(__put_user_2)
SYM_FUNC_START(__put_user_nocheck_2)
ENDBR
ASM_STAC
4: movw %ax,(%_ASM_CX)
xor %ecx,%ecx
ASM_CLAC
RET
SYM_FUNC_END(__put_user_nocheck_2)
EXPORT_SYMBOL(__put_user_nocheck_2)
SYM_FUNC_START(__put_user_4)
LOAD_TASK_SIZE_MINUS_N(3)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL)
ENDBR
ASM_STAC
3: movl %eax,(%_ASM_CX)
5: movl %eax,(%_ASM_CX)
xor %ecx,%ecx
ASM_CLAC
RET
SYM_FUNC_END(__put_user_4)
EXPORT_SYMBOL(__put_user_4)
SYM_FUNC_START(__put_user_nocheck_4)
ENDBR
ASM_STAC
6: movl %eax,(%_ASM_CX)
xor %ecx,%ecx
ASM_CLAC
RET
SYM_FUNC_END(__put_user_nocheck_4)
EXPORT_SYMBOL(__put_user_nocheck_4)
SYM_FUNC_START(__put_user_8)
LOAD_TASK_SIZE_MINUS_N(7)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_8, SYM_L_GLOBAL)
ENDBR
ASM_STAC
4: mov %_ASM_AX,(%_ASM_CX)
7: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
5: movl %edx,4(%_ASM_CX)
8: movl %edx,4(%_ASM_CX)
#endif
xor %ecx,%ecx
ASM_CLAC
RET
SYM_FUNC_END(__put_user_8)
EXPORT_SYMBOL(__put_user_8)
SYM_FUNC_START(__put_user_nocheck_8)
ENDBR
ASM_STAC
9: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
10: movl %edx,4(%_ASM_CX)
#endif
xor %ecx,%ecx
ASM_CLAC
RET
SYM_FUNC_END(__put_user_nocheck_8)
EXPORT_SYMBOL(__put_user_nocheck_8)
SYM_CODE_START_LOCAL(.Lbad_put_user_clac)
@ -117,6 +148,11 @@ SYM_CODE_END(.Lbad_put_user_clac)
_ASM_EXTABLE_UA(2b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(3b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(4b, .Lbad_put_user_clac)
#ifdef CONFIG_X86_32
_ASM_EXTABLE_UA(5b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(6b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(7b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(9b, .Lbad_put_user_clac)
#ifdef CONFIG_X86_32
_ASM_EXTABLE_UA(8b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(10b, .Lbad_put_user_clac)
#endif

View file

@ -5,24 +5,27 @@
#include <asm/dwarf2.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
#include <asm/asm-offsets.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
#include <asm/unwind_hints.h>
#include <asm/percpu.h>
#include <asm/frame.h>
.section .text.__x86.indirect_thunk
.macro RETPOLINE reg
.macro POLINE reg
ANNOTATE_INTRA_FUNCTION_CALL
call .Ldo_rop_\@
.Lspec_trap_\@:
UNWIND_HINT_EMPTY
pause
lfence
jmp .Lspec_trap_\@
int3
.Ldo_rop_\@:
mov %\reg, (%_ASM_SP)
UNWIND_HINT_FUNC
.endm
.macro RETPOLINE reg
POLINE \reg
RET
.endm
@ -52,7 +55,6 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
*/
#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
.align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_thunk_array)
@ -64,10 +66,65 @@ SYM_CODE_START(__x86_indirect_thunk_array)
.align RETPOLINE_THUNK_SIZE
SYM_CODE_END(__x86_indirect_thunk_array)
#define GEN(reg) EXPORT_THUNK(reg)
#define GEN(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN
#ifdef CONFIG_CALL_DEPTH_TRACKING
.macro CALL_THUNK reg
.align RETPOLINE_THUNK_SIZE
SYM_INNER_LABEL(__x86_indirect_call_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR
CALL_DEPTH_ACCOUNT
POLINE \reg
ANNOTATE_UNRET_SAFE
ret
int3
.endm
.align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_call_thunk_array)
#define GEN(reg) CALL_THUNK reg
#include <asm/GEN-for-each-reg.h>
#undef GEN
.align RETPOLINE_THUNK_SIZE
SYM_CODE_END(__x86_indirect_call_thunk_array)
#define GEN(reg) __EXPORT_THUNK(__x86_indirect_call_thunk_ ## reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN
.macro JUMP_THUNK reg
.align RETPOLINE_THUNK_SIZE
SYM_INNER_LABEL(__x86_indirect_jump_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR
POLINE \reg
ANNOTATE_UNRET_SAFE
ret
int3
.endm
.align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_jump_thunk_array)
#define GEN(reg) JUMP_THUNK reg
#include <asm/GEN-for-each-reg.h>
#undef GEN
.align RETPOLINE_THUNK_SIZE
SYM_CODE_END(__x86_indirect_jump_thunk_array)
#define GEN(reg) __EXPORT_THUNK(__x86_indirect_jump_thunk_ ## reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN
#endif
/*
* This function name is magical and is used by -mfunction-return=thunk-extern
* for the compiler to generate JMPs to it.
@ -140,3 +197,37 @@ __EXPORT_THUNK(zen_untrain_ret)
EXPORT_SYMBOL(__x86_return_thunk)
#endif /* CONFIG_RETHUNK */
#ifdef CONFIG_CALL_DEPTH_TRACKING
.align 64
SYM_FUNC_START(__x86_return_skl)
ANNOTATE_NOENDBR
/*
* Keep the hotpath in a 16byte I-fetch for the non-debug
* case.
*/
CALL_THUNKS_DEBUG_INC_RETS
shlq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth)
jz 1f
ANNOTATE_UNRET_SAFE
ret
int3
1:
CALL_THUNKS_DEBUG_INC_STUFFS
.rept 16
ANNOTATE_INTRA_FUNCTION_CALL
call 2f
int3
2:
.endr
add $(8*16), %rsp
CREDIT_CALL_DEPTH
ANNOTATE_UNRET_SAFE
ret
int3
SYM_FUNC_END(__x86_return_skl)
#endif /* CONFIG_CALL_DEPTH_TRACKING */

View file

@ -12,6 +12,7 @@
#include <linux/memory.h>
#include <linux/sort.h>
#include <asm/extable.h>
#include <asm/ftrace.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
#include <asm/text-patching.h>
@ -340,6 +341,13 @@ static int emit_call(u8 **pprog, void *func, void *ip)
return emit_patch(pprog, func, ip, 0xE8);
}
static int emit_rsb_call(u8 **pprog, void *func, void *ip)
{
OPTIMIZER_HIDE_VAR(func);
x86_call_depth_emit_accounting(pprog, func);
return emit_patch(pprog, func, ip, 0xE8);
}
static int emit_jump(u8 **pprog, void *func, void *ip)
{
return emit_patch(pprog, func, ip, 0xE9);
@ -417,7 +425,10 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
EMIT2(0xFF, 0xE0 + reg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
OPTIMIZER_HIDE_VAR(reg);
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip);
else
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
} else {
EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */
if (IS_ENABLED(CONFIG_RETPOLINE) || IS_ENABLED(CONFIG_SLS))
@ -432,7 +443,7 @@ static void emit_return(u8 **pprog, u8 *ip)
u8 *prog = *pprog;
if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
emit_jump(&prog, &__x86_return_thunk, ip);
emit_jump(&prog, x86_return_thunk, ip);
} else {
EMIT1(0xC3); /* ret */
if (IS_ENABLED(CONFIG_SLS))
@ -1514,19 +1525,26 @@ st: if (is_imm8(insn->off))
break;
/* call */
case BPF_JMP | BPF_CALL:
case BPF_JMP | BPF_CALL: {
int offs;
func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
EMIT3_off32(0x48, 0x8B, 0x85,
-round_up(bpf_prog->aux->stack_depth, 8) - 8);
if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7))
if (!imm32)
return -EINVAL;
offs = 7 + x86_call_depth_emit_accounting(&prog, func);
} else {
if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
if (!imm32)
return -EINVAL;
offs = x86_call_depth_emit_accounting(&prog, func);
}
if (emit_call(&prog, func, image + addrs[i - 1] + offs))
return -EINVAL;
break;
}
case BPF_JMP | BPF_TAIL_CALL:
if (imm32)
@ -1917,7 +1935,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
/* arg2: lea rsi, [rbp - ctx_cookie_off] */
EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
if (emit_call(&prog, bpf_trampoline_enter(p), prog))
if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog))
return -EINVAL;
/* remember prog start time returned by __bpf_prog_enter */
emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
@ -1938,7 +1956,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
(long) p->insnsi >> 32,
(u32) (long) p->insnsi);
/* call JITed bpf program or interpreter */
if (emit_call(&prog, p->bpf_func, prog))
if (emit_rsb_call(&prog, p->bpf_func, prog))
return -EINVAL;
/*
@ -1962,7 +1980,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
/* arg3: lea rdx, [rbp - run_ctx_off] */
EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
if (emit_call(&prog, bpf_trampoline_exit(p), prog))
if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog))
return -EINVAL;
*pprog = prog;
@ -2184,6 +2202,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
prog = image;
EMIT_ENDBR();
/*
* This is the direct-call trampoline, as such it needs accounting
* for the __fentry__ call.
*/
x86_call_depth_emit_accounting(&prog, NULL);
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
@ -2210,7 +2233,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (flags & BPF_TRAMP_F_CALL_ORIG) {
/* arg1: mov rdi, im */
emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
if (emit_call(&prog, __bpf_tramp_enter, prog)) {
if (emit_rsb_call(&prog, __bpf_tramp_enter, prog)) {
ret = -EINVAL;
goto cleanup;
}
@ -2242,7 +2265,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
EMIT2(0xff, 0xd0); /* call *rax */
} else {
/* call original function */
if (emit_call(&prog, orig_call, prog)) {
if (emit_rsb_call(&prog, orig_call, prog)) {
ret = -EINVAL;
goto cleanup;
}
@ -2286,7 +2309,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
im->ip_epilogue = prog;
/* arg1: mov rdi, im */
emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
if (emit_call(&prog, __bpf_tramp_exit, prog)) {
if (emit_rsb_call(&prog, __bpf_tramp_exit, prog)) {
ret = -EINVAL;
goto cleanup;
}

View file

@ -159,7 +159,7 @@ int relocate_restore_code(void)
if (!relocated_restore_code)
return -ENOMEM;
memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE);
__memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE);
/* Make the page containing the relocated code executable */
pgd = (pgd_t *)__va(read_cr3_pa()) +

View file

@ -1210,7 +1210,7 @@ static void __init xen_setup_gdt(int cpu)
pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot;
pv_ops.cpu.load_gdt = xen_load_gdt_boot;
switch_to_new_gdt(cpu);
switch_gdt_and_percpu_base(cpu);
pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry;
pv_ops.cpu.load_gdt = xen_load_gdt;

View file

@ -81,8 +81,8 @@
#define RO_EXCEPTION_TABLE
#endif
/* Align . to a 8 byte boundary equals to maximum function alignment. */
#define ALIGN_FUNCTION() . = ALIGN(8)
/* Align . function alignment. */
#define ALIGN_FUNCTION() . = ALIGN(CONFIG_FUNCTION_ALIGNMENT)
/*
* LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which

View file

@ -69,8 +69,8 @@
#endif
#ifndef __ALIGN
#define __ALIGN .align 4,0x90
#define __ALIGN_STR ".align 4,0x90"
#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT
#define __ALIGN_STR __stringify(__ALIGN)
#endif
#ifdef __ASSEMBLY__

View file

@ -162,6 +162,8 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool
extern int __init static_call_init(void);
extern void static_call_force_reinit(void);
struct static_call_mod {
struct static_call_mod *next;
struct module *mod; /* for vmlinux, mod == NULL */

View file

@ -15,7 +15,18 @@ extern struct static_call_site __start_static_call_sites[],
extern struct static_call_tramp_key __start_static_call_tramp_key[],
__stop_static_call_tramp_key[];
static bool static_call_initialized;
static int static_call_initialized;
/*
* Must be called before early_initcall() to be effective.
*/
void static_call_force_reinit(void)
{
if (WARN_ON_ONCE(!static_call_initialized))
return;
static_call_initialized++;
}
/* mutex to protect key modules/sites */
static DEFINE_MUTEX(static_call_mutex);
@ -475,7 +486,8 @@ int __init static_call_init(void)
{
int ret;
if (static_call_initialized)
/* See static_call_force_reinit(). */
if (static_call_initialized == 1)
return 0;
cpus_read_lock();
@ -490,11 +502,12 @@ int __init static_call_init(void)
BUG();
}
static_call_initialized = true;
#ifdef CONFIG_MODULES
register_module_notifier(&static_call_module_nb);
if (!static_call_initialized)
register_module_notifier(&static_call_module_nb);
#endif
static_call_initialized = 1;
return 0;
}
early_initcall(static_call_init);

View file

@ -785,7 +785,14 @@ static struct fgraph_ops fgraph_ops __initdata = {
};
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
noinline __noclone static void trace_direct_tramp(void) { }
#ifndef CALL_DEPTH_ACCOUNT
#define CALL_DEPTH_ACCOUNT ""
#endif
noinline __noclone static void trace_direct_tramp(void)
{
asm(CALL_DEPTH_ACCOUNT);
}
#endif
/*

View file

@ -469,6 +469,7 @@ config SECTION_MISMATCH_WARN_ONLY
config DEBUG_FORCE_FUNCTION_ALIGN_64B
bool "Force all function address 64B aligned"
depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC)
select FUNCTION_ALIGNMENT_64B
help
There are cases that a commit from one domain changes the function
address alignment of other domains, and cause magic performance

View file

@ -3,6 +3,7 @@
#include <linux/kthread.h>
#include <linux/ftrace.h>
#include <asm/asm-offsets.h>
#include <asm/nospec-branch.h>
extern void my_direct_func1(void);
extern void my_direct_func2(void);
@ -34,6 +35,7 @@ asm (
ASM_ENDBR
" pushq %rbp\n"
" movq %rsp, %rbp\n"
CALL_DEPTH_ACCOUNT
" call my_direct_func1\n"
" leave\n"
" .size my_tramp1, .-my_tramp1\n"
@ -45,6 +47,7 @@ asm (
ASM_ENDBR
" pushq %rbp\n"
" movq %rsp, %rbp\n"
CALL_DEPTH_ACCOUNT
" call my_direct_func2\n"
" leave\n"
ASM_RET

View file

@ -3,6 +3,7 @@
#include <linux/kthread.h>
#include <linux/ftrace.h>
#include <asm/asm-offsets.h>
#include <asm/nospec-branch.h>
extern void my_direct_func1(unsigned long ip);
extern void my_direct_func2(unsigned long ip);
@ -32,6 +33,7 @@ asm (
ASM_ENDBR
" pushq %rbp\n"
" movq %rsp, %rbp\n"
CALL_DEPTH_ACCOUNT
" pushq %rdi\n"
" movq 8(%rbp), %rdi\n"
" call my_direct_func1\n"
@ -46,6 +48,7 @@ asm (
ASM_ENDBR
" pushq %rbp\n"
" movq %rsp, %rbp\n"
CALL_DEPTH_ACCOUNT
" pushq %rdi\n"
" movq 8(%rbp), %rdi\n"
" call my_direct_func2\n"

View file

@ -5,6 +5,7 @@
#include <linux/ftrace.h>
#include <linux/sched/stat.h>
#include <asm/asm-offsets.h>
#include <asm/nospec-branch.h>
extern void my_direct_func(unsigned long ip);
@ -27,6 +28,7 @@ asm (
ASM_ENDBR
" pushq %rbp\n"
" movq %rsp, %rbp\n"
CALL_DEPTH_ACCOUNT
" pushq %rdi\n"
" movq 8(%rbp), %rdi\n"
" call my_direct_func\n"

View file

@ -4,6 +4,7 @@
#include <linux/mm.h> /* for handle_mm_fault() */
#include <linux/ftrace.h>
#include <asm/asm-offsets.h>
#include <asm/nospec-branch.h>
extern void my_direct_func(struct vm_area_struct *vma,
unsigned long address, unsigned int flags);
@ -29,6 +30,7 @@ asm (
ASM_ENDBR
" pushq %rbp\n"
" movq %rsp, %rbp\n"
CALL_DEPTH_ACCOUNT
" pushq %rdi\n"
" pushq %rsi\n"
" pushq %rdx\n"

View file

@ -4,6 +4,7 @@
#include <linux/sched.h> /* for wake_up_process() */
#include <linux/ftrace.h>
#include <asm/asm-offsets.h>
#include <asm/nospec-branch.h>
extern void my_direct_func(struct task_struct *p);
@ -26,6 +27,7 @@ asm (
ASM_ENDBR
" pushq %rbp\n"
" movq %rsp, %rbp\n"
CALL_DEPTH_ACCOUNT
" pushq %rdi\n"
" call my_direct_func\n"
" popq %rdi\n"

View file

@ -254,7 +254,9 @@ objtool := $(objtree)/tools/objtool/objtool
objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK) += --hacks=jump_label
objtool-args-$(CONFIG_HAVE_NOINSTR_HACK) += --hacks=noinstr
objtool-args-$(CONFIG_CALL_DEPTH_TRACKING) += --hacks=skylake
objtool-args-$(CONFIG_X86_KERNEL_IBT) += --ibt
objtool-args-$(CONFIG_FINEIBT) += --cfi
objtool-args-$(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL) += --mcount
objtool-args-$(CONFIG_UNWINDER_ORC) += --orc
objtool-args-$(CONFIG_RETPOLINE) += --retpoline
@ -264,6 +266,7 @@ objtool-args-$(CONFIG_STACK_VALIDATION) += --stackval
objtool-args-$(CONFIG_HAVE_STATIC_CALL_INLINE) += --static-call
objtool-args-$(CONFIG_HAVE_UACCESS_VALIDATION) += --uaccess
objtool-args-$(CONFIG_GCOV_KERNEL) += --no-unreachable
objtool-args-$(CONFIG_PREFIX_SYMBOLS) += --prefix=$(CONFIG_FUNCTION_PADDING_BYTES)
objtool-args = $(objtool-args-y) \
$(if $(delay-objtool), --link) \

View file

@ -0,0 +1,187 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
Interval Trees
(C) 2012 Michel Lespinasse <walken@google.com>
include/linux/interval_tree_generic.h
*/
#include <linux/rbtree_augmented.h>
/*
* Template for implementing interval trees
*
* ITSTRUCT: struct type of the interval tree nodes
* ITRB: name of struct rb_node field within ITSTRUCT
* ITTYPE: type of the interval endpoints
* ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree
* ITSTART(n): start endpoint of ITSTRUCT node n
* ITLAST(n): last endpoint of ITSTRUCT node n
* ITSTATIC: 'static' or empty
* ITPREFIX: prefix to use for the inline tree definitions
*
* Note - before using this, please consider if generic version
* (interval_tree.h) would work for you...
*/
#define INTERVAL_TREE_DEFINE(ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, \
ITSTART, ITLAST, ITSTATIC, ITPREFIX) \
\
/* Callbacks for augmented rbtree insert and remove */ \
\
RB_DECLARE_CALLBACKS_MAX(static, ITPREFIX ## _augment, \
ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, ITLAST) \
\
/* Insert / remove interval nodes from the tree */ \
\
ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, \
struct rb_root_cached *root) \
{ \
struct rb_node **link = &root->rb_root.rb_node, *rb_parent = NULL; \
ITTYPE start = ITSTART(node), last = ITLAST(node); \
ITSTRUCT *parent; \
bool leftmost = true; \
\
while (*link) { \
rb_parent = *link; \
parent = rb_entry(rb_parent, ITSTRUCT, ITRB); \
if (parent->ITSUBTREE < last) \
parent->ITSUBTREE = last; \
if (start < ITSTART(parent)) \
link = &parent->ITRB.rb_left; \
else { \
link = &parent->ITRB.rb_right; \
leftmost = false; \
} \
} \
\
node->ITSUBTREE = last; \
rb_link_node(&node->ITRB, rb_parent, link); \
rb_insert_augmented_cached(&node->ITRB, root, \
leftmost, &ITPREFIX ## _augment); \
} \
\
ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, \
struct rb_root_cached *root) \
{ \
rb_erase_augmented_cached(&node->ITRB, root, &ITPREFIX ## _augment); \
} \
\
/* \
* Iterate over intervals intersecting [start;last] \
* \
* Note that a node's interval intersects [start;last] iff: \
* Cond1: ITSTART(node) <= last \
* and \
* Cond2: start <= ITLAST(node) \
*/ \
\
static ITSTRUCT * \
ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
{ \
while (true) { \
/* \
* Loop invariant: start <= node->ITSUBTREE \
* (Cond2 is satisfied by one of the subtree nodes) \
*/ \
if (node->ITRB.rb_left) { \
ITSTRUCT *left = rb_entry(node->ITRB.rb_left, \
ITSTRUCT, ITRB); \
if (start <= left->ITSUBTREE) { \
/* \
* Some nodes in left subtree satisfy Cond2. \
* Iterate to find the leftmost such node N. \
* If it also satisfies Cond1, that's the \
* match we are looking for. Otherwise, there \
* is no matching interval as nodes to the \
* right of N can't satisfy Cond1 either. \
*/ \
node = left; \
continue; \
} \
} \
if (ITSTART(node) <= last) { /* Cond1 */ \
if (start <= ITLAST(node)) /* Cond2 */ \
return node; /* node is leftmost match */ \
if (node->ITRB.rb_right) { \
node = rb_entry(node->ITRB.rb_right, \
ITSTRUCT, ITRB); \
if (start <= node->ITSUBTREE) \
continue; \
} \
} \
return NULL; /* No match */ \
} \
} \
\
ITSTATIC ITSTRUCT * \
ITPREFIX ## _iter_first(struct rb_root_cached *root, \
ITTYPE start, ITTYPE last) \
{ \
ITSTRUCT *node, *leftmost; \
\
if (!root->rb_root.rb_node) \
return NULL; \
\
/* \
* Fastpath range intersection/overlap between A: [a0, a1] and \
* B: [b0, b1] is given by: \
* \
* a0 <= b1 && b0 <= a1 \
* \
* ... where A holds the lock range and B holds the smallest \
* 'start' and largest 'last' in the tree. For the later, we \
* rely on the root node, which by augmented interval tree \
* property, holds the largest value in its last-in-subtree. \
* This allows mitigating some of the tree walk overhead for \
* for non-intersecting ranges, maintained and consulted in O(1). \
*/ \
node = rb_entry(root->rb_root.rb_node, ITSTRUCT, ITRB); \
if (node->ITSUBTREE < start) \
return NULL; \
\
leftmost = rb_entry(root->rb_leftmost, ITSTRUCT, ITRB); \
if (ITSTART(leftmost) > last) \
return NULL; \
\
return ITPREFIX ## _subtree_search(node, start, last); \
} \
\
ITSTATIC ITSTRUCT * \
ITPREFIX ## _iter_next(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
{ \
struct rb_node *rb = node->ITRB.rb_right, *prev; \
\
while (true) { \
/* \
* Loop invariants: \
* Cond1: ITSTART(node) <= last \
* rb == node->ITRB.rb_right \
* \
* First, search right subtree if suitable \
*/ \
if (rb) { \
ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB); \
if (start <= right->ITSUBTREE) \
return ITPREFIX ## _subtree_search(right, \
start, last); \
} \
\
/* Move up the tree until we come from a node's left child */ \
do { \
rb = rb_parent(&node->ITRB); \
if (!rb) \
return NULL; \
prev = &node->ITRB; \
node = rb_entry(rb, ITSTRUCT, ITRB); \
rb = node->ITRB.rb_right; \
} while (prev == rb); \
\
/* Check if the node intersects [start;last] */ \
if (last < ITSTART(node)) /* !Cond1 */ \
return NULL; \
else if (start <= ITLAST(node)) /* Cond2 */ \
return node; \
} \
}

View file

@ -73,6 +73,30 @@ unsigned long arch_jump_destination(struct instruction *insn)
return insn->offset + insn->len + insn->immediate;
}
bool arch_pc_relative_reloc(struct reloc *reloc)
{
/*
* All relocation types where P (the address of the target)
* is included in the computation.
*/
switch (reloc->type) {
case R_X86_64_PC8:
case R_X86_64_PC16:
case R_X86_64_PC32:
case R_X86_64_PC64:
case R_X86_64_PLT32:
case R_X86_64_GOTPC32:
case R_X86_64_GOTPCREL:
return true;
default:
break;
}
return false;
}
#define ADD_OP(op) \
if (!(op = calloc(1, sizeof(*op)))) \
return -1; \

View file

@ -57,12 +57,17 @@ static int parse_hacks(const struct option *opt, const char *str, int unset)
found = true;
}
if (!str || strstr(str, "skylake")) {
opts.hack_skylake = true;
found = true;
}
return found ? 0 : -1;
}
const struct option check_options[] = {
OPT_GROUP("Actions:"),
OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr", "patch toolchain bugs/limitations", parse_hacks),
OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr,skylake", "patch toolchain bugs/limitations", parse_hacks),
OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
@ -70,10 +75,12 @@ const struct option check_options[] = {
OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"),
OPT_INTEGER(0, "prefix", &opts.prefix, "generate prefix symbols"),
OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"),
OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"),
OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"),
OPT_BOOLEAN(0 , "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"),
OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
OPT_GROUP("Options:"),

View file

@ -62,12 +62,12 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
struct instruction *insn)
{
struct instruction *next = list_next_entry(insn, list);
struct symbol *func = insn->func;
struct symbol *func = insn_func(insn);
if (!func)
return NULL;
if (&next->list != &file->insn_list && next->func == func)
if (&next->list != &file->insn_list && insn_func(next) == func)
return next;
/* Check if we're already in the subfunction: */
@ -83,7 +83,7 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
{
struct instruction *prev = list_prev_entry(insn, list);
if (&prev->list != &file->insn_list && prev->func == insn->func)
if (&prev->list != &file->insn_list && insn_func(prev) == insn_func(insn))
return prev;
return NULL;
@ -129,16 +129,13 @@ static bool is_jump_table_jump(struct instruction *insn)
static bool is_sibling_call(struct instruction *insn)
{
/*
* Assume only ELF functions can make sibling calls. This ensures
* sibling call detection consistency between vmlinux.o and individual
* objects.
* Assume only STT_FUNC calls have jump-tables.
*/
if (!insn->func)
return false;
/* An indirect jump is either a sibling call or a jump to a table. */
if (insn->type == INSN_JUMP_DYNAMIC)
return !is_jump_table_jump(insn);
if (insn_func(insn)) {
/* An indirect jump is either a sibling call or a jump to a table. */
if (insn->type == INSN_JUMP_DYNAMIC)
return !is_jump_table_jump(insn);
}
/* add_jump_destinations() sets insn->call_dest for sibling calls. */
return (is_static_jump(insn) && insn->call_dest);
@ -207,7 +204,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
return false;
insn = find_insn(file, func->sec, func->offset);
if (!insn->func)
if (!insn_func(insn))
return false;
func_for_each_insn(file, func, insn) {
@ -243,7 +240,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
return false;
}
return __dead_end_function(file, dest->func, recursion+1);
return __dead_end_function(file, insn_func(dest), recursion+1);
}
}
@ -382,6 +379,15 @@ static int decode_instructions(struct objtool_file *file)
!strncmp(sec->name, ".text.__x86.", 12))
sec->noinstr = true;
/*
* .init.text code is ran before userspace and thus doesn't
* strictly need retpolines, except for modules which are
* loaded late, they very much do need retpoline in their
* .init.text
*/
if (!strcmp(sec->name, ".init.text") && !opts.module)
sec->init = true;
for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
insn = malloc(sizeof(*insn));
if (!insn) {
@ -418,7 +424,10 @@ static int decode_instructions(struct objtool_file *file)
}
list_for_each_entry(func, &sec->symbol_list, list) {
if (func->type != STT_FUNC || func->alias != func)
if (func->type != STT_NOTYPE && func->type != STT_FUNC)
continue;
if (func->return_thunk || func->alias != func)
continue;
if (!find_insn(file, sec, func->offset)) {
@ -428,9 +437,11 @@ static int decode_instructions(struct objtool_file *file)
}
sym_for_each_insn(file, func, insn) {
insn->func = func;
if (insn->type == INSN_ENDBR && list_empty(&insn->call_node)) {
if (insn->offset == insn->func->offset) {
insn->sym = func;
if (func->type == STT_FUNC &&
insn->type == INSN_ENDBR &&
list_empty(&insn->call_node)) {
if (insn->offset == func->offset) {
list_add_tail(&insn->call_node, &file->endbr_list);
file->nr_endbr++;
} else {
@ -850,6 +861,68 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file)
return 0;
}
static int create_cfi_sections(struct objtool_file *file)
{
struct section *sec, *s;
struct symbol *sym;
unsigned int *loc;
int idx;
sec = find_section_by_name(file->elf, ".cfi_sites");
if (sec) {
INIT_LIST_HEAD(&file->call_list);
WARN("file already has .cfi_sites section, skipping");
return 0;
}
idx = 0;
for_each_sec(file, s) {
if (!s->text)
continue;
list_for_each_entry(sym, &s->symbol_list, list) {
if (sym->type != STT_FUNC)
continue;
if (strncmp(sym->name, "__cfi_", 6))
continue;
idx++;
}
}
sec = elf_create_section(file->elf, ".cfi_sites", 0, sizeof(unsigned int), idx);
if (!sec)
return -1;
idx = 0;
for_each_sec(file, s) {
if (!s->text)
continue;
list_for_each_entry(sym, &s->symbol_list, list) {
if (sym->type != STT_FUNC)
continue;
if (strncmp(sym->name, "__cfi_", 6))
continue;
loc = (unsigned int *)sec->data->d_buf + idx;
memset(loc, 0, sizeof(unsigned int));
if (elf_add_reloc_to_insn(file->elf, sec,
idx * sizeof(unsigned int),
R_X86_64_PC32,
s, sym->offset))
return -1;
idx++;
}
}
return 0;
}
static int create_mcount_loc_sections(struct objtool_file *file)
{
struct section *sec;
@ -893,6 +966,49 @@ static int create_mcount_loc_sections(struct objtool_file *file)
return 0;
}
static int create_direct_call_sections(struct objtool_file *file)
{
struct instruction *insn;
struct section *sec;
unsigned int *loc;
int idx;
sec = find_section_by_name(file->elf, ".call_sites");
if (sec) {
INIT_LIST_HEAD(&file->call_list);
WARN("file already has .call_sites section, skipping");
return 0;
}
if (list_empty(&file->call_list))
return 0;
idx = 0;
list_for_each_entry(insn, &file->call_list, call_node)
idx++;
sec = elf_create_section(file->elf, ".call_sites", 0, sizeof(unsigned int), idx);
if (!sec)
return -1;
idx = 0;
list_for_each_entry(insn, &file->call_list, call_node) {
loc = (unsigned int *)sec->data->d_buf + idx;
memset(loc, 0, sizeof(unsigned int));
if (elf_add_reloc_to_insn(file->elf, sec,
idx * sizeof(unsigned int),
R_X86_64_PC32,
insn->sec, insn->offset))
return -1;
idx++;
}
return 0;
}
/*
* Warnings shouldn't be reported for ignored functions.
*/
@ -1280,6 +1396,9 @@ static void annotate_call_site(struct objtool_file *file,
return;
}
if (insn->type == INSN_CALL && !insn->sec->init)
list_add_tail(&insn->call_node, &file->call_list);
if (!sibling && dead_end_function(file, sym))
insn->dead_end = true;
}
@ -1350,27 +1469,50 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn,
list_add_tail(&insn->call_node, &file->return_thunk_list);
}
static bool same_function(struct instruction *insn1, struct instruction *insn2)
static bool is_first_func_insn(struct objtool_file *file,
struct instruction *insn, struct symbol *sym)
{
return insn1->func->pfunc == insn2->func->pfunc;
}
static bool is_first_func_insn(struct objtool_file *file, struct instruction *insn)
{
if (insn->offset == insn->func->offset)
if (insn->offset == sym->offset)
return true;
/* Allow direct CALL/JMP past ENDBR */
if (opts.ibt) {
struct instruction *prev = prev_insn_same_sym(file, insn);
if (prev && prev->type == INSN_ENDBR &&
insn->offset == insn->func->offset + prev->len)
insn->offset == sym->offset + prev->len)
return true;
}
return false;
}
/*
* A sibling call is a tail-call to another symbol -- to differentiate from a
* recursive tail-call which is to the same symbol.
*/
static bool jump_is_sibling_call(struct objtool_file *file,
struct instruction *from, struct instruction *to)
{
struct symbol *fs = from->sym;
struct symbol *ts = to->sym;
/* Not a sibling call if from/to a symbol hole */
if (!fs || !ts)
return false;
/* Not a sibling call if not targeting the start of a symbol. */
if (!is_first_func_insn(file, to, ts))
return false;
/* Disallow sibling calls into STT_NOTYPE */
if (ts->type == STT_NOTYPE)
return false;
/* Must not be self to be a sibling */
return fs->pfunc != ts->pfunc;
}
/*
* Find the destination instructions for all jumps.
*/
@ -1405,7 +1547,7 @@ static int add_jump_destinations(struct objtool_file *file)
} else if (reloc->sym->return_thunk) {
add_return_call(file, insn, true);
continue;
} else if (insn->func) {
} else if (insn_func(insn)) {
/*
* External sibling call or internal sibling call with
* STT_FUNC reloc.
@ -1447,8 +1589,8 @@ static int add_jump_destinations(struct objtool_file *file)
/*
* Cross-function jump.
*/
if (insn->func && jump_dest->func &&
insn->func != jump_dest->func) {
if (insn_func(insn) && insn_func(jump_dest) &&
insn_func(insn) != insn_func(jump_dest)) {
/*
* For GCC 8+, create parent/child links for any cold
@ -1465,22 +1607,22 @@ static int add_jump_destinations(struct objtool_file *file)
* case where the parent function's only reference to a
* subfunction is through a jump table.
*/
if (!strstr(insn->func->name, ".cold") &&
strstr(jump_dest->func->name, ".cold")) {
insn->func->cfunc = jump_dest->func;
jump_dest->func->pfunc = insn->func;
} else if (!same_function(insn, jump_dest) &&
is_first_func_insn(file, jump_dest)) {
/*
* Internal sibling call without reloc or with
* STT_SECTION reloc.
*/
add_call_dest(file, insn, jump_dest->func, true);
continue;
if (!strstr(insn_func(insn)->name, ".cold") &&
strstr(insn_func(jump_dest)->name, ".cold")) {
insn_func(insn)->cfunc = insn_func(jump_dest);
insn_func(jump_dest)->pfunc = insn_func(insn);
}
}
if (jump_is_sibling_call(file, insn, jump_dest)) {
/*
* Internal sibling call without reloc or with
* STT_SECTION reloc.
*/
add_call_dest(file, insn, insn_func(jump_dest), true);
continue;
}
insn->jump_dest = jump_dest;
}
@ -1527,7 +1669,7 @@ static int add_call_destinations(struct objtool_file *file)
return -1;
}
if (insn->func && insn->call_dest->type != STT_FUNC) {
if (insn_func(insn) && insn->call_dest->type != STT_FUNC) {
WARN_FUNC("unsupported call to non-function",
insn->sec, insn->offset);
return -1;
@ -1623,7 +1765,7 @@ static int handle_group_alt(struct objtool_file *file,
nop->offset = special_alt->new_off + special_alt->new_len;
nop->len = special_alt->orig_len - special_alt->new_len;
nop->type = INSN_NOP;
nop->func = orig_insn->func;
nop->sym = orig_insn->sym;
nop->alt_group = new_alt_group;
nop->ignore = orig_insn->ignore_alts;
}
@ -1643,7 +1785,7 @@ static int handle_group_alt(struct objtool_file *file,
last_new_insn = insn;
insn->ignore = orig_insn->ignore_alts;
insn->func = orig_insn->func;
insn->sym = orig_insn->sym;
insn->alt_group = new_alt_group;
/*
@ -1655,7 +1797,7 @@ static int handle_group_alt(struct objtool_file *file,
* accordingly.
*/
alt_reloc = insn_reloc(file, insn);
if (alt_reloc &&
if (alt_reloc && arch_pc_relative_reloc(alt_reloc) &&
!arch_support_alt_relocation(special_alt, insn, alt_reloc)) {
WARN_FUNC("unsupported relocation in alternatives section",
@ -1837,7 +1979,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
struct reloc *reloc = table;
struct instruction *dest_insn;
struct alternative *alt;
struct symbol *pfunc = insn->func->pfunc;
struct symbol *pfunc = insn_func(insn)->pfunc;
unsigned int prev_offset = 0;
/*
@ -1864,7 +2006,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
break;
/* Make sure the destination is in the same function: */
if (!dest_insn->func || dest_insn->func->pfunc != pfunc)
if (!insn_func(dest_insn) || insn_func(dest_insn)->pfunc != pfunc)
break;
alt = malloc(sizeof(*alt));
@ -1904,7 +2046,7 @@ static struct reloc *find_jump_table(struct objtool_file *file,
* it.
*/
for (;
insn && insn->func && insn->func->pfunc == func;
insn && insn_func(insn) && insn_func(insn)->pfunc == func;
insn = insn->first_jump_src ?: prev_insn_same_sym(file, insn)) {
if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC)
@ -1921,7 +2063,7 @@ static struct reloc *find_jump_table(struct objtool_file *file,
if (!table_reloc)
continue;
dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend);
if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func)
if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func)
continue;
return table_reloc;
@ -2370,6 +2512,13 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
/*
* Must be before add_{jump_call}_destination.
*/
ret = classify_symbols(file);
if (ret)
return ret;
ret = decode_instructions(file);
if (ret)
return ret;
@ -2388,13 +2537,6 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
/*
* Must be before add_{jump_call}_destination.
*/
ret = classify_symbols(file);
if (ret)
return ret;
/*
* Must be before add_jump_destinations(), which depends on 'func'
* being set for alternatives, to enable proper sibling call detection.
@ -2603,7 +2745,7 @@ static int update_cfi_state(struct instruction *insn,
/* stack operations don't make sense with an undefined CFA */
if (cfa->base == CFI_UNDEFINED) {
if (insn->func) {
if (insn_func(insn)) {
WARN_FUNC("undefined stack state", insn->sec, insn->offset);
return -1;
}
@ -2949,7 +3091,7 @@ static int update_cfi_state(struct instruction *insn,
}
/* detect when asm code uses rbp as a scratch register */
if (opts.stackval && insn->func && op->src.reg == CFI_BP &&
if (opts.stackval && insn_func(insn) && op->src.reg == CFI_BP &&
cfa->base != CFI_BP)
cfi->bp_scratch = true;
break;
@ -3259,7 +3401,7 @@ static int validate_sibling_call(struct objtool_file *file,
struct instruction *insn,
struct insn_state *state)
{
if (has_modified_stack_frame(insn, state)) {
if (insn_func(insn) && has_modified_stack_frame(insn, state)) {
WARN_FUNC("sibling call from callable instruction with modified stack frame",
insn->sec, insn->offset);
return 1;
@ -3345,13 +3487,14 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
while (1) {
next_insn = next_insn_to_validate(file, insn);
if (func && insn->func && func != insn->func->pfunc) {
if (func && insn_func(insn) && func != insn_func(insn)->pfunc) {
/* Ignore KCFI type preambles, which always fall through */
if (!strncmp(func->name, "__cfi_", 6))
if (!strncmp(func->name, "__cfi_", 6) ||
!strncmp(func->name, "__pfx_", 6))
return 0;
WARN("%s() falls through to next function %s()",
func->name, insn->func->name);
func->name, insn_func(insn)->name);
return 1;
}
@ -3593,7 +3736,7 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) {
if (insn->hint && !insn->visited && !insn->ignore) {
ret = validate_branch(file, insn->func, insn, state);
ret = validate_branch(file, insn_func(insn), insn, state);
if (ret && opts.backtrace)
BT_FUNC("<=== (hint)", insn);
warnings += ret;
@ -3758,13 +3901,7 @@ static int validate_retpoline(struct objtool_file *file)
if (insn->retpoline_safe)
continue;
/*
* .init.text code is ran before userspace and thus doesn't
* strictly need retpolines, except for modules which are
* loaded late, they very much do need retpoline in their
* .init.text
*/
if (!strcmp(insn->sec->name, ".init.text") && !opts.module)
if (insn->sec->init)
continue;
if (insn->type == INSN_RETURN) {
@ -3822,7 +3959,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
* In this case we'll find a piece of code (whole function) that is not
* covered by a !section symbol. Ignore them.
*/
if (opts.link && !insn->func) {
if (opts.link && !insn_func(insn)) {
int size = find_symbol_hole_containing(insn->sec, insn->offset);
unsigned long end = insn->offset + size;
@ -3846,10 +3983,10 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
/*
* If this hole jumps to a .cold function, mark it ignore too.
*/
if (insn->jump_dest && insn->jump_dest->func &&
strstr(insn->jump_dest->func->name, ".cold")) {
if (insn->jump_dest && insn_func(insn->jump_dest) &&
strstr(insn_func(insn->jump_dest)->name, ".cold")) {
struct instruction *dest = insn->jump_dest;
func_for_each_insn(file, dest->func, dest)
func_for_each_insn(file, insn_func(dest), dest)
dest->ignore = true;
}
}
@ -3857,10 +3994,10 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
return false;
}
if (!insn->func)
if (!insn_func(insn))
return false;
if (insn->func->static_call_tramp)
if (insn_func(insn)->static_call_tramp)
return true;
/*
@ -3891,7 +4028,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
if (insn->type == INSN_JUMP_UNCONDITIONAL) {
if (insn->jump_dest &&
insn->jump_dest->func == insn->func) {
insn_func(insn->jump_dest) == insn_func(insn)) {
insn = insn->jump_dest;
continue;
}
@ -3899,7 +4036,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
break;
}
if (insn->offset + insn->len >= insn->func->offset + insn->func->len)
if (insn->offset + insn->len >= insn_func(insn)->offset + insn_func(insn)->len)
break;
insn = list_next_entry(insn, list);
@ -3908,6 +4045,54 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
return false;
}
static int add_prefix_symbol(struct objtool_file *file, struct symbol *func,
struct instruction *insn)
{
if (!opts.prefix)
return 0;
for (;;) {
struct instruction *prev = list_prev_entry(insn, list);
u64 offset;
if (&prev->list == &file->insn_list)
break;
if (prev->type != INSN_NOP)
break;
offset = func->offset - prev->offset;
if (offset >= opts.prefix) {
if (offset == opts.prefix) {
/*
* Since the sec->symbol_list is ordered by
* offset (see elf_add_symbol()) the added
* symbol will not be seen by the iteration in
* validate_section().
*
* Hence the lack of list_for_each_entry_safe()
* there.
*
* The direct concequence is that prefix symbols
* don't get visited (because pointless), except
* for the logic in ignore_unreachable_insn()
* that needs the terminating insn to be visited
* otherwise it will report the hole.
*
* Hence mark the first instruction of the
* prefix symbol as visisted.
*/
prev->visited |= VISITED_BRANCH;
elf_create_prefix_symbol(file->elf, func, opts.prefix);
}
break;
}
insn = prev;
}
return 0;
}
static int validate_symbol(struct objtool_file *file, struct section *sec,
struct symbol *sym, struct insn_state *state)
{
@ -3926,9 +4111,11 @@ static int validate_symbol(struct objtool_file *file, struct section *sec,
if (!insn || insn->ignore || insn->visited)
return 0;
add_prefix_symbol(file, sym, insn);
state->uaccess = sym->uaccess_safe;
ret = validate_branch(file, insn->func, insn, *state);
ret = validate_branch(file, insn_func(insn), insn, *state);
if (ret && opts.backtrace)
BT_FUNC("<=== (sym)", insn);
return ret;
@ -3994,6 +4181,24 @@ static void mark_endbr_used(struct instruction *insn)
list_del_init(&insn->call_node);
}
static bool noendbr_range(struct objtool_file *file, struct instruction *insn)
{
struct symbol *sym = find_symbol_containing(insn->sec, insn->offset-1);
struct instruction *first;
if (!sym)
return false;
first = find_insn(file, sym->sec, sym->offset);
if (!first)
return false;
if (first->type != INSN_ENDBR && !first->noendbr)
return false;
return insn->offset == sym->offset + sym->len;
}
static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn)
{
struct instruction *dest;
@ -4047,7 +4252,7 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn
continue;
}
if (dest->func && dest->func == insn->func) {
if (insn_func(dest) && insn_func(dest) == insn_func(insn)) {
/*
* Anything from->to self is either _THIS_IP_ or
* IRET-to-self.
@ -4066,9 +4271,19 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn
continue;
}
/*
* Accept anything ANNOTATE_NOENDBR.
*/
if (dest->noendbr)
continue;
/*
* Accept if this is the instruction after a symbol
* that is (no)endbr -- typical code-range usage.
*/
if (noendbr_range(file, dest))
continue;
WARN_FUNC("relocation to !ENDBR: %s",
insn->sec, insn->offset,
offstr(dest->sec, dest->offset));
@ -4307,11 +4522,25 @@ int check(struct objtool_file *file)
warnings += ret;
}
if (opts.cfi) {
ret = create_cfi_sections(file);
if (ret < 0)
goto out;
warnings += ret;
}
if (opts.rethunk) {
ret = create_return_sites_sections(file);
if (ret < 0)
goto out;
warnings += ret;
if (opts.hack_skylake) {
ret = create_direct_call_sections(file);
if (ret < 0)
goto out;
warnings += ret;
}
}
if (opts.mcount) {

View file

@ -16,6 +16,7 @@
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <linux/interval_tree_generic.h>
#include <objtool/builtin.h>
#include <objtool/elf.h>
@ -50,39 +51,23 @@ static inline u32 str_hash(const char *str)
__elf_table(name); \
})
static bool symbol_to_offset(struct rb_node *a, const struct rb_node *b)
static inline unsigned long __sym_start(struct symbol *s)
{
struct symbol *sa = rb_entry(a, struct symbol, node);
struct symbol *sb = rb_entry(b, struct symbol, node);
if (sa->offset < sb->offset)
return true;
if (sa->offset > sb->offset)
return false;
if (sa->len < sb->len)
return true;
if (sa->len > sb->len)
return false;
sa->alias = sb;
return false;
return s->offset;
}
static int symbol_by_offset(const void *key, const struct rb_node *node)
static inline unsigned long __sym_last(struct symbol *s)
{
const struct symbol *s = rb_entry(node, struct symbol, node);
const unsigned long *o = key;
if (*o < s->offset)
return -1;
if (*o >= s->offset + s->len)
return 1;
return 0;
return s->offset + s->len - 1;
}
INTERVAL_TREE_DEFINE(struct symbol, node, unsigned long, __subtree_last,
__sym_start, __sym_last, static, __sym)
#define __sym_for_each(_iter, _tree, _start, _end) \
for (_iter = __sym_iter_first((_tree), (_start), (_end)); \
_iter; _iter = __sym_iter_next(_iter, (_start), (_end)))
struct symbol_hole {
unsigned long key;
const struct symbol *sym;
@ -147,13 +132,12 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
{
struct rb_node *node;
struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
struct symbol *iter;
rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) {
struct symbol *s = rb_entry(node, struct symbol, node);
if (s->offset == offset && s->type != STT_SECTION)
return s;
__sym_for_each(iter, tree, offset, offset) {
if (iter->offset == offset && iter->type != STT_SECTION)
return iter;
}
return NULL;
@ -161,13 +145,12 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
{
struct rb_node *node;
struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
struct symbol *iter;
rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) {
struct symbol *s = rb_entry(node, struct symbol, node);
if (s->offset == offset && s->type == STT_FUNC)
return s;
__sym_for_each(iter, tree, offset, offset) {
if (iter->offset == offset && iter->type == STT_FUNC)
return iter;
}
return NULL;
@ -175,13 +158,12 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset)
{
struct rb_node *node;
struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
struct symbol *iter;
rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) {
struct symbol *s = rb_entry(node, struct symbol, node);
if (s->type != STT_SECTION)
return s;
__sym_for_each(iter, tree, offset, offset) {
if (iter->type != STT_SECTION)
return iter;
}
return NULL;
@ -202,7 +184,7 @@ int find_symbol_hole_containing(const struct section *sec, unsigned long offset)
/*
* Find the rightmost symbol for which @offset is after it.
*/
n = rb_find(&hole, &sec->symbol_tree, symbol_hole_by_offset);
n = rb_find(&hole, &sec->symbol_tree.rb_root, symbol_hole_by_offset);
/* found a symbol that contains @offset */
if (n)
@ -224,13 +206,12 @@ int find_symbol_hole_containing(const struct section *sec, unsigned long offset)
struct symbol *find_func_containing(struct section *sec, unsigned long offset)
{
struct rb_node *node;
struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
struct symbol *iter;
rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) {
struct symbol *s = rb_entry(node, struct symbol, node);
if (s->type == STT_FUNC)
return s;
__sym_for_each(iter, tree, offset, offset) {
if (iter->type == STT_FUNC)
return iter;
}
return NULL;
@ -373,7 +354,9 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
{
struct list_head *entry;
struct rb_node *pnode;
struct symbol *iter;
INIT_LIST_HEAD(&sym->reloc_list);
INIT_LIST_HEAD(&sym->pv_target);
sym->alias = sym;
@ -386,7 +369,12 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
sym->offset = sym->sym.st_value;
sym->len = sym->sym.st_size;
rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset);
__sym_for_each(iter, &sym->sec->symbol_tree, sym->offset, sym->offset) {
if (iter->offset == sym->offset && iter->type == sym->type)
iter->alias = sym;
}
__sym_insert(sym, &sym->sec->symbol_tree);
pnode = rb_prev(&sym->node);
if (pnode)
entry = &rb_entry(pnode, struct symbol, node)->list;
@ -401,7 +389,7 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
* can exist within a function, confusing the sorting.
*/
if (!sym->len)
rb_erase(&sym->node, &sym->sec->symbol_tree);
__sym_remove(sym, &sym->sec->symbol_tree);
}
static int read_symbols(struct elf *elf)
@ -570,6 +558,7 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
reloc->sym = sym;
reloc->addend = addend;
list_add_tail(&reloc->sym_reloc_entry, &sym->reloc_list);
list_add_tail(&reloc->list, &sec->reloc->reloc_list);
elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
@ -586,21 +575,10 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
*/
static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym)
{
struct section *sec;
struct reloc *reloc;
list_for_each_entry(sec, &elf->sections, list) {
struct reloc *reloc;
if (sec->changed)
continue;
list_for_each_entry(reloc, &sec->reloc_list, list) {
if (reloc->sym == sym) {
sec->changed = true;
break;
}
}
}
list_for_each_entry(reloc, &sym->reloc_list, sym_reloc_entry)
reloc->sec->changed = true;
}
/*
@ -647,6 +625,12 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
/* end-of-list */
if (!symtab_data) {
/*
* Over-allocate to avoid O(n^2) symbol creation
* behaviour. The down side is that libelf doesn't
* like this; see elf_truncate_section() for the fixup.
*/
int num = max(1U, sym->idx/3);
void *buf;
if (idx) {
@ -660,28 +644,34 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
if (t)
shndx_data = elf_newdata(t);
buf = calloc(1, entsize);
buf = calloc(num, entsize);
if (!buf) {
WARN("malloc");
return -1;
}
symtab_data->d_buf = buf;
symtab_data->d_size = entsize;
symtab_data->d_size = num * entsize;
symtab_data->d_align = 1;
symtab_data->d_type = ELF_T_SYM;
symtab->sh.sh_size += entsize;
symtab->changed = true;
symtab->truncate = true;
if (t) {
shndx_data->d_buf = &sym->sec->idx;
shndx_data->d_size = sizeof(Elf32_Word);
buf = calloc(num, sizeof(Elf32_Word));
if (!buf) {
WARN("malloc");
return -1;
}
shndx_data->d_buf = buf;
shndx_data->d_size = num * sizeof(Elf32_Word);
shndx_data->d_align = sizeof(Elf32_Word);
shndx_data->d_type = ELF_T_WORD;
symtab_shndx->sh.sh_size += sizeof(Elf32_Word);
symtab_shndx->changed = true;
symtab_shndx->truncate = true;
}
break;
@ -730,11 +720,11 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
}
static struct symbol *
elf_create_section_symbol(struct elf *elf, struct section *sec)
__elf_create_symbol(struct elf *elf, struct symbol *sym)
{
struct section *symtab, *symtab_shndx;
Elf32_Word first_non_local, new_idx;
struct symbol *sym, *old;
struct symbol *old;
symtab = find_section_by_name(elf, ".symtab");
if (symtab) {
@ -744,27 +734,16 @@ elf_create_section_symbol(struct elf *elf, struct section *sec)
return NULL;
}
sym = calloc(1, sizeof(*sym));
if (!sym) {
perror("malloc");
return NULL;
}
new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize;
sym->name = sec->name;
sym->sec = sec;
// st_name 0
sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
// st_other 0
// st_value 0
// st_size 0
if (GELF_ST_BIND(sym->sym.st_info) != STB_LOCAL)
goto non_local;
/*
* Move the first global symbol, as per sh_info, into a new, higher
* symbol index. This fees up a spot for a new local symbol.
*/
first_non_local = symtab->sh.sh_info;
new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize;
old = find_symbol_by_index(elf, first_non_local);
if (old) {
old->idx = new_idx;
@ -782,18 +761,82 @@ elf_create_section_symbol(struct elf *elf, struct section *sec)
new_idx = first_non_local;
}
/*
* Either way, we will add a LOCAL symbol.
*/
symtab->sh.sh_info += 1;
non_local:
sym->idx = new_idx;
if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) {
WARN("elf_update_symbol");
return NULL;
}
/*
* Either way, we added a LOCAL symbol.
*/
symtab->sh.sh_info += 1;
symtab->sh.sh_size += symtab->sh.sh_entsize;
symtab->changed = true;
elf_add_symbol(elf, sym);
if (symtab_shndx) {
symtab_shndx->sh.sh_size += sizeof(Elf32_Word);
symtab_shndx->changed = true;
}
return sym;
}
static struct symbol *
elf_create_section_symbol(struct elf *elf, struct section *sec)
{
struct symbol *sym = calloc(1, sizeof(*sym));
if (!sym) {
perror("malloc");
return NULL;
}
sym->name = sec->name;
sym->sec = sec;
// st_name 0
sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
// st_other 0
// st_value 0
// st_size 0
sym = __elf_create_symbol(elf, sym);
if (sym)
elf_add_symbol(elf, sym);
return sym;
}
static int elf_add_string(struct elf *elf, struct section *strtab, char *str);
struct symbol *
elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size)
{
struct symbol *sym = calloc(1, sizeof(*sym));
size_t namelen = strlen(orig->name) + sizeof("__pfx_");
char *name = malloc(namelen);
if (!sym || !name) {
perror("malloc");
return NULL;
}
snprintf(name, namelen, "__pfx_%s", orig->name);
sym->name = name;
sym->sec = orig->sec;
sym->sym.st_name = elf_add_string(elf, NULL, name);
sym->sym.st_info = orig->sym.st_info;
sym->sym.st_value = orig->sym.st_value - size;
sym->sym.st_size = size;
sym = __elf_create_symbol(elf, sym);
if (sym)
elf_add_symbol(elf, sym);
return sym;
}
@ -850,11 +893,12 @@ static int read_rela_reloc(struct section *sec, int i, struct reloc *reloc, unsi
static int read_relocs(struct elf *elf)
{
unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
struct section *sec;
struct reloc *reloc;
int i;
unsigned int symndx;
unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
struct symbol *sym;
int i;
if (!elf_alloc_hash(reloc, elf->text_size / 16))
return -1;
@ -895,13 +939,14 @@ static int read_relocs(struct elf *elf)
reloc->sec = sec;
reloc->idx = i;
reloc->sym = find_symbol_by_index(elf, symndx);
reloc->sym = sym = find_symbol_by_index(elf, symndx);
if (!reloc->sym) {
WARN("can't find reloc entry symbol %d for %s",
symndx, sec->name);
return -1;
}
list_add_tail(&reloc->sym_reloc_entry, &sym->reloc_list);
list_add_tail(&reloc->list, &sec->reloc_list);
elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
@ -1285,6 +1330,60 @@ int elf_write_reloc(struct elf *elf, struct reloc *reloc)
return 0;
}
/*
* When Elf_Scn::sh_size is smaller than the combined Elf_Data::d_size
* do you:
*
* A) adhere to the section header and truncate the data, or
* B) ignore the section header and write out all the data you've got?
*
* Yes, libelf sucks and we need to manually truncate if we over-allocate data.
*/
static int elf_truncate_section(struct elf *elf, struct section *sec)
{
u64 size = sec->sh.sh_size;
bool truncated = false;
Elf_Data *data = NULL;
Elf_Scn *s;
s = elf_getscn(elf->elf, sec->idx);
if (!s) {
WARN_ELF("elf_getscn");
return -1;
}
for (;;) {
/* get next data descriptor for the relevant section */
data = elf_getdata(s, data);
if (!data) {
if (size) {
WARN("end of section data but non-zero size left\n");
return -1;
}
return 0;
}
if (truncated) {
/* when we remove symbols */
WARN("truncated; but more data\n");
return -1;
}
if (!data->d_size) {
WARN("zero size data");
return -1;
}
if (data->d_size > size) {
truncated = true;
data->d_size = size;
}
size -= data->d_size;
}
}
int elf_write(struct elf *elf)
{
struct section *sec;
@ -1295,6 +1394,9 @@ int elf_write(struct elf *elf)
/* Update changed relocation sections and section headers: */
list_for_each_entry(sec, &elf->sections, list) {
if (sec->truncate)
elf_truncate_section(elf, sec);
if (sec->changed) {
s = elf_getscn(elf->elf, sec->idx);
if (!s) {

View file

@ -93,4 +93,6 @@ bool arch_is_rethunk(struct symbol *sym);
int arch_rewrite_retpolines(struct objtool_file *file);
bool arch_pc_relative_reloc(struct reloc *reloc);
#endif /* _ARCH_H */

View file

@ -14,6 +14,7 @@ struct opts {
bool dump_orc;
bool hack_jump_label;
bool hack_noinstr;
bool hack_skylake;
bool ibt;
bool mcount;
bool noinstr;
@ -25,6 +26,8 @@ struct opts {
bool stackval;
bool static_call;
bool uaccess;
int prefix;
bool cfi;
/* options: */
bool backtrace;

View file

@ -67,11 +67,21 @@ struct instruction {
struct reloc *jump_table;
struct reloc *reloc;
struct list_head alts;
struct symbol *func;
struct symbol *sym;
struct list_head stack_ops;
struct cfi_state *cfi;
};
static inline struct symbol *insn_func(struct instruction *insn)
{
struct symbol *sym = insn->sym;
if (sym && sym->type != STT_FUNC)
sym = NULL;
return sym;
}
#define VISITED_BRANCH 0x01
#define VISITED_BRANCH_UACCESS 0x02
#define VISITED_BRANCH_MASK 0x03

Some files were not shown because too many files have changed in this diff Show more