linux/kernel/stackleak.c
Mark Rutland 9ec79840d6 stackleak: rework stack low bound handling
In stackleak_task_init(), stackleak_track_stack(), and
__stackleak_erase(), we open-code skipping the STACK_END_MAGIC at the
bottom of the stack. Each case is implemented slightly differently, and
only the __stackleak_erase() case is commented.

In stackleak_task_init() and stackleak_track_stack() we unconditionally
add sizeof(unsigned long) to the lowest stack address. In
stackleak_task_init() we use end_of_stack() for this, and in
stackleak_track_stack() we use task_stack_page(). In __stackleak_erase()
we handle this by detecting if `kstack_ptr` has hit the stack end
boundary, and if so, conditionally moving it above the magic.

This patch adds a new stackleak_task_low_bound() helper which is used in
all three cases, which unconditionally adds sizeof(unsigned long) to the
lowest address on the task stack, with commentary as to why. This uses
end_of_stack() as stackleak_task_init() did prior to this patch, as this
is consistent with the code in kernel/fork.c which initializes the
STACK_END_MAGIC value.

In __stackleak_erase() we no longer need to check whether we've spilled
into the STACK_END_MAGIC value, as stackleak_track_stack() ensures that
`current->lowest_stack` stops immediately above this, and similarly the
poison scan will stop immediately above this.

For stackleak_task_init() and stackleak_track_stack() this results in no
change to code generation. For __stackleak_erase() the generated
assembly is slightly simpler and shorter.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Popov <alex.popov@linux.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220427173128.2603085-5-mark.rutland@arm.com
2022-05-08 01:33:08 -07:00

143 lines
3.8 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* This code fills the used part of the kernel stack with a poison value
* before returning to userspace. It's part of the STACKLEAK feature
* ported from grsecurity/PaX.
*
* Author: Alexander Popov <alex.popov@linux.com>
*
* STACKLEAK reduces the information which kernel stack leak bugs can
* reveal and blocks some uninitialized stack variable attacks.
*/
#include <linux/stackleak.h>
#include <linux/kprobes.h>
#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
#include <linux/jump_label.h>
#include <linux/sysctl.h>
#include <linux/init.h>
static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);
#ifdef CONFIG_SYSCTL
static int stack_erasing_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret = 0;
int state = !static_branch_unlikely(&stack_erasing_bypass);
int prev_state = state;
table->data = &state;
table->maxlen = sizeof(int);
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
state = !!state;
if (ret || !write || state == prev_state)
return ret;
if (state)
static_branch_disable(&stack_erasing_bypass);
else
static_branch_enable(&stack_erasing_bypass);
pr_warn("stackleak: kernel stack erasing is %s\n",
state ? "enabled" : "disabled");
return ret;
}
static struct ctl_table stackleak_sysctls[] = {
{
.procname = "stack_erasing",
.data = NULL,
.maxlen = sizeof(int),
.mode = 0600,
.proc_handler = stack_erasing_sysctl,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{}
};
static int __init stackleak_sysctls_init(void)
{
register_sysctl_init("kernel", stackleak_sysctls);
return 0;
}
late_initcall(stackleak_sysctls_init);
#endif /* CONFIG_SYSCTL */
#define skip_erasing() static_branch_unlikely(&stack_erasing_bypass)
#else
#define skip_erasing() false
#endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
static __always_inline void __stackleak_erase(void)
{
const unsigned long task_stack_low = stackleak_task_low_bound(current);
/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
unsigned long kstack_ptr = current->lowest_stack;
unsigned long boundary = task_stack_low;
unsigned int poison_count = 0;
const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
/* Search for the poison value in the kernel stack */
while (kstack_ptr > boundary && poison_count <= depth) {
if (*(unsigned long *)kstack_ptr == STACKLEAK_POISON)
poison_count++;
else
poison_count = 0;
kstack_ptr -= sizeof(unsigned long);
}
#ifdef CONFIG_STACKLEAK_METRICS
current->prev_lowest_stack = kstack_ptr;
#endif
/*
* Now write the poison value to the kernel stack. Start from
* 'kstack_ptr' and move up till the new 'boundary'. We assume that
* the stack pointer doesn't change when we write poison.
*/
if (on_thread_stack())
boundary = current_stack_pointer;
else
boundary = current_top_of_stack();
while (kstack_ptr < boundary) {
*(unsigned long *)kstack_ptr = STACKLEAK_POISON;
kstack_ptr += sizeof(unsigned long);
}
/* Reset the 'lowest_stack' value for the next syscall */
current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
}
asmlinkage void noinstr stackleak_erase(void)
{
if (skip_erasing())
return;
__stackleak_erase();
}
void __used __no_caller_saved_registers noinstr stackleak_track_stack(void)
{
unsigned long sp = current_stack_pointer;
/*
* Having CONFIG_STACKLEAK_TRACK_MIN_SIZE larger than
* STACKLEAK_SEARCH_DEPTH makes the poison search in
* stackleak_erase() unreliable. Let's prevent that.
*/
BUILD_BUG_ON(CONFIG_STACKLEAK_TRACK_MIN_SIZE > STACKLEAK_SEARCH_DEPTH);
/* 'lowest_stack' should be aligned on the register width boundary */
sp = ALIGN(sp, sizeof(unsigned long));
if (sp < current->lowest_stack &&
sp >= stackleak_task_low_bound(current)) {
current->lowest_stack = sp;
}
}
EXPORT_SYMBOL(stackleak_track_stack);