diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 29f9f0554f7d..7a35a6e71d44 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -355,3 +355,4 @@ 346 i386 setns sys_setns 347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +349 i386 kcmp sys_kcmp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index dd29a9ea27c5..51171aeff0dc 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -318,6 +318,8 @@ 309 common getcpu sys_getcpu 310 64 process_vm_readv sys_process_vm_readv 311 64 process_vm_writev sys_process_vm_writev +312 64 kcmp sys_kcmp + # # x32-specific system call numbers start at 512 to avoid cache impact # for native 64-bit operation. diff --git a/include/linux/kcmp.h b/include/linux/kcmp.h new file mode 100644 index 000000000000..2dcd1b3aafc8 --- /dev/null +++ b/include/linux/kcmp.h @@ -0,0 +1,17 @@ +#ifndef _LINUX_KCMP_H +#define _LINUX_KCMP_H + +/* Comparison type */ +enum kcmp_type { + KCMP_FILE, + KCMP_VM, + KCMP_FILES, + KCMP_FS, + KCMP_SIGHAND, + KCMP_IO, + KCMP_SYSVSEM, + + KCMP_TYPES, +}; + +#endif /* _LINUX_KCMP_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3de3acb84a95..19439c75c5b2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -858,4 +858,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, unsigned long riovcnt, unsigned long flags); +asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, + unsigned long idx1, unsigned long idx2); #endif diff --git a/kernel/Makefile b/kernel/Makefile index 6c07f30fa9b7..80be6ca0cc75 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -25,6 +25,9 @@ endif obj-y += sched/ obj-y += power/ +ifeq ($(CONFIG_CHECKPOINT_RESTORE),y) +obj-$(CONFIG_X86) += kcmp.o +endif obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/kernel/kcmp.c b/kernel/kcmp.c new file mode 100644 index 000000000000..30b7b225306c --- /dev/null +++ b/kernel/kcmp.c @@ -0,0 +1,196 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * We don't expose the real in-memory order of objects for security reasons. + * But still the comparison results should be suitable for sorting. So we + * obfuscate kernel pointers values and compare the production instead. + * + * The obfuscation is done in two steps. First we xor the kernel pointer with + * a random value, which puts pointer into a new position in a reordered space. + * Secondly we multiply the xor production with a large odd random number to + * permute its bits even more (the odd multiplier guarantees that the product + * is unique ever after the high bits are truncated, since any odd number is + * relative prime to 2^n). + * + * Note also that the obfuscation itself is invisible to userspace and if needed + * it can be changed to an alternate scheme. + */ +static unsigned long cookies[KCMP_TYPES][2] __read_mostly; + +static long kptr_obfuscate(long v, int type) +{ + return (v ^ cookies[type][0]) * cookies[type][1]; +} + +/* + * 0 - equal, i.e. v1 = v2 + * 1 - less than, i.e. v1 < v2 + * 2 - greater than, i.e. v1 > v2 + * 3 - not equal but ordering unavailable (reserved for future) + */ +static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type) +{ + long ret; + + ret = kptr_obfuscate((long)v1, type) - kptr_obfuscate((long)v2, type); + + return (ret < 0) | ((ret > 0) << 1); +} + +/* The caller must have pinned the task */ +static struct file * +get_file_raw_ptr(struct task_struct *task, unsigned int idx) +{ + struct file *file = NULL; + + task_lock(task); + rcu_read_lock(); + + if (task->files) + file = fcheck_files(task->files, idx); + + rcu_read_unlock(); + task_unlock(task); + + return file; +} + +static void kcmp_unlock(struct mutex *m1, struct mutex *m2) +{ + if (likely(m2 != m1)) + mutex_unlock(m2); + mutex_unlock(m1); +} + +static int kcmp_lock(struct mutex *m1, struct mutex *m2) +{ + int err; + + if (m2 > m1) + swap(m1, m2); + + err = mutex_lock_killable(m1); + if (!err && likely(m1 != m2)) { + err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING); + if (err) + mutex_unlock(m1); + } + + return err; +} + +SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type, + unsigned long, idx1, unsigned long, idx2) +{ + struct task_struct *task1, *task2; + int ret; + + rcu_read_lock(); + + /* + * Tasks are looked up in caller's PID namespace only. + */ + task1 = find_task_by_vpid(pid1); + task2 = find_task_by_vpid(pid2); + if (!task1 || !task2) + goto err_no_task; + + get_task_struct(task1); + get_task_struct(task2); + + rcu_read_unlock(); + + /* + * One should have enough rights to inspect task details. + */ + ret = kcmp_lock(&task1->signal->cred_guard_mutex, + &task2->signal->cred_guard_mutex); + if (ret) + goto err; + if (!ptrace_may_access(task1, PTRACE_MODE_READ) || + !ptrace_may_access(task2, PTRACE_MODE_READ)) { + ret = -EPERM; + goto err_unlock; + } + + switch (type) { + case KCMP_FILE: { + struct file *filp1, *filp2; + + filp1 = get_file_raw_ptr(task1, idx1); + filp2 = get_file_raw_ptr(task2, idx2); + + if (filp1 && filp2) + ret = kcmp_ptr(filp1, filp2, KCMP_FILE); + else + ret = -EBADF; + break; + } + case KCMP_VM: + ret = kcmp_ptr(task1->mm, task2->mm, KCMP_VM); + break; + case KCMP_FILES: + ret = kcmp_ptr(task1->files, task2->files, KCMP_FILES); + break; + case KCMP_FS: + ret = kcmp_ptr(task1->fs, task2->fs, KCMP_FS); + break; + case KCMP_SIGHAND: + ret = kcmp_ptr(task1->sighand, task2->sighand, KCMP_SIGHAND); + break; + case KCMP_IO: + ret = kcmp_ptr(task1->io_context, task2->io_context, KCMP_IO); + break; + case KCMP_SYSVSEM: +#ifdef CONFIG_SYSVIPC + ret = kcmp_ptr(task1->sysvsem.undo_list, + task2->sysvsem.undo_list, + KCMP_SYSVSEM); +#else + ret = -EOPNOTSUPP; +#endif + break; + default: + ret = -EINVAL; + break; + } + +err_unlock: + kcmp_unlock(&task1->signal->cred_guard_mutex, + &task2->signal->cred_guard_mutex); +err: + put_task_struct(task1); + put_task_struct(task2); + + return ret; + +err_no_task: + rcu_read_unlock(); + return -ESRCH; +} + +static __init int kcmp_cookies_init(void) +{ + int i; + + get_random_bytes(cookies, sizeof(cookies)); + + for (i = 0; i < KCMP_TYPES; i++) + cookies[i][1] |= (~(~0UL >> 1) | 1); + + return 0; +} +arch_initcall(kcmp_cookies_init); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 47bfa16430d7..dbff751e4086 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -203,3 +203,6 @@ cond_syscall(sys_fanotify_mark); cond_syscall(sys_name_to_handle_at); cond_syscall(sys_open_by_handle_at); cond_syscall(compat_sys_open_by_handle_at); + +/* compare kernel pointers */ +cond_syscall(sys_kcmp); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 14972017a43e..a4162e15c25f 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,4 +1,4 @@ -TARGETS = breakpoints mqueue vm +TARGETS = breakpoints kcmp mqueue vm all: for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile new file mode 100644 index 000000000000..dc79b86ea65c --- /dev/null +++ b/tools/testing/selftests/kcmp/Makefile @@ -0,0 +1,29 @@ +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) +ifeq ($(ARCH),i386) + ARCH := X86 + CFLAGS := -DCONFIG_X86_32 -D__i386__ +endif +ifeq ($(ARCH),x86_64) + ARCH := X86 + CFLAGS := -DCONFIG_X86_64 -D__x86_64__ +endif + +CFLAGS += -I../../../../arch/x86/include/generated/ +CFLAGS += -I../../../../include/ +CFLAGS += -I../../../../usr/include/ +CFLAGS += -I../../../../arch/x86/include/ + +all: +ifeq ($(ARCH),X86) + gcc $(CFLAGS) kcmp_test.c -o run_test +else + echo "Not an x86 target, can't build kcmp selftest" +endif + +run-tests: all + ./kcmp_test + +clean: + rm -fr ./run_test + rm -fr ./test-file diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c new file mode 100644 index 000000000000..358cc6bfa35d --- /dev/null +++ b/tools/testing/selftests/kcmp/kcmp_test.c @@ -0,0 +1,94 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +static long sys_kcmp(int pid1, int pid2, int type, int fd1, int fd2) +{ + return syscall(__NR_kcmp, pid1, pid2, type, fd1, fd2); +} + +int main(int argc, char **argv) +{ + const char kpath[] = "kcmp-test-file"; + int pid1, pid2; + int fd1, fd2; + int status; + + fd1 = open(kpath, O_RDWR | O_CREAT | O_TRUNC, 0644); + pid1 = getpid(); + + if (fd1 < 0) { + perror("Can't create file"); + exit(1); + } + + pid2 = fork(); + if (pid2 < 0) { + perror("fork failed"); + exit(1); + } + + if (!pid2) { + int pid2 = getpid(); + int ret; + + fd2 = open(kpath, O_RDWR, 0644); + if (fd2 < 0) { + perror("Can't open file"); + exit(1); + } + + /* An example of output and arguments */ + printf("pid1: %6d pid2: %6d FD: %2ld FILES: %2ld VM: %2ld " + "FS: %2ld SIGHAND: %2ld IO: %2ld SYSVSEM: %2ld " + "INV: %2ld\n", + pid1, pid2, + sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd2), + sys_kcmp(pid1, pid2, KCMP_FILES, 0, 0), + sys_kcmp(pid1, pid2, KCMP_VM, 0, 0), + sys_kcmp(pid1, pid2, KCMP_FS, 0, 0), + sys_kcmp(pid1, pid2, KCMP_SIGHAND, 0, 0), + sys_kcmp(pid1, pid2, KCMP_IO, 0, 0), + sys_kcmp(pid1, pid2, KCMP_SYSVSEM, 0, 0), + + /* This one should fail */ + sys_kcmp(pid1, pid2, KCMP_TYPES + 1, 0, 0)); + + /* This one should return same fd */ + ret = sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd1); + if (ret) { + printf("FAIL: 0 expected but %d returned\n", ret); + ret = -1; + } else + printf("PASS: 0 returned as expected\n"); + + /* Compare with self */ + ret = sys_kcmp(pid1, pid1, KCMP_VM, 0, 0); + if (ret) { + printf("FAIL: 0 expected but %li returned\n", ret); + ret = -1; + } else + printf("PASS: 0 returned as expected\n"); + + exit(ret); + } + + waitpid(pid2, &status, P_ALL); + + return 0; +}