From 5fec25f2cb959cb5f189d7f6127bee3efc782530 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 24 Jun 2020 16:34:57 -0500 Subject: [PATCH 01/23] umh: Capture the pid in umh_pipe_setup The pid in struct subprocess_info is only used by umh_clean_and_save_pid to write the pid into umh_info. Instead always capture the pid on struct umh_info in umh_pipe_setup, removing code that is specific to user mode drivers from the common user path of user mode helpers. v1: https://lkml.kernel.org/r/87h7uygf9i.fsf_-_@x220.int.ebiederm.org v2: https://lkml.kernel.org/r/875zb97iix.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-1-ebiederm@xmission.com Reviewed-by: Greg Kroah-Hartman Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- include/linux/umh.h | 1 - kernel/umh.c | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/linux/umh.h b/include/linux/umh.h index 0c08de356d0d..aae16a0ebd0f 100644 --- a/include/linux/umh.h +++ b/include/linux/umh.h @@ -25,7 +25,6 @@ struct subprocess_info { struct file *file; int wait; int retval; - pid_t pid; int (*init)(struct subprocess_info *info, struct cred *new); void (*cleanup)(struct subprocess_info *info); void *data; diff --git a/kernel/umh.c b/kernel/umh.c index 79f139a7ca03..c2a582b3a2bf 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -102,7 +102,6 @@ static int call_usermodehelper_exec_async(void *data) commit_creds(new); - sub_info->pid = task_pid_nr(current); if (sub_info->file) { retval = do_execve_file(sub_info->file, sub_info->argv, sub_info->envp); @@ -468,6 +467,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) umh_info->pipe_to_umh = to_umh[1]; umh_info->pipe_from_umh = from_umh[0]; + umh_info->pid = task_pid_nr(current); return 0; } @@ -476,13 +476,12 @@ static void umh_clean_and_save_pid(struct subprocess_info *info) struct umh_info *umh_info = info->data; /* cleanup if umh_pipe_setup() was successful but exec failed */ - if (info->pid && info->retval) { + if (info->retval) { fput(umh_info->pipe_to_umh); fput(umh_info->pipe_from_umh); } argv_free(info->argv); - umh_info->pid = info->pid; } /** From b044fa2ae50d52d8c9f9d130055c2aea032e7475 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Jun 2020 10:04:25 -0500 Subject: [PATCH 02/23] umh: Move setting PF_UMH into umh_pipe_setup I am separating the code specific to user mode drivers from the code for ordinary user space helpers. Move setting of PF_UMH from call_usermodehelper_exec_async which is core user mode helper code into umh_pipe_setup which is user mode driver code. The code is equally as easy to write in one location as the other and the movement minimizes the impact of the user mode driver code on the core of the user mode helper code. Setting PF_UMH unconditionally is harmless as an action will only happen if it is paired with an entry on umh_list. v1: https://lkml.kernel.org/r/87bll6gf8t.fsf_-_@x220.int.ebiederm.org v2: https://lkml.kernel.org/r/87zh8l63xs.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-2-ebiederm@xmission.com Reviewed-by: Greg Kroah-Hartman Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- kernel/umh.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/umh.c b/kernel/umh.c index c2a582b3a2bf..e6b9d6636850 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -102,12 +102,10 @@ static int call_usermodehelper_exec_async(void *data) commit_creds(new); - if (sub_info->file) { + if (sub_info->file) retval = do_execve_file(sub_info->file, sub_info->argv, sub_info->envp); - if (!retval) - current->flags |= PF_UMH; - } else + else retval = do_execve(getname_kernel(sub_info->path), (const char __user *const __user *)sub_info->argv, (const char __user *const __user *)sub_info->envp); @@ -468,6 +466,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) umh_info->pipe_to_umh = to_umh[1]; umh_info->pipe_from_umh = from_umh[0]; umh_info->pid = task_pid_nr(current); + current->flags |= PF_UMH; return 0; } From 3a171042aeab8702391c311d84633a6c267d566c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Jun 2020 10:10:03 -0500 Subject: [PATCH 03/23] umh: Rename the user mode driver helpers for clarity Now that the functionality of umh_setup_pipe and umh_clean_and_save_pid has changed their names are too specific and don't make much sense. Instead name them umd_setup and umd_cleanup for the functional role in setting up user mode drivers. v1: https://lkml.kernel.org/r/875zbegf82.fsf_-_@x220.int.ebiederm.org v2: https://lkml.kernel.org/r/87tuyt63x3.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-3-ebiederm@xmission.com Reviewed-by: Greg Kroah-Hartman Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- kernel/umh.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/umh.c b/kernel/umh.c index e6b9d6636850..26c3d493f168 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -429,7 +429,7 @@ struct subprocess_info *call_usermodehelper_setup_file(struct file *file, return sub_info; } -static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) +static int umd_setup(struct subprocess_info *info, struct cred *new) { struct umh_info *umh_info = info->data; struct file *from_umh[2]; @@ -470,11 +470,11 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) return 0; } -static void umh_clean_and_save_pid(struct subprocess_info *info) +static void umd_cleanup(struct subprocess_info *info) { struct umh_info *umh_info = info->data; - /* cleanup if umh_pipe_setup() was successful but exec failed */ + /* cleanup if umh_setup() was successful but exec failed */ if (info->retval) { fput(umh_info->pipe_to_umh); fput(umh_info->pipe_from_umh); @@ -520,8 +520,8 @@ int fork_usermode_blob(void *data, size_t len, struct umh_info *info) } err = -ENOMEM; - sub_info = call_usermodehelper_setup_file(file, umh_pipe_setup, - umh_clean_and_save_pid, info); + sub_info = call_usermodehelper_setup_file(file, umd_setup, umd_cleanup, + info); if (!sub_info) goto out; From 21d598280675c463ea1b264fab06e9614aacd1e1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 24 Jun 2020 17:01:18 -0500 Subject: [PATCH 04/23] umh: Remove call_usermodehelper_setup_file. The only caller of call_usermodehelper_setup_file is fork_usermode_blob. In fork_usermode_blob replace call_usermodehelper_setup_file with call_usermodehelper_setup and delete fork_usermodehelper_setup_file. For this to work the argv_free is moved from umh_clean_and_save_pid to fork_usermode_blob. v1: https://lkml.kernel.org/r/87zh8qf0mp.fsf_-_@x220.int.ebiederm.org v2: https://lkml.kernel.org/r/87o8p163u1.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-4-ebiederm@xmission.com Reviewed-by: Greg Kroah-Hartman Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- include/linux/umh.h | 3 --- kernel/umh.c | 42 +++++++++++------------------------------- 2 files changed, 11 insertions(+), 34 deletions(-) diff --git a/include/linux/umh.h b/include/linux/umh.h index aae16a0ebd0f..de08af00c68a 100644 --- a/include/linux/umh.h +++ b/include/linux/umh.h @@ -39,9 +39,6 @@ call_usermodehelper_setup(const char *path, char **argv, char **envp, int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *), void *data); -struct subprocess_info *call_usermodehelper_setup_file(struct file *file, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *), void *data); struct umh_info { const char *cmdline; struct file *pipe_to_umh; diff --git a/kernel/umh.c b/kernel/umh.c index 26c3d493f168..b8fa9b99b366 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -402,33 +402,6 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv, } EXPORT_SYMBOL(call_usermodehelper_setup); -struct subprocess_info *call_usermodehelper_setup_file(struct file *file, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *info), void *data) -{ - struct subprocess_info *sub_info; - struct umh_info *info = data; - const char *cmdline = (info->cmdline) ? info->cmdline : "usermodehelper"; - - sub_info = kzalloc(sizeof(struct subprocess_info), GFP_KERNEL); - if (!sub_info) - return NULL; - - sub_info->argv = argv_split(GFP_KERNEL, cmdline, NULL); - if (!sub_info->argv) { - kfree(sub_info); - return NULL; - } - - INIT_WORK(&sub_info->work, call_usermodehelper_exec_work); - sub_info->path = "none"; - sub_info->file = file; - sub_info->init = init; - sub_info->cleanup = cleanup; - sub_info->data = data; - return sub_info; -} - static int umd_setup(struct subprocess_info *info, struct cred *new) { struct umh_info *umh_info = info->data; @@ -479,8 +452,6 @@ static void umd_cleanup(struct subprocess_info *info) fput(umh_info->pipe_to_umh); fput(umh_info->pipe_from_umh); } - - argv_free(info->argv); } /** @@ -501,7 +472,9 @@ static void umd_cleanup(struct subprocess_info *info) */ int fork_usermode_blob(void *data, size_t len, struct umh_info *info) { + const char *cmdline = (info->cmdline) ? info->cmdline : "usermodehelper"; struct subprocess_info *sub_info; + char **argv = NULL; struct file *file; ssize_t written; loff_t pos = 0; @@ -520,11 +493,16 @@ int fork_usermode_blob(void *data, size_t len, struct umh_info *info) } err = -ENOMEM; - sub_info = call_usermodehelper_setup_file(file, umd_setup, umd_cleanup, - info); + argv = argv_split(GFP_KERNEL, cmdline, NULL); + if (!argv) + goto out; + + sub_info = call_usermodehelper_setup("none", argv, NULL, GFP_KERNEL, + umd_setup, umd_cleanup, info); if (!sub_info) goto out; + sub_info->file = file; err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); if (!err) { mutex_lock(&umh_list_lock); @@ -532,6 +510,8 @@ int fork_usermode_blob(void *data, size_t len, struct umh_info *info) mutex_unlock(&umh_list_lock); } out: + if (argv) + argv_free(argv); fput(file); return err; } From 884c5e683b67dbc52892e24c29eed864f330ec08 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 26 Jun 2020 12:23:00 -0500 Subject: [PATCH 05/23] umh: Separate the user mode driver and the user mode helper support This makes it clear which code is part of the core user mode helper support and which code is needed to implement user mode drivers. This makes the kernel smaller for everyone who does not use a usermode driver. v1: https://lkml.kernel.org/r/87tuyyf0ln.fsf_-_@x220.int.ebiederm.org v2: https://lkml.kernel.org/r/87imf963s6.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-5-ebiederm@xmission.com Reviewed-by: Greg Kroah-Hartman Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- include/linux/bpfilter.h | 2 +- include/linux/sched.h | 8 -- include/linux/umh.h | 10 --- include/linux/usermode_driver.h | 30 +++++++ kernel/Makefile | 1 + kernel/exit.c | 1 + kernel/umh.c | 139 ------------------------------ kernel/usermode_driver.c | 146 ++++++++++++++++++++++++++++++++ 8 files changed, 179 insertions(+), 158 deletions(-) create mode 100644 include/linux/usermode_driver.h create mode 100644 kernel/usermode_driver.c diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h index d815622cd31e..d6d6206052a6 100644 --- a/include/linux/bpfilter.h +++ b/include/linux/bpfilter.h @@ -3,7 +3,7 @@ #define _LINUX_BPFILTER_H #include -#include +#include struct sock; int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, diff --git a/include/linux/sched.h b/include/linux/sched.h index b62e6aaf28f0..59d1e92bb88e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2020,14 +2020,6 @@ static inline void rseq_execve(struct task_struct *t) #endif -void __exit_umh(struct task_struct *tsk); - -static inline void exit_umh(struct task_struct *tsk) -{ - if (unlikely(tsk->flags & PF_UMH)) - __exit_umh(tsk); -} - #ifdef CONFIG_DEBUG_RSEQ void rseq_syscall(struct pt_regs *regs); diff --git a/include/linux/umh.h b/include/linux/umh.h index de08af00c68a..73173c4a07e5 100644 --- a/include/linux/umh.h +++ b/include/linux/umh.h @@ -39,16 +39,6 @@ call_usermodehelper_setup(const char *path, char **argv, char **envp, int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *), void *data); -struct umh_info { - const char *cmdline; - struct file *pipe_to_umh; - struct file *pipe_from_umh; - struct list_head list; - void (*cleanup)(struct umh_info *info); - pid_t pid; -}; -int fork_usermode_blob(void *data, size_t len, struct umh_info *info); - extern int call_usermodehelper_exec(struct subprocess_info *info, int wait); diff --git a/include/linux/usermode_driver.h b/include/linux/usermode_driver.h new file mode 100644 index 000000000000..c5f6dc950227 --- /dev/null +++ b/include/linux/usermode_driver.h @@ -0,0 +1,30 @@ +#ifndef __LINUX_USERMODE_DRIVER_H__ +#define __LINUX_USERMODE_DRIVER_H__ + +#include + +#ifdef CONFIG_BPFILTER +void __exit_umh(struct task_struct *tsk); + +static inline void exit_umh(struct task_struct *tsk) +{ + if (unlikely(tsk->flags & PF_UMH)) + __exit_umh(tsk); +} +#else +static inline void exit_umh(struct task_struct *tsk) +{ +} +#endif + +struct umh_info { + const char *cmdline; + struct file *pipe_to_umh; + struct file *pipe_from_umh; + struct list_head list; + void (*cleanup)(struct umh_info *info); + pid_t pid; +}; +int fork_usermode_blob(void *data, size_t len, struct umh_info *info); + +#endif /* __LINUX_USERMODE_DRIVER_H__ */ diff --git a/kernel/Makefile b/kernel/Makefile index f3218bc5ec69..43928759893a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -12,6 +12,7 @@ obj-y = fork.o exec_domain.o panic.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o smpboot.o ucount.o +obj-$(CONFIG_BPFILTER) += usermode_driver.o obj-$(CONFIG_MODULES) += kmod.o obj-$(CONFIG_MULTIUSER) += groups.o diff --git a/kernel/exit.c b/kernel/exit.c index 727150f28103..a081deea52ca 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include diff --git a/kernel/umh.c b/kernel/umh.c index b8fa9b99b366..3e4e453d45c8 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -26,8 +26,6 @@ #include #include #include -#include -#include #include @@ -38,8 +36,6 @@ static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; static DEFINE_SPINLOCK(umh_sysctl_lock); static DECLARE_RWSEM(umhelper_sem); -static LIST_HEAD(umh_list); -static DEFINE_MUTEX(umh_list_lock); static void call_usermodehelper_freeinfo(struct subprocess_info *info) { @@ -402,121 +398,6 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv, } EXPORT_SYMBOL(call_usermodehelper_setup); -static int umd_setup(struct subprocess_info *info, struct cred *new) -{ - struct umh_info *umh_info = info->data; - struct file *from_umh[2]; - struct file *to_umh[2]; - int err; - - /* create pipe to send data to umh */ - err = create_pipe_files(to_umh, 0); - if (err) - return err; - err = replace_fd(0, to_umh[0], 0); - fput(to_umh[0]); - if (err < 0) { - fput(to_umh[1]); - return err; - } - - /* create pipe to receive data from umh */ - err = create_pipe_files(from_umh, 0); - if (err) { - fput(to_umh[1]); - replace_fd(0, NULL, 0); - return err; - } - err = replace_fd(1, from_umh[1], 0); - fput(from_umh[1]); - if (err < 0) { - fput(to_umh[1]); - replace_fd(0, NULL, 0); - fput(from_umh[0]); - return err; - } - - umh_info->pipe_to_umh = to_umh[1]; - umh_info->pipe_from_umh = from_umh[0]; - umh_info->pid = task_pid_nr(current); - current->flags |= PF_UMH; - return 0; -} - -static void umd_cleanup(struct subprocess_info *info) -{ - struct umh_info *umh_info = info->data; - - /* cleanup if umh_setup() was successful but exec failed */ - if (info->retval) { - fput(umh_info->pipe_to_umh); - fput(umh_info->pipe_from_umh); - } -} - -/** - * fork_usermode_blob - fork a blob of bytes as a usermode process - * @data: a blob of bytes that can be do_execv-ed as a file - * @len: length of the blob - * @info: information about usermode process (shouldn't be NULL) - * - * If info->cmdline is set it will be used as command line for the - * user process, else "usermodehelper" is used. - * - * Returns either negative error or zero which indicates success - * in executing a blob of bytes as a usermode process. In such - * case 'struct umh_info *info' is populated with two pipes - * and a pid of the process. The caller is responsible for health - * check of the user process, killing it via pid, and closing the - * pipes when user process is no longer needed. - */ -int fork_usermode_blob(void *data, size_t len, struct umh_info *info) -{ - const char *cmdline = (info->cmdline) ? info->cmdline : "usermodehelper"; - struct subprocess_info *sub_info; - char **argv = NULL; - struct file *file; - ssize_t written; - loff_t pos = 0; - int err; - - file = shmem_kernel_file_setup("", len, 0); - if (IS_ERR(file)) - return PTR_ERR(file); - - written = kernel_write(file, data, len, &pos); - if (written != len) { - err = written; - if (err >= 0) - err = -ENOMEM; - goto out; - } - - err = -ENOMEM; - argv = argv_split(GFP_KERNEL, cmdline, NULL); - if (!argv) - goto out; - - sub_info = call_usermodehelper_setup("none", argv, NULL, GFP_KERNEL, - umd_setup, umd_cleanup, info); - if (!sub_info) - goto out; - - sub_info->file = file; - err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); - if (!err) { - mutex_lock(&umh_list_lock); - list_add(&info->list, &umh_list); - mutex_unlock(&umh_list_lock); - } -out: - if (argv) - argv_free(argv); - fput(file); - return err; -} -EXPORT_SYMBOL_GPL(fork_usermode_blob); - /** * call_usermodehelper_exec - start a usermode application * @sub_info: information about the subprocessa @@ -678,26 +559,6 @@ static int proc_cap_handler(struct ctl_table *table, int write, return 0; } -void __exit_umh(struct task_struct *tsk) -{ - struct umh_info *info; - pid_t pid = tsk->pid; - - mutex_lock(&umh_list_lock); - list_for_each_entry(info, &umh_list, list) { - if (info->pid == pid) { - list_del(&info->list); - mutex_unlock(&umh_list_lock); - goto out; - } - } - mutex_unlock(&umh_list_lock); - return; -out: - if (info->cleanup) - info->cleanup(info); -} - struct ctl_table usermodehelper_table[] = { { .procname = "bset", diff --git a/kernel/usermode_driver.c b/kernel/usermode_driver.c new file mode 100644 index 000000000000..5b05863af855 --- /dev/null +++ b/kernel/usermode_driver.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * umd - User mode driver support + */ +#include +#include +#include + +static LIST_HEAD(umh_list); +static DEFINE_MUTEX(umh_list_lock); + +static int umd_setup(struct subprocess_info *info, struct cred *new) +{ + struct umh_info *umh_info = info->data; + struct file *from_umh[2]; + struct file *to_umh[2]; + int err; + + /* create pipe to send data to umh */ + err = create_pipe_files(to_umh, 0); + if (err) + return err; + err = replace_fd(0, to_umh[0], 0); + fput(to_umh[0]); + if (err < 0) { + fput(to_umh[1]); + return err; + } + + /* create pipe to receive data from umh */ + err = create_pipe_files(from_umh, 0); + if (err) { + fput(to_umh[1]); + replace_fd(0, NULL, 0); + return err; + } + err = replace_fd(1, from_umh[1], 0); + fput(from_umh[1]); + if (err < 0) { + fput(to_umh[1]); + replace_fd(0, NULL, 0); + fput(from_umh[0]); + return err; + } + + umh_info->pipe_to_umh = to_umh[1]; + umh_info->pipe_from_umh = from_umh[0]; + umh_info->pid = task_pid_nr(current); + current->flags |= PF_UMH; + return 0; +} + +static void umd_cleanup(struct subprocess_info *info) +{ + struct umh_info *umh_info = info->data; + + /* cleanup if umh_setup() was successful but exec failed */ + if (info->retval) { + fput(umh_info->pipe_to_umh); + fput(umh_info->pipe_from_umh); + } +} + +/** + * fork_usermode_blob - fork a blob of bytes as a usermode process + * @data: a blob of bytes that can be do_execv-ed as a file + * @len: length of the blob + * @info: information about usermode process (shouldn't be NULL) + * + * If info->cmdline is set it will be used as command line for the + * user process, else "usermodehelper" is used. + * + * Returns either negative error or zero which indicates success + * in executing a blob of bytes as a usermode process. In such + * case 'struct umh_info *info' is populated with two pipes + * and a pid of the process. The caller is responsible for health + * check of the user process, killing it via pid, and closing the + * pipes when user process is no longer needed. + */ +int fork_usermode_blob(void *data, size_t len, struct umh_info *info) +{ + const char *cmdline = (info->cmdline) ? info->cmdline : "usermodehelper"; + struct subprocess_info *sub_info; + char **argv = NULL; + struct file *file; + ssize_t written; + loff_t pos = 0; + int err; + + file = shmem_kernel_file_setup("", len, 0); + if (IS_ERR(file)) + return PTR_ERR(file); + + written = kernel_write(file, data, len, &pos); + if (written != len) { + err = written; + if (err >= 0) + err = -ENOMEM; + goto out; + } + + err = -ENOMEM; + argv = argv_split(GFP_KERNEL, cmdline, NULL); + if (!argv) + goto out; + + sub_info = call_usermodehelper_setup("none", argv, NULL, GFP_KERNEL, + umd_setup, umd_cleanup, info); + if (!sub_info) + goto out; + + sub_info->file = file; + err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); + if (!err) { + mutex_lock(&umh_list_lock); + list_add(&info->list, &umh_list); + mutex_unlock(&umh_list_lock); + } +out: + if (argv) + argv_free(argv); + fput(file); + return err; +} +EXPORT_SYMBOL_GPL(fork_usermode_blob); + +void __exit_umh(struct task_struct *tsk) +{ + struct umh_info *info; + pid_t pid = tsk->pid; + + mutex_lock(&umh_list_lock); + list_for_each_entry(info, &umh_list, list) { + if (info->pid == pid) { + list_del(&info->list); + mutex_unlock(&umh_list_lock); + goto out; + } + } + mutex_unlock(&umh_list_lock); + return; +out: + if (info->cleanup) + info->cleanup(info); +} + From 74be2d3b80af1bb264c3b9905b52c15efc03c0fe Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 26 Jun 2020 11:16:06 -0500 Subject: [PATCH 06/23] umd: For clarity rename umh_info umd_info This structure is only used for user mode drivers so change the prefix from umh to umd to make that clear. v1: https://lkml.kernel.org/r/87o8p6f0kw.fsf_-_@x220.int.ebiederm.org v2: https://lkml.kernel.org/r/878sg563po.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-6-ebiederm@xmission.com Reviewed-by: Greg Kroah-Hartman Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- include/linux/bpfilter.h | 2 +- include/linux/usermode_driver.h | 6 +++--- kernel/usermode_driver.c | 20 ++++++++++---------- net/ipv4/bpfilter/sockopt.c | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h index d6d6206052a6..ec9972d822e0 100644 --- a/include/linux/bpfilter.h +++ b/include/linux/bpfilter.h @@ -11,7 +11,7 @@ int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen); struct bpfilter_umh_ops { - struct umh_info info; + struct umd_info info; /* since ip_getsockopt() can run in parallel, serialize access to umh */ struct mutex lock; int (*sockopt)(struct sock *sk, int optname, diff --git a/include/linux/usermode_driver.h b/include/linux/usermode_driver.h index c5f6dc950227..7131ea611bab 100644 --- a/include/linux/usermode_driver.h +++ b/include/linux/usermode_driver.h @@ -17,14 +17,14 @@ static inline void exit_umh(struct task_struct *tsk) } #endif -struct umh_info { +struct umd_info { const char *cmdline; struct file *pipe_to_umh; struct file *pipe_from_umh; struct list_head list; - void (*cleanup)(struct umh_info *info); + void (*cleanup)(struct umd_info *info); pid_t pid; }; -int fork_usermode_blob(void *data, size_t len, struct umh_info *info); +int fork_usermode_blob(void *data, size_t len, struct umd_info *info); #endif /* __LINUX_USERMODE_DRIVER_H__ */ diff --git a/kernel/usermode_driver.c b/kernel/usermode_driver.c index 5b05863af855..e73550e946d6 100644 --- a/kernel/usermode_driver.c +++ b/kernel/usermode_driver.c @@ -11,7 +11,7 @@ static DEFINE_MUTEX(umh_list_lock); static int umd_setup(struct subprocess_info *info, struct cred *new) { - struct umh_info *umh_info = info->data; + struct umd_info *umd_info = info->data; struct file *from_umh[2]; struct file *to_umh[2]; int err; @@ -43,21 +43,21 @@ static int umd_setup(struct subprocess_info *info, struct cred *new) return err; } - umh_info->pipe_to_umh = to_umh[1]; - umh_info->pipe_from_umh = from_umh[0]; - umh_info->pid = task_pid_nr(current); + umd_info->pipe_to_umh = to_umh[1]; + umd_info->pipe_from_umh = from_umh[0]; + umd_info->pid = task_pid_nr(current); current->flags |= PF_UMH; return 0; } static void umd_cleanup(struct subprocess_info *info) { - struct umh_info *umh_info = info->data; + struct umd_info *umd_info = info->data; /* cleanup if umh_setup() was successful but exec failed */ if (info->retval) { - fput(umh_info->pipe_to_umh); - fput(umh_info->pipe_from_umh); + fput(umd_info->pipe_to_umh); + fput(umd_info->pipe_from_umh); } } @@ -72,12 +72,12 @@ static void umd_cleanup(struct subprocess_info *info) * * Returns either negative error or zero which indicates success * in executing a blob of bytes as a usermode process. In such - * case 'struct umh_info *info' is populated with two pipes + * case 'struct umd_info *info' is populated with two pipes * and a pid of the process. The caller is responsible for health * check of the user process, killing it via pid, and closing the * pipes when user process is no longer needed. */ -int fork_usermode_blob(void *data, size_t len, struct umh_info *info) +int fork_usermode_blob(void *data, size_t len, struct umd_info *info) { const char *cmdline = (info->cmdline) ? info->cmdline : "usermodehelper"; struct subprocess_info *sub_info; @@ -126,7 +126,7 @@ EXPORT_SYMBOL_GPL(fork_usermode_blob); void __exit_umh(struct task_struct *tsk) { - struct umh_info *info; + struct umd_info *info; pid_t pid = tsk->pid; mutex_lock(&umh_list_lock); diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 0480918bfc7c..c0dbcc86fcdb 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -12,7 +12,7 @@ struct bpfilter_umh_ops bpfilter_ops; EXPORT_SYMBOL_GPL(bpfilter_ops); -static void bpfilter_umh_cleanup(struct umh_info *info) +static void bpfilter_umh_cleanup(struct umd_info *info) { mutex_lock(&bpfilter_ops.lock); bpfilter_ops.stop = true; From 1199c6c3da5197e9924a906b9de71b8d0ac62a01 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Jun 2020 11:38:08 -0500 Subject: [PATCH 07/23] umd: Rename umd_info.cmdline umd_info.driver_name The only thing supplied in the cmdline today is the driver name so rename the field to clarify the code. As this value is always supplied stop trying to handle the case of a NULL cmdline. Additionally since we now have a name we can count on use the driver_name any place where the code is looking for a name of the binary. v1: https://lkml.kernel.org/r/87imfef0k3.fsf_-_@x220.int.ebiederm.org v2: https://lkml.kernel.org/r/87366d63os.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-7-ebiederm@xmission.com Reviewed-by: Greg Kroah-Hartman Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- include/linux/usermode_driver.h | 2 +- kernel/usermode_driver.c | 11 ++++------- net/ipv4/bpfilter/sockopt.c | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/include/linux/usermode_driver.h b/include/linux/usermode_driver.h index 7131ea611bab..48cf25e3145d 100644 --- a/include/linux/usermode_driver.h +++ b/include/linux/usermode_driver.h @@ -18,7 +18,7 @@ static inline void exit_umh(struct task_struct *tsk) #endif struct umd_info { - const char *cmdline; + const char *driver_name; struct file *pipe_to_umh; struct file *pipe_from_umh; struct list_head list; diff --git a/kernel/usermode_driver.c b/kernel/usermode_driver.c index e73550e946d6..46d60d855e93 100644 --- a/kernel/usermode_driver.c +++ b/kernel/usermode_driver.c @@ -67,9 +67,6 @@ static void umd_cleanup(struct subprocess_info *info) * @len: length of the blob * @info: information about usermode process (shouldn't be NULL) * - * If info->cmdline is set it will be used as command line for the - * user process, else "usermodehelper" is used. - * * Returns either negative error or zero which indicates success * in executing a blob of bytes as a usermode process. In such * case 'struct umd_info *info' is populated with two pipes @@ -79,7 +76,6 @@ static void umd_cleanup(struct subprocess_info *info) */ int fork_usermode_blob(void *data, size_t len, struct umd_info *info) { - const char *cmdline = (info->cmdline) ? info->cmdline : "usermodehelper"; struct subprocess_info *sub_info; char **argv = NULL; struct file *file; @@ -87,7 +83,7 @@ int fork_usermode_blob(void *data, size_t len, struct umd_info *info) loff_t pos = 0; int err; - file = shmem_kernel_file_setup("", len, 0); + file = shmem_kernel_file_setup(info->driver_name, len, 0); if (IS_ERR(file)) return PTR_ERR(file); @@ -100,11 +96,12 @@ int fork_usermode_blob(void *data, size_t len, struct umd_info *info) } err = -ENOMEM; - argv = argv_split(GFP_KERNEL, cmdline, NULL); + argv = argv_split(GFP_KERNEL, info->driver_name, NULL); if (!argv) goto out; - sub_info = call_usermodehelper_setup("none", argv, NULL, GFP_KERNEL, + sub_info = call_usermodehelper_setup(info->driver_name, argv, NULL, + GFP_KERNEL, umd_setup, umd_cleanup, info); if (!sub_info) goto out; diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index c0dbcc86fcdb..5050de28333d 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -70,7 +70,7 @@ static int __init bpfilter_sockopt_init(void) { mutex_init(&bpfilter_ops.lock); bpfilter_ops.stop = true; - bpfilter_ops.info.cmdline = "bpfilter_umh"; + bpfilter_ops.info.driver_name = "bpfilter_umh"; bpfilter_ops.info.cleanup = &bpfilter_umh_cleanup; return 0; From e2dc9bf3f5275ca372001541e5f26af572976e65 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Jun 2020 13:12:59 -0500 Subject: [PATCH 08/23] umd: Transform fork_usermode_blob into fork_usermode_driver Instead of loading a binary blob into a temporary file with shmem_kernel_file_setup load a binary blob into a temporary tmpfs filesystem. This means that the blob can be stored in an init section and discared, and it means the binary blob will have a filename so can be executed normally. The only tricky thing about this code is that in the helper function blob_to_mnt __fput_sync is used. That is because a file can not be executed if it is still open for write, and the ordinary delayed close for kernel threads does not happen soon enough, which causes the following exec to fail. The function umd_load_blob is not called with any locks so this should be safe. Executing the blob normally winds up correcting several problems with the user mode driver code discovered by Tetsuo Handa[1]. By passing an ordinary filename into the exec, it is no longer necessary to figure out how to turn a O_RDWR file descriptor into a properly referende counted O_EXEC file descriptor that forbids all writes. For path based LSMs there are no new special cases. [1] https://lore.kernel.org/linux-fsdevel/2a8775b4-1dd5-9d5c-aa42-9872445e0942@i-love.sakura.ne.jp/ v1: https://lkml.kernel.org/r/87d05mf0j9.fsf_-_@x220.int.ebiederm.org v2: https://lkml.kernel.org/r/87wo3p4p35.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-8-ebiederm@xmission.com Reviewed-by: Greg Kroah-Hartman Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- include/linux/usermode_driver.h | 6 +- kernel/usermode_driver.c | 126 ++++++++++++++++++++++++-------- net/bpfilter/bpfilter_kern.c | 14 +++- 3 files changed, 113 insertions(+), 33 deletions(-) diff --git a/include/linux/usermode_driver.h b/include/linux/usermode_driver.h index 48cf25e3145d..97c919b7147c 100644 --- a/include/linux/usermode_driver.h +++ b/include/linux/usermode_driver.h @@ -2,6 +2,7 @@ #define __LINUX_USERMODE_DRIVER_H__ #include +#include #ifdef CONFIG_BPFILTER void __exit_umh(struct task_struct *tsk); @@ -23,8 +24,11 @@ struct umd_info { struct file *pipe_from_umh; struct list_head list; void (*cleanup)(struct umd_info *info); + struct path wd; pid_t pid; }; -int fork_usermode_blob(void *data, size_t len, struct umd_info *info); +int umd_load_blob(struct umd_info *info, const void *data, size_t len); +int umd_unload_blob(struct umd_info *info); +int fork_usermode_driver(struct umd_info *info); #endif /* __LINUX_USERMODE_DRIVER_H__ */ diff --git a/kernel/usermode_driver.c b/kernel/usermode_driver.c index 46d60d855e93..a86798759f83 100644 --- a/kernel/usermode_driver.c +++ b/kernel/usermode_driver.c @@ -4,11 +4,98 @@ */ #include #include +#include +#include +#include #include static LIST_HEAD(umh_list); static DEFINE_MUTEX(umh_list_lock); +static struct vfsmount *blob_to_mnt(const void *data, size_t len, const char *name) +{ + struct file_system_type *type; + struct vfsmount *mnt; + struct file *file; + ssize_t written; + loff_t pos = 0; + + type = get_fs_type("tmpfs"); + if (!type) + return ERR_PTR(-ENODEV); + + mnt = kern_mount(type); + put_filesystem(type); + if (IS_ERR(mnt)) + return mnt; + + file = file_open_root(mnt->mnt_root, mnt, name, O_CREAT | O_WRONLY, 0700); + if (IS_ERR(file)) { + mntput(mnt); + return ERR_CAST(file); + } + + written = kernel_write(file, data, len, &pos); + if (written != len) { + int err = written; + if (err >= 0) + err = -ENOMEM; + filp_close(file, NULL); + mntput(mnt); + return ERR_PTR(err); + } + + fput(file); + + /* Flush delayed fput so exec can open the file read-only */ + flush_delayed_fput(); + task_work_run(); + return mnt; +} + +/** + * umd_load_blob - Remember a blob of bytes for fork_usermode_driver + * @info: information about usermode driver + * @data: a blob of bytes that can be executed as a file + * @len: The lentgh of the blob + * + */ +int umd_load_blob(struct umd_info *info, const void *data, size_t len) +{ + struct vfsmount *mnt; + + if (WARN_ON_ONCE(info->wd.dentry || info->wd.mnt)) + return -EBUSY; + + mnt = blob_to_mnt(data, len, info->driver_name); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + + info->wd.mnt = mnt; + info->wd.dentry = mnt->mnt_root; + return 0; +} +EXPORT_SYMBOL_GPL(umd_load_blob); + +/** + * umd_unload_blob - Disassociate @info from a previously loaded blob + * @info: information about usermode driver + * + */ +int umd_unload_blob(struct umd_info *info) +{ + if (WARN_ON_ONCE(!info->wd.mnt || + !info->wd.dentry || + info->wd.mnt->mnt_root != info->wd.dentry)) + return -EINVAL; + + kern_unmount(info->wd.mnt); + info->wd.mnt = NULL; + info->wd.dentry = NULL; + return 0; +} +EXPORT_SYMBOL_GPL(umd_unload_blob); + static int umd_setup(struct subprocess_info *info, struct cred *new) { struct umd_info *umd_info = info->data; @@ -43,6 +130,7 @@ static int umd_setup(struct subprocess_info *info, struct cred *new) return err; } + set_fs_pwd(current->fs, &umd_info->wd); umd_info->pipe_to_umh = to_umh[1]; umd_info->pipe_from_umh = from_umh[0]; umd_info->pid = task_pid_nr(current); @@ -62,39 +150,21 @@ static void umd_cleanup(struct subprocess_info *info) } /** - * fork_usermode_blob - fork a blob of bytes as a usermode process - * @data: a blob of bytes that can be do_execv-ed as a file - * @len: length of the blob - * @info: information about usermode process (shouldn't be NULL) + * fork_usermode_driver - fork a usermode driver + * @info: information about usermode driver (shouldn't be NULL) * - * Returns either negative error or zero which indicates success - * in executing a blob of bytes as a usermode process. In such - * case 'struct umd_info *info' is populated with two pipes - * and a pid of the process. The caller is responsible for health - * check of the user process, killing it via pid, and closing the - * pipes when user process is no longer needed. + * Returns either negative error or zero which indicates success in + * executing a usermode driver. In such case 'struct umd_info *info' + * is populated with two pipes and a pid of the process. The caller is + * responsible for health check of the user process, killing it via + * pid, and closing the pipes when user process is no longer needed. */ -int fork_usermode_blob(void *data, size_t len, struct umd_info *info) +int fork_usermode_driver(struct umd_info *info) { struct subprocess_info *sub_info; char **argv = NULL; - struct file *file; - ssize_t written; - loff_t pos = 0; int err; - file = shmem_kernel_file_setup(info->driver_name, len, 0); - if (IS_ERR(file)) - return PTR_ERR(file); - - written = kernel_write(file, data, len, &pos); - if (written != len) { - err = written; - if (err >= 0) - err = -ENOMEM; - goto out; - } - err = -ENOMEM; argv = argv_split(GFP_KERNEL, info->driver_name, NULL); if (!argv) @@ -106,7 +176,6 @@ int fork_usermode_blob(void *data, size_t len, struct umd_info *info) if (!sub_info) goto out; - sub_info->file = file; err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); if (!err) { mutex_lock(&umh_list_lock); @@ -116,10 +185,9 @@ int fork_usermode_blob(void *data, size_t len, struct umd_info *info) out: if (argv) argv_free(argv); - fput(file); return err; } -EXPORT_SYMBOL_GPL(fork_usermode_blob); +EXPORT_SYMBOL_GPL(fork_usermode_driver); void __exit_umh(struct task_struct *tsk) { diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index c0f0990f30b6..28883b00609d 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -77,9 +77,7 @@ static int start_umh(void) int err; /* fork usermode process */ - err = fork_usermode_blob(&bpfilter_umh_start, - &bpfilter_umh_end - &bpfilter_umh_start, - &bpfilter_ops.info); + err = fork_usermode_driver(&bpfilter_ops.info); if (err) return err; bpfilter_ops.stop = false; @@ -98,6 +96,12 @@ static int __init load_umh(void) { int err; + err = umd_load_blob(&bpfilter_ops.info, + &bpfilter_umh_start, + &bpfilter_umh_end - &bpfilter_umh_start); + if (err) + return err; + mutex_lock(&bpfilter_ops.lock); if (!bpfilter_ops.stop) { err = -EFAULT; @@ -110,6 +114,8 @@ static int __init load_umh(void) } out: mutex_unlock(&bpfilter_ops.lock); + if (err) + umd_unload_blob(&bpfilter_ops.info); return err; } @@ -122,6 +128,8 @@ static void __exit fini_umh(void) bpfilter_ops.sockopt = NULL; } mutex_unlock(&bpfilter_ops.lock); + + umd_unload_blob(&bpfilter_ops.info); } module_init(load_umh); module_exit(fini_umh); From 55e6074e3fa67e1fb9ec140904db7e6cae6eda4b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Jun 2020 13:52:50 -0500 Subject: [PATCH 09/23] umh: Stop calling do_execve_file With the user mode driver code changed to not set subprocess_info.file there are no more users of subproces_info.file. Remove this field from struct subprocess_info and remove the only user in call_usermodehelper_exec_async that would call do_execve_file instead of do_execve if file was set. v1: https://lkml.kernel.org/r/877dvuf0i7.fsf_-_@x220.int.ebiederm.org v2: https://lkml.kernel.org/r/87r1tx4p2a.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-9-ebiederm@xmission.com Reviewed-by: Greg Kroah-Hartman Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- include/linux/umh.h | 1 - kernel/umh.c | 10 +++------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/include/linux/umh.h b/include/linux/umh.h index 73173c4a07e5..244aff638220 100644 --- a/include/linux/umh.h +++ b/include/linux/umh.h @@ -22,7 +22,6 @@ struct subprocess_info { const char *path; char **argv; char **envp; - struct file *file; int wait; int retval; int (*init)(struct subprocess_info *info, struct cred *new); diff --git a/kernel/umh.c b/kernel/umh.c index 3e4e453d45c8..6ca2096298b9 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -98,13 +98,9 @@ static int call_usermodehelper_exec_async(void *data) commit_creds(new); - if (sub_info->file) - retval = do_execve_file(sub_info->file, - sub_info->argv, sub_info->envp); - else - retval = do_execve(getname_kernel(sub_info->path), - (const char __user *const __user *)sub_info->argv, - (const char __user *const __user *)sub_info->envp); + retval = do_execve(getname_kernel(sub_info->path), + (const char __user *const __user *)sub_info->argv, + (const char __user *const __user *)sub_info->envp); out: sub_info->retval = retval; /* From 25cf336de51b51a3e440e1893751f9532095eff0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Jun 2020 13:56:40 -0500 Subject: [PATCH 10/23] exec: Remove do_execve_file Now that the last callser has been removed remove this code from exec. For anyone thinking of resurrecing do_execve_file please note that the code was buggy in several fundamental ways. - It did not ensure the file it was passed was read-only and that deny_write_access had been called on it. Which subtlely breaks invaniants in exec. - The caller of do_execve_file was expected to hold and put a reference to the file, but an extra reference for use by exec was not taken so that when exec put it's reference to the file an underflow occured on the file reference count. - The point of the interface was so that a pathname did not need to exist. Which breaks pathname based LSMs. Tetsuo Handa originally reported these issues[1]. While it was clear that deny_write_access was missing the fundamental incompatibility with the passed in O_RDWR filehandle was not immediately recognized. All of these issues were fixed by modifying the usermode driver code to have a path, so it did not need this hack. Reported-by: Tetsuo Handa [1] https://lore.kernel.org/linux-fsdevel/2a8775b4-1dd5-9d5c-aa42-9872445e0942@i-love.sakura.ne.jp/ v1: https://lkml.kernel.org/r/871rm2f0hi.fsf_-_@x220.int.ebiederm.org v2: https://lkml.kernel.org/r/87lfk54p0m.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-10-ebiederm@xmission.com Reviewed-by: Greg Kroah-Hartman Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- fs/exec.c | 38 +++++++++----------------------------- include/linux/binfmts.h | 1 - 2 files changed, 9 insertions(+), 30 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index e6e8a9a70327..23dfbb820626 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1818,13 +1818,14 @@ static int exec_binprm(struct linux_binprm *bprm) /* * sys_execve() executes a new program. */ -static int __do_execve_file(int fd, struct filename *filename, - struct user_arg_ptr argv, - struct user_arg_ptr envp, - int flags, struct file *file) +static int do_execveat_common(int fd, struct filename *filename, + struct user_arg_ptr argv, + struct user_arg_ptr envp, + int flags) { char *pathbuf = NULL; struct linux_binprm *bprm; + struct file *file; struct files_struct *displaced; int retval; @@ -1863,8 +1864,7 @@ static int __do_execve_file(int fd, struct filename *filename, check_unsafe_exec(bprm); current->in_execve = 1; - if (!file) - file = do_open_execat(fd, filename, flags); + file = do_open_execat(fd, filename, flags); retval = PTR_ERR(file); if (IS_ERR(file)) goto out_unmark; @@ -1872,9 +1872,7 @@ static int __do_execve_file(int fd, struct filename *filename, sched_exec(); bprm->file = file; - if (!filename) { - bprm->filename = "none"; - } else if (fd == AT_FDCWD || filename->name[0] == '/') { + if (fd == AT_FDCWD || filename->name[0] == '/') { bprm->filename = filename->name; } else { if (filename->name[0] == '\0') @@ -1935,8 +1933,7 @@ static int __do_execve_file(int fd, struct filename *filename, task_numa_free(current, false); free_bprm(bprm); kfree(pathbuf); - if (filename) - putname(filename); + putname(filename); if (displaced) put_files_struct(displaced); return retval; @@ -1967,27 +1964,10 @@ static int __do_execve_file(int fd, struct filename *filename, if (displaced) reset_files_struct(displaced); out_ret: - if (filename) - putname(filename); + putname(filename); return retval; } -static int do_execveat_common(int fd, struct filename *filename, - struct user_arg_ptr argv, - struct user_arg_ptr envp, - int flags) -{ - return __do_execve_file(fd, filename, argv, envp, flags, NULL); -} - -int do_execve_file(struct file *file, void *__argv, void *__envp) -{ - struct user_arg_ptr argv = { .ptr.native = __argv }; - struct user_arg_ptr envp = { .ptr.native = __envp }; - - return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file); -} - int do_execve(struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 4a20b7517dd0..7c27d7b57871 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -141,6 +141,5 @@ extern int do_execveat(int, struct filename *, const char __user * const __user *, const char __user * const __user *, int); -int do_execve_file(struct file *file, void *__argv, void *__envp); #endif /* _LINUX_BINFMTS_H */ From 0fe3c63148ef5df1b3cb067e4b4d45d45d0c0fdc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Jun 2020 16:08:46 -0500 Subject: [PATCH 11/23] bpfilter: Move bpfilter_umh back into init data To allow for restarts 61fbf5933d42 ("net: bpfilter: restart bpfilter_umh when error occurred") moved the blob holding the userspace binary out of the init sections. Now that loading the blob into a filesystem is separate from executing the blob the blob no longer needs to live .rodata to allow for restarting. So move the blob back to .init.rodata. v1: https://lkml.kernel.org/r/87sgeidlvq.fsf_-_@x220.int.ebiederm.org v2: https://lkml.kernel.org/r/87ftad4ozc.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-11-ebiederm@xmission.com Reviewed-by: Greg Kroah-Hartman Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- net/bpfilter/bpfilter_umh_blob.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S index 9ea6100dca87..40311d10d2f2 100644 --- a/net/bpfilter/bpfilter_umh_blob.S +++ b/net/bpfilter/bpfilter_umh_blob.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ - .section .rodata, "a" + .section .init.rodata, "a" .global bpfilter_umh_start bpfilter_umh_start: .incbin "net/bpfilter/bpfilter_umh" From 1c340ead18ee4b4a84357abdef6d4f39ee08328b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Jun 2020 16:48:26 -0500 Subject: [PATCH 12/23] umd: Track user space drivers with struct pid Use struct pid instead of user space pid values that are prone to wrap araound. In addition track the entire thread group instead of just the first thread that is started by exec. There are no multi-threaded user mode drivers today but there is nothing preclucing user drivers from being multi-threaded, so it is just a good idea to track the entire process. Take a reference count on the tgid's in question to make it possible to remove exit_umh in a future change. As a struct pid is available directly use kill_pid_info. The prior process signalling code was iffy in using a userspace pid known to be in the initial pid namespace and then looking up it's task in whatever the current pid namespace is. It worked only because kernel threads always run in the initial pid namespace. As the tgid is now refcounted verify the tgid is NULL at the start of fork_usermode_driver to avoid the possibility of silent pid leaks. v1: https://lkml.kernel.org/r/87mu4qdlv2.fsf_-_@x220.int.ebiederm.org v2: https://lkml.kernel.org/r/a70l4oy8.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-12-ebiederm@xmission.com Reviewed-by: Greg Kroah-Hartman Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- include/linux/usermode_driver.h | 2 +- kernel/exit.c | 3 ++- kernel/usermode_driver.c | 15 ++++++++++----- net/bpfilter/bpfilter_kern.c | 13 +++++-------- net/ipv4/bpfilter/sockopt.c | 3 ++- 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/include/linux/usermode_driver.h b/include/linux/usermode_driver.h index 97c919b7147c..45adbffb31d9 100644 --- a/include/linux/usermode_driver.h +++ b/include/linux/usermode_driver.h @@ -25,7 +25,7 @@ struct umd_info { struct list_head list; void (*cleanup)(struct umd_info *info); struct path wd; - pid_t pid; + struct pid *tgid; }; int umd_load_blob(struct umd_info *info, const void *data, size_t len); int umd_unload_blob(struct umd_info *info); diff --git a/kernel/exit.c b/kernel/exit.c index a081deea52ca..d3294b611df1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -805,7 +805,8 @@ void __noreturn do_exit(long code) exit_task_namespaces(tsk); exit_task_work(tsk); exit_thread(tsk); - exit_umh(tsk); + if (group_dead) + exit_umh(tsk); /* * Flush inherited counters to the parent - before the parent diff --git a/kernel/usermode_driver.c b/kernel/usermode_driver.c index a86798759f83..f77f8d7ce9e3 100644 --- a/kernel/usermode_driver.c +++ b/kernel/usermode_driver.c @@ -133,7 +133,7 @@ static int umd_setup(struct subprocess_info *info, struct cred *new) set_fs_pwd(current->fs, &umd_info->wd); umd_info->pipe_to_umh = to_umh[1]; umd_info->pipe_from_umh = from_umh[0]; - umd_info->pid = task_pid_nr(current); + umd_info->tgid = get_pid(task_tgid(current)); current->flags |= PF_UMH; return 0; } @@ -146,6 +146,8 @@ static void umd_cleanup(struct subprocess_info *info) if (info->retval) { fput(umd_info->pipe_to_umh); fput(umd_info->pipe_from_umh); + put_pid(umd_info->tgid); + umd_info->tgid = NULL; } } @@ -155,9 +157,9 @@ static void umd_cleanup(struct subprocess_info *info) * * Returns either negative error or zero which indicates success in * executing a usermode driver. In such case 'struct umd_info *info' - * is populated with two pipes and a pid of the process. The caller is + * is populated with two pipes and a tgid of the process. The caller is * responsible for health check of the user process, killing it via - * pid, and closing the pipes when user process is no longer needed. + * tgid, and closing the pipes when user process is no longer needed. */ int fork_usermode_driver(struct umd_info *info) { @@ -165,6 +167,9 @@ int fork_usermode_driver(struct umd_info *info) char **argv = NULL; int err; + if (WARN_ON_ONCE(info->tgid)) + return -EBUSY; + err = -ENOMEM; argv = argv_split(GFP_KERNEL, info->driver_name, NULL); if (!argv) @@ -192,11 +197,11 @@ EXPORT_SYMBOL_GPL(fork_usermode_driver); void __exit_umh(struct task_struct *tsk) { struct umd_info *info; - pid_t pid = tsk->pid; + struct pid *tgid = task_tgid(tsk); mutex_lock(&umh_list_lock); list_for_each_entry(info, &umh_list, list) { - if (info->pid == pid) { + if (info->tgid == tgid) { list_del(&info->list); mutex_unlock(&umh_list_lock); goto out; diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index 28883b00609d..08ea77c2b137 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -15,16 +15,13 @@ extern char bpfilter_umh_end; static void shutdown_umh(void) { - struct task_struct *tsk; + struct umd_info *info = &bpfilter_ops.info; + struct pid *tgid = info->tgid; if (bpfilter_ops.stop) return; - tsk = get_pid_task(find_vpid(bpfilter_ops.info.pid), PIDTYPE_PID); - if (tsk) { - send_sig(SIGKILL, tsk, 1); - put_task_struct(tsk); - } + kill_pid(tgid, SIGKILL, 1); } static void __stop_umh(void) @@ -48,7 +45,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, req.cmd = optname; req.addr = (long __force __user)optval; req.len = optlen; - if (!bpfilter_ops.info.pid) + if (!bpfilter_ops.info.tgid) goto out; n = __kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req), &pos); @@ -81,7 +78,7 @@ static int start_umh(void) if (err) return err; bpfilter_ops.stop = false; - pr_info("Loaded bpfilter_umh pid %d\n", bpfilter_ops.info.pid); + pr_info("Loaded bpfilter_umh pid %d\n", pid_nr(bpfilter_ops.info.tgid)); /* health check that usermode process started correctly */ if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) { diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 5050de28333d..56cbc43145f6 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -18,7 +18,8 @@ static void bpfilter_umh_cleanup(struct umd_info *info) bpfilter_ops.stop = true; fput(info->pipe_to_umh); fput(info->pipe_from_umh); - info->pid = 0; + put_pid(info->tgid); + info->tgid = NULL; mutex_unlock(&bpfilter_ops.lock); } From 38fd525a4c61e7ecdc9ad4dcbf7b767d0a007962 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Jul 2020 07:30:06 -0500 Subject: [PATCH 13/23] exit: Factor thread_group_exited out of pidfd_poll Create an independent helper thread_group_exited which returns true when all threads have passed exit_notify in do_exit. AKA all of the threads are at least zombies and might be dead or completely gone. Create this helper by taking the logic out of pidfd_poll where it is already tested, and adding a READ_ONCE on the read of task->exit_state. I will be changing the user mode driver code to use this same logic to know when a user mode driver needs to be restarted. Place the new helper thread_group_exited in kernel/exit.c and EXPORT it so it can be used by modules. Link: https://lkml.kernel.org/r/20200702164140.4468-13-ebiederm@xmission.com Acked-by: Christian Brauner Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 2 ++ kernel/exit.c | 24 ++++++++++++++++++++++++ kernel/fork.c | 6 +----- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0ee5e696c5d8..1bad18a1d8ba 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -674,6 +674,8 @@ static inline int thread_group_empty(struct task_struct *p) #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) +extern bool thread_group_exited(struct pid *pid); + extern struct sighand_struct *__lock_task_sighand(struct task_struct *task, unsigned long *flags); diff --git a/kernel/exit.c b/kernel/exit.c index d3294b611df1..dee246c0866f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1713,6 +1713,30 @@ COMPAT_SYSCALL_DEFINE5(waitid, } #endif +/** + * thread_group_exited - check that a thread group has exited + * @pid: tgid of thread group to be checked. + * + * Test if the thread group represented by tgid has exited (all + * threads are zombies, dead or completely gone). + * + * Return: true if the thread group has exited. false otherwise. + */ +bool thread_group_exited(struct pid *pid) +{ + struct task_struct *task; + bool exited; + + rcu_read_lock(); + task = pid_task(pid, PIDTYPE_PID); + exited = !task || + (READ_ONCE(task->exit_state) && thread_group_empty(task)); + rcu_read_unlock(); + + return exited; +} +EXPORT_SYMBOL(thread_group_exited); + __weak void abort(void) { BUG(); diff --git a/kernel/fork.c b/kernel/fork.c index 142b23645d82..bf215af7a904 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1787,22 +1787,18 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) */ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) { - struct task_struct *task; struct pid *pid = file->private_data; __poll_t poll_flags = 0; poll_wait(file, &pid->wait_pidfd, pts); - rcu_read_lock(); - task = pid_task(pid, PIDTYPE_PID); /* * Inform pollers only when the whole thread group exits. * If the thread group leader exits before all other threads in the * group, then poll(2) should block, similar to the wait(2) family. */ - if (!task || (task->exit_state && thread_group_empty(task))) + if (thread_group_exited(pid)) poll_flags = EPOLLIN | EPOLLRDNORM; - rcu_read_unlock(); return poll_flags; } From e80eb1dc868bc1ed93602389d54b27f170ca770c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Jun 2020 17:23:22 -0500 Subject: [PATCH 14/23] bpfilter: Take advantage of the facilities of struct pid Instead of relying on the exit_umh cleanup callback use the fact a struct pid can be tested to see if a process still exists, and that struct pid has a wait queue that notifies when the process dies. v1: https://lkml.kernel.org/r/87h7uydlu9.fsf_-_@x220.int.ebiederm.org v2: https://lkml.kernel.org/r/874kqt4owu.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-14-ebiederm@xmission.com Reviewed-by: Greg Kroah-Hartman Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- include/linux/bpfilter.h | 3 ++- net/bpfilter/bpfilter_kern.c | 15 +++++---------- net/ipv4/bpfilter/sockopt.c | 15 ++++++++------- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h index ec9972d822e0..9b114c718a76 100644 --- a/include/linux/bpfilter.h +++ b/include/linux/bpfilter.h @@ -10,6 +10,8 @@ int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen); int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen); +void bpfilter_umh_cleanup(struct umd_info *info); + struct bpfilter_umh_ops { struct umd_info info; /* since ip_getsockopt() can run in parallel, serialize access to umh */ @@ -18,7 +20,6 @@ struct bpfilter_umh_ops { char __user *optval, unsigned int optlen, bool is_set); int (*start)(void); - bool stop; }; extern struct bpfilter_umh_ops bpfilter_ops; #endif diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index 08ea77c2b137..9616fb7defeb 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -18,10 +18,11 @@ static void shutdown_umh(void) struct umd_info *info = &bpfilter_ops.info; struct pid *tgid = info->tgid; - if (bpfilter_ops.stop) - return; - - kill_pid(tgid, SIGKILL, 1); + if (tgid) { + kill_pid(tgid, SIGKILL, 1); + wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); + bpfilter_umh_cleanup(info); + } } static void __stop_umh(void) @@ -77,7 +78,6 @@ static int start_umh(void) err = fork_usermode_driver(&bpfilter_ops.info); if (err) return err; - bpfilter_ops.stop = false; pr_info("Loaded bpfilter_umh pid %d\n", pid_nr(bpfilter_ops.info.tgid)); /* health check that usermode process started correctly */ @@ -100,16 +100,11 @@ static int __init load_umh(void) return err; mutex_lock(&bpfilter_ops.lock); - if (!bpfilter_ops.stop) { - err = -EFAULT; - goto out; - } err = start_umh(); if (!err && IS_ENABLED(CONFIG_INET)) { bpfilter_ops.sockopt = &__bpfilter_process_sockopt; bpfilter_ops.start = &start_umh; } -out: mutex_unlock(&bpfilter_ops.lock); if (err) umd_unload_blob(&bpfilter_ops.info); diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 56cbc43145f6..9063c6767d34 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -12,16 +12,14 @@ struct bpfilter_umh_ops bpfilter_ops; EXPORT_SYMBOL_GPL(bpfilter_ops); -static void bpfilter_umh_cleanup(struct umd_info *info) +void bpfilter_umh_cleanup(struct umd_info *info) { - mutex_lock(&bpfilter_ops.lock); - bpfilter_ops.stop = true; fput(info->pipe_to_umh); fput(info->pipe_from_umh); put_pid(info->tgid); info->tgid = NULL; - mutex_unlock(&bpfilter_ops.lock); } +EXPORT_SYMBOL_GPL(bpfilter_umh_cleanup); static int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval, @@ -39,7 +37,11 @@ static int bpfilter_mbox_request(struct sock *sk, int optname, goto out; } } - if (bpfilter_ops.stop) { + if (bpfilter_ops.info.tgid && + thread_group_exited(bpfilter_ops.info.tgid)) + bpfilter_umh_cleanup(&bpfilter_ops.info); + + if (!bpfilter_ops.info.tgid) { err = bpfilter_ops.start(); if (err) goto out; @@ -70,9 +72,8 @@ int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, static int __init bpfilter_sockopt_init(void) { mutex_init(&bpfilter_ops.lock); - bpfilter_ops.stop = true; + bpfilter_ops.info.tgid = NULL; bpfilter_ops.info.driver_name = "bpfilter_umh"; - bpfilter_ops.info.cleanup = &bpfilter_umh_cleanup; return 0; } From 8c2f52663973e643c617663d826e2b0daa008b38 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Jun 2020 17:40:40 -0500 Subject: [PATCH 15/23] umd: Remove exit_umh The bpfilter code no longer uses the umd_info.cleanup callback. This callback is what exit_umh exists to call. So remove exit_umh and all of it's associated booking. v1: https://lkml.kernel.org/r/87bll6dlte.fsf_-_@x220.int.ebiederm.org v2: https://lkml.kernel.org/r/87y2o53abg.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-15-ebiederm@xmission.com Reviewed-by: Greg Kroah-Hartman Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- include/linux/sched.h | 1 - include/linux/usermode_driver.h | 16 ---------------- kernel/exit.c | 3 --- kernel/usermode_driver.c | 28 ---------------------------- 4 files changed, 48 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 59d1e92bb88e..edb2020875ad 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1511,7 +1511,6 @@ extern struct pid *cad_pid; #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ -#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ diff --git a/include/linux/usermode_driver.h b/include/linux/usermode_driver.h index 45adbffb31d9..073a9e0ec07d 100644 --- a/include/linux/usermode_driver.h +++ b/include/linux/usermode_driver.h @@ -4,26 +4,10 @@ #include #include -#ifdef CONFIG_BPFILTER -void __exit_umh(struct task_struct *tsk); - -static inline void exit_umh(struct task_struct *tsk) -{ - if (unlikely(tsk->flags & PF_UMH)) - __exit_umh(tsk); -} -#else -static inline void exit_umh(struct task_struct *tsk) -{ -} -#endif - struct umd_info { const char *driver_name; struct file *pipe_to_umh; struct file *pipe_from_umh; - struct list_head list; - void (*cleanup)(struct umd_info *info); struct path wd; struct pid *tgid; }; diff --git a/kernel/exit.c b/kernel/exit.c index dee246c0866f..39226a018ed7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -63,7 +63,6 @@ #include #include #include -#include #include #include @@ -805,8 +804,6 @@ void __noreturn do_exit(long code) exit_task_namespaces(tsk); exit_task_work(tsk); exit_thread(tsk); - if (group_dead) - exit_umh(tsk); /* * Flush inherited counters to the parent - before the parent diff --git a/kernel/usermode_driver.c b/kernel/usermode_driver.c index f77f8d7ce9e3..cd136f86f799 100644 --- a/kernel/usermode_driver.c +++ b/kernel/usermode_driver.c @@ -9,9 +9,6 @@ #include #include -static LIST_HEAD(umh_list); -static DEFINE_MUTEX(umh_list_lock); - static struct vfsmount *blob_to_mnt(const void *data, size_t len, const char *name) { struct file_system_type *type; @@ -134,7 +131,6 @@ static int umd_setup(struct subprocess_info *info, struct cred *new) umd_info->pipe_to_umh = to_umh[1]; umd_info->pipe_from_umh = from_umh[0]; umd_info->tgid = get_pid(task_tgid(current)); - current->flags |= PF_UMH; return 0; } @@ -182,11 +178,6 @@ int fork_usermode_driver(struct umd_info *info) goto out; err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); - if (!err) { - mutex_lock(&umh_list_lock); - list_add(&info->list, &umh_list); - mutex_unlock(&umh_list_lock); - } out: if (argv) argv_free(argv); @@ -194,23 +185,4 @@ int fork_usermode_driver(struct umd_info *info) } EXPORT_SYMBOL_GPL(fork_usermode_driver); -void __exit_umh(struct task_struct *tsk) -{ - struct umd_info *info; - struct pid *tgid = task_tgid(tsk); - - mutex_lock(&umh_list_lock); - list_for_each_entry(info, &umh_list, list) { - if (info->tgid == tgid) { - list_del(&info->list); - mutex_unlock(&umh_list_lock); - goto out; - } - } - mutex_unlock(&umh_list_lock); - return; -out: - if (info->cleanup) - info->cleanup(info); -} From 33c326014fe69304244868cc793c2c77be533125 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 29 Jun 2020 08:28:33 -0500 Subject: [PATCH 16/23] umd: Stop using split_argv There is exactly one argument so there is nothing to split. All split_argv does now is cause confusion and avoid the need for a cast when passing a "const char *" string to call_usermodehelper_setup. So avoid confusion and the possibility of an odd driver name causing problems by just using a fixed argv array with a cast in the call to call_usermodehelper_setup. v1: https://lkml.kernel.org/r/87sged3a9n.fsf_-_@x220.int.ebiederm.org Link: https://lkml.kernel.org/r/20200702164140.4468-16-ebiederm@xmission.com Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: "Eric W. Biederman" --- kernel/usermode_driver.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/kernel/usermode_driver.c b/kernel/usermode_driver.c index cd136f86f799..0b35212ffc3d 100644 --- a/kernel/usermode_driver.c +++ b/kernel/usermode_driver.c @@ -160,27 +160,21 @@ static void umd_cleanup(struct subprocess_info *info) int fork_usermode_driver(struct umd_info *info) { struct subprocess_info *sub_info; - char **argv = NULL; + const char *argv[] = { info->driver_name, NULL }; int err; if (WARN_ON_ONCE(info->tgid)) return -EBUSY; err = -ENOMEM; - argv = argv_split(GFP_KERNEL, info->driver_name, NULL); - if (!argv) - goto out; - - sub_info = call_usermodehelper_setup(info->driver_name, argv, NULL, - GFP_KERNEL, + sub_info = call_usermodehelper_setup(info->driver_name, + (char **)argv, NULL, GFP_KERNEL, umd_setup, umd_cleanup, info); if (!sub_info) goto out; err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); out: - if (argv) - argv_free(argv); return err; } EXPORT_SYMBOL_GPL(fork_usermode_driver); From 9746c9be0bb5860592e048468b37974be4c59d44 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 11 Jul 2020 06:45:36 -0500 Subject: [PATCH 17/23] exec: Remove unnecessary spaces from binfmts.h The general convention in the linux kernel is to define a pointer member as "type *name". The declaration of struct linux_binprm has several pointer defined as "type * name". Update them to the form of "type *name" for consistency. Suggested-by: Kees Cook Reviewed-by: Kees Cook Reviewed-by: Christoph Hellwig Link: https://lkml.kernel.org/r/87v9iq6x9x.fsf@x220.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- include/linux/binfmts.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 7c27d7b57871..eb5cb8df5485 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -45,15 +45,15 @@ struct linux_binprm { #ifdef __alpha__ unsigned int taso:1; #endif - struct file * executable; /* Executable to pass to the interpreter */ - struct file * interpreter; - struct file * file; + struct file *executable; /* Executable to pass to the interpreter */ + struct file *interpreter; + struct file *file; struct cred *cred; /* new credentials */ int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */ unsigned int per_clear; /* bits to clear in current->personality */ int argc, envc; - const char * filename; /* Name of binary as seen by procps */ - const char * interp; /* Name of the binary really executed. Most + const char *filename; /* Name of binary as seen by procps */ + const char *interp; /* Name of the binary really executed. Most of the time same as filename, but could be different for binfmt_{misc,script} */ unsigned interp_flags; From 0a8f36eb48f64006dde86e2a58230d5a599eef7d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Jul 2020 15:39:45 -0500 Subject: [PATCH 18/23] exec: Factor out alloc_bprm Currently it is necessary for the usermode helper code and the code that launches init to use set_fs so that pages coming from the kernel look like they are coming from userspace. To allow that usage of set_fs to be removed cleanly the argument copying from userspace needs to happen earlier. Move the allocation of the bprm into it's own function (alloc_bprm) and move the call of alloc_bprm before unshare_files so that bprm can ultimately be allocated, the arguments can be placed on the new stack, and then the bprm can be passed into the core of exec. Neither the allocation of struct binprm nor the unsharing depend upon each other so swapping the order in which they are called is trivially safe. To keep things consistent the order of cleanup at the end of do_execve_common swapped to match the order of initialization. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/87pn8y6x9a.fsf@x220.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- fs/exec.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 23dfbb820626..526156d6461d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1560,6 +1560,14 @@ static void free_bprm(struct linux_binprm *bprm) kfree(bprm); } +static struct linux_binprm *alloc_bprm(void) +{ + struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); + if (!bprm) + return ERR_PTR(-ENOMEM); + return bprm; +} + int bprm_change_interp(const char *interp, struct linux_binprm *bprm) { /* If a binfmt changed the interp, free it first. */ @@ -1848,18 +1856,19 @@ static int do_execveat_common(int fd, struct filename *filename, * further execve() calls fail. */ current->flags &= ~PF_NPROC_EXCEEDED; + bprm = alloc_bprm(); + if (IS_ERR(bprm)) { + retval = PTR_ERR(bprm); + goto out_ret; + } + retval = unshare_files(&displaced); if (retval) - goto out_ret; - - retval = -ENOMEM; - bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); - if (!bprm) - goto out_files; + goto out_free; retval = prepare_bprm_creds(bprm); if (retval) - goto out_free; + goto out_files; check_unsafe_exec(bprm); current->in_execve = 1; @@ -1956,13 +1965,13 @@ static int do_execveat_common(int fd, struct filename *filename, current->fs->in_exec = 0; current->in_execve = 0; +out_files: + if (displaced) + reset_files_struct(displaced); out_free: free_bprm(bprm); kfree(pathbuf); -out_files: - if (displaced) - reset_files_struct(displaced); out_ret: putname(filename); return retval; From 60d9ad1d1d7f15964d23f6e71a7adcf1bde0e18e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 11 Jul 2020 08:16:15 -0500 Subject: [PATCH 19/23] exec: Move initialization of bprm->filename into alloc_bprm Currently it is necessary for the usermode helper code and the code that launches init to use set_fs so that pages coming from the kernel look like they are coming from userspace. To allow that usage of set_fs to be removed cleanly the argument copying from userspace needs to happen earlier. Move the computation of bprm->filename and possible allocation of a name in the case of execveat into alloc_bprm to make that possible. The exectuable name, the arguments, and the environment are copied into the new usermode stack which is stored in bprm until exec passes the point of no return. As the executable name is copied first onto the usermode stack it needs to be known. As there are no dependencies to computing the executable name, compute it early in alloc_bprm. As an implementation detail if the filename needs to be generated because it embeds a file descriptor store that filename in a new field bprm->fdpath, and free it in free_bprm. Previously this was done in an independent variable pathbuf. I have renamed pathbuf fdpath because fdpath is more suggestive of what kind of path is in the variable. I moved fdpath into struct linux_binprm because it is tightly tied to the other variables in struct linux_binprm, and as such is needed to allow the call alloc_binprm to move. Reviewed-by: Kees Cook Reviewed-by: Christoph Hellwig Link: https://lkml.kernel.org/r/87k0z66x8f.fsf@x220.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- fs/exec.c | 61 ++++++++++++++++++++++------------------- include/linux/binfmts.h | 1 + 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 526156d6461d..7e8af27dd199 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1557,15 +1557,37 @@ static void free_bprm(struct linux_binprm *bprm) /* If a binfmt changed the interp, free it. */ if (bprm->interp != bprm->filename) kfree(bprm->interp); + kfree(bprm->fdpath); kfree(bprm); } -static struct linux_binprm *alloc_bprm(void) +static struct linux_binprm *alloc_bprm(int fd, struct filename *filename) { struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); + int retval = -ENOMEM; if (!bprm) - return ERR_PTR(-ENOMEM); + goto out; + + if (fd == AT_FDCWD || filename->name[0] == '/') { + bprm->filename = filename->name; + } else { + if (filename->name[0] == '\0') + bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd); + else + bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s", + fd, filename->name); + if (!bprm->fdpath) + goto out_free; + + bprm->filename = bprm->fdpath; + } + bprm->interp = bprm->filename; return bprm; + +out_free: + free_bprm(bprm); +out: + return ERR_PTR(retval); } int bprm_change_interp(const char *interp, struct linux_binprm *bprm) @@ -1831,7 +1853,6 @@ static int do_execveat_common(int fd, struct filename *filename, struct user_arg_ptr envp, int flags) { - char *pathbuf = NULL; struct linux_binprm *bprm; struct file *file; struct files_struct *displaced; @@ -1856,7 +1877,7 @@ static int do_execveat_common(int fd, struct filename *filename, * further execve() calls fail. */ current->flags &= ~PF_NPROC_EXCEEDED; - bprm = alloc_bprm(); + bprm = alloc_bprm(fd, filename); if (IS_ERR(bprm)) { retval = PTR_ERR(bprm); goto out_ret; @@ -1881,28 +1902,14 @@ static int do_execveat_common(int fd, struct filename *filename, sched_exec(); bprm->file = file; - if (fd == AT_FDCWD || filename->name[0] == '/') { - bprm->filename = filename->name; - } else { - if (filename->name[0] == '\0') - pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd); - else - pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s", - fd, filename->name); - if (!pathbuf) { - retval = -ENOMEM; - goto out_unmark; - } - /* - * Record that a name derived from an O_CLOEXEC fd will be - * inaccessible after exec. Relies on having exclusive access to - * current->files (due to unshare_files above). - */ - if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt))) - bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; - bprm->filename = pathbuf; - } - bprm->interp = bprm->filename; + /* + * Record that a name derived from an O_CLOEXEC fd will be + * inaccessible after exec. Relies on having exclusive access to + * current->files (due to unshare_files above). + */ + if (bprm->fdpath && + close_on_exec(fd, rcu_dereference_raw(current->files->fdt))) + bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; retval = bprm_mm_init(bprm); if (retval) @@ -1941,7 +1948,6 @@ static int do_execveat_common(int fd, struct filename *filename, acct_update_integrals(current); task_numa_free(current, false); free_bprm(bprm); - kfree(pathbuf); putname(filename); if (displaced) put_files_struct(displaced); @@ -1970,7 +1976,6 @@ static int do_execveat_common(int fd, struct filename *filename, reset_files_struct(displaced); out_free: free_bprm(bprm); - kfree(pathbuf); out_ret: putname(filename); diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index eb5cb8df5485..8e9e1b0c8eb8 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -56,6 +56,7 @@ struct linux_binprm { const char *interp; /* Name of the binary really executed. Most of the time same as filename, but could be different for binfmt_{misc,script} */ + const char *fdpath; /* generated filename for execveat */ unsigned interp_flags; int execfd; /* File descriptor of the executable */ unsigned long loader, exec; From f18ac551e5035a52fd8edadc45e0f8eb4cec8d3e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Jul 2020 15:54:54 -0500 Subject: [PATCH 20/23] exec: Move bprm_mm_init into alloc_bprm Currently it is necessary for the usermode helper code and the code that launches init to use set_fs so that pages coming from the kernel look like they are coming from userspace. To allow that usage of set_fs to be removed cleanly the argument copying from userspace needs to happen earlier. Move the allocation and initialization of bprm->mm into alloc_bprm so that the bprm->mm is available early to store the new user stack into. This is a prerequisite for copying argv and envp into the new user stack early before ther rest of exec. To keep the things consistent the cleanup of bprm->mm is moved into free_bprm. So that bprm->mm will be cleaned up whenever bprm->mm is allocated and free_bprm are called. Moving bprm_mm_init earlier is safe as it does not depend on any files, current->in_execve, current->fs->in_exec, bprm->unsafe, or the if the file table is shared. (AKA bprm_mm_init does not depend on any of the code that happens between alloc_bprm and where it was previously called.) This moves bprm->mm cleanup after current->fs->in_exec is set to 0. This is safe because current->fs->in_exec is only used to preventy taking an additional reference on the fs_struct. This moves bprm->mm cleanup after current->in_execve is set to 0. This is safe because current->in_execve is only used by the lsms (apparmor and tomoyou) and always for LSM specific functions, never for anything to do with the mm. This adds bprm->mm cleanup into the successful return path. This is safe because being on the successful return path implies that begin_new_exec succeeded and set brpm->mm to NULL. As bprm->mm is NULL bprm cleanup I am moving into free_bprm will do nothing. Reviewed-by: Kees Cook Reviewed-by: Christoph Hellwig Link: https://lkml.kernel.org/r/87eepe6x7p.fsf@x220.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- fs/exec.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 7e8af27dd199..afb168bf5e23 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1543,6 +1543,10 @@ static int prepare_bprm_creds(struct linux_binprm *bprm) static void free_bprm(struct linux_binprm *bprm) { + if (bprm->mm) { + acct_arg_size(bprm, 0); + mmput(bprm->mm); + } free_arg_pages(bprm); if (bprm->cred) { mutex_unlock(¤t->signal->cred_guard_mutex); @@ -1582,6 +1586,10 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename) bprm->filename = bprm->fdpath; } bprm->interp = bprm->filename; + + retval = bprm_mm_init(bprm); + if (retval) + goto out_free; return bprm; out_free: @@ -1911,10 +1919,6 @@ static int do_execveat_common(int fd, struct filename *filename, close_on_exec(fd, rcu_dereference_raw(current->files->fdt))) bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; - retval = bprm_mm_init(bprm); - if (retval) - goto out_unmark; - retval = prepare_arg_pages(bprm, argv, envp); if (retval < 0) goto out; @@ -1962,10 +1966,6 @@ static int do_execveat_common(int fd, struct filename *filename, */ if (bprm->point_of_no_return && !fatal_signal_pending(current)) force_sigsegv(SIGSEGV); - if (bprm->mm) { - acct_arg_size(bprm, 0); - mmput(bprm->mm); - } out_unmark: current->fs->in_exec = 0; From 0c9cdff054aec0836bb38a449e860793849c3f84 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 12 Jul 2020 07:17:50 -0500 Subject: [PATCH 21/23] exec: Factor bprm_execve out of do_execve_common Currently it is necessary for the usermode helper code and the code that launches init to use set_fs so that pages coming from the kernel look like they are coming from userspace. To allow that usage of set_fs to be removed cleanly the argument copying from userspace needs to happen earlier. Factor bprm_execve out of do_execve_common to separate out the copying of arguments to the newe stack, and the rest of exec. In separating bprm_execve from do_execve_common the copying of the arguments onto the new stack happens earlier. As the copying of the arguments does not depend any security hooks, files, the file table, current->in_execve, current->fs->in_exec, bprm->unsafe, or creds this is safe. Likewise the security hook security_creds_for_exec does not depend upon preventing the argument copying from happening. In addition to making it possible to implement kernel_execve that performs the copying differently, this separation of bprm_execve from do_execve_common makes for a nice separation of responsibilities making the exec code easier to navigate. Reviewed-by: Kees Cook Reviewed-by: Christoph Hellwig Link: https://lkml.kernel.org/r/878sfm6x6x.fsf@x220.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- fs/exec.c | 154 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 81 insertions(+), 73 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index afb168bf5e23..50508892fa71 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1856,14 +1856,87 @@ static int exec_binprm(struct linux_binprm *bprm) /* * sys_execve() executes a new program. */ +static int bprm_execve(struct linux_binprm *bprm, + int fd, struct filename *filename, int flags) +{ + struct file *file; + struct files_struct *displaced; + int retval; + + retval = unshare_files(&displaced); + if (retval) + return retval; + + retval = prepare_bprm_creds(bprm); + if (retval) + goto out_files; + + check_unsafe_exec(bprm); + current->in_execve = 1; + + file = do_open_execat(fd, filename, flags); + retval = PTR_ERR(file); + if (IS_ERR(file)) + goto out_unmark; + + sched_exec(); + + bprm->file = file; + /* + * Record that a name derived from an O_CLOEXEC fd will be + * inaccessible after exec. Relies on having exclusive access to + * current->files (due to unshare_files above). + */ + if (bprm->fdpath && + close_on_exec(fd, rcu_dereference_raw(current->files->fdt))) + bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; + + /* Set the unchanging part of bprm->cred */ + retval = security_bprm_creds_for_exec(bprm); + if (retval) + goto out; + + retval = exec_binprm(bprm); + if (retval < 0) + goto out; + + /* execve succeeded */ + current->fs->in_exec = 0; + current->in_execve = 0; + rseq_execve(current); + acct_update_integrals(current); + task_numa_free(current, false); + if (displaced) + put_files_struct(displaced); + return retval; + +out: + /* + * If past the point of no return ensure the the code never + * returns to the userspace process. Use an existing fatal + * signal if present otherwise terminate the process with + * SIGSEGV. + */ + if (bprm->point_of_no_return && !fatal_signal_pending(current)) + force_sigsegv(SIGSEGV); + +out_unmark: + current->fs->in_exec = 0; + current->in_execve = 0; + +out_files: + if (displaced) + reset_files_struct(displaced); + + return retval; +} + static int do_execveat_common(int fd, struct filename *filename, struct user_arg_ptr argv, struct user_arg_ptr envp, int flags) { struct linux_binprm *bprm; - struct file *file; - struct files_struct *displaced; int retval; if (IS_ERR(filename)) @@ -1891,89 +1964,24 @@ static int do_execveat_common(int fd, struct filename *filename, goto out_ret; } - retval = unshare_files(&displaced); - if (retval) - goto out_free; - - retval = prepare_bprm_creds(bprm); - if (retval) - goto out_files; - - check_unsafe_exec(bprm); - current->in_execve = 1; - - file = do_open_execat(fd, filename, flags); - retval = PTR_ERR(file); - if (IS_ERR(file)) - goto out_unmark; - - sched_exec(); - - bprm->file = file; - /* - * Record that a name derived from an O_CLOEXEC fd will be - * inaccessible after exec. Relies on having exclusive access to - * current->files (due to unshare_files above). - */ - if (bprm->fdpath && - close_on_exec(fd, rcu_dereference_raw(current->files->fdt))) - bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; - retval = prepare_arg_pages(bprm, argv, envp); if (retval < 0) - goto out; - - /* Set the unchanging part of bprm->cred */ - retval = security_bprm_creds_for_exec(bprm); - if (retval) - goto out; + goto out_free; retval = copy_string_kernel(bprm->filename, bprm); if (retval < 0) - goto out; - + goto out_free; bprm->exec = bprm->p; + retval = copy_strings(bprm->envc, envp, bprm); if (retval < 0) - goto out; + goto out_free; retval = copy_strings(bprm->argc, argv, bprm); if (retval < 0) - goto out; + goto out_free; - retval = exec_binprm(bprm); - if (retval < 0) - goto out; - - /* execve succeeded */ - current->fs->in_exec = 0; - current->in_execve = 0; - rseq_execve(current); - acct_update_integrals(current); - task_numa_free(current, false); - free_bprm(bprm); - putname(filename); - if (displaced) - put_files_struct(displaced); - return retval; - -out: - /* - * If past the point of no return ensure the the code never - * returns to the userspace process. Use an existing fatal - * signal if present otherwise terminate the process with - * SIGSEGV. - */ - if (bprm->point_of_no_return && !fatal_signal_pending(current)) - force_sigsegv(SIGSEGV); - -out_unmark: - current->fs->in_exec = 0; - current->in_execve = 0; - -out_files: - if (displaced) - reset_files_struct(displaced); + retval = bprm_execve(bprm, fd, filename, flags); out_free: free_bprm(bprm); From d8b9cd549ecf0f3dc8da42ada5f0ce73e8ed5f1e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 12 Jul 2020 08:23:54 -0500 Subject: [PATCH 22/23] exec: Factor bprm_stack_limits out of prepare_arg_pages In preparation for implementiong kernel_execve (which will take kernel pointers not userspace pointers) factor out bprm_stack_limits out of prepare_arg_pages. This separates the counting which depends upon the getting data from userspace from the calculations of the stack limits which is usable in kernel_execve. The remove prepare_args_pages and compute bprm->argc and bprm->envc directly in do_execveat_common, before bprm_stack_limits is called. Reviewed-by: Kees Cook Reviewed-by: Christoph Hellwig Link: https://lkml.kernel.org/r/87365u6x60.fsf@x220.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- fs/exec.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 50508892fa71..f8135dc149b3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -448,19 +448,10 @@ static int count(struct user_arg_ptr argv, int max) return i; } -static int prepare_arg_pages(struct linux_binprm *bprm, - struct user_arg_ptr argv, struct user_arg_ptr envp) +static int bprm_stack_limits(struct linux_binprm *bprm) { unsigned long limit, ptr_size; - bprm->argc = count(argv, MAX_ARG_STRINGS); - if (bprm->argc < 0) - return bprm->argc; - - bprm->envc = count(envp, MAX_ARG_STRINGS); - if (bprm->envc < 0) - return bprm->envc; - /* * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM * (whichever is smaller) for the argv+env strings. @@ -1964,7 +1955,17 @@ static int do_execveat_common(int fd, struct filename *filename, goto out_ret; } - retval = prepare_arg_pages(bprm, argv, envp); + retval = count(argv, MAX_ARG_STRINGS); + if (retval < 0) + goto out_free; + bprm->argc = retval; + + retval = count(envp, MAX_ARG_STRINGS); + if (retval < 0) + goto out_free; + bprm->envc = retval; + + retval = bprm_stack_limits(bprm); if (retval < 0) goto out_free; From be619f7f063a49c656f620a46af4f8ea3e759e91 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 13 Jul 2020 12:06:48 -0500 Subject: [PATCH 23/23] exec: Implement kernel_execve To allow the kernel not to play games with set_fs to call exec implement kernel_execve. The function kernel_execve takes pointers into kernel memory and copies the values pointed to onto the new userspace stack. The calls with arguments from kernel space of do_execve are replaced with calls to kernel_execve. The calls do_execve and do_execveat are made static as there are now no callers outside of exec. The comments that mention do_execve are updated to refer to kernel_execve or execve depending on the circumstances. In addition to correcting the comments, this makes it easy to grep for do_execve and verify it is not used. Inspired-by: https://lkml.kernel.org/r/20200627072704.2447163-1-hch@lst.de Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/87wo365ikj.fsf@x220.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- arch/x86/entry/entry_32.S | 2 +- arch/x86/entry/entry_64.S | 2 +- arch/x86/kernel/unwind_frame.c | 2 +- fs/exec.c | 88 +++++++++++++++++++++++++++++++++- include/linux/binfmts.h | 9 +--- init/main.c | 4 +- kernel/umh.c | 6 +-- security/tomoyo/common.h | 2 +- security/tomoyo/domain.c | 4 +- security/tomoyo/tomoyo.c | 4 +- 10 files changed, 100 insertions(+), 23 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 024d7d276cd4..8f4e085ee06d 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -854,7 +854,7 @@ SYM_CODE_START(ret_from_fork) CALL_NOSPEC ebx /* * A kernel thread is allowed to return here after successfully - * calling do_execve(). Exit to userspace to complete the execve() + * calling kernel_execve(). Exit to userspace to complete the execve() * syscall. */ movl $0, PT_EAX(%esp) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d2a00c97e53f..73c7e255256b 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -293,7 +293,7 @@ SYM_CODE_START(ret_from_fork) CALL_NOSPEC rbx /* * A kernel thread is allowed to return here after successfully - * calling do_execve(). Exit to userspace to complete the execve() + * calling kernel_execve(). Exit to userspace to complete the execve() * syscall. */ movq $0, RAX(%rsp) diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 722a85f3b2dd..e40b4942157f 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -275,7 +275,7 @@ bool unwind_next_frame(struct unwind_state *state) * This user_mode() check is slightly broader than a PF_KTHREAD * check because it also catches the awkward situation where a * newly forked kthread transitions into a user task by calling - * do_execve(), which eventually clears PF_KTHREAD. + * kernel_execve(), which eventually clears PF_KTHREAD. */ if (!user_mode(regs)) goto the_end; diff --git a/fs/exec.c b/fs/exec.c index f8135dc149b3..3698252719a3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -448,6 +448,23 @@ static int count(struct user_arg_ptr argv, int max) return i; } +static int count_strings_kernel(const char *const *argv) +{ + int i; + + if (!argv) + return 0; + + for (i = 0; argv[i]; ++i) { + if (i >= MAX_ARG_STRINGS) + return -E2BIG; + if (fatal_signal_pending(current)) + return -ERESTARTNOHAND; + cond_resched(); + } + return i; +} + static int bprm_stack_limits(struct linux_binprm *bprm) { unsigned long limit, ptr_size; @@ -624,6 +641,20 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm) } EXPORT_SYMBOL(copy_string_kernel); +static int copy_strings_kernel(int argc, const char *const *argv, + struct linux_binprm *bprm) +{ + while (argc-- > 0) { + int ret = copy_string_kernel(argv[argc], bprm); + if (ret < 0) + return ret; + if (fatal_signal_pending(current)) + return -ERESTARTNOHAND; + cond_resched(); + } + return 0; +} + #ifdef CONFIG_MMU /* @@ -1991,7 +2022,60 @@ static int do_execveat_common(int fd, struct filename *filename, return retval; } -int do_execve(struct filename *filename, +int kernel_execve(const char *kernel_filename, + const char *const *argv, const char *const *envp) +{ + struct filename *filename; + struct linux_binprm *bprm; + int fd = AT_FDCWD; + int retval; + + filename = getname_kernel(kernel_filename); + if (IS_ERR(filename)) + return PTR_ERR(filename); + + bprm = alloc_bprm(fd, filename); + if (IS_ERR(bprm)) { + retval = PTR_ERR(bprm); + goto out_ret; + } + + retval = count_strings_kernel(argv); + if (retval < 0) + goto out_free; + bprm->argc = retval; + + retval = count_strings_kernel(envp); + if (retval < 0) + goto out_free; + bprm->envc = retval; + + retval = bprm_stack_limits(bprm); + if (retval < 0) + goto out_free; + + retval = copy_string_kernel(bprm->filename, bprm); + if (retval < 0) + goto out_free; + bprm->exec = bprm->p; + + retval = copy_strings_kernel(bprm->envc, envp, bprm); + if (retval < 0) + goto out_free; + + retval = copy_strings_kernel(bprm->argc, argv, bprm); + if (retval < 0) + goto out_free; + + retval = bprm_execve(bprm, fd, filename, 0); +out_free: + free_bprm(bprm); +out_ret: + putname(filename); + return retval; +} + +static int do_execve(struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp) { @@ -2000,7 +2084,7 @@ int do_execve(struct filename *filename, return do_execveat_common(AT_FDCWD, filename, argv, envp, 0); } -int do_execveat(int fd, struct filename *filename, +static int do_execveat(int fd, struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp, int flags) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 8e9e1b0c8eb8..0571701ab1c5 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -135,12 +135,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm); extern void set_binfmt(struct linux_binfmt *new); extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t); -extern int do_execve(struct filename *, - const char __user * const __user *, - const char __user * const __user *); -extern int do_execveat(int, struct filename *, - const char __user * const __user *, - const char __user * const __user *, - int); +int kernel_execve(const char *filename, + const char *const *argv, const char *const *envp); #endif /* _LINUX_BINFMTS_H */ diff --git a/init/main.c b/init/main.c index 0ead83e86b5a..78ccec5c28f3 100644 --- a/init/main.c +++ b/init/main.c @@ -1329,9 +1329,7 @@ static int run_init_process(const char *init_filename) pr_debug(" with environment:\n"); for (p = envp_init; *p; p++) pr_debug(" %s\n", *p); - return do_execve(getname_kernel(init_filename), - (const char __user *const __user *)argv_init, - (const char __user *const __user *)envp_init); + return kernel_execve(init_filename, argv_init, envp_init); } static int try_to_run_init_process(const char *init_filename) diff --git a/kernel/umh.c b/kernel/umh.c index 6ca2096298b9..a25433f9cd9a 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -98,9 +98,9 @@ static int call_usermodehelper_exec_async(void *data) commit_creds(new); - retval = do_execve(getname_kernel(sub_info->path), - (const char __user *const __user *)sub_info->argv, - (const char __user *const __user *)sub_info->envp); + retval = kernel_execve(sub_info->path, + (const char *const *)sub_info->argv, + (const char *const *)sub_info->envp); out: sub_info->retval = retval; /* diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h index 050473df5809..85246b9df7ca 100644 --- a/security/tomoyo/common.h +++ b/security/tomoyo/common.h @@ -425,7 +425,7 @@ struct tomoyo_request_info { struct tomoyo_obj_info *obj; /* * For holding parameters specific to execve() request. - * NULL if not dealing do_execve(). + * NULL if not dealing execve(). */ struct tomoyo_execve *ee; struct tomoyo_domain_info *domain; diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index 7869d6a9980b..53b3e1f5f227 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -767,7 +767,7 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm) /* * Check for domain transition preference if "file execute" matched. - * If preference is given, make do_execve() fail if domain transition + * If preference is given, make execve() fail if domain transition * has failed, for domain transition preference should be used with * destination domain defined. */ @@ -810,7 +810,7 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm) snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "<%s>", candidate->name); /* - * Make do_execve() fail if domain transition across namespaces + * Make execve() fail if domain transition across namespaces * has failed. */ reject_on_transition_failure = true; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index f9adddc42ac8..1f3cd432d830 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -93,7 +93,7 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm) struct tomoyo_task *s = tomoyo_task(current); /* - * Execute permission is checked against pathname passed to do_execve() + * Execute permission is checked against pathname passed to execve() * using current domain. */ if (!s->old_domain_info) { @@ -307,7 +307,7 @@ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd, */ static int tomoyo_file_open(struct file *f) { - /* Don't check read permission here if called from do_execve(). */ + /* Don't check read permission here if called from execve(). */ if (current->in_execve) return 0; return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path,