diff --git a/Documentation/filesystems/files.rst b/Documentation/filesystems/files.rst index cbf8e57376bf..bcf84459917f 100644 --- a/Documentation/filesystems/files.rst +++ b/Documentation/filesystems/files.rst @@ -62,7 +62,7 @@ the fdtable structure - be held. 4. To look up the file structure given an fd, a reader - must use either fcheck() or fcheck_files() APIs. These + must use either lookup_fd_rcu() or files_lookup_fd_rcu() APIs. These take care of barrier requirements due to lock-free lookup. An example:: @@ -70,7 +70,7 @@ the fdtable structure - struct file *file; rcu_read_lock(); - file = fcheck(fd); + file = lookup_fd_rcu(fd); if (file) { ... } @@ -84,7 +84,7 @@ the fdtable structure - on ->f_count:: rcu_read_lock(); - file = fcheck_files(files, fd); + file = files_lookup_fd_rcu(files, fd); if (file) { if (atomic_long_inc_not_zero(&file->f_count)) *fput_needed = 1; @@ -104,7 +104,7 @@ the fdtable structure - lock-free, they must be installed using rcu_assign_pointer() API. If they are looked up lock-free, rcu_dereference() must be used. However it is advisable to use files_fdtable() - and fcheck()/fcheck_files() which take care of these issues. + and lookup_fd_rcu()/files_lookup_fd_rcu() which take care of these issues. 7. While updating, the fdtable pointer must be looked up while holding files->file_lock. If ->file_lock is dropped, then diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 026c181a98c5..60b5583e9eaf 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -74,7 +74,7 @@ static struct spu_context *coredump_next_context(int *fd) *fd = n - 1; rcu_read_lock(); - file = fcheck(*fd); + file = lookup_fd_rcu(*fd); ctx = SPUFS_I(file_inode(file))->i_ctx; get_spu_context(ctx); rcu_read_unlock(); diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 1338209f9f86..c119736ca56a 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1836,7 +1836,7 @@ static void binder_deferred_fd_close(int fd) if (!twcb) return; init_task_work(&twcb->twork, binder_do_fd_close); - __close_fd_get_file(fd, &twcb->file); + close_fd_get_file(fd, &twcb->file); if (twcb->file) { filp_close(twcb->file, current->files); task_work_add(current, &twcb->twork, TWA_RESUME); diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c index 322b7dfb4ea0..5bf781ea6d67 100644 --- a/fs/autofs/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c @@ -4,9 +4,10 @@ * Copyright 2008 Ian Kent */ +#include #include #include -#include +#include #include #include @@ -289,7 +290,7 @@ static int autofs_dev_ioctl_closemount(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { - return ksys_close(param->ioctlfd); + return close_fd(param->ioctlfd); } /* diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index ac0b5fc30ea6..950bc177238a 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2198,6 +2198,7 @@ static int elf_core_dump(struct coredump_params *cprm) { size_t sz = get_note_info_size(&info); + /* For cell spufs */ sz += elf_coredump_extra_notes_size(); phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL); @@ -2261,6 +2262,7 @@ static int elf_core_dump(struct coredump_params *cprm) if (!write_note_info(&info, cprm)) goto end_coredump; + /* For cell spufs */ if (elf_coredump_extra_notes_write(cprm)) goto end_coredump; diff --git a/fs/coredump.c b/fs/coredump.c index c6acfc694f65..a2f6ecc8e345 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -586,7 +586,6 @@ void do_coredump(const kernel_siginfo_t *siginfo) int ispipe; size_t *argv = NULL; int argc = 0; - struct files_struct *displaced; /* require nonrelative corefile path and be extra careful */ bool need_suid_safe = false; bool core_dumped = false; @@ -792,11 +791,10 @@ void do_coredump(const kernel_siginfo_t *siginfo) } /* get us an unshared descriptor table; almost always a no-op */ - retval = unshare_files(&displaced); + /* The cell spufs coredump code reads the file descriptor tables */ + retval = unshare_files(); if (retval) goto close_fail; - if (displaced) - put_files_struct(displaced); if (!dump_interrupted()) { /* * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would diff --git a/fs/exec.c b/fs/exec.c index aee36e5733ce..81b85f70e9f3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1258,6 +1258,16 @@ int begin_new_exec(struct linux_binprm * bprm) if (retval) goto out; + /* + * Cancel any io_uring activity across execve + */ + io_uring_task_cancel(); + + /* Ensure the files table is not shared. */ + retval = unshare_files(); + if (retval) + goto out; + /* * Must be called _before_ exec_mmap() as bprm->mm is * not visibile until then. This also enables the update @@ -1779,21 +1789,11 @@ static int bprm_execve(struct linux_binprm *bprm, int fd, struct filename *filename, int flags) { struct file *file; - struct files_struct *displaced; int retval; - /* - * Cancel any io_uring activity across execve - */ - io_uring_task_cancel(); - - retval = unshare_files(&displaced); - if (retval) - return retval; - retval = prepare_bprm_creds(bprm); if (retval) - goto out_files; + return retval; check_unsafe_exec(bprm); current->in_execve = 1; @@ -1808,11 +1808,14 @@ static int bprm_execve(struct linux_binprm *bprm, bprm->file = file; /* * Record that a name derived from an O_CLOEXEC fd will be - * inaccessible after exec. Relies on having exclusive access to - * current->files (due to unshare_files above). + * inaccessible after exec. This allows the code in exec to + * choose to fail when the executable is not mmaped into the + * interpreter and an open file descriptor is not passed to + * the interpreter. This makes for a better user experience + * than having the interpreter start and then immediately fail + * when it finds the executable is inaccessible. */ - if (bprm->fdpath && - close_on_exec(fd, rcu_dereference_raw(current->files->fdt))) + if (bprm->fdpath && get_close_on_exec(fd)) bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; /* Set the unchanging part of bprm->cred */ @@ -1830,8 +1833,6 @@ static int bprm_execve(struct linux_binprm *bprm, rseq_execve(current); acct_update_integrals(current); task_numa_free(current, false); - if (displaced) - put_files_struct(displaced); return retval; out: @@ -1848,10 +1849,6 @@ static int bprm_execve(struct linux_binprm *bprm, current->fs->in_exec = 0; current->in_execve = 0; -out_files: - if (displaced) - reset_files_struct(displaced); - return retval; } diff --git a/fs/file.c b/fs/file.c index e08e4daccac3..8434e0afecc7 100644 --- a/fs/file.c +++ b/fs/file.c @@ -158,7 +158,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr) spin_unlock(&files->file_lock); new_fdt = alloc_fdtable(nr); - /* make sure all __fd_install() have seen resize_in_progress + /* make sure all fd_install() have seen resize_in_progress * or have finished their rcu_read_lock_sched() section. */ if (atomic_read(&files->count) > 1) @@ -181,7 +181,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr) rcu_assign_pointer(files->fdt, new_fdt); if (cur_fdt != &files->fdtab) call_rcu(&cur_fdt->rcu, free_fdtable_rcu); - /* coupled with smp_rmb() in __fd_install() */ + /* coupled with smp_rmb() in fd_install() */ smp_wmb(); return 1; } @@ -411,19 +411,6 @@ static struct fdtable *close_files(struct files_struct * files) return fdt; } -struct files_struct *get_files_struct(struct task_struct *task) -{ - struct files_struct *files; - - task_lock(task); - files = task->files; - if (files) - atomic_inc(&files->count); - task_unlock(task); - - return files; -} - void put_files_struct(struct files_struct *files) { if (atomic_dec_and_test(&files->count)) { @@ -436,18 +423,6 @@ void put_files_struct(struct files_struct *files) } } -void reset_files_struct(struct files_struct *files) -{ - struct task_struct *tsk = current; - struct files_struct *old; - - old = tsk->files; - task_lock(tsk); - tsk->files = files; - task_unlock(tsk); - put_files_struct(old); -} - void exit_files(struct task_struct *tsk) { struct files_struct * files = tsk->files; @@ -492,9 +467,9 @@ static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start) /* * allocate a file descriptor, mark it busy. */ -int __alloc_fd(struct files_struct *files, - unsigned start, unsigned end, unsigned flags) +static int alloc_fd(unsigned start, unsigned end, unsigned flags) { + struct files_struct *files = current->files; unsigned int fd; int error; struct fdtable *fdt; @@ -550,14 +525,9 @@ int __alloc_fd(struct files_struct *files, return error; } -static int alloc_fd(unsigned start, unsigned flags) -{ - return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); -} - int __get_unused_fd_flags(unsigned flags, unsigned long nofile) { - return __alloc_fd(current->files, 0, nofile, flags); + return alloc_fd(0, nofile, flags); } int get_unused_fd_flags(unsigned flags) @@ -596,17 +566,13 @@ EXPORT_SYMBOL(put_unused_fd); * It should never happen - if we allow dup2() do it, _really_ bad things * will follow. * - * NOTE: __fd_install() variant is really, really low-level; don't - * use it unless you are forced to by truly lousy API shoved down - * your throat. 'files' *MUST* be either current->files or obtained - * by get_files_struct(current) done by whoever had given it to you, - * or really bad things will happen. Normally you want to use - * fd_install() instead. + * This consumes the "file" refcount, so callers should treat it + * as if they had called fput(file). */ -void __fd_install(struct files_struct *files, unsigned int fd, - struct file *file) +void fd_install(unsigned int fd, struct file *file) { + struct files_struct *files = current->files; struct fdtable *fdt; rcu_read_lock_sched(); @@ -628,15 +594,6 @@ void __fd_install(struct files_struct *files, unsigned int fd, rcu_read_unlock_sched(); } -/* - * This consumes the "file" refcount, so callers should treat it - * as if they had called fput(file). - */ -void fd_install(unsigned int fd, struct file *file) -{ - __fd_install(current->files, fd, file); -} - EXPORT_SYMBOL(fd_install); static struct file *pick_file(struct files_struct *files, unsigned fd) @@ -659,11 +616,9 @@ static struct file *pick_file(struct files_struct *files, unsigned fd) return file; } -/* - * The same warnings as for __alloc_fd()/__fd_install() apply here... - */ -int __close_fd(struct files_struct *files, unsigned fd) +int close_fd(unsigned fd) { + struct files_struct *files = current->files; struct file *file; file = pick_file(files, fd); @@ -672,7 +627,7 @@ int __close_fd(struct files_struct *files, unsigned fd) return filp_close(file, files); } -EXPORT_SYMBOL(__close_fd); /* for ksys_close() */ +EXPORT_SYMBOL(close_fd); /* for ksys_close() */ static inline void __range_cloexec(struct files_struct *cur_fds, unsigned int fd, unsigned int max_fd) @@ -777,11 +732,11 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) } /* - * variant of __close_fd that gets a ref on the file for later fput. + * variant of close_fd that gets a ref on the file for later fput. * The caller must ensure that filp_close() called on the file, and then * an fput(). */ -int __close_fd_get_file(unsigned int fd, struct file **res) +int close_fd_get_file(unsigned int fd, struct file **res) { struct files_struct *files = current->files; struct file *file; @@ -850,7 +805,7 @@ static struct file *__fget_files(struct files_struct *files, unsigned int fd, rcu_read_lock(); loop: - file = fcheck_files(files, fd); + file = files_lookup_fd_rcu(files, fd); if (file) { /* File object ref couldn't be taken. * dup2() atomicity guarantee is the reason @@ -901,6 +856,42 @@ struct file *fget_task(struct task_struct *task, unsigned int fd) return file; } +struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd) +{ + /* Must be called with rcu_read_lock held */ + struct files_struct *files; + struct file *file = NULL; + + task_lock(task); + files = task->files; + if (files) + file = files_lookup_fd_rcu(files, fd); + task_unlock(task); + + return file; +} + +struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret_fd) +{ + /* Must be called with rcu_read_lock held */ + struct files_struct *files; + unsigned int fd = *ret_fd; + struct file *file = NULL; + + task_lock(task); + files = task->files; + if (files) { + for (; fd < files_fdtable(files)->max_fds; fd++) { + file = files_lookup_fd_rcu(files, fd); + if (file) + break; + } + } + task_unlock(task); + *ret_fd = fd; + return file; +} + /* * Lightweight file lookup - no refcnt increment if fd table isn't shared. * @@ -923,7 +914,7 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask) struct file *file; if (atomic_read(&files->count) == 1) { - file = __fcheck_files(files, fd); + file = files_lookup_fd_raw(files, fd); if (!file || unlikely(file->f_mode & mask)) return 0; return (unsigned long)file; @@ -1045,7 +1036,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags) struct files_struct *files = current->files; if (!file) - return __close_fd(files, fd); + return close_fd(fd); if (fd >= rlimit(RLIMIT_NOFILE)) return -EBADF; @@ -1134,7 +1125,7 @@ static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags) spin_lock(&files->file_lock); err = expand_files(files, newfd); - file = fcheck(oldfd); + file = files_lookup_fd_locked(files, oldfd); if (unlikely(!file)) goto Ebadf; if (unlikely(err < 0)) { @@ -1163,7 +1154,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) int retval = oldfd; rcu_read_lock(); - if (!fcheck_files(files, oldfd)) + if (!files_lookup_fd_rcu(files, oldfd)) retval = -EBADF; rcu_read_unlock(); return retval; @@ -1188,10 +1179,11 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes) int f_dupfd(unsigned int from, struct file *file, unsigned flags) { + unsigned long nofile = rlimit(RLIMIT_NOFILE); int err; - if (from >= rlimit(RLIMIT_NOFILE)) + if (from >= nofile) return -EINVAL; - err = alloc_fd(from, flags); + err = alloc_fd(from, nofile, flags); if (err >= 0) { get_file(file); fd_install(err, file); diff --git a/fs/io_uring.c b/fs/io_uring.c index b3544ecfe305..2b588bd5494c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4236,7 +4236,7 @@ static int io_close(struct io_kiocb *req, bool force_nonblock, /* might be already done during nonblock submission */ if (!close->put_file) { - ret = __close_fd_get_file(close->fd, &close->put_file); + ret = close_fd_get_file(close->fd, &close->put_file); if (ret < 0) return (ret == -ENOENT) ? -EBADF : ret; } diff --git a/fs/locks.c b/fs/locks.c index ed95cd813e10..99ca97e81b7a 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2539,14 +2539,15 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, */ if (!error && file_lock->fl_type != F_UNLCK && !(file_lock->fl_flags & FL_OFDLCK)) { + struct files_struct *files = current->files; /* * We need that spin_lock here - it prevents reordering between * update of i_flctx->flc_posix and check for it done in * close(). rcu_read_lock() wouldn't do. */ - spin_lock(¤t->files->file_lock); - f = fcheck(fd); - spin_unlock(¤t->files->file_lock); + spin_lock(&files->file_lock); + f = files_lookup_fd_locked(files, fd); + spin_unlock(&files->file_lock); if (f != filp) { file_lock->fl_type = F_UNLCK; error = do_lock_file_wait(filp, cmd, file_lock); @@ -2670,14 +2671,15 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, */ if (!error && file_lock->fl_type != F_UNLCK && !(file_lock->fl_flags & FL_OFDLCK)) { + struct files_struct *files = current->files; /* * We need that spin_lock here - it prevents reordering between * update of i_flctx->flc_posix and check for it done in * close(). rcu_read_lock() wouldn't do. */ - spin_lock(¤t->files->file_lock); - f = fcheck(fd); - spin_unlock(¤t->files->file_lock); + spin_lock(&files->file_lock); + f = files_lookup_fd_locked(files, fd); + spin_unlock(&files->file_lock); if (f != filp) { file_lock->fl_type = F_UNLCK; error = do_lock_file_wait(filp, cmd, file_lock); diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 5dcda8f20c04..5486aaca60b0 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -327,7 +327,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) } rcu_read_lock(); - f = fcheck(fd); + f = lookup_fd_rcu(fd); rcu_read_unlock(); /* if (f != filp) means that we lost a race and another task/thread diff --git a/fs/open.c b/fs/open.c index 4d7537ae59df..1e06e443a565 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1296,7 +1296,7 @@ EXPORT_SYMBOL(filp_close); */ SYSCALL_DEFINE1(close, unsigned int, fd) { - int retval = __close_fd(current->files, fd); + int retval = close_fd(fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 81882a13212d..cb51763ed554 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -28,14 +28,13 @@ static int seq_show(struct seq_file *m, void *v) if (!task) return -ENOENT; - files = get_files_struct(task); - put_task_struct(task); - + task_lock(task); + files = task->files; if (files) { unsigned int fd = proc_fd(m->private); spin_lock(&files->file_lock); - file = fcheck_files(files, fd); + file = files_lookup_fd_locked(files, fd); if (file) { struct fdtable *fdt = files_fdtable(files); @@ -47,8 +46,9 @@ static int seq_show(struct seq_file *m, void *v) ret = 0; } spin_unlock(&files->file_lock); - put_files_struct(files); } + task_unlock(task); + put_task_struct(task); if (ret) return ret; @@ -57,6 +57,7 @@ static int seq_show(struct seq_file *m, void *v) (long long)file->f_pos, f_flags, real_mount(file->f_path.mnt)->mnt_id); + /* show_fd_locks() never deferences files so a stale value is safe */ show_fd_locks(m, file, files); if (seq_has_overflowed(m)) goto out; @@ -83,18 +84,13 @@ static const struct file_operations proc_fdinfo_file_operations = { static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode) { - struct files_struct *files = get_files_struct(task); struct file *file; - if (!files) - return false; - rcu_read_lock(); - file = fcheck_files(files, fd); + file = task_lookup_fd_rcu(task, fd); if (file) *mode = file->f_mode; rcu_read_unlock(); - put_files_struct(files); return !!file; } @@ -146,29 +142,22 @@ static const struct dentry_operations tid_fd_dentry_operations = { static int proc_fd_link(struct dentry *dentry, struct path *path) { - struct files_struct *files = NULL; struct task_struct *task; int ret = -ENOENT; task = get_proc_task(d_inode(dentry)); if (task) { - files = get_files_struct(task); - put_task_struct(task); - } - - if (files) { unsigned int fd = proc_fd(d_inode(dentry)); struct file *fd_file; - spin_lock(&files->file_lock); - fd_file = fcheck_files(files, fd); + fd_file = fget_task(task, fd); if (fd_file) { *path = fd_file->f_path; path_get(&fd_file->f_path); ret = 0; + fput(fd_file); } - spin_unlock(&files->file_lock); - put_files_struct(files); + put_task_struct(task); } return ret; @@ -229,7 +218,6 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, instantiate_t instantiate) { struct task_struct *p = get_proc_task(file_inode(file)); - struct files_struct *files; unsigned int fd; if (!p) @@ -237,22 +225,18 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, if (!dir_emit_dots(file, ctx)) goto out; - files = get_files_struct(p); - if (!files) - goto out; rcu_read_lock(); - for (fd = ctx->pos - 2; - fd < files_fdtable(files)->max_fds; - fd++, ctx->pos++) { + for (fd = ctx->pos - 2;; fd++) { struct file *f; struct fd_data data; char name[10 + 1]; unsigned int len; - f = fcheck_files(files, fd); + f = task_lookup_next_fd_rcu(p, &fd); + ctx->pos = fd + 2LL; if (!f) - continue; + break; data.mode = f->f_mode; rcu_read_unlock(); data.fd = fd; @@ -261,13 +245,11 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, if (!proc_fill_cache(file, ctx, name, len, instantiate, p, &data)) - goto out_fd_loop; + goto out; cond_resched(); rcu_read_lock(); } rcu_read_unlock(); -out_fd_loop: - put_files_struct(files); out: put_task_struct(p); return 0; diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index a32bf47c593e..d0e78174874a 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -80,7 +80,7 @@ struct dentry; /* * The caller must ensure that fd table isn't shared or hold rcu or file lock */ -static inline struct file *__fcheck_files(struct files_struct *files, unsigned int fd) +static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = rcu_dereference_raw(files->fdt); @@ -91,39 +91,41 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i return NULL; } -static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd) +static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd) { - RCU_LOCKDEP_WARN(!rcu_read_lock_held() && - !lockdep_is_held(&files->file_lock), + RCU_LOCKDEP_WARN(!lockdep_is_held(&files->file_lock), "suspicious rcu_dereference_check() usage"); - return __fcheck_files(files, fd); + return files_lookup_fd_raw(files, fd); } -/* - * Check whether the specified fd has an open file. - */ -#define fcheck(fd) fcheck_files(current->files, fd) +static inline struct file *files_lookup_fd_rcu(struct files_struct *files, unsigned int fd) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), + "suspicious rcu_dereference_check() usage"); + return files_lookup_fd_raw(files, fd); +} + +static inline struct file *lookup_fd_rcu(unsigned int fd) +{ + return files_lookup_fd_rcu(current->files, fd); +} + +struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd); +struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *fd); struct task_struct; -struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); -void reset_files_struct(struct files_struct *); -int unshare_files(struct files_struct **); +int unshare_files(void); struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy; void do_close_on_exec(struct files_struct *); int iterate_fd(struct files_struct *, unsigned, int (*)(const void *, struct file *, unsigned), const void *); -extern int __alloc_fd(struct files_struct *files, - unsigned start, unsigned end, unsigned flags); -extern void __fd_install(struct files_struct *files, - unsigned int fd, struct file *file); -extern int __close_fd(struct files_struct *files, - unsigned int fd); +extern int close_fd(unsigned int fd); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); -extern int __close_fd_get_file(unsigned int fd, struct file **res); +extern int close_fd_get_file(unsigned int fd, struct file **res); extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, struct files_struct **new_fdp); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 37bea07c12f2..0f72f380db72 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1295,18 +1295,6 @@ static inline long ksys_ftruncate(unsigned int fd, loff_t length) return do_sys_ftruncate(fd, length, 1); } -extern int __close_fd(struct files_struct *files, unsigned int fd); - -/* - * In contrast to sys_close(), this stub does not check whether the syscall - * should or should not be restarted, but returns the raw error codes from - * __close_fd(). - */ -static inline int ksys_close(unsigned int fd) -{ - return __close_fd(current->files, fd); -} - extern long do_sys_truncate(const char __user *pathname, loff_t length); static inline long ksys_truncate(const char __user *pathname, loff_t length) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 287be337d5f6..4caf06fe4152 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3874,7 +3874,6 @@ static int bpf_task_fd_query(const union bpf_attr *attr, pid_t pid = attr->task_fd_query.pid; u32 fd = attr->task_fd_query.fd; const struct perf_event *event; - struct files_struct *files; struct task_struct *task; struct file *file; int err; @@ -3892,23 +3891,11 @@ static int bpf_task_fd_query(const union bpf_attr *attr, if (!task) return -ENOENT; - files = get_files_struct(task); - put_task_struct(task); - if (!files) - return -ENOENT; - err = 0; - spin_lock(&files->file_lock); - file = fcheck_files(files, fd); + file = fget_task(task, fd); + put_task_struct(task); if (!file) - err = -EBADF; - else - get_file(file); - spin_unlock(&files->file_lock); - put_files_struct(files); - - if (err) - goto out; + return -EBADF; if (file->f_op == &bpf_link_fops) { struct bpf_link *link = file->private_data; @@ -3948,7 +3935,6 @@ static int bpf_task_fd_query(const union bpf_attr *attr, err = -ENOTSUPP; put_file: fput(file); -out: return err; } diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 0458a40edf10..e73c07593024 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -130,7 +130,6 @@ struct bpf_iter_seq_task_file_info { */ struct bpf_iter_seq_task_common common; struct task_struct *task; - struct files_struct *files; u32 tid; u32 fd; }; @@ -139,37 +138,26 @@ static struct file * task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) { struct pid_namespace *ns = info->common.ns; - u32 curr_tid = info->tid, max_fds; - struct files_struct *curr_files; + u32 curr_tid = info->tid; struct task_struct *curr_task; - int curr_fd = info->fd; + unsigned int curr_fd = info->fd; /* If this function returns a non-NULL file object, - * it held a reference to the task/files_struct/file. + * it held a reference to the task/file. * Otherwise, it does not hold any reference. */ again: if (info->task) { curr_task = info->task; - curr_files = info->files; curr_fd = info->fd; } else { curr_task = task_seq_get_next(ns, &curr_tid, true); if (!curr_task) { info->task = NULL; - info->files = NULL; return NULL; } - curr_files = get_files_struct(curr_task); - if (!curr_files) { - put_task_struct(curr_task); - curr_tid = ++(info->tid); - info->fd = 0; - goto again; - } - - info->files = curr_files; + /* set info->task and info->tid */ info->task = curr_task; if (curr_tid == info->tid) { curr_fd = info->fd; @@ -180,13 +168,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) } rcu_read_lock(); - max_fds = files_fdtable(curr_files)->max_fds; - for (; curr_fd < max_fds; curr_fd++) { + for (;; curr_fd++) { struct file *f; - - f = fcheck_files(curr_files, curr_fd); + f = task_lookup_next_fd_rcu(curr_task, &curr_fd); if (!f) - continue; + break; if (!get_file_rcu(f)) continue; @@ -198,10 +184,8 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) /* the current task is done, go to the next task */ rcu_read_unlock(); - put_files_struct(curr_files); put_task_struct(curr_task); info->task = NULL; - info->files = NULL; info->fd = 0; curr_tid = ++(info->tid); goto again; @@ -213,7 +197,6 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) struct file *file; info->task = NULL; - info->files = NULL; file = task_file_seq_get_next(info); if (file && *pos == 0) ++*pos; @@ -275,9 +258,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v) (void)__task_file_seq_show(seq, v, true); } else { fput((struct file *)v); - put_files_struct(info->files); put_task_struct(info->task); - info->files = NULL; info->task = NULL; } } diff --git a/kernel/fork.c b/kernel/fork.c index 7425b3224891..4f44d87b82ef 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3031,21 +3031,21 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) * the exec layer of the kernel. */ -int unshare_files(struct files_struct **displaced) +int unshare_files(void) { struct task_struct *task = current; - struct files_struct *copy = NULL; + struct files_struct *old, *copy = NULL; int error; error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, ©); - if (error || !copy) { - *displaced = NULL; + if (error || !copy) return error; - } - *displaced = task->files; + + old = task->files; task_lock(task); task->files = copy; task_unlock(task); + put_files_struct(old); return 0; } diff --git a/kernel/kcmp.c b/kernel/kcmp.c index b3ff9288c6cc..36e58eb5a11d 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -61,16 +61,11 @@ static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type) static struct file * get_file_raw_ptr(struct task_struct *task, unsigned int idx) { - struct file *file = NULL; + struct file *file; - task_lock(task); rcu_read_lock(); - - if (task->files) - file = fcheck_files(task->files, idx); - + file = task_lookup_fd_rcu(task, idx); rcu_read_unlock(); - task_unlock(task); return file; } @@ -107,7 +102,6 @@ static int kcmp_epoll_target(struct task_struct *task1, { struct file *filp, *filp_epoll, *filp_tgt; struct kcmp_epoll_slot slot; - struct files_struct *files; if (copy_from_user(&slot, uslot, sizeof(slot))) return -EFAULT; @@ -116,23 +110,12 @@ static int kcmp_epoll_target(struct task_struct *task1, if (!filp) return -EBADF; - files = get_files_struct(task2); - if (!files) + filp_epoll = fget_task(task2, slot.efd); + if (!filp_epoll) return -EBADF; - spin_lock(&files->file_lock); - filp_epoll = fcheck_files(files, slot.efd); - if (filp_epoll) - get_file(filp_epoll); - else - filp_tgt = ERR_PTR(-EBADF); - spin_unlock(&files->file_lock); - put_files_struct(files); - - if (filp_epoll) { - filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff); - fput(filp_epoll); - } + filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff); + fput(filp_epoll); if (IS_ERR(filp_tgt)) return PTR_ERR(filp_tgt);