diff --git a/io_uring/Makefile b/io_uring/Makefile index ed0bf42db4ae..2db085cdedad 100644 --- a/io_uring/Makefile +++ b/io_uring/Makefile @@ -6,5 +6,5 @@ obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \ sync.o advise.o filetable.o \ openclose.o uring_cmd.o epoll.o \ statx.o net.o msg_ring.o timeout.o \ - sqpoll.o fdinfo.o + sqpoll.o fdinfo.o tctx.o obj-$(CONFIG_IO_WQ) += io-wq.o diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 95c2083a0da8..d7336e6c9f23 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -92,6 +92,7 @@ #include "io_uring_types.h" #include "io_uring.h" #include "refs.h" +#include "tctx.h" #include "sqpoll.h" #include "fdinfo.h" @@ -208,30 +209,6 @@ struct io_buffer { #define BGID_ARRAY 64 -/* - * Arbitrary limit, can be raised if need be - */ -#define IO_RINGFD_REG_MAX 16 - -struct io_uring_task { - /* submission side */ - int cached_refs; - struct xarray xa; - struct wait_queue_head wait; - const struct io_ring_ctx *last; - struct io_wq *io_wq; - struct percpu_counter inflight; - atomic_t inflight_tracked; - atomic_t in_idle; - - spinlock_t task_lock; - struct io_wq_work_list task_list; - struct io_wq_work_list prio_task_list; - struct callback_head task_work; - struct file **registered_rings; - bool task_running; -}; - /* * First field must be the file pointer in all the * iocb unions! See also 'struct kiocb' in @@ -311,12 +288,6 @@ enum { IO_CHECK_CQ_DROPPED_BIT, }; -struct io_tctx_node { - struct list_head ctx_node; - struct task_struct *task; - struct io_ring_ctx *ctx; -}; - struct io_defer_entry { struct list_head list; struct io_kiocb *req; @@ -361,7 +332,6 @@ static const struct io_op_def io_op_defs[]; #define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL) #define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK) -static void io_uring_del_tctx_node(unsigned long index); static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, struct task_struct *task, bool cancel_all); @@ -1677,7 +1647,7 @@ static void handle_tw_list(struct io_wq_work_node *node, } while (node); } -static void tctx_task_work(struct callback_head *cb) +void tctx_task_work(struct callback_head *cb) { bool uring_locked = false; struct io_ring_ctx *ctx = NULL; @@ -4725,7 +4695,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) return 0; } -static struct io_wq_work *io_wq_free_work(struct io_wq_work *work) +struct io_wq_work *io_wq_free_work(struct io_wq_work *work) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); @@ -4733,7 +4703,7 @@ static struct io_wq_work *io_wq_free_work(struct io_wq_work *work) return req ? &req->work : NULL; } -static void io_wq_submit_work(struct io_wq_work *work) +void io_wq_submit_work(struct io_wq_work *work) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); const struct io_op_def *def = &io_op_defs[req->opcode]; @@ -6089,97 +6059,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, return done ? done : err; } -static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, - struct task_struct *task) -{ - struct io_wq_hash *hash; - struct io_wq_data data; - unsigned int concurrency; - - mutex_lock(&ctx->uring_lock); - hash = ctx->hash_map; - if (!hash) { - hash = kzalloc(sizeof(*hash), GFP_KERNEL); - if (!hash) { - mutex_unlock(&ctx->uring_lock); - return ERR_PTR(-ENOMEM); - } - refcount_set(&hash->refs, 1); - init_waitqueue_head(&hash->wait); - ctx->hash_map = hash; - } - mutex_unlock(&ctx->uring_lock); - - data.hash = hash; - data.task = task; - data.free_work = io_wq_free_work; - data.do_work = io_wq_submit_work; - - /* Do QD, or 4 * CPUS, whatever is smallest */ - concurrency = min(ctx->sq_entries, 4 * num_online_cpus()); - - return io_wq_create(concurrency, &data); -} - -__cold int io_uring_alloc_task_context(struct task_struct *task, - struct io_ring_ctx *ctx) -{ - struct io_uring_task *tctx; - int ret; - - tctx = kzalloc(sizeof(*tctx), GFP_KERNEL); - if (unlikely(!tctx)) - return -ENOMEM; - - tctx->registered_rings = kcalloc(IO_RINGFD_REG_MAX, - sizeof(struct file *), GFP_KERNEL); - if (unlikely(!tctx->registered_rings)) { - kfree(tctx); - return -ENOMEM; - } - - ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL); - if (unlikely(ret)) { - kfree(tctx->registered_rings); - kfree(tctx); - return ret; - } - - tctx->io_wq = io_init_wq_offload(ctx, task); - if (IS_ERR(tctx->io_wq)) { - ret = PTR_ERR(tctx->io_wq); - percpu_counter_destroy(&tctx->inflight); - kfree(tctx->registered_rings); - kfree(tctx); - return ret; - } - - xa_init(&tctx->xa); - init_waitqueue_head(&tctx->wait); - atomic_set(&tctx->in_idle, 0); - atomic_set(&tctx->inflight_tracked, 0); - task->io_uring = tctx; - spin_lock_init(&tctx->task_lock); - INIT_WQ_LIST(&tctx->task_list); - INIT_WQ_LIST(&tctx->prio_task_list); - init_task_work(&tctx->task_work, tctx_task_work); - return 0; -} - -void __io_uring_free(struct task_struct *tsk) -{ - struct io_uring_task *tctx = tsk->io_uring; - - WARN_ON_ONCE(!xa_empty(&tctx->xa)); - WARN_ON_ONCE(tctx->io_wq); - WARN_ON_ONCE(tctx->cached_refs); - - kfree(tctx->registered_rings); - percpu_counter_destroy(&tctx->inflight); - kfree(tctx); - tsk->io_uring = NULL; -} - static inline void __io_unaccount_mem(struct user_struct *user, unsigned long nr_pages) { @@ -7179,107 +7058,6 @@ static __cold void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, } } -static int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) -{ - struct io_uring_task *tctx = current->io_uring; - struct io_tctx_node *node; - int ret; - - if (unlikely(!tctx)) { - ret = io_uring_alloc_task_context(current, ctx); - if (unlikely(ret)) - return ret; - - tctx = current->io_uring; - if (ctx->iowq_limits_set) { - unsigned int limits[2] = { ctx->iowq_limits[0], - ctx->iowq_limits[1], }; - - ret = io_wq_max_workers(tctx->io_wq, limits); - if (ret) - return ret; - } - } - if (!xa_load(&tctx->xa, (unsigned long)ctx)) { - node = kmalloc(sizeof(*node), GFP_KERNEL); - if (!node) - return -ENOMEM; - node->ctx = ctx; - node->task = current; - - ret = xa_err(xa_store(&tctx->xa, (unsigned long)ctx, - node, GFP_KERNEL)); - if (ret) { - kfree(node); - return ret; - } - - mutex_lock(&ctx->uring_lock); - list_add(&node->ctx_node, &ctx->tctx_list); - mutex_unlock(&ctx->uring_lock); - } - tctx->last = ctx; - return 0; -} - -/* - * Note that this task has used io_uring. We use it for cancelation purposes. - */ -static inline int io_uring_add_tctx_node(struct io_ring_ctx *ctx) -{ - struct io_uring_task *tctx = current->io_uring; - - if (likely(tctx && tctx->last == ctx)) - return 0; - return __io_uring_add_tctx_node(ctx); -} - -/* - * Remove this io_uring_file -> task mapping. - */ -static __cold void io_uring_del_tctx_node(unsigned long index) -{ - struct io_uring_task *tctx = current->io_uring; - struct io_tctx_node *node; - - if (!tctx) - return; - node = xa_erase(&tctx->xa, index); - if (!node) - return; - - WARN_ON_ONCE(current != node->task); - WARN_ON_ONCE(list_empty(&node->ctx_node)); - - mutex_lock(&node->ctx->uring_lock); - list_del(&node->ctx_node); - mutex_unlock(&node->ctx->uring_lock); - - if (tctx->last == node->ctx) - tctx->last = NULL; - kfree(node); -} - -static __cold void io_uring_clean_tctx(struct io_uring_task *tctx) -{ - struct io_wq *wq = tctx->io_wq; - struct io_tctx_node *node; - unsigned long index; - - xa_for_each(&tctx->xa, index, node) { - io_uring_del_tctx_node(index); - cond_resched(); - } - if (wq) { - /* - * Must be after io_uring_del_tctx_node() (removes nodes under - * uring_lock) to avoid race with io_uring_try_cancel_iowq(). - */ - io_wq_put_and_exit(wq); - tctx->io_wq = NULL; - } -} - static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) { if (tracked) @@ -7361,144 +7139,6 @@ void __io_uring_cancel(bool cancel_all) io_uring_cancel_generic(cancel_all, NULL); } -void io_uring_unreg_ringfd(void) -{ - struct io_uring_task *tctx = current->io_uring; - int i; - - for (i = 0; i < IO_RINGFD_REG_MAX; i++) { - if (tctx->registered_rings[i]) { - fput(tctx->registered_rings[i]); - tctx->registered_rings[i] = NULL; - } - } -} - -static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd, - int start, int end) -{ - struct file *file; - int offset; - - for (offset = start; offset < end; offset++) { - offset = array_index_nospec(offset, IO_RINGFD_REG_MAX); - if (tctx->registered_rings[offset]) - continue; - - file = fget(fd); - if (!file) { - return -EBADF; - } else if (!io_is_uring_fops(file)) { - fput(file); - return -EOPNOTSUPP; - } - tctx->registered_rings[offset] = file; - return offset; - } - - return -EBUSY; -} - -/* - * Register a ring fd to avoid fdget/fdput for each io_uring_enter() - * invocation. User passes in an array of struct io_uring_rsrc_update - * with ->data set to the ring_fd, and ->offset given for the desired - * index. If no index is desired, application may set ->offset == -1U - * and we'll find an available index. Returns number of entries - * successfully processed, or < 0 on error if none were processed. - */ -static int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg, - unsigned nr_args) -{ - struct io_uring_rsrc_update __user *arg = __arg; - struct io_uring_rsrc_update reg; - struct io_uring_task *tctx; - int ret, i; - - if (!nr_args || nr_args > IO_RINGFD_REG_MAX) - return -EINVAL; - - mutex_unlock(&ctx->uring_lock); - ret = io_uring_add_tctx_node(ctx); - mutex_lock(&ctx->uring_lock); - if (ret) - return ret; - - tctx = current->io_uring; - for (i = 0; i < nr_args; i++) { - int start, end; - - if (copy_from_user(®, &arg[i], sizeof(reg))) { - ret = -EFAULT; - break; - } - - if (reg.resv) { - ret = -EINVAL; - break; - } - - if (reg.offset == -1U) { - start = 0; - end = IO_RINGFD_REG_MAX; - } else { - if (reg.offset >= IO_RINGFD_REG_MAX) { - ret = -EINVAL; - break; - } - start = reg.offset; - end = start + 1; - } - - ret = io_ring_add_registered_fd(tctx, reg.data, start, end); - if (ret < 0) - break; - - reg.offset = ret; - if (copy_to_user(&arg[i], ®, sizeof(reg))) { - fput(tctx->registered_rings[reg.offset]); - tctx->registered_rings[reg.offset] = NULL; - ret = -EFAULT; - break; - } - } - - return i ? i : ret; -} - -static int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg, - unsigned nr_args) -{ - struct io_uring_rsrc_update __user *arg = __arg; - struct io_uring_task *tctx = current->io_uring; - struct io_uring_rsrc_update reg; - int ret = 0, i; - - if (!nr_args || nr_args > IO_RINGFD_REG_MAX) - return -EINVAL; - if (!tctx) - return 0; - - for (i = 0; i < nr_args; i++) { - if (copy_from_user(®, &arg[i], sizeof(reg))) { - ret = -EFAULT; - break; - } - if (reg.resv || reg.data || reg.offset >= IO_RINGFD_REG_MAX) { - ret = -EINVAL; - break; - } - - reg.offset = array_index_nospec(reg.offset, IO_RINGFD_REG_MAX); - if (tctx->registered_rings[reg.offset]) { - fput(tctx->registered_rings[reg.offset]); - tctx->registered_rings[reg.offset] = NULL; - } - } - - return i ? i : ret; -} - static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff, size_t sz) { diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 1da8e66507a3..60678e88a9b9 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -128,6 +128,7 @@ void io_req_task_work_add(struct io_kiocb *req); void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags); void io_req_task_complete(struct io_kiocb *req, bool *locked); void io_req_task_queue_fail(struct io_kiocb *req, int ret); +void tctx_task_work(struct callback_head *cb); int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd); __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); int io_uring_alloc_task_context(struct task_struct *task, @@ -136,6 +137,9 @@ int io_uring_alloc_task_context(struct task_struct *task, int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr); int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin); +struct io_wq_work *io_wq_free_work(struct io_wq_work *work); +void io_wq_submit_work(struct io_wq_work *work); + void io_free_req(struct io_kiocb *req); void io_queue_next(struct io_kiocb *req); diff --git a/io_uring/tctx.c b/io_uring/tctx.c new file mode 100644 index 000000000000..3f7e9feb6ca2 --- /dev/null +++ b/io_uring/tctx.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "io_uring_types.h" +#include "io_uring.h" +#include "tctx.h" + +static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, + struct task_struct *task) +{ + struct io_wq_hash *hash; + struct io_wq_data data; + unsigned int concurrency; + + mutex_lock(&ctx->uring_lock); + hash = ctx->hash_map; + if (!hash) { + hash = kzalloc(sizeof(*hash), GFP_KERNEL); + if (!hash) { + mutex_unlock(&ctx->uring_lock); + return ERR_PTR(-ENOMEM); + } + refcount_set(&hash->refs, 1); + init_waitqueue_head(&hash->wait); + ctx->hash_map = hash; + } + mutex_unlock(&ctx->uring_lock); + + data.hash = hash; + data.task = task; + data.free_work = io_wq_free_work; + data.do_work = io_wq_submit_work; + + /* Do QD, or 4 * CPUS, whatever is smallest */ + concurrency = min(ctx->sq_entries, 4 * num_online_cpus()); + + return io_wq_create(concurrency, &data); +} + +void __io_uring_free(struct task_struct *tsk) +{ + struct io_uring_task *tctx = tsk->io_uring; + + WARN_ON_ONCE(!xa_empty(&tctx->xa)); + WARN_ON_ONCE(tctx->io_wq); + WARN_ON_ONCE(tctx->cached_refs); + + kfree(tctx->registered_rings); + percpu_counter_destroy(&tctx->inflight); + kfree(tctx); + tsk->io_uring = NULL; +} + +__cold int io_uring_alloc_task_context(struct task_struct *task, + struct io_ring_ctx *ctx) +{ + struct io_uring_task *tctx; + int ret; + + tctx = kzalloc(sizeof(*tctx), GFP_KERNEL); + if (unlikely(!tctx)) + return -ENOMEM; + + tctx->registered_rings = kcalloc(IO_RINGFD_REG_MAX, + sizeof(struct file *), GFP_KERNEL); + if (unlikely(!tctx->registered_rings)) { + kfree(tctx); + return -ENOMEM; + } + + ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL); + if (unlikely(ret)) { + kfree(tctx->registered_rings); + kfree(tctx); + return ret; + } + + tctx->io_wq = io_init_wq_offload(ctx, task); + if (IS_ERR(tctx->io_wq)) { + ret = PTR_ERR(tctx->io_wq); + percpu_counter_destroy(&tctx->inflight); + kfree(tctx->registered_rings); + kfree(tctx); + return ret; + } + + xa_init(&tctx->xa); + init_waitqueue_head(&tctx->wait); + atomic_set(&tctx->in_idle, 0); + atomic_set(&tctx->inflight_tracked, 0); + task->io_uring = tctx; + spin_lock_init(&tctx->task_lock); + INIT_WQ_LIST(&tctx->task_list); + INIT_WQ_LIST(&tctx->prio_task_list); + init_task_work(&tctx->task_work, tctx_task_work); + return 0; +} + +int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) +{ + struct io_uring_task *tctx = current->io_uring; + struct io_tctx_node *node; + int ret; + + if (unlikely(!tctx)) { + ret = io_uring_alloc_task_context(current, ctx); + if (unlikely(ret)) + return ret; + + tctx = current->io_uring; + if (ctx->iowq_limits_set) { + unsigned int limits[2] = { ctx->iowq_limits[0], + ctx->iowq_limits[1], }; + + ret = io_wq_max_workers(tctx->io_wq, limits); + if (ret) + return ret; + } + } + if (!xa_load(&tctx->xa, (unsigned long)ctx)) { + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + node->ctx = ctx; + node->task = current; + + ret = xa_err(xa_store(&tctx->xa, (unsigned long)ctx, + node, GFP_KERNEL)); + if (ret) { + kfree(node); + return ret; + } + + mutex_lock(&ctx->uring_lock); + list_add(&node->ctx_node, &ctx->tctx_list); + mutex_unlock(&ctx->uring_lock); + } + tctx->last = ctx; + return 0; +} + +/* + * Remove this io_uring_file -> task mapping. + */ +__cold void io_uring_del_tctx_node(unsigned long index) +{ + struct io_uring_task *tctx = current->io_uring; + struct io_tctx_node *node; + + if (!tctx) + return; + node = xa_erase(&tctx->xa, index); + if (!node) + return; + + WARN_ON_ONCE(current != node->task); + WARN_ON_ONCE(list_empty(&node->ctx_node)); + + mutex_lock(&node->ctx->uring_lock); + list_del(&node->ctx_node); + mutex_unlock(&node->ctx->uring_lock); + + if (tctx->last == node->ctx) + tctx->last = NULL; + kfree(node); +} + +__cold void io_uring_clean_tctx(struct io_uring_task *tctx) +{ + struct io_wq *wq = tctx->io_wq; + struct io_tctx_node *node; + unsigned long index; + + xa_for_each(&tctx->xa, index, node) { + io_uring_del_tctx_node(index); + cond_resched(); + } + if (wq) { + /* + * Must be after io_uring_del_tctx_node() (removes nodes under + * uring_lock) to avoid race with io_uring_try_cancel_iowq(). + */ + io_wq_put_and_exit(wq); + tctx->io_wq = NULL; + } +} + +void io_uring_unreg_ringfd(void) +{ + struct io_uring_task *tctx = current->io_uring; + int i; + + for (i = 0; i < IO_RINGFD_REG_MAX; i++) { + if (tctx->registered_rings[i]) { + fput(tctx->registered_rings[i]); + tctx->registered_rings[i] = NULL; + } + } +} + +static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd, + int start, int end) +{ + struct file *file; + int offset; + + for (offset = start; offset < end; offset++) { + offset = array_index_nospec(offset, IO_RINGFD_REG_MAX); + if (tctx->registered_rings[offset]) + continue; + + file = fget(fd); + if (!file) { + return -EBADF; + } else if (!io_is_uring_fops(file)) { + fput(file); + return -EOPNOTSUPP; + } + tctx->registered_rings[offset] = file; + return offset; + } + + return -EBUSY; +} + +/* + * Register a ring fd to avoid fdget/fdput for each io_uring_enter() + * invocation. User passes in an array of struct io_uring_rsrc_update + * with ->data set to the ring_fd, and ->offset given for the desired + * index. If no index is desired, application may set ->offset == -1U + * and we'll find an available index. Returns number of entries + * successfully processed, or < 0 on error if none were processed. + */ +int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg, + unsigned nr_args) +{ + struct io_uring_rsrc_update __user *arg = __arg; + struct io_uring_rsrc_update reg; + struct io_uring_task *tctx; + int ret, i; + + if (!nr_args || nr_args > IO_RINGFD_REG_MAX) + return -EINVAL; + + mutex_unlock(&ctx->uring_lock); + ret = io_uring_add_tctx_node(ctx); + mutex_lock(&ctx->uring_lock); + if (ret) + return ret; + + tctx = current->io_uring; + for (i = 0; i < nr_args; i++) { + int start, end; + + if (copy_from_user(®, &arg[i], sizeof(reg))) { + ret = -EFAULT; + break; + } + + if (reg.resv) { + ret = -EINVAL; + break; + } + + if (reg.offset == -1U) { + start = 0; + end = IO_RINGFD_REG_MAX; + } else { + if (reg.offset >= IO_RINGFD_REG_MAX) { + ret = -EINVAL; + break; + } + start = reg.offset; + end = start + 1; + } + + ret = io_ring_add_registered_fd(tctx, reg.data, start, end); + if (ret < 0) + break; + + reg.offset = ret; + if (copy_to_user(&arg[i], ®, sizeof(reg))) { + fput(tctx->registered_rings[reg.offset]); + tctx->registered_rings[reg.offset] = NULL; + ret = -EFAULT; + break; + } + } + + return i ? i : ret; +} + +int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg, + unsigned nr_args) +{ + struct io_uring_rsrc_update __user *arg = __arg; + struct io_uring_task *tctx = current->io_uring; + struct io_uring_rsrc_update reg; + int ret = 0, i; + + if (!nr_args || nr_args > IO_RINGFD_REG_MAX) + return -EINVAL; + if (!tctx) + return 0; + + for (i = 0; i < nr_args; i++) { + if (copy_from_user(®, &arg[i], sizeof(reg))) { + ret = -EFAULT; + break; + } + if (reg.resv || reg.data || reg.offset >= IO_RINGFD_REG_MAX) { + ret = -EINVAL; + break; + } + + reg.offset = array_index_nospec(reg.offset, IO_RINGFD_REG_MAX); + if (tctx->registered_rings[reg.offset]) { + fput(tctx->registered_rings[reg.offset]); + tctx->registered_rings[reg.offset] = NULL; + } + } + + return i ? i : ret; +} diff --git a/io_uring/tctx.h b/io_uring/tctx.h new file mode 100644 index 000000000000..f4964e40d07e --- /dev/null +++ b/io_uring/tctx.h @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Arbitrary limit, can be raised if need be + */ +#define IO_RINGFD_REG_MAX 16 + +struct io_uring_task { + /* submission side */ + int cached_refs; + struct xarray xa; + struct wait_queue_head wait; + const struct io_ring_ctx *last; + struct io_wq *io_wq; + struct percpu_counter inflight; + atomic_t inflight_tracked; + atomic_t in_idle; + + spinlock_t task_lock; + struct io_wq_work_list task_list; + struct io_wq_work_list prio_task_list; + struct callback_head task_work; + struct file **registered_rings; + bool task_running; +}; + +struct io_tctx_node { + struct list_head ctx_node; + struct task_struct *task; + struct io_ring_ctx *ctx; +}; + +int io_uring_alloc_task_context(struct task_struct *task, + struct io_ring_ctx *ctx); +void io_uring_del_tctx_node(unsigned long index); +int __io_uring_add_tctx_node(struct io_ring_ctx *ctx); +void io_uring_clean_tctx(struct io_uring_task *tctx); + +void io_uring_unreg_ringfd(void); +int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg, + unsigned nr_args); +int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg, + unsigned nr_args); + +/* + * Note that this task has used io_uring. We use it for cancelation purposes. + */ +static inline int io_uring_add_tctx_node(struct io_ring_ctx *ctx) +{ + struct io_uring_task *tctx = current->io_uring; + + if (likely(tctx && tctx->last == ctx)) + return 0; + return __io_uring_add_tctx_node(ctx); +}