From aa38e19f05c3a5ae64dff84f44e1aa31281a5b14 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 5 Mar 2020 17:08:05 +0000 Subject: [PATCH] aio-posix: support userspace polling of fd monitoring Unlike ppoll(2) and epoll(7), Linux io_uring completions can be polled from userspace. Previously userspace polling was only allowed when all AioHandler's had an ->io_poll() callback. This prevented starvation of fds by userspace pollable handlers. Add the FDMonOps->need_wait() callback that enables userspace polling even when some AioHandlers lack ->io_poll(). For example, it's now possible to do userspace polling when a TCP/IP socket is monitored thanks to Linux io_uring. Signed-off-by: Stefan Hajnoczi Link: https://lore.kernel.org/r/20200305170806.1313245-7-stefanha@redhat.com Message-Id: <20200305170806.1313245-7-stefanha@redhat.com> --- include/block/aio.h | 19 +++++++++++++++++++ util/aio-posix.c | 11 ++++++++--- util/fdmon-epoll.c | 1 + util/fdmon-io_uring.c | 6 ++++++ util/fdmon-poll.c | 1 + 5 files changed, 35 insertions(+), 3 deletions(-) diff --git a/include/block/aio.h b/include/block/aio.h index 83fc9b844d..f07ebb76b8 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -55,6 +55,9 @@ struct ThreadPool; struct LinuxAioState; struct LuringState; +/* Is polling disabled? */ +bool aio_poll_disabled(AioContext *ctx); + /* Callbacks for file descriptor monitoring implementations */ typedef struct { /* @@ -84,6 +87,22 @@ typedef struct { * Returns: number of ready file descriptors. */ int (*wait)(AioContext *ctx, AioHandlerList *ready_list, int64_t timeout); + + /* + * need_wait: + * @ctx: the AioContext + * + * Tell aio_poll() when to stop userspace polling early because ->wait() + * has fds ready. + * + * File descriptor monitoring implementations that cannot poll fd readiness + * from userspace should use aio_poll_disabled() here. This ensures that + * file descriptors are not starved by handlers that frequently make + * progress via userspace polling. + * + * Returns: true if ->wait() should be called, false otherwise. + */ + bool (*need_wait)(AioContext *ctx); } FDMonOps; /* diff --git a/util/aio-posix.c b/util/aio-posix.c index ffd9cc381b..759989b45b 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -22,6 +22,11 @@ #include "trace.h" #include "aio-posix.h" +bool aio_poll_disabled(AioContext *ctx) +{ + return atomic_read(&ctx->poll_disable_cnt); +} + void aio_add_ready_handler(AioHandlerList *ready_list, AioHandler *node, int revents) @@ -423,7 +428,7 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout) elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time; max_ns = qemu_soonest_timeout(*timeout, max_ns); assert(!(max_ns && progress)); - } while (elapsed_time < max_ns && !atomic_read(&ctx->poll_disable_cnt)); + } while (elapsed_time < max_ns && !ctx->fdmon_ops->need_wait(ctx)); /* If time has passed with no successful polling, adjust *timeout to * keep the same ending time. @@ -451,7 +456,7 @@ static bool try_poll_mode(AioContext *ctx, int64_t *timeout) { int64_t max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns); - if (max_ns && !atomic_read(&ctx->poll_disable_cnt)) { + if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) { poll_set_started(ctx, true); if (run_poll_handlers(ctx, max_ns, timeout)) { @@ -501,7 +506,7 @@ bool aio_poll(AioContext *ctx, bool blocking) /* If polling is allowed, non-blocking aio_poll does not need the * system call---a single round of run_poll_handlers_once suffices. */ - if (timeout || atomic_read(&ctx->poll_disable_cnt)) { + if (timeout || ctx->fdmon_ops->need_wait(ctx)) { ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout); } diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c index d56b69468b..fcd989d47d 100644 --- a/util/fdmon-epoll.c +++ b/util/fdmon-epoll.c @@ -100,6 +100,7 @@ out: static const FDMonOps fdmon_epoll_ops = { .update = fdmon_epoll_update, .wait = fdmon_epoll_wait, + .need_wait = aio_poll_disabled, }; static bool fdmon_epoll_try_enable(AioContext *ctx) diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c index fb99b4b61e..893b79b622 100644 --- a/util/fdmon-io_uring.c +++ b/util/fdmon-io_uring.c @@ -288,9 +288,15 @@ static int fdmon_io_uring_wait(AioContext *ctx, AioHandlerList *ready_list, return process_cq_ring(ctx, ready_list); } +static bool fdmon_io_uring_need_wait(AioContext *ctx) +{ + return io_uring_cq_ready(&ctx->fdmon_io_uring); +} + static const FDMonOps fdmon_io_uring_ops = { .update = fdmon_io_uring_update, .wait = fdmon_io_uring_wait, + .need_wait = fdmon_io_uring_need_wait, }; bool fdmon_io_uring_setup(AioContext *ctx) diff --git a/util/fdmon-poll.c b/util/fdmon-poll.c index 28114a0f39..488067b679 100644 --- a/util/fdmon-poll.c +++ b/util/fdmon-poll.c @@ -103,4 +103,5 @@ static void fdmon_poll_update(AioContext *ctx, const FDMonOps fdmon_poll_ops = { .update = fdmon_poll_update, .wait = fdmon_poll_wait, + .need_wait = aio_poll_disabled, };