linux/kernel/sched/completion.c
Andrei Vagin 6f63904c8f sched: add a few helpers to wake up tasks on the current cpu
Add complete_on_current_cpu, wake_up_poll_on_current_cpu helpers to wake
up tasks on the current CPU.

These two helpers are useful when the task needs to make a synchronous context
switch to another task. In this context, synchronous means it wakes up the
target task and falls asleep right after that.

One example of such workloads is seccomp user notifies. This mechanism allows
the  supervisor process handles system calls on behalf of a target process.
While the supervisor is handling an intercepted system call, the target process
will be blocked in the kernel, waiting for a response to come back.

On-CPU context switches are much faster than regular ones.

Signed-off-by: Andrei Vagin <avagin@google.com>
Acked-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230308073201.3102738-4-avagin@google.com
Signed-off-by: Kees Cook <keescook@chromium.org>
2023-07-17 16:08:08 -07:00

354 lines
10 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Generic wait-for-completion handler;
*
* It differs from semaphores in that their default case is the opposite,
* wait_for_completion default blocks whereas semaphore default non-block. The
* interface also makes it easy to 'complete' multiple waiting threads,
* something which isn't entirely natural for semaphores.
*
* But more importantly, the primitive documents the usage. Semaphores would
* typically be used for exclusion which gives rise to priority inversion.
* Waiting for completion is a typically sync point, but not an exclusion point.
*/
static void complete_with_flags(struct completion *x, int wake_flags)
{
unsigned long flags;
raw_spin_lock_irqsave(&x->wait.lock, flags);
if (x->done != UINT_MAX)
x->done++;
swake_up_locked(&x->wait, wake_flags);
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
}
void complete_on_current_cpu(struct completion *x)
{
return complete_with_flags(x, WF_CURRENT_CPU);
}
/**
* complete: - signals a single thread waiting on this completion
* @x: holds the state of this particular completion
*
* This will wake up a single thread waiting on this completion. Threads will be
* awakened in the same order in which they were queued.
*
* See also complete_all(), wait_for_completion() and related routines.
*
* If this function wakes up a task, it executes a full memory barrier before
* accessing the task state.
*/
void complete(struct completion *x)
{
complete_with_flags(x, 0);
}
EXPORT_SYMBOL(complete);
/**
* complete_all: - signals all threads waiting on this completion
* @x: holds the state of this particular completion
*
* This will wake up all threads waiting on this particular completion event.
*
* If this function wakes up a task, it executes a full memory barrier before
* accessing the task state.
*
* Since complete_all() sets the completion of @x permanently to done
* to allow multiple waiters to finish, a call to reinit_completion()
* must be used on @x if @x is to be used again. The code must make
* sure that all waiters have woken and finished before reinitializing
* @x. Also note that the function completion_done() can not be used
* to know if there are still waiters after complete_all() has been called.
*/
void complete_all(struct completion *x)
{
unsigned long flags;
lockdep_assert_RT_in_threaded_ctx();
raw_spin_lock_irqsave(&x->wait.lock, flags);
x->done = UINT_MAX;
swake_up_all_locked(&x->wait);
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete_all);
static inline long __sched
do_wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
if (!x->done) {
DECLARE_SWAITQUEUE(wait);
do {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
}
__prepare_to_swait(&x->wait, &wait);
__set_current_state(state);
raw_spin_unlock_irq(&x->wait.lock);
timeout = action(timeout);
raw_spin_lock_irq(&x->wait.lock);
} while (!x->done && timeout);
__finish_swait(&x->wait, &wait);
if (!x->done)
return timeout;
}
if (x->done != UINT_MAX)
x->done--;
return timeout ?: 1;
}
static inline long __sched
__wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
might_sleep();
complete_acquire(x);
raw_spin_lock_irq(&x->wait.lock);
timeout = do_wait_for_common(x, action, timeout, state);
raw_spin_unlock_irq(&x->wait.lock);
complete_release(x);
return timeout;
}
static long __sched
wait_for_common(struct completion *x, long timeout, int state)
{
return __wait_for_common(x, schedule_timeout, timeout, state);
}
static long __sched
wait_for_common_io(struct completion *x, long timeout, int state)
{
return __wait_for_common(x, io_schedule_timeout, timeout, state);
}
/**
* wait_for_completion: - waits for completion of a task
* @x: holds the state of this particular completion
*
* This waits to be signaled for completion of a specific task. It is NOT
* interruptible and there is no timeout.
*
* See also similar routines (i.e. wait_for_completion_timeout()) with timeout
* and interrupt capability. Also see complete().
*/
void __sched wait_for_completion(struct completion *x)
{
wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_for_completion);
/**
* wait_for_completion_timeout: - waits for completion of a task (w/timeout)
* @x: holds the state of this particular completion
* @timeout: timeout value in jiffies
*
* This waits for either a completion of a specific task to be signaled or for a
* specified timeout to expire. The timeout is in jiffies. It is not
* interruptible.
*
* Return: 0 if timed out, and positive (at least 1, or number of jiffies left
* till timeout) if completed.
*/
unsigned long __sched
wait_for_completion_timeout(struct completion *x, unsigned long timeout)
{
return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_for_completion_timeout);
/**
* wait_for_completion_io: - waits for completion of a task
* @x: holds the state of this particular completion
*
* This waits to be signaled for completion of a specific task. It is NOT
* interruptible and there is no timeout. The caller is accounted as waiting
* for IO (which traditionally means blkio only).
*/
void __sched wait_for_completion_io(struct completion *x)
{
wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_for_completion_io);
/**
* wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
* @x: holds the state of this particular completion
* @timeout: timeout value in jiffies
*
* This waits for either a completion of a specific task to be signaled or for a
* specified timeout to expire. The timeout is in jiffies. It is not
* interruptible. The caller is accounted as waiting for IO (which traditionally
* means blkio only).
*
* Return: 0 if timed out, and positive (at least 1, or number of jiffies left
* till timeout) if completed.
*/
unsigned long __sched
wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
{
return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_for_completion_io_timeout);
/**
* wait_for_completion_interruptible: - waits for completion of a task (w/intr)
* @x: holds the state of this particular completion
*
* This waits for completion of a specific task to be signaled. It is
* interruptible.
*
* Return: -ERESTARTSYS if interrupted, 0 if completed.
*/
int __sched wait_for_completion_interruptible(struct completion *x)
{
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
if (t == -ERESTARTSYS)
return t;
return 0;
}
EXPORT_SYMBOL(wait_for_completion_interruptible);
/**
* wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
* @x: holds the state of this particular completion
* @timeout: timeout value in jiffies
*
* This waits for either a completion of a specific task to be signaled or for a
* specified timeout to expire. It is interruptible. The timeout is in jiffies.
*
* Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
* or number of jiffies left till timeout) if completed.
*/
long __sched
wait_for_completion_interruptible_timeout(struct completion *x,
unsigned long timeout)
{
return wait_for_common(x, timeout, TASK_INTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
/**
* wait_for_completion_killable: - waits for completion of a task (killable)
* @x: holds the state of this particular completion
*
* This waits to be signaled for completion of a specific task. It can be
* interrupted by a kill signal.
*
* Return: -ERESTARTSYS if interrupted, 0 if completed.
*/
int __sched wait_for_completion_killable(struct completion *x)
{
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
if (t == -ERESTARTSYS)
return t;
return 0;
}
EXPORT_SYMBOL(wait_for_completion_killable);
int __sched wait_for_completion_state(struct completion *x, unsigned int state)
{
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, state);
if (t == -ERESTARTSYS)
return t;
return 0;
}
EXPORT_SYMBOL(wait_for_completion_state);
/**
* wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
* @x: holds the state of this particular completion
* @timeout: timeout value in jiffies
*
* This waits for either a completion of a specific task to be
* signaled or for a specified timeout to expire. It can be
* interrupted by a kill signal. The timeout is in jiffies.
*
* Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
* or number of jiffies left till timeout) if completed.
*/
long __sched
wait_for_completion_killable_timeout(struct completion *x,
unsigned long timeout)
{
return wait_for_common(x, timeout, TASK_KILLABLE);
}
EXPORT_SYMBOL(wait_for_completion_killable_timeout);
/**
* try_wait_for_completion - try to decrement a completion without blocking
* @x: completion structure
*
* Return: 0 if a decrement cannot be done without blocking
* 1 if a decrement succeeded.
*
* If a completion is being used as a counting completion,
* attempt to decrement the counter without blocking. This
* enables us to avoid waiting if the resource the completion
* is protecting is not available.
*/
bool try_wait_for_completion(struct completion *x)
{
unsigned long flags;
bool ret = true;
/*
* Since x->done will need to be locked only
* in the non-blocking case, we check x->done
* first without taking the lock so we can
* return early in the blocking case.
*/
if (!READ_ONCE(x->done))
return false;
raw_spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = false;
else if (x->done != UINT_MAX)
x->done--;
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
}
EXPORT_SYMBOL(try_wait_for_completion);
/**
* completion_done - Test to see if a completion has any waiters
* @x: completion structure
*
* Return: 0 if there are waiters (wait_for_completion() in progress)
* 1 if there are no waiters.
*
* Note, this will always return true if complete_all() was called on @X.
*/
bool completion_done(struct completion *x)
{
unsigned long flags;
if (!READ_ONCE(x->done))
return false;
/*
* If ->done, we need to wait for complete() to release ->wait.lock
* otherwise we can end up freeing the completion before complete()
* is done referencing it.
*/
raw_spin_lock_irqsave(&x->wait.lock, flags);
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
return true;
}
EXPORT_SYMBOL(completion_done);