sched/psi: Rename existing poll members in preparation

Renaming in PSI implementation to make a clear distinction between
privileged and unprivileged triggers code to be implemented in the
next patch.

Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Link: https://lore.kernel.org/r/20230330105418.77061-3-cerasuolodomenico@gmail.com
This commit is contained in:
Domenico Cerasuolo 2023-03-30 12:54:16 +02:00 committed by Peter Zijlstra
parent 7fab21fa0d
commit 65457b74aa
2 changed files with 98 additions and 97 deletions

View file

@ -175,26 +175,26 @@ struct psi_group {
u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1]; u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
unsigned long avg[NR_PSI_STATES - 1][3]; unsigned long avg[NR_PSI_STATES - 1][3];
/* Monitor work control */ /* Monitor RT polling work control */
struct task_struct __rcu *poll_task; struct task_struct __rcu *rtpoll_task;
struct timer_list poll_timer; struct timer_list rtpoll_timer;
wait_queue_head_t poll_wait; wait_queue_head_t rtpoll_wait;
atomic_t poll_wakeup; atomic_t rtpoll_wakeup;
atomic_t poll_scheduled; atomic_t rtpoll_scheduled;
/* Protects data used by the monitor */ /* Protects data used by the monitor */
struct mutex trigger_lock; struct mutex rtpoll_trigger_lock;
/* Configured polling triggers */ /* Configured RT polling triggers */
struct list_head triggers; struct list_head rtpoll_triggers;
u32 nr_triggers[NR_PSI_STATES - 1]; u32 rtpoll_nr_triggers[NR_PSI_STATES - 1];
u32 poll_states; u32 rtpoll_states;
u64 poll_min_period; u64 rtpoll_min_period;
/* Total stall times at the start of monitor activation */ /* Total stall times at the start of RT polling monitor activation */
u64 polling_total[NR_PSI_STATES - 1]; u64 rtpoll_total[NR_PSI_STATES - 1];
u64 polling_next_update; u64 rtpoll_next_update;
u64 polling_until; u64 rtpoll_until;
}; };
#else /* CONFIG_PSI */ #else /* CONFIG_PSI */

View file

@ -189,14 +189,14 @@ static void group_init(struct psi_group *group)
INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work); INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
mutex_init(&group->avgs_lock); mutex_init(&group->avgs_lock);
/* Init trigger-related members */ /* Init trigger-related members */
atomic_set(&group->poll_scheduled, 0); atomic_set(&group->rtpoll_scheduled, 0);
mutex_init(&group->trigger_lock); mutex_init(&group->rtpoll_trigger_lock);
INIT_LIST_HEAD(&group->triggers); INIT_LIST_HEAD(&group->rtpoll_triggers);
group->poll_min_period = U32_MAX; group->rtpoll_min_period = U32_MAX;
group->polling_next_update = ULLONG_MAX; group->rtpoll_next_update = ULLONG_MAX;
init_waitqueue_head(&group->poll_wait); init_waitqueue_head(&group->rtpoll_wait);
timer_setup(&group->poll_timer, poll_timer_fn, 0); timer_setup(&group->rtpoll_timer, poll_timer_fn, 0);
rcu_assign_pointer(group->poll_task, NULL); rcu_assign_pointer(group->rtpoll_task, NULL);
} }
void __init psi_init(void) void __init psi_init(void)
@ -440,11 +440,11 @@ static u64 update_triggers(struct psi_group *group, u64 now)
* On subsequent updates, calculate growth deltas and let * On subsequent updates, calculate growth deltas and let
* watchers know when their specified thresholds are exceeded. * watchers know when their specified thresholds are exceeded.
*/ */
list_for_each_entry(t, &group->triggers, node) { list_for_each_entry(t, &group->rtpoll_triggers, node) {
u64 growth; u64 growth;
bool new_stall; bool new_stall;
new_stall = group->polling_total[t->state] != total[t->state]; new_stall = group->rtpoll_total[t->state] != total[t->state];
/* Check for stall activity or a previous threshold breach */ /* Check for stall activity or a previous threshold breach */
if (!new_stall && !t->pending_event) if (!new_stall && !t->pending_event)
@ -486,10 +486,10 @@ static u64 update_triggers(struct psi_group *group, u64 now)
} }
if (update_total) if (update_total)
memcpy(group->polling_total, total, memcpy(group->rtpoll_total, total,
sizeof(group->polling_total)); sizeof(group->rtpoll_total));
return now + group->poll_min_period; return now + group->rtpoll_min_period;
} }
static u64 update_averages(struct psi_group *group, u64 now) static u64 update_averages(struct psi_group *group, u64 now)
@ -582,53 +582,53 @@ static void init_triggers(struct psi_group *group, u64 now)
{ {
struct psi_trigger *t; struct psi_trigger *t;
list_for_each_entry(t, &group->triggers, node) list_for_each_entry(t, &group->rtpoll_triggers, node)
window_reset(&t->win, now, window_reset(&t->win, now,
group->total[PSI_POLL][t->state], 0); group->total[PSI_POLL][t->state], 0);
memcpy(group->polling_total, group->total[PSI_POLL], memcpy(group->rtpoll_total, group->total[PSI_POLL],
sizeof(group->polling_total)); sizeof(group->rtpoll_total));
group->polling_next_update = now + group->poll_min_period; group->rtpoll_next_update = now + group->rtpoll_min_period;
} }
/* Schedule polling if it's not already scheduled or forced. */ /* Schedule polling if it's not already scheduled or forced. */
static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay, static void psi_schedule_rtpoll_work(struct psi_group *group, unsigned long delay,
bool force) bool force)
{ {
struct task_struct *task; struct task_struct *task;
/* /*
* atomic_xchg should be called even when !force to provide a * atomic_xchg should be called even when !force to provide a
* full memory barrier (see the comment inside psi_poll_work). * full memory barrier (see the comment inside psi_rtpoll_work).
*/ */
if (atomic_xchg(&group->poll_scheduled, 1) && !force) if (atomic_xchg(&group->rtpoll_scheduled, 1) && !force)
return; return;
rcu_read_lock(); rcu_read_lock();
task = rcu_dereference(group->poll_task); task = rcu_dereference(group->rtpoll_task);
/* /*
* kworker might be NULL in case psi_trigger_destroy races with * kworker might be NULL in case psi_trigger_destroy races with
* psi_task_change (hotpath) which can't use locks * psi_task_change (hotpath) which can't use locks
*/ */
if (likely(task)) if (likely(task))
mod_timer(&group->poll_timer, jiffies + delay); mod_timer(&group->rtpoll_timer, jiffies + delay);
else else
atomic_set(&group->poll_scheduled, 0); atomic_set(&group->rtpoll_scheduled, 0);
rcu_read_unlock(); rcu_read_unlock();
} }
static void psi_poll_work(struct psi_group *group) static void psi_rtpoll_work(struct psi_group *group)
{ {
bool force_reschedule = false; bool force_reschedule = false;
u32 changed_states; u32 changed_states;
u64 now; u64 now;
mutex_lock(&group->trigger_lock); mutex_lock(&group->rtpoll_trigger_lock);
now = sched_clock(); now = sched_clock();
if (now > group->polling_until) { if (now > group->rtpoll_until) {
/* /*
* We are either about to start or might stop polling if no * We are either about to start or might stop polling if no
* state change was recorded. Resetting poll_scheduled leaves * state change was recorded. Resetting poll_scheduled leaves
@ -638,7 +638,7 @@ static void psi_poll_work(struct psi_group *group)
* should be negligible and polling_next_update still keeps * should be negligible and polling_next_update still keeps
* updates correctly on schedule. * updates correctly on schedule.
*/ */
atomic_set(&group->poll_scheduled, 0); atomic_set(&group->rtpoll_scheduled, 0);
/* /*
* A task change can race with the poll worker that is supposed to * A task change can race with the poll worker that is supposed to
* report on it. To avoid missing events, ensure ordering between * report on it. To avoid missing events, ensure ordering between
@ -667,9 +667,9 @@ static void psi_poll_work(struct psi_group *group)
collect_percpu_times(group, PSI_POLL, &changed_states); collect_percpu_times(group, PSI_POLL, &changed_states);
if (changed_states & group->poll_states) { if (changed_states & group->rtpoll_states) {
/* Initialize trigger windows when entering polling mode */ /* Initialize trigger windows when entering polling mode */
if (now > group->polling_until) if (now > group->rtpoll_until)
init_triggers(group, now); init_triggers(group, now);
/* /*
@ -677,50 +677,50 @@ static void psi_poll_work(struct psi_group *group)
* minimum tracking window as long as monitor states are * minimum tracking window as long as monitor states are
* changing. * changing.
*/ */
group->polling_until = now + group->rtpoll_until = now +
group->poll_min_period * UPDATES_PER_WINDOW; group->rtpoll_min_period * UPDATES_PER_WINDOW;
} }
if (now > group->polling_until) { if (now > group->rtpoll_until) {
group->polling_next_update = ULLONG_MAX; group->rtpoll_next_update = ULLONG_MAX;
goto out; goto out;
} }
if (now >= group->polling_next_update) if (now >= group->rtpoll_next_update)
group->polling_next_update = update_triggers(group, now); group->rtpoll_next_update = update_triggers(group, now);
psi_schedule_poll_work(group, psi_schedule_rtpoll_work(group,
nsecs_to_jiffies(group->polling_next_update - now) + 1, nsecs_to_jiffies(group->rtpoll_next_update - now) + 1,
force_reschedule); force_reschedule);
out: out:
mutex_unlock(&group->trigger_lock); mutex_unlock(&group->rtpoll_trigger_lock);
} }
static int psi_poll_worker(void *data) static int psi_rtpoll_worker(void *data)
{ {
struct psi_group *group = (struct psi_group *)data; struct psi_group *group = (struct psi_group *)data;
sched_set_fifo_low(current); sched_set_fifo_low(current);
while (true) { while (true) {
wait_event_interruptible(group->poll_wait, wait_event_interruptible(group->rtpoll_wait,
atomic_cmpxchg(&group->poll_wakeup, 1, 0) || atomic_cmpxchg(&group->rtpoll_wakeup, 1, 0) ||
kthread_should_stop()); kthread_should_stop());
if (kthread_should_stop()) if (kthread_should_stop())
break; break;
psi_poll_work(group); psi_rtpoll_work(group);
} }
return 0; return 0;
} }
static void poll_timer_fn(struct timer_list *t) static void poll_timer_fn(struct timer_list *t)
{ {
struct psi_group *group = from_timer(group, t, poll_timer); struct psi_group *group = from_timer(group, t, rtpoll_timer);
atomic_set(&group->poll_wakeup, 1); atomic_set(&group->rtpoll_wakeup, 1);
wake_up_interruptible(&group->poll_wait); wake_up_interruptible(&group->rtpoll_wait);
} }
static void record_times(struct psi_group_cpu *groupc, u64 now) static void record_times(struct psi_group_cpu *groupc, u64 now)
@ -851,8 +851,8 @@ static void psi_group_change(struct psi_group *group, int cpu,
write_seqcount_end(&groupc->seq); write_seqcount_end(&groupc->seq);
if (state_mask & group->poll_states) if (state_mask & group->rtpoll_states)
psi_schedule_poll_work(group, 1, false); psi_schedule_rtpoll_work(group, 1, false);
if (wake_clock && !delayed_work_pending(&group->avgs_work)) if (wake_clock && !delayed_work_pending(&group->avgs_work))
schedule_delayed_work(&group->avgs_work, PSI_FREQ); schedule_delayed_work(&group->avgs_work, PSI_FREQ);
@ -1005,8 +1005,8 @@ void psi_account_irqtime(struct task_struct *task, u32 delta)
write_seqcount_end(&groupc->seq); write_seqcount_end(&groupc->seq);
if (group->poll_states & (1 << PSI_IRQ_FULL)) if (group->rtpoll_states & (1 << PSI_IRQ_FULL))
psi_schedule_poll_work(group, 1, false); psi_schedule_rtpoll_work(group, 1, false);
} while ((group = group->parent)); } while ((group = group->parent));
} }
#endif #endif
@ -1101,7 +1101,7 @@ void psi_cgroup_free(struct cgroup *cgroup)
cancel_delayed_work_sync(&cgroup->psi->avgs_work); cancel_delayed_work_sync(&cgroup->psi->avgs_work);
free_percpu(cgroup->psi->pcpu); free_percpu(cgroup->psi->pcpu);
/* All triggers must be removed by now */ /* All triggers must be removed by now */
WARN_ONCE(cgroup->psi->poll_states, "psi: trigger leak\n"); WARN_ONCE(cgroup->psi->rtpoll_states, "psi: trigger leak\n");
kfree(cgroup->psi); kfree(cgroup->psi);
} }
@ -1302,29 +1302,29 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
init_waitqueue_head(&t->event_wait); init_waitqueue_head(&t->event_wait);
t->pending_event = false; t->pending_event = false;
mutex_lock(&group->trigger_lock); mutex_lock(&group->rtpoll_trigger_lock);
if (!rcu_access_pointer(group->poll_task)) { if (!rcu_access_pointer(group->rtpoll_task)) {
struct task_struct *task; struct task_struct *task;
task = kthread_create(psi_poll_worker, group, "psimon"); task = kthread_create(psi_rtpoll_worker, group, "psimon");
if (IS_ERR(task)) { if (IS_ERR(task)) {
kfree(t); kfree(t);
mutex_unlock(&group->trigger_lock); mutex_unlock(&group->rtpoll_trigger_lock);
return ERR_CAST(task); return ERR_CAST(task);
} }
atomic_set(&group->poll_wakeup, 0); atomic_set(&group->rtpoll_wakeup, 0);
wake_up_process(task); wake_up_process(task);
rcu_assign_pointer(group->poll_task, task); rcu_assign_pointer(group->rtpoll_task, task);
} }
list_add(&t->node, &group->triggers); list_add(&t->node, &group->rtpoll_triggers);
group->poll_min_period = min(group->poll_min_period, group->rtpoll_min_period = min(group->rtpoll_min_period,
div_u64(t->win.size, UPDATES_PER_WINDOW)); div_u64(t->win.size, UPDATES_PER_WINDOW));
group->nr_triggers[t->state]++; group->rtpoll_nr_triggers[t->state]++;
group->poll_states |= (1 << t->state); group->rtpoll_states |= (1 << t->state);
mutex_unlock(&group->trigger_lock); mutex_unlock(&group->rtpoll_trigger_lock);
return t; return t;
} }
@ -1349,51 +1349,52 @@ void psi_trigger_destroy(struct psi_trigger *t)
*/ */
wake_up_pollfree(&t->event_wait); wake_up_pollfree(&t->event_wait);
mutex_lock(&group->trigger_lock); mutex_lock(&group->rtpoll_trigger_lock);
if (!list_empty(&t->node)) { if (!list_empty(&t->node)) {
struct psi_trigger *tmp; struct psi_trigger *tmp;
u64 period = ULLONG_MAX; u64 period = ULLONG_MAX;
list_del(&t->node); list_del(&t->node);
group->nr_triggers[t->state]--; group->rtpoll_nr_triggers[t->state]--;
if (!group->nr_triggers[t->state]) if (!group->rtpoll_nr_triggers[t->state])
group->poll_states &= ~(1 << t->state); group->rtpoll_states &= ~(1 << t->state);
/* reset min update period for the remaining triggers */ /* reset min update period for the remaining triggers */
list_for_each_entry(tmp, &group->triggers, node) list_for_each_entry(tmp, &group->rtpoll_triggers, node)
period = min(period, div_u64(tmp->win.size, period = min(period, div_u64(tmp->win.size,
UPDATES_PER_WINDOW)); UPDATES_PER_WINDOW));
group->poll_min_period = period; group->rtpoll_min_period = period;
/* Destroy poll_task when the last trigger is destroyed */ /* Destroy rtpoll_task when the last trigger is destroyed */
if (group->poll_states == 0) { if (group->rtpoll_states == 0) {
group->polling_until = 0; group->rtpoll_until = 0;
task_to_destroy = rcu_dereference_protected( task_to_destroy = rcu_dereference_protected(
group->poll_task, group->rtpoll_task,
lockdep_is_held(&group->trigger_lock)); lockdep_is_held(&group->rtpoll_trigger_lock));
rcu_assign_pointer(group->poll_task, NULL); rcu_assign_pointer(group->rtpoll_task, NULL);
del_timer(&group->poll_timer); del_timer(&group->rtpoll_timer);
} }
} }
mutex_unlock(&group->trigger_lock); mutex_unlock(&group->rtpoll_trigger_lock);
/* /*
* Wait for psi_schedule_poll_work RCU to complete its read-side * Wait for psi_schedule_rtpoll_work RCU to complete its read-side
* critical section before destroying the trigger and optionally the * critical section before destroying the trigger and optionally the
* poll_task. * rtpoll_task.
*/ */
synchronize_rcu(); synchronize_rcu();
/* /*
* Stop kthread 'psimon' after releasing trigger_lock to prevent a * Stop kthread 'psimon' after releasing rtpoll_trigger_lock to prevent
* deadlock while waiting for psi_poll_work to acquire trigger_lock * a deadlock while waiting for psi_rtpoll_work to acquire
* rtpoll_trigger_lock
*/ */
if (task_to_destroy) { if (task_to_destroy) {
/* /*
* After the RCU grace period has expired, the worker * After the RCU grace period has expired, the worker
* can no longer be found through group->poll_task. * can no longer be found through group->rtpoll_task.
*/ */
kthread_stop(task_to_destroy); kthread_stop(task_to_destroy);
atomic_set(&group->poll_scheduled, 0); atomic_set(&group->rtpoll_scheduled, 0);
} }
kfree(t); kfree(t);
} }