mirror of
https://github.com/torvalds/linux
synced 2024-11-02 18:48:59 +00:00
620a30fa0b
Reading idle/IO sleep time (eg: from /proc/stat) can race with idle exit updates because the state machine handling the stats is not atomic and requires a coherent read batch. As a result reading the sleep time may report irrelevant or backward values. Fix this with protecting the simple state machine within a seqcount. This is expected to be cheap enough not to add measurable performance impact on the idle path. Note this only fixes reader VS writer condition partitially. A race remains that involves remote updates of the CPU iowait task counter. It can hardly be fixed. Reported-by: Yu Liao <liaoyu15@huawei.com> Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20230222144649.624380-4-frederic@kernel.org
122 lines
3.9 KiB
C
122 lines
3.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _TICK_SCHED_H
|
|
#define _TICK_SCHED_H
|
|
|
|
#include <linux/hrtimer.h>
|
|
|
|
enum tick_device_mode {
|
|
TICKDEV_MODE_PERIODIC,
|
|
TICKDEV_MODE_ONESHOT,
|
|
};
|
|
|
|
struct tick_device {
|
|
struct clock_event_device *evtdev;
|
|
enum tick_device_mode mode;
|
|
};
|
|
|
|
enum tick_nohz_mode {
|
|
NOHZ_MODE_INACTIVE,
|
|
NOHZ_MODE_LOWRES,
|
|
NOHZ_MODE_HIGHRES,
|
|
};
|
|
|
|
/**
|
|
* struct tick_sched - sched tick emulation and no idle tick control/stats
|
|
*
|
|
* @inidle: Indicator that the CPU is in the tick idle mode
|
|
* @tick_stopped: Indicator that the idle tick has been stopped
|
|
* @idle_active: Indicator that the CPU is actively in the tick idle mode;
|
|
* it is reset during irq handling phases.
|
|
* @do_timer_last: CPU was the last one doing do_timer before going idle
|
|
* @got_idle_tick: Tick timer function has run with @inidle set
|
|
* @stalled_jiffies: Number of stalled jiffies detected across ticks
|
|
* @last_tick_jiffies: Value of jiffies seen on last tick
|
|
* @sched_timer: hrtimer to schedule the periodic tick in high
|
|
* resolution mode
|
|
* @last_tick: Store the last tick expiry time when the tick
|
|
* timer is modified for nohz sleeps. This is necessary
|
|
* to resume the tick timer operation in the timeline
|
|
* when the CPU returns from nohz sleep.
|
|
* @next_tick: Next tick to be fired when in dynticks mode.
|
|
* @idle_jiffies: jiffies at the entry to idle for idle time accounting
|
|
* @idle_waketime: Time when the idle was interrupted
|
|
* @idle_entrytime: Time when the idle call was entered
|
|
* @nohz_mode: Mode - one state of tick_nohz_mode
|
|
* @last_jiffies: Base jiffies snapshot when next event was last computed
|
|
* @timer_expires_base: Base time clock monotonic for @timer_expires
|
|
* @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
|
|
* @next_timer: Expiry time of next expiring timer for debugging purpose only
|
|
* @idle_expires: Next tick in idle, for debugging purpose only
|
|
* @idle_calls: Total number of idle calls
|
|
* @idle_sleeps: Number of idle calls, where the sched tick was stopped
|
|
* @idle_exittime: Time when the idle state was left
|
|
* @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
|
|
* @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding
|
|
* @tick_dep_mask: Tick dependency mask - is set, if someone needs the tick
|
|
* @check_clocks: Notification mechanism about clocksource changes
|
|
*/
|
|
struct tick_sched {
|
|
/* Common flags */
|
|
unsigned int inidle : 1;
|
|
unsigned int tick_stopped : 1;
|
|
unsigned int idle_active : 1;
|
|
unsigned int do_timer_last : 1;
|
|
unsigned int got_idle_tick : 1;
|
|
|
|
/* Tick handling: jiffies stall check */
|
|
unsigned int stalled_jiffies;
|
|
unsigned long last_tick_jiffies;
|
|
|
|
/* Tick handling */
|
|
struct hrtimer sched_timer;
|
|
ktime_t last_tick;
|
|
ktime_t next_tick;
|
|
unsigned long idle_jiffies;
|
|
ktime_t idle_waketime;
|
|
|
|
/* Idle entry */
|
|
seqcount_t idle_sleeptime_seq;
|
|
ktime_t idle_entrytime;
|
|
|
|
/* Tick stop */
|
|
enum tick_nohz_mode nohz_mode;
|
|
unsigned long last_jiffies;
|
|
u64 timer_expires_base;
|
|
u64 timer_expires;
|
|
u64 next_timer;
|
|
ktime_t idle_expires;
|
|
unsigned long idle_calls;
|
|
unsigned long idle_sleeps;
|
|
|
|
/* Idle exit */
|
|
ktime_t idle_exittime;
|
|
ktime_t idle_sleeptime;
|
|
ktime_t iowait_sleeptime;
|
|
|
|
/* Full dynticks handling */
|
|
atomic_t tick_dep_mask;
|
|
|
|
/* Clocksource changes */
|
|
unsigned long check_clocks;
|
|
};
|
|
|
|
extern struct tick_sched *tick_get_tick_sched(int cpu);
|
|
|
|
extern void tick_setup_sched_timer(void);
|
|
#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
|
|
extern void tick_cancel_sched_timer(int cpu);
|
|
#else
|
|
static inline void tick_cancel_sched_timer(int cpu) { }
|
|
#endif
|
|
|
|
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
|
|
extern int __tick_broadcast_oneshot_control(enum tick_broadcast_state state);
|
|
#else
|
|
static inline int
|
|
__tick_broadcast_oneshot_control(enum tick_broadcast_state state)
|
|
{
|
|
return -EBUSY;
|
|
}
|
|
#endif
|
|
|
|
#endif
|