From 3f0b0966b30982e843950b170b7a9ddfd8094428 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 31 Jul 2023 20:56:35 +0200 Subject: [PATCH 01/13] cpuidle: teo: Update idle duration estimate when choosing shallower state The TEO governor takes CPU utilization into account by refining idle state selection when the utilization is above a certain threshold. This is done by choosing an idle state shallower than the previously selected one. However, when doing this, the idle duration estimate needs to be adjusted so as to prevent the scheduler tick from being stopped when the candidate idle state is shallow, which may lead to excessive energy usage if the CPU is not woken up quickly enough going forward. Moreover, if the scheduler tick has been stopped already and the new idle duration estimate is too small, the replacement candidate state cannot be used. Modify the relevant code to take the above observations into account. Fixes: 9ce0f7c4bc64 ("cpuidle: teo: Introduce util-awareness") Link: https://lore.kernel.org/linux-pm/CAJZ5v0jJxHj65r2HXBTd3wfbZtsg=_StzwO1kA5STDnaPe_dWA@mail.gmail.com Signed-off-by: Rafael J. Wysocki Reviewed-and-tested-by: Kajetan Puchalski --- drivers/cpuidle/governors/teo.c | 40 ++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 987fc5f3997d..2cdc711679a5 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -397,13 +397,23 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * the shallowest non-polling state and exit. */ if (drv->state_count < 3 && cpu_data->utilized) { - for (i = 0; i < drv->state_count; ++i) { - if (!dev->states_usage[i].disable && - !(drv->states[i].flags & CPUIDLE_FLAG_POLLING)) { - idx = i; - goto end; - } - } + /* The CPU is utilized, so assume a short idle duration. */ + duration_ns = teo_middle_of_bin(0, drv); + /* + * If state 0 is enabled and it is not a polling one, select it + * right away unless the scheduler tick has been stopped, in + * which case care needs to be taken to leave the CPU in a deep + * enough state in case it is not woken up any time soon after + * all. If state 1 is disabled, though, state 0 must be used + * anyway. + */ + if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) && + teo_time_ok(duration_ns)) || dev->states_usage[1].disable) + idx = 0; + else /* Assume that state 1 is not a polling one and use it. */ + idx = 1; + + goto end; } /* @@ -539,10 +549,20 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, /* * If the CPU is being utilized over the threshold, choose a shallower - * non-polling state to improve latency + * non-polling state to improve latency, unless the scheduler tick has + * been stopped already and the shallower state's target residency is + * not sufficiently large. */ - if (cpu_data->utilized) - idx = teo_find_shallower_state(drv, dev, idx, duration_ns, true); + if (cpu_data->utilized) { + s64 span_ns; + + i = teo_find_shallower_state(drv, dev, idx, duration_ns, true); + span_ns = teo_middle_of_bin(i, drv); + if (teo_time_ok(span_ns)) { + idx = i; + duration_ns = span_ns; + } + } end: /* From 04bae4e2267d49eb9cfec403acd0462b8d00d637 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 31 Jul 2023 21:03:09 +0200 Subject: [PATCH 02/13] cpuidle: teo: Avoid stopping the tick unnecessarily when bailing out When teo_select() is going to return early in some special cases, make it avoid stopping the tick if the idle state to be returned is shallow. In particular, never stop the tick if state 0 is to be returned. Link: https://lore.kernel.org/linux-pm/CAJZ5v0jJxHj65r2HXBTd3wfbZtsg=_StzwO1kA5STDnaPe_dWA@mail.gmail.com Signed-off-by: Rafael J. Wysocki Reviewed-and-tested-by: Kajetan Puchalski --- drivers/cpuidle/governors/teo.c | 56 +++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 2cdc711679a5..1b76db3689d0 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -382,12 +382,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, /* Check if there is any choice in the first place. */ if (drv->state_count < 2) { idx = 0; - goto end; + goto out_tick; } + if (!dev->states_usage[0].disable) { idx = 0; if (drv->states[1].target_residency_ns > duration_ns) - goto end; + goto out_tick; } cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data); @@ -408,11 +409,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * anyway. */ if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) && - teo_time_ok(duration_ns)) || dev->states_usage[1].disable) + teo_time_ok(duration_ns)) || dev->states_usage[1].disable) { idx = 0; - else /* Assume that state 1 is not a polling one and use it. */ - idx = 1; - + goto out_tick; + } + /* Assume that state 1 is not a polling one and use it. */ + idx = 1; goto end; } @@ -459,8 +461,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, /* Avoid unnecessary overhead. */ if (idx < 0) { idx = 0; /* No states enabled, must use 0. */ - goto end; - } else if (idx == idx0) { + goto out_tick; + } + + if (idx == idx0) { + /* + * This is the first enabled idle state, so use it, but do not + * allow the tick to be stopped it is shallow enough. + */ + duration_ns = drv->states[idx].target_residency_ns; goto end; } @@ -566,24 +575,25 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, end: /* - * Don't stop the tick if the selected state is a polling one or if the - * expected idle duration is shorter than the tick period length. + * Allow the tick to be stopped unless the selected state is a polling + * one or the expected idle duration is shorter than the tick period + * length. */ - if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || - duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) { - *stop_tick = false; + if ((!(drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && + duration_ns >= TICK_NSEC) || tick_nohz_tick_stopped()) + return idx; - /* - * The tick is not going to be stopped, so if the target - * residency of the state to be returned is not within the time - * till the closest timer including the tick, try to correct - * that. - */ - if (idx > idx0 && - drv->states[idx].target_residency_ns > delta_tick) - idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false); - } + /* + * The tick is not going to be stopped, so if the target residency of + * the state to be returned is not within the time till the closest + * timer including the tick, try to correct that. + */ + if (idx > idx0 && + drv->states[idx].target_residency_ns > delta_tick) + idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false); +out_tick: + *stop_tick = false; return idx; } From 9a41e16f11103c37263271ff2a433f2f3885fced Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 31 Jul 2023 21:04:41 +0200 Subject: [PATCH 03/13] cpuidle: teo: Drop utilized from struct teo_cpu Because the utilized field in struct teo_cpu is only used locally in teo_select(), replace it with a local variable in that function. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-and-tested-by: Kajetan Puchalski --- drivers/cpuidle/governors/teo.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 1b76db3689d0..3915e5639790 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -187,7 +187,6 @@ struct teo_bin { * @next_recent_idx: Index of the next @recent_idx entry to update. * @recent_idx: Indices of bins corresponding to recent "intercepts". * @util_threshold: Threshold above which the CPU is considered utilized - * @utilized: Whether the last sleep on the CPU happened while utilized */ struct teo_cpu { s64 time_span_ns; @@ -197,7 +196,6 @@ struct teo_cpu { int next_recent_idx; int recent_idx[NR_RECENT]; unsigned long util_threshold; - bool utilized; }; static DEFINE_PER_CPU(struct teo_cpu, teo_cpus); @@ -366,6 +364,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int idx0 = 0, idx = -1; bool alt_intercepts, alt_recent; ktime_t delta_tick; + bool cpu_utilized; s64 duration_ns; int i; @@ -391,13 +390,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, goto out_tick; } - cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data); + cpu_utilized = teo_cpu_is_utilized(dev->cpu, cpu_data); /* * If the CPU is being utilized over the threshold and there are only 2 * states to choose from, the metrics need not be considered, so choose * the shallowest non-polling state and exit. */ - if (drv->state_count < 3 && cpu_data->utilized) { + if (drv->state_count < 3 && cpu_utilized) { /* The CPU is utilized, so assume a short idle duration. */ duration_ns = teo_middle_of_bin(0, drv); /* @@ -562,7 +561,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * been stopped already and the shallower state's target residency is * not sufficiently large. */ - if (cpu_data->utilized) { + if (cpu_utilized) { s64 span_ns; i = teo_find_shallower_state(drv, dev, idx, duration_ns, true); From 21d28cd2fa5fc01ee83e64df838ecd72112c09b7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 3 Aug 2023 23:07:41 +0200 Subject: [PATCH 04/13] cpuidle: teo: Do not call tick_nohz_get_sleep_length() upfront Because the cost of calling tick_nohz_get_sleep_length() may increase in the future, reorder the code in teo_select() so it first uses the statistics to pick up a candidate idle state and applies the utilization heuristic to it and only then calls tick_nohz_get_sleep_length() to obtain the sleep length value and refine the selection if necessary. This change by itself does not cause tick_nohz_get_sleep_length() to be called less often, but it prepares the code for subsequent changes that will do so. Signed-off-by: Rafael J. Wysocki Tested-by: Kajetan Puchalski Tested-by: Anna-Maria Behnsen --- drivers/cpuidle/governors/teo.c | 107 ++++++++++++++------------------ 1 file changed, 45 insertions(+), 62 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 3915e5639790..4ef76cdfc5c8 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -306,15 +306,10 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) cpu_data->total += PULSE; } -static bool teo_time_ok(u64 interval_ns) +static bool teo_state_ok(int i, struct cpuidle_driver *drv) { - return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC; -} - -static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv) -{ - return (drv->states[idx].target_residency_ns + - drv->states[idx+1].target_residency_ns) / 2; + return !tick_nohz_tick_stopped() || + drv->states[i].target_residency_ns >= TICK_NSEC; } /** @@ -354,6 +349,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, { struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); s64 latency_req = cpuidle_governor_latency_req(dev->cpu); + ktime_t delta_tick = TICK_NSEC / 2; unsigned int idx_intercept_sum = 0; unsigned int intercept_sum = 0; unsigned int idx_recent_sum = 0; @@ -363,7 +359,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int constraint_idx = 0; int idx0 = 0, idx = -1; bool alt_intercepts, alt_recent; - ktime_t delta_tick; bool cpu_utilized; s64 duration_ns; int i; @@ -374,9 +369,11 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, } cpu_data->time_span_ns = local_clock(); - - duration_ns = tick_nohz_get_sleep_length(&delta_tick); - cpu_data->sleep_length_ns = duration_ns; + /* + * Set the expected sleep length to infinity in case of an early + * return. + */ + cpu_data->sleep_length_ns = KTIME_MAX; /* Check if there is any choice in the first place. */ if (drv->state_count < 2) { @@ -384,11 +381,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, goto out_tick; } - if (!dev->states_usage[0].disable) { + if (!dev->states_usage[0].disable) idx = 0; - if (drv->states[1].target_residency_ns > duration_ns) - goto out_tick; - } cpu_utilized = teo_cpu_is_utilized(dev->cpu, cpu_data); /* @@ -397,8 +391,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * the shallowest non-polling state and exit. */ if (drv->state_count < 3 && cpu_utilized) { - /* The CPU is utilized, so assume a short idle duration. */ - duration_ns = teo_middle_of_bin(0, drv); /* * If state 0 is enabled and it is not a polling one, select it * right away unless the scheduler tick has been stopped, in @@ -408,22 +400,17 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * anyway. */ if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) && - teo_time_ok(duration_ns)) || dev->states_usage[1].disable) { + teo_state_ok(0, drv)) || dev->states_usage[1].disable) { idx = 0; goto out_tick; } /* Assume that state 1 is not a polling one and use it. */ idx = 1; + duration_ns = drv->states[1].target_residency_ns; goto end; } - /* - * Find the deepest idle state whose target residency does not exceed - * the current sleep length and the deepest idle state not deeper than - * the former whose exit latency does not exceed the current latency - * constraint. Compute the sums of metrics for early wakeup pattern - * detection. - */ + /* Compute the sums of metrics for early wakeup pattern detection. */ for (i = 1; i < drv->state_count; i++) { struct teo_bin *prev_bin = &cpu_data->state_bins[i-1]; struct cpuidle_state *s = &drv->states[i]; @@ -439,19 +426,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (dev->states_usage[i].disable) continue; - if (idx < 0) { - idx = i; /* first enabled state */ - idx0 = i; - } - - if (s->target_residency_ns > duration_ns) - break; + if (idx < 0) + idx0 = i; /* first enabled state */ idx = i; if (s->exit_latency_ns <= latency_req) constraint_idx = i; + /* Save the sums for the current state. */ idx_intercept_sum = intercept_sum; idx_hit_sum = hit_sum; idx_recent_sum = recent_sum; @@ -465,7 +448,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (idx == idx0) { /* - * This is the first enabled idle state, so use it, but do not + * Only one idle state is enabled, so use it, but do not * allow the tick to be stopped it is shallow enough. */ duration_ns = drv->states[idx].target_residency_ns; @@ -479,13 +462,11 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * all of the deeper states, or the sum of the numbers of recent * intercepts over all of the states shallower than the candidate one * is greater than a half of the number of recent events taken into - * account, the CPU is likely to wake up early, so find an alternative - * idle state to select. + * account, a shallower idle state is likely to be a better choice. */ alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum; alt_recent = idx_recent_sum > NR_RECENT / 2; if (alt_recent || alt_intercepts) { - s64 first_suitable_span_ns = duration_ns; int first_suitable_idx = idx; /* @@ -494,44 +475,39 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * cases (both with respect to intercepts overall and with * respect to the recent intercepts only) in the past. * - * Take the possible latency constraint and duration limitation - * present if the tick has been stopped already into account. + * Take the possible duration limitation present if the tick + * has been stopped already into account. */ intercept_sum = 0; recent_sum = 0; for (i = idx - 1; i >= 0; i--) { struct teo_bin *bin = &cpu_data->state_bins[i]; - s64 span_ns; intercept_sum += bin->intercepts; recent_sum += bin->recent; - span_ns = teo_middle_of_bin(i, drv); - if ((!alt_recent || 2 * recent_sum > idx_recent_sum) && (!alt_intercepts || 2 * intercept_sum > idx_intercept_sum)) { - if (teo_time_ok(span_ns) && - !dev->states_usage[i].disable) { + /* + * Use the current state unless it is too + * shallow or disabled, in which case take the + * first enabled state that is deep enough. + */ + if (teo_state_ok(i, drv) && + !dev->states_usage[i].disable) idx = i; - duration_ns = span_ns; - } else { - /* - * The current state is too shallow or - * disabled, so take the first enabled - * deeper state with suitable time span. - */ + else idx = first_suitable_idx; - duration_ns = first_suitable_span_ns; - } + break; } if (dev->states_usage[i].disable) continue; - if (!teo_time_ok(span_ns)) { + if (!teo_state_ok(i, drv)) { /* * The current state is too shallow, but if an * alternative candidate state has been found, @@ -543,7 +519,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, break; } - first_suitable_span_ns = span_ns; first_suitable_idx = i; } } @@ -562,14 +537,22 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * not sufficiently large. */ if (cpu_utilized) { - s64 span_ns; - - i = teo_find_shallower_state(drv, dev, idx, duration_ns, true); - span_ns = teo_middle_of_bin(i, drv); - if (teo_time_ok(span_ns)) { + i = teo_find_shallower_state(drv, dev, idx, KTIME_MAX, true); + if (teo_state_ok(i, drv)) + idx = i; + } + + duration_ns = tick_nohz_get_sleep_length(&delta_tick); + cpu_data->sleep_length_ns = duration_ns; + + /* + * If the closest expected timer is before the terget residency of the + * candidate state, a shallower one needs to be found. + */ + if (drv->states[idx].target_residency_ns > duration_ns) { + i = teo_find_shallower_state(drv, dev, idx, duration_ns, false); + if (teo_state_ok(i, drv)) idx = i; - duration_ns = span_ns; - } } end: From 6da8f9ba5a87f8aecf2cd4441cc6024a77e5b645 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 3 Aug 2023 23:09:18 +0200 Subject: [PATCH 05/13] cpuidle: teo: Skip tick_nohz_get_sleep_length() call in some cases Make teo_select() avoid calling tick_nohz_get_sleep_length() if the candidate idle state to return is state 0 or if state 0 is a polling one and the target residency of the current candidate one is below a certain threshold, in which cases it may be assumed that the CPU will be woken up immediately by a non-timer wakeup source and the timers are not likely to matter. Signed-off-by: Rafael J. Wysocki Tested-by: Kajetan Puchalski Tested-by: Anna-Maria Behnsen --- drivers/cpuidle/governors/teo.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 4ef76cdfc5c8..8248492cad3a 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -166,6 +166,12 @@ */ #define NR_RECENT 9 +/* + * Idle state target residency threshold used for deciding whether or not to + * check the time till the closest expected timer event. + */ +#define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC) + /** * struct teo_bin - Metrics used by the TEO cpuidle governor. * @intercepts: The "intercepts" metric. @@ -542,6 +548,22 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, idx = i; } + /* + * Skip the timers check if state 0 is the current candidate one, + * because an immediate non-timer wakeup is expected in that case. + */ + if (!idx) + goto out_tick; + + /* + * If state 0 is a polling one, check if the target residency of + * the current candidate state is low enough and skip the timers + * check in that case too. + */ + if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) && + drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) + goto out_tick; + duration_ns = tick_nohz_get_sleep_length(&delta_tick); cpu_data->sleep_length_ns = duration_ns; From 2662342079f54b8a940f7094c197c99458caeb0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 3 Aug 2023 23:11:40 +0200 Subject: [PATCH 06/13] cpuidle: teo: Gather statistics regarding whether or not to stop the tick Currently, if the target residency of the deepest idle state is less than the tick period length, which is quite likely for HZ=100, and the deepest idle state is about to be selected by the TEO idle governor, the decision on whether or not to stop the scheduler tick is based entirely on the time till the closest timer. This is often insufficient, because timers may not be in heavy use and there may be a plenty of other CPU wakeup events between the deepest idle state's target residency and the closest tick. Allow the governor to count those events by making the deepest idle state's bin effectively end at TICK_NSEC and introducing an additional "bin" for collecting "hit" events (ie. the ones in which the measured idle duration falls into the same bin as the time till the closest timer) with idle duration values past TICK_NSEC. This way the "intercepts" metric for the deepest idle state's bin becomes nonzero in general, and so it can influence the decision on whether or not to stop the tick possibly increasing the governor's accuracy in that respect. Signed-off-by: Rafael J. Wysocki Tested-by: Kajetan Puchalski Tested-by: Anna-Maria Behnsen --- drivers/cpuidle/governors/teo.c | 41 ++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 8248492cad3a..70846b669255 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -192,6 +192,7 @@ struct teo_bin { * @total: Grand total of the "intercepts" and "hits" metrics for all bins. * @next_recent_idx: Index of the next @recent_idx entry to update. * @recent_idx: Indices of bins corresponding to recent "intercepts". + * @tick_hits: Number of "hits" after TICK_NSEC. * @util_threshold: Threshold above which the CPU is considered utilized */ struct teo_cpu { @@ -201,6 +202,7 @@ struct teo_cpu { unsigned int total; int next_recent_idx; int recent_idx[NR_RECENT]; + unsigned int tick_hits; unsigned long util_threshold; }; @@ -232,6 +234,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); int i, idx_timer = 0, idx_duration = 0; + s64 target_residency_ns; u64 measured_ns; if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) { @@ -272,7 +275,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * fall into. */ for (i = 0; i < drv->state_count; i++) { - s64 target_residency_ns = drv->states[i].target_residency_ns; struct teo_bin *bin = &cpu_data->state_bins[i]; bin->hits -= bin->hits >> DECAY_SHIFT; @@ -280,6 +282,8 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) cpu_data->total += bin->hits + bin->intercepts; + target_residency_ns = drv->states[i].target_residency_ns; + if (target_residency_ns <= cpu_data->sleep_length_ns) { idx_timer = i; if (target_residency_ns <= measured_ns) @@ -294,6 +298,26 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (cpu_data->recent_idx[i] >= 0) cpu_data->state_bins[cpu_data->recent_idx[i]].recent--; + /* + * If the deepest state's target residency is below the tick length, + * make a record of it to help teo_select() decide whether or not + * to stop the tick. This effectively adds an extra hits-only bin + * beyond the last state-related one. + */ + if (target_residency_ns < TICK_NSEC) { + cpu_data->tick_hits -= cpu_data->tick_hits >> DECAY_SHIFT; + + cpu_data->total += cpu_data->tick_hits; + + if (TICK_NSEC <= cpu_data->sleep_length_ns) { + idx_timer = drv->state_count; + if (TICK_NSEC <= measured_ns) { + cpu_data->tick_hits += PULSE; + goto end; + } + } + } + /* * If the measured idle duration falls into the same bin as the sleep * length, this is a "hit", so update the "hits" metric for that bin. @@ -309,6 +333,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) cpu_data->recent_idx[i] = idx_duration; } +end: cpu_data->total += PULSE; } @@ -356,6 +381,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); s64 latency_req = cpuidle_governor_latency_req(dev->cpu); ktime_t delta_tick = TICK_NSEC / 2; + unsigned int tick_intercept_sum = 0; unsigned int idx_intercept_sum = 0; unsigned int intercept_sum = 0; unsigned int idx_recent_sum = 0; @@ -429,6 +455,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, hit_sum += prev_bin->hits; recent_sum += prev_bin->recent; + tick_intercept_sum = intercept_sum; + if (dev->states_usage[i].disable) continue; @@ -461,6 +489,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, goto end; } + tick_intercept_sum += cpu_data->state_bins[drv->state_count-1].intercepts; + /* * If the sum of the intercepts metric for all of the idle states * shallower than the current candidate one (idx) is greater than the @@ -577,6 +607,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, idx = i; } + /* + * If the selected state's target residency is below the tick length + * and intercepts occurring before the tick length are the majority of + * total wakeup events, do not stop the tick. + */ + if (drv->states[idx].target_residency_ns < TICK_NSEC && + tick_intercept_sum > cpu_data->total / 2 + cpu_data->total / 8) + duration_ns = TICK_NSEC / 2; + end: /* * Allow the tick to be stopped unless the selected state is a polling From 5484e31bbbff285f9505c4766373f840ffb746e5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 10 Aug 2023 20:36:36 +0200 Subject: [PATCH 07/13] cpuidle: menu: Skip tick_nohz_get_sleep_length() call in some cases Because the cost of calling tick_nohz_get_sleep_length() may increase in the future, reorder the code in menu_select() so it first uses the statistics to determine the expected idle duration. If that value is higher than RESIDENCY_THRESHOLD_NS, tick_nohz_get_sleep_length() will be called to obtain the time till the closest timer and refine the idle duration prediction if necessary. This causes the governor to always take the full overhead of get_typical_interval() with the assumption that the cost will be amortized by skipping the tick_nohz_get_sleep_length() call in the cases when the predicted idle duration is relatively very small. Signed-off-by: Rafael J. Wysocki Tested-by: Doug Smythies --- drivers/cpuidle/governors/gov.h | 14 +++++++ drivers/cpuidle/governors/menu.c | 65 +++++++++++++++++++------------- drivers/cpuidle/governors/teo.c | 9 +---- 3 files changed, 54 insertions(+), 34 deletions(-) create mode 100644 drivers/cpuidle/governors/gov.h diff --git a/drivers/cpuidle/governors/gov.h b/drivers/cpuidle/governors/gov.h new file mode 100644 index 000000000000..99e067d9668c --- /dev/null +++ b/drivers/cpuidle/governors/gov.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Common definitions for cpuidle governors. */ + +#ifndef __CPUIDLE_GOVERNOR_H +#define __CPUIDLE_GOVERNOR_H + +/* + * Idle state target residency threshold used for deciding whether or not to + * check the time till the closest expected timer event. + */ +#define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC) + +#endif /* __CPUIDLE_GOVERNOR_H */ diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index c4922684f305..b96e3da0fedd 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -19,6 +19,8 @@ #include #include +#include "gov.h" + #define BUCKETS 12 #define INTERVAL_SHIFT 3 #define INTERVALS (1UL << INTERVAL_SHIFT) @@ -166,8 +168,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); * of points is below a threshold. If it is... then use the * average of these 8 points as the estimated value. */ -static unsigned int get_typical_interval(struct menu_device *data, - unsigned int predicted_us) +static unsigned int get_typical_interval(struct menu_device *data) { int i, divisor; unsigned int min, max, thresh, avg; @@ -195,11 +196,7 @@ static unsigned int get_typical_interval(struct menu_device *data, } } - /* - * If the result of the computation is going to be discarded anyway, - * avoid the computation altogether. - */ - if (min >= predicted_us) + if (!max) return UINT_MAX; if (divisor == INTERVALS) @@ -267,7 +264,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, { struct menu_device *data = this_cpu_ptr(&menu_devices); s64 latency_req = cpuidle_governor_latency_req(dev->cpu); - unsigned int predicted_us; u64 predicted_ns; u64 interactivity_req; unsigned int nr_iowaiters; @@ -279,16 +275,41 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, data->needs_update = 0; } - /* determine the expected residency time, round up */ - delta = tick_nohz_get_sleep_length(&delta_tick); - if (unlikely(delta < 0)) { - delta = 0; - delta_tick = 0; - } - data->next_timer_ns = delta; - nr_iowaiters = nr_iowait_cpu(dev->cpu); - data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters); + + /* Find the shortest expected idle interval. */ + predicted_ns = get_typical_interval(data) * NSEC_PER_USEC; + if (predicted_ns > RESIDENCY_THRESHOLD_NS) { + unsigned int timer_us; + + /* Determine the time till the closest timer. */ + delta = tick_nohz_get_sleep_length(&delta_tick); + if (unlikely(delta < 0)) { + delta = 0; + delta_tick = 0; + } + + data->next_timer_ns = delta; + data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters); + + /* Round up the result for half microseconds. */ + timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 + + data->next_timer_ns * + data->correction_factor[data->bucket], + RESOLUTION * DECAY * NSEC_PER_USEC); + /* Use the lowest expected idle interval to pick the idle state. */ + predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns); + } else { + /* + * Because the next timer event is not going to be determined + * in this case, assume that without the tick the closest timer + * will be in distant future and that the closest tick will occur + * after 1/2 of the tick period. + */ + data->next_timer_ns = KTIME_MAX; + delta_tick = TICK_NSEC / 2; + data->bucket = which_bucket(KTIME_MAX, nr_iowaiters); + } if (unlikely(drv->state_count <= 1 || latency_req == 0) || ((data->next_timer_ns < drv->states[1].target_residency_ns || @@ -303,16 +324,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, return 0; } - /* Round up the result for half microseconds. */ - predicted_us = div_u64(data->next_timer_ns * - data->correction_factor[data->bucket] + - (RESOLUTION * DECAY * NSEC_PER_USEC) / 2, - RESOLUTION * DECAY * NSEC_PER_USEC); - /* Use the lowest expected idle interval to pick the idle state. */ - predicted_ns = (u64)min(predicted_us, - get_typical_interval(data, predicted_us)) * - NSEC_PER_USEC; - if (tick_nohz_tick_stopped()) { /* * If the tick is already stopped, the cost of possible short diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 70846b669255..31050fdc633f 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -140,6 +140,8 @@ #include #include +#include "gov.h" + /* * The number of bits to shift the CPU's capacity by in order to determine * the utilized threshold. @@ -152,7 +154,6 @@ */ #define UTIL_THRESHOLD_SHIFT 6 - /* * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value * is used for decreasing metrics on a regular basis. @@ -166,12 +167,6 @@ */ #define NR_RECENT 9 -/* - * Idle state target residency threshold used for deciding whether or not to - * check the time till the closest expected timer event. - */ -#define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC) - /** * struct teo_bin - Metrics used by the TEO cpuidle governor. * @intercepts: The "intercepts" metric. From ed9571647eff3dd7a53068f83338530959c7ace4 Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Fri, 18 Aug 2023 09:50:00 +0000 Subject: [PATCH 08/13] cpufreq: stats: Improve the performance of cpufreq_stats_create_table() In the worst case, the freq_table of policy data is not sorted and contains duplicate frequencies, this means that it needs to iterate through the entire freq_table of policy to ensure each frequency is unique in the freq_table of stats data, this has a time complexity of O(N^2), where N is the number of frequencies in the freq_table of policy. However, if the policy.freq_table is already sorted and contains no duplicate frequencies, it can reduce the time complexity of creating stats.freq_table to O(N), the 'freq_table_sorted' field of policy data can be used to indicate whether the policy.freq_table is sorted. Signed-off-by: Liao Chang Acked-by: Viresh Kumar Reviewed-by: Dhruva Gole [ rjw: Fix typo in changelog, remove redundant parens ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 55c7ffd37d1c..a33df3c66c88 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -243,7 +243,8 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy) /* Find valid-unique entries */ cpufreq_for_each_valid_entry(pos, policy->freq_table) - if (freq_table_get_index(stats, pos->frequency) == -1) + if (policy->freq_table_sorted != CPUFREQ_TABLE_UNSORTED || + freq_table_get_index(stats, pos->frequency) == -1) stats->freq_table[i++] = pos->frequency; stats->state_num = i; From d51847acb018d83186e4af67bc93f9a00a8644f7 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Sun, 20 Aug 2023 13:46:49 -0700 Subject: [PATCH 09/13] cpufreq: intel_pstate: set stale CPU frequency to minimum The intel_pstate CPU frequency scaling driver does not use policy->cur and it is 0. When the CPU frequency is outdated arch_freq_get_on_cpu() will default to the nominal clock frequency when its call to cpufreq_quick_getpolicy_cur returns the never updated 0. Thus, the listed frequency might be outside of currently set limits. Some users are complaining about the high reported frequency, albeit stale, when their system is idle and/or it is above the reduced maximum they have set. This patch will maintain policy_cur for the intel_pstate driver at the current minimum CPU frequency. Reported-by: Yang Jie Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217597 Signed-off-by: Doug Smythies [ rjw: White space damage fixes and comment adjustment ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 8ca2bce4341a..dc50c9fb488d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2609,6 +2609,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) intel_pstate_clear_update_util_hook(policy->cpu); intel_pstate_hwp_set(policy->cpu); } + /* + * policy->cur is never updated with the intel_pstate driver, but it + * is used as a stale frequency value. So, keep it within limits. + */ + policy->cur = policy->min; mutex_unlock(&intel_pstate_limits_lock); From a436ae9434ec491eefb4ed4c77bdb9c762925976 Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Wed, 16 Aug 2023 01:58:53 +0000 Subject: [PATCH 10/13] cpufreq: Use clamp() helper macro to improve the code readability The valid values of policy.{min, max} should be between 'min' and 'max', so use clamp() helper macro to makes cpufreq_verify_within_limits() easier to follow. Signed-off-by: Liao Chang [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 172ff51c1b2a..9bf94ae08158 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -19,6 +19,7 @@ #include #include #include +#include /********************************************************************* * CPUFREQ INTERFACE * @@ -467,17 +468,8 @@ static inline void cpufreq_verify_within_limits(struct cpufreq_policy_data *poli unsigned int min, unsigned int max) { - if (policy->min < min) - policy->min = min; - if (policy->max < min) - policy->max = min; - if (policy->min > max) - policy->min = max; - if (policy->max > max) - policy->max = max; - if (policy->min > policy->max) - policy->min = policy->max; - return; + policy->max = clamp(policy->max, min, max); + policy->min = clamp(policy->min, min, policy->max); } static inline void From 8d6e5e8268e89979d86501dbb8385ce2e6154de1 Mon Sep 17 00:00:00 2001 From: Swapnil Sapkal Date: Fri, 18 Aug 2023 11:44:51 +0000 Subject: [PATCH 11/13] cpufreq: amd-pstate-ut: Remove module parameter access In amd-pstate-ut, shared memory-based systems call get_shared_mem() as part of amd_pstate_ut_check_enabled() function. This function was written when CONFIG_X86_AMD_PSTATE was tristate config and amd_pstate can be built as a module. Currently CONFIG_X86_AMD_PSTATE is a boolean config and module parameter shared_mem is removed. But amd-pstate-ut code still accesses this module parameter. Remove those accesses. Fixes: 456ca88d8a52 ("cpufreq: amd-pstate: change amd-pstate driver to be built-in type") Reviewed-by: Mario Limonciello Reviewed-by: Meng Li Reviewed-by: Wyes Karny Suggested-by: Wyes Karny Signed-off-by: Swapnil Sapkal [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate-ut.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index 7f3fe2048981..cf07ee77d3cc 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -64,27 +64,9 @@ static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = { static bool get_shared_mem(void) { bool result = false; - char path[] = "/sys/module/amd_pstate/parameters/shared_mem"; - char buf[5] = {0}; - struct file *filp = NULL; - loff_t pos = 0; - ssize_t ret; - if (!boot_cpu_has(X86_FEATURE_CPPC)) { - filp = filp_open(path, O_RDONLY, 0); - if (IS_ERR(filp)) - pr_err("%s unable to open %s file!\n", __func__, path); - else { - ret = kernel_read(filp, &buf, sizeof(buf), &pos); - if (ret < 0) - pr_err("%s read %s file fail ret=%ld!\n", - __func__, path, (long)ret); - filp_close(filp, NULL); - } - - if ('Y' == *buf) - result = true; - } + if (!boot_cpu_has(X86_FEATURE_CPPC)) + result = true; return result; } From 60dd283804479c4a52f995b713f448e2cd65b8c8 Mon Sep 17 00:00:00 2001 From: Swapnil Sapkal Date: Fri, 18 Aug 2023 11:44:52 +0000 Subject: [PATCH 12/13] cpufreq: amd-pstate-ut: Fix kernel panic when loading the driver After loading the amd-pstate-ut driver, amd_pstate_ut_check_perf() and amd_pstate_ut_check_freq() use cpufreq_cpu_get() to get the policy of the CPU and mark it as busy. In these functions, cpufreq_cpu_put() should be used to release the policy, but it is not, so any other entity trying to access the policy is blocked indefinitely. One such scenario is when amd_pstate mode is changed, leading to the following splat: [ 1332.103727] INFO: task bash:2929 blocked for more than 120 seconds. [ 1332.110001] Not tainted 6.5.0-rc2-amd-pstate-ut #5 [ 1332.115315] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1332.123140] task:bash state:D stack:0 pid:2929 ppid:2873 flags:0x00004006 [ 1332.123143] Call Trace: [ 1332.123145] [ 1332.123148] __schedule+0x3c1/0x16a0 [ 1332.123154] ? _raw_read_lock_irqsave+0x2d/0x70 [ 1332.123157] schedule+0x6f/0x110 [ 1332.123160] schedule_timeout+0x14f/0x160 [ 1332.123162] ? preempt_count_add+0x86/0xd0 [ 1332.123165] __wait_for_common+0x92/0x190 [ 1332.123168] ? __pfx_schedule_timeout+0x10/0x10 [ 1332.123170] wait_for_completion+0x28/0x30 [ 1332.123173] cpufreq_policy_put_kobj+0x4d/0x90 [ 1332.123177] cpufreq_policy_free+0x157/0x1d0 [ 1332.123178] ? preempt_count_add+0x58/0xd0 [ 1332.123180] cpufreq_remove_dev+0xb6/0x100 [ 1332.123182] subsys_interface_unregister+0x114/0x120 [ 1332.123185] ? preempt_count_add+0x58/0xd0 [ 1332.123187] ? __pfx_amd_pstate_change_driver_mode+0x10/0x10 [ 1332.123190] cpufreq_unregister_driver+0x3b/0xd0 [ 1332.123192] amd_pstate_change_driver_mode+0x1e/0x50 [ 1332.123194] store_status+0xe9/0x180 [ 1332.123197] dev_attr_store+0x1b/0x30 [ 1332.123199] sysfs_kf_write+0x42/0x50 [ 1332.123202] kernfs_fop_write_iter+0x143/0x1d0 [ 1332.123204] vfs_write+0x2df/0x400 [ 1332.123208] ksys_write+0x6b/0xf0 [ 1332.123210] __x64_sys_write+0x1d/0x30 [ 1332.123213] do_syscall_64+0x60/0x90 [ 1332.123216] ? fpregs_assert_state_consistent+0x2e/0x50 [ 1332.123219] ? exit_to_user_mode_prepare+0x49/0x1a0 [ 1332.123223] ? irqentry_exit_to_user_mode+0xd/0x20 [ 1332.123225] ? irqentry_exit+0x3f/0x50 [ 1332.123226] ? exc_page_fault+0x8e/0x190 [ 1332.123228] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 1332.123232] RIP: 0033:0x7fa74c514a37 [ 1332.123234] RSP: 002b:00007ffe31dd0788 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1332.123238] RAX: ffffffffffffffda RBX: 0000000000000008 RCX: 00007fa74c514a37 [ 1332.123239] RDX: 0000000000000008 RSI: 000055e27c447aa0 RDI: 0000000000000001 [ 1332.123241] RBP: 000055e27c447aa0 R08: 00007fa74c5d1460 R09: 000000007fffffff [ 1332.123242] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000008 [ 1332.123244] R13: 00007fa74c61a780 R14: 00007fa74c616600 R15: 00007fa74c615a00 [ 1332.123247] Fix this by calling cpufreq_cpu_put() wherever necessary. Fixes: 14eb1c96e3a3 ("cpufreq: amd-pstate: Add test module for amd-pstate driver") Reviewed-by: Mario Limonciello Reviewed-by: Meng Li Reviewed-by: Wyes Karny Suggested-by: Wyes Karny Signed-off-by: Swapnil Sapkal [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate-ut.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index cf07ee77d3cc..502d494499ae 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -140,7 +140,7 @@ static void amd_pstate_ut_check_perf(u32 index) if (ret) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret); - return; + goto skip_test; } nominal_perf = cppc_perf.nominal_perf; @@ -151,7 +151,7 @@ static void amd_pstate_ut_check_perf(u32 index) if (ret) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret); - return; + goto skip_test; } nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1); @@ -169,7 +169,7 @@ static void amd_pstate_ut_check_perf(u32 index) nominal_perf, cpudata->nominal_perf, lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf, lowest_perf, cpudata->lowest_perf); - return; + goto skip_test; } if (!((highest_perf >= nominal_perf) && @@ -180,11 +180,15 @@ static void amd_pstate_ut_check_perf(u32 index) pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n", __func__, cpu, highest_perf, nominal_perf, lowest_nonlinear_perf, lowest_perf); - return; + goto skip_test; } + cpufreq_cpu_put(policy); } amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; + return; +skip_test: + cpufreq_cpu_put(policy); } /* @@ -212,14 +216,14 @@ static void amd_pstate_ut_check_freq(u32 index) pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", __func__, cpu, cpudata->max_freq, cpudata->nominal_freq, cpudata->lowest_nonlinear_freq, cpudata->min_freq); - return; + goto skip_test; } if (cpudata->min_freq != policy->min) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d cpudata_min_freq=%d policy_min=%d, they should be equal!\n", __func__, cpu, cpudata->min_freq, policy->min); - return; + goto skip_test; } if (cpudata->boost_supported) { @@ -231,16 +235,20 @@ static void amd_pstate_ut_check_freq(u32 index) pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", __func__, cpu, policy->max, cpudata->max_freq, cpudata->nominal_freq); - return; + goto skip_test; } } else { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d must support boost!\n", __func__, cpu); - return; + goto skip_test; } + cpufreq_cpu_put(policy); } amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; + return; +skip_test: + cpufreq_cpu_put(policy); } static int __init amd_pstate_ut_init(void) From 78aabcb3211aa4b7e8f8a1d5d4bdad699a42d4ba Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 22 Aug 2023 13:28:02 +0200 Subject: [PATCH 13/13] cpuidle: teo: Avoid unnecessary variable assignments Notice that it is not necessary to assign tick_intercept_sum in every iteration of the first loop over idle states in teo_select(), because the intercept_sum value does not change after the assignment in a given iteration of the loop, so its value after the last iteration of the loop can be used for computing the tick_intercept_sum value directly. Modify the code accordingly. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/teo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 31050fdc633f..7244f71c59c5 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -450,8 +450,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, hit_sum += prev_bin->hits; recent_sum += prev_bin->recent; - tick_intercept_sum = intercept_sum; - if (dev->states_usage[i].disable) continue; @@ -484,7 +482,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, goto end; } - tick_intercept_sum += cpu_data->state_bins[drv->state_count-1].intercepts; + tick_intercept_sum = intercept_sum + + cpu_data->state_bins[drv->state_count-1].intercepts; /* * If the sum of the intercepts metric for all of the idle states