linux/kernel/power/process.c
Peter Zijlstra 50e7663233 sched/cpuset/pm: Fix cpuset vs. suspend-resume bugs
Cpusets vs. suspend-resume is _completely_ broken. And it got noticed
because it now resulted in non-cpuset usage breaking too.

On suspend cpuset_cpu_inactive() doesn't call into
cpuset_update_active_cpus() because it doesn't want to move tasks about,
there is no need, all tasks are frozen and won't run again until after
we've resumed everything.

But this means that when we finally do call into
cpuset_update_active_cpus() after resuming the last frozen cpu in
cpuset_cpu_active(), the top_cpuset will not have any difference with
the cpu_active_mask and this it will not in fact do _anything_.

So the cpuset configuration will not be restored. This was largely
hidden because we would unconditionally create identity domains and
mobile users would not in fact use cpusets much. And servers what do use
cpusets tend to not suspend-resume much.

An addition problem is that we'd not in fact wait for the cpuset work to
finish before resuming the tasks, allowing spurious migrations outside
of the specified domains.

Fix the rebuild by introducing cpuset_force_rebuild() and fix the
ordering with cpuset_wait_for_hotplug().

Reported-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: deb7aa308e ("cpuset: reorganize CPU / memory hotplug handling")
Link: http://lkml.kernel.org/r/20170907091338.orwxrqkbfkki3c24@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-09-07 11:45:21 +02:00

245 lines
5.5 KiB
C

/*
* drivers/power/process.c - Functions for starting/stopping processes on
* suspend transitions.
*
* Originally from swsusp.
*/
#undef DEBUG
#include <linux/interrupt.h>
#include <linux/oom.h>
#include <linux/suspend.h>
#include <linux/module.h>
#include <linux/sched/debug.h>
#include <linux/sched/task.h>
#include <linux/syscalls.h>
#include <linux/freezer.h>
#include <linux/delay.h>
#include <linux/workqueue.h>
#include <linux/kmod.h>
#include <trace/events/power.h>
#include <linux/cpuset.h>
/*
* Timeout for stopping processes
*/
unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC;
static int try_to_freeze_tasks(bool user_only)
{
struct task_struct *g, *p;
unsigned long end_time;
unsigned int todo;
bool wq_busy = false;
ktime_t start, end, elapsed;
unsigned int elapsed_msecs;
bool wakeup = false;
int sleep_usecs = USEC_PER_MSEC;
start = ktime_get_boottime();
end_time = jiffies + msecs_to_jiffies(freeze_timeout_msecs);
if (!user_only)
freeze_workqueues_begin();
while (true) {
todo = 0;
read_lock(&tasklist_lock);
for_each_process_thread(g, p) {
if (p == current || !freeze_task(p))
continue;
if (!freezer_should_skip(p))
todo++;
}
read_unlock(&tasklist_lock);
if (!user_only) {
wq_busy = freeze_workqueues_busy();
todo += wq_busy;
}
if (!todo || time_after(jiffies, end_time))
break;
if (pm_wakeup_pending()) {
wakeup = true;
break;
}
/*
* We need to retry, but first give the freezing tasks some
* time to enter the refrigerator. Start with an initial
* 1 ms sleep followed by exponential backoff until 8 ms.
*/
usleep_range(sleep_usecs / 2, sleep_usecs);
if (sleep_usecs < 8 * USEC_PER_MSEC)
sleep_usecs *= 2;
}
end = ktime_get_boottime();
elapsed = ktime_sub(end, start);
elapsed_msecs = ktime_to_ms(elapsed);
if (todo) {
pr_cont("\n");
pr_err("Freezing of tasks %s after %d.%03d seconds "
"(%d tasks refusing to freeze, wq_busy=%d):\n",
wakeup ? "aborted" : "failed",
elapsed_msecs / 1000, elapsed_msecs % 1000,
todo - wq_busy, wq_busy);
if (wq_busy)
show_workqueue_state();
if (!wakeup) {
read_lock(&tasklist_lock);
for_each_process_thread(g, p) {
if (p != current && !freezer_should_skip(p)
&& freezing(p) && !frozen(p))
sched_show_task(p);
}
read_unlock(&tasklist_lock);
}
} else {
pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000,
elapsed_msecs % 1000);
}
return todo ? -EBUSY : 0;
}
/**
* freeze_processes - Signal user space processes to enter the refrigerator.
* The current thread will not be frozen. The same process that calls
* freeze_processes must later call thaw_processes.
*
* On success, returns 0. On failure, -errno and system is fully thawed.
*/
int freeze_processes(void)
{
int error;
error = __usermodehelper_disable(UMH_FREEZING);
if (error)
return error;
/* Make sure this task doesn't get frozen */
current->flags |= PF_SUSPEND_TASK;
if (!pm_freezing)
atomic_inc(&system_freezing_cnt);
pm_wakeup_clear(true);
pr_info("Freezing user space processes ... ");
pm_freezing = true;
error = try_to_freeze_tasks(true);
if (!error) {
__usermodehelper_set_disable_depth(UMH_DISABLED);
pr_cont("done.");
}
pr_cont("\n");
BUG_ON(in_atomic());
/*
* Now that the whole userspace is frozen we need to disbale
* the OOM killer to disallow any further interference with
* killable tasks. There is no guarantee oom victims will
* ever reach a point they go away we have to wait with a timeout.
*/
if (!error && !oom_killer_disable(msecs_to_jiffies(freeze_timeout_msecs)))
error = -EBUSY;
if (error)
thaw_processes();
return error;
}
/**
* freeze_kernel_threads - Make freezable kernel threads go to the refrigerator.
*
* On success, returns 0. On failure, -errno and only the kernel threads are
* thawed, so as to give a chance to the caller to do additional cleanups
* (if any) before thawing the userspace tasks. So, it is the responsibility
* of the caller to thaw the userspace tasks, when the time is right.
*/
int freeze_kernel_threads(void)
{
int error;
pr_info("Freezing remaining freezable tasks ... ");
pm_nosig_freezing = true;
error = try_to_freeze_tasks(false);
if (!error)
pr_cont("done.");
pr_cont("\n");
BUG_ON(in_atomic());
if (error)
thaw_kernel_threads();
return error;
}
void thaw_processes(void)
{
struct task_struct *g, *p;
struct task_struct *curr = current;
trace_suspend_resume(TPS("thaw_processes"), 0, true);
if (pm_freezing)
atomic_dec(&system_freezing_cnt);
pm_freezing = false;
pm_nosig_freezing = false;
oom_killer_enable();
pr_info("Restarting tasks ... ");
__usermodehelper_set_disable_depth(UMH_FREEZING);
thaw_workqueues();
cpuset_wait_for_hotplug();
read_lock(&tasklist_lock);
for_each_process_thread(g, p) {
/* No other threads should have PF_SUSPEND_TASK set */
WARN_ON((p != curr) && (p->flags & PF_SUSPEND_TASK));
__thaw_task(p);
}
read_unlock(&tasklist_lock);
WARN_ON(!(curr->flags & PF_SUSPEND_TASK));
curr->flags &= ~PF_SUSPEND_TASK;
usermodehelper_enable();
schedule();
pr_cont("done.\n");
trace_suspend_resume(TPS("thaw_processes"), 0, false);
}
void thaw_kernel_threads(void)
{
struct task_struct *g, *p;
pm_nosig_freezing = false;
pr_info("Restarting kernel threads ... ");
thaw_workqueues();
read_lock(&tasklist_lock);
for_each_process_thread(g, p) {
if (p->flags & (PF_KTHREAD | PF_WQ_WORKER))
__thaw_task(p);
}
read_unlock(&tasklist_lock);
schedule();
pr_cont("done.\n");
}