linux/arch/x86/kernel/acpi/cstate.c
Qian Cai 696ac2e3bf x86: ACPI: fix CPU hotplug deadlock
Similar to commit 0266d81e9b ("acpi/processor: Prevent cpu hotplug
deadlock") except this is for acpi_processor_ffh_cstate_probe():

"The problem is that the work is scheduled on the current CPU from the
hotplug thread associated with that CPU.

It's not required to invoke these functions via the workqueue because
the hotplug thread runs on the target CPU already.

Check whether current is a per cpu thread pinned on the target CPU and
invoke the function directly to avoid the workqueue."

 WARNING: possible circular locking dependency detected
 ------------------------------------------------------
 cpuhp/1/15 is trying to acquire lock:
 ffffc90003447a28 ((work_completion)(&wfc.work)){+.+.}-{0:0}, at: __flush_work+0x4c6/0x630

 but task is already holding lock:
 ffffffffafa1c0e8 (cpuidle_lock){+.+.}-{3:3}, at: cpuidle_pause_and_lock+0x17/0x20

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #1 (cpu_hotplug_lock){++++}-{0:0}:
 cpus_read_lock+0x3e/0xc0
 irq_calc_affinity_vectors+0x5f/0x91
 __pci_enable_msix_range+0x10f/0x9a0
 pci_alloc_irq_vectors_affinity+0x13e/0x1f0
 pci_alloc_irq_vectors_affinity at drivers/pci/msi.c:1208
 pqi_ctrl_init+0x72f/0x1618 [smartpqi]
 pqi_pci_probe.cold.63+0x882/0x892 [smartpqi]
 local_pci_probe+0x7a/0xc0
 work_for_cpu_fn+0x2e/0x50
 process_one_work+0x57e/0xb90
 worker_thread+0x363/0x5b0
 kthread+0x1f4/0x220
 ret_from_fork+0x27/0x50

 -> #0 ((work_completion)(&wfc.work)){+.+.}-{0:0}:
 __lock_acquire+0x2244/0x32a0
 lock_acquire+0x1a2/0x680
 __flush_work+0x4e6/0x630
 work_on_cpu+0x114/0x160
 acpi_processor_ffh_cstate_probe+0x129/0x250
 acpi_processor_evaluate_cst+0x4c8/0x580
 acpi_processor_get_power_info+0x86/0x740
 acpi_processor_hotplug+0xc3/0x140
 acpi_soft_cpu_online+0x102/0x1d0
 cpuhp_invoke_callback+0x197/0x1120
 cpuhp_thread_fun+0x252/0x2f0
 smpboot_thread_fn+0x255/0x440
 kthread+0x1f4/0x220
 ret_from_fork+0x27/0x50

 other info that might help us debug this:

 Chain exists of:
 (work_completion)(&wfc.work) --> cpuhp_state-up --> cpuidle_lock

 Possible unsafe locking scenario:

 CPU0                    CPU1
 ----                    ----
 lock(cpuidle_lock);
                         lock(cpuhp_state-up);
                         lock(cpuidle_lock);
 lock((work_completion)(&wfc.work));

 *** DEADLOCK ***

 3 locks held by cpuhp/1/15:
 #0: ffffffffaf51ab10 (cpu_hotplug_lock){++++}-{0:0}, at: cpuhp_thread_fun+0x69/0x2f0
 #1: ffffffffaf51ad40 (cpuhp_state-up){+.+.}-{0:0}, at: cpuhp_thread_fun+0x69/0x2f0
 #2: ffffffffafa1c0e8 (cpuidle_lock){+.+.}-{3:3}, at: cpuidle_pause_and_lock+0x17/0x20

 Call Trace:
 dump_stack+0xa0/0xea
 print_circular_bug.cold.52+0x147/0x14c
 check_noncircular+0x295/0x2d0
 __lock_acquire+0x2244/0x32a0
 lock_acquire+0x1a2/0x680
 __flush_work+0x4e6/0x630
 work_on_cpu+0x114/0x160
 acpi_processor_ffh_cstate_probe+0x129/0x250
 acpi_processor_evaluate_cst+0x4c8/0x580
 acpi_processor_get_power_info+0x86/0x740
 acpi_processor_hotplug+0xc3/0x140
 acpi_soft_cpu_online+0x102/0x1d0
 cpuhp_invoke_callback+0x197/0x1120
 cpuhp_thread_fun+0x252/0x2f0
 smpboot_thread_fn+0x255/0x440
 kthread+0x1f4/0x220
 ret_from_fork+0x27/0x50

Signed-off-by: Qian Cai <cai@lca.pw>
Tested-by: Borislav Petkov <bp@suse.de>
[ rjw: Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2020-04-04 16:28:24 +02:00

215 lines
6 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2005 Intel Corporation
* Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
* - Added _PDC for SMP C-states on Intel CPUs
*/
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/cpu.h>
#include <linux/sched.h>
#include <acpi/processor.h>
#include <asm/mwait.h>
#include <asm/special_insns.h>
/*
* Initialize bm_flags based on the CPU cache properties
* On SMP it depends on cache configuration
* - When cache is not shared among all CPUs, we flush cache
* before entering C3.
* - When cache is shared among all CPUs, we use bm_check
* mechanism as in UP case
*
* This routine is called only after all the CPUs are online
*/
void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
unsigned int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
flags->bm_check = 0;
if (num_online_cpus() == 1)
flags->bm_check = 1;
else if (c->x86_vendor == X86_VENDOR_INTEL) {
/*
* Today all MP CPUs that support C3 share cache.
* And caches should not be flushed by software while
* entering C3 type state.
*/
flags->bm_check = 1;
}
/*
* On all recent Intel platforms, ARB_DISABLE is a nop.
* So, set bm_control to zero to indicate that ARB_DISABLE
* is not required while entering C3 type state on
* P4, Core and beyond CPUs
*/
if (c->x86_vendor == X86_VENDOR_INTEL &&
(c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f)))
flags->bm_control = 0;
/*
* For all recent Centaur CPUs, the ucode will make sure that each
* core can keep cache coherence with each other while entering C3
* type state. So, set bm_check to 1 to indicate that the kernel
* doesn't need to execute a cache flush operation (WBINVD) when
* entering C3 type state.
*/
if (c->x86_vendor == X86_VENDOR_CENTAUR) {
if (c->x86 > 6 || (c->x86 == 6 && c->x86_model == 0x0f &&
c->x86_stepping >= 0x0e))
flags->bm_check = 1;
}
if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
/*
* All Zhaoxin CPUs that support C3 share cache.
* And caches should not be flushed by software while
* entering C3 type state.
*/
flags->bm_check = 1;
/*
* On all recent Zhaoxin platforms, ARB_DISABLE is a nop.
* So, set bm_control to zero to indicate that ARB_DISABLE
* is not required while entering C3 type state.
*/
flags->bm_control = 0;
}
}
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
/* The code below handles cstate entry with monitor-mwait pair on Intel*/
struct cstate_entry {
struct {
unsigned int eax;
unsigned int ecx;
} states[ACPI_PROCESSOR_MAX_POWER];
};
static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */
static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
#define NATIVE_CSTATE_BEYOND_HALT (2)
static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
{
struct acpi_processor_cx *cx = _cx;
long retval;
unsigned int eax, ebx, ecx, edx;
unsigned int edx_part;
unsigned int cstate_type; /* C-state type and not ACPI C-state type */
unsigned int num_cstate_subtype;
cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
/* Check whether this particular cx_type (in CST) is supported or not */
cstate_type = ((cx->address >> MWAIT_SUBSTATE_SIZE) &
MWAIT_CSTATE_MASK) + 1;
edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE);
num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
retval = 0;
/* If the HW does not support any sub-states in this C-state */
if (num_cstate_subtype == 0) {
pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW (0x%x)\n",
cx->address, edx_part);
retval = -1;
goto out;
}
/* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */
if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
!(ecx & CPUID5_ECX_INTERRUPT_BREAK)) {
retval = -1;
goto out;
}
if (!mwait_supported[cstate_type]) {
mwait_supported[cstate_type] = 1;
printk(KERN_DEBUG
"Monitor-Mwait will be used to enter C-%d state\n",
cx->type);
}
snprintf(cx->desc,
ACPI_CX_DESC_LEN, "ACPI FFH MWAIT 0x%x",
cx->address);
out:
return retval;
}
int acpi_processor_ffh_cstate_probe(unsigned int cpu,
struct acpi_processor_cx *cx, struct acpi_power_register *reg)
{
struct cstate_entry *percpu_entry;
struct cpuinfo_x86 *c = &cpu_data(cpu);
long retval;
if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF)
return -1;
if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT)
return -1;
percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
percpu_entry->states[cx->index].eax = 0;
percpu_entry->states[cx->index].ecx = 0;
/* Make sure we are running on right CPU */
retval = call_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx,
false);
if (retval == 0) {
/* Use the hint in CST */
percpu_entry->states[cx->index].eax = cx->address;
percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK;
}
/*
* For _CST FFH on Intel, if GAS.access_size bit 1 is cleared,
* then we should skip checking BM_STS for this C-state.
* ref: "Intel Processor Vendor-Specific ACPI Interface Specification"
*/
if ((c->x86_vendor == X86_VENDOR_INTEL) && !(reg->access_size & 0x2))
cx->bm_sts_skip = 1;
return retval;
}
EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
void __cpuidle acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
{
unsigned int cpu = smp_processor_id();
struct cstate_entry *percpu_entry;
percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
mwait_idle_with_hints(percpu_entry->states[cx->index].eax,
percpu_entry->states[cx->index].ecx);
}
EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter);
static int __init ffh_cstate_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
if (c->x86_vendor != X86_VENDOR_INTEL &&
c->x86_vendor != X86_VENDOR_AMD)
return -1;
cpu_cstate_entry = alloc_percpu(struct cstate_entry);
return 0;
}
static void __exit ffh_cstate_exit(void)
{
free_percpu(cpu_cstate_entry);
cpu_cstate_entry = NULL;
}
arch_initcall(ffh_cstate_init);
__exitcall(ffh_cstate_exit);