Merge branches 'idle-remove-statedata', 'pm_idle' and 'idle-hsw-turbostat' into release

This commit is contained in:
Len Brown 2013-02-18 00:25:16 -05:00
commit 2e7d0f60d8
26 changed files with 284 additions and 255 deletions

View file

@ -172,14 +172,9 @@ static void default_idle(void)
local_irq_enable();
}
void (*pm_idle)(void) = default_idle;
EXPORT_SYMBOL(pm_idle);
/*
* The idle thread, has rather strange semantics for calling pm_idle,
* but this is what x86 does and we need to do the same, so that
* things like cpuidle get called in the same way. The only difference
* is that we always respect 'hlt_counter' to prevent low power idle.
* The idle thread.
* We always respect 'hlt_counter' to prevent low power idle.
*/
void cpu_idle(void)
{
@ -210,10 +205,10 @@ void cpu_idle(void)
} else if (!need_resched()) {
stop_critical_timings();
if (cpuidle_idle_call())
pm_idle();
default_idle();
start_critical_timings();
/*
* pm_idle functions must always
* default_idle functions must always
* return with IRQs enabled.
*/
WARN_ON(irqs_disabled());

View file

@ -97,14 +97,9 @@ static void default_idle(void)
local_irq_enable();
}
void (*pm_idle)(void) = default_idle;
EXPORT_SYMBOL_GPL(pm_idle);
/*
* The idle thread, has rather strange semantics for calling pm_idle,
* but this is what x86 does and we need to do the same, so that
* things like cpuidle get called in the same way. The only difference
* is that we always respect 'hlt_counter' to prevent low power idle.
* The idle thread.
* We always respect 'hlt_counter' to prevent low power idle.
*/
void cpu_idle(void)
{
@ -122,10 +117,10 @@ void cpu_idle(void)
local_irq_disable();
if (!need_resched()) {
stop_critical_timings();
pm_idle();
default_idle();
start_critical_timings();
/*
* pm_idle functions should always return
* default_idle functions should always return
* with IRQs enabled.
*/
WARN_ON(irqs_disabled());

View file

@ -39,12 +39,6 @@ int nr_l1stack_tasks;
void *l1_stack_base;
unsigned long l1_stack_len;
/*
* Powermanagement idle function, if any..
*/
void (*pm_idle)(void) = NULL;
EXPORT_SYMBOL(pm_idle);
void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off);
@ -81,7 +75,6 @@ void cpu_idle(void)
{
/* endless idle loop with no priority at all */
while (1) {
void (*idle)(void) = pm_idle;
#ifdef CONFIG_HOTPLUG_CPU
if (cpu_is_offline(smp_processor_id()))

View file

@ -54,11 +54,6 @@ void enable_hlt(void)
EXPORT_SYMBOL(enable_hlt);
/*
* The following aren't currently used.
*/
void (*pm_idle)(void);
extern void default_idle(void);
void (*pm_power_off)(void);
@ -77,16 +72,12 @@ void cpu_idle (void)
while (1) {
rcu_idle_enter();
while (!need_resched()) {
void (*idle)(void);
/*
* Mark this as an RCU critical section so that
* synchronize_kernel() in the unload path waits
* for our completion.
*/
idle = pm_idle;
if (!idle)
idle = default_idle;
idle();
default_idle();
}
rcu_idle_exit();
schedule_preempt_disabled();

View file

@ -57,8 +57,6 @@ void (*ia64_mark_idle)(int);
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override);
void (*pm_idle) (void);
EXPORT_SYMBOL(pm_idle);
void (*pm_power_off) (void);
EXPORT_SYMBOL(pm_power_off);
@ -301,7 +299,6 @@ cpu_idle (void)
if (mark_idle)
(*mark_idle)(1);
idle = pm_idle;
if (!idle)
idle = default_idle;
(*idle)();

View file

@ -1051,7 +1051,6 @@ cpu_init (void)
max_num_phys_stacked = num_phys_stacked;
}
platform_cpu_init();
pm_idle = default_idle;
}
void __init

View file

@ -44,35 +44,9 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return tsk->thread.lr;
}
/*
* Powermanagement idle function, if any..
*/
static void (*pm_idle)(void) = NULL;
void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off);
/*
* We use this is we don't have any better
* idle routine..
*/
static void default_idle(void)
{
/* M32R_FIXME: Please use "cpu_sleep" mode. */
cpu_relax();
}
/*
* On SMP it's slightly faster (but much more power-consuming!)
* to poll the ->work.need_resched flag instead of waiting for the
* cross-CPU IPI to arrive. Use this option with caution.
*/
static void poll_idle (void)
{
/* M32R_FIXME */
cpu_relax();
}
/*
* The idle thread. There's no useful work to be
* done, so just try to conserve power and have a
@ -84,14 +58,8 @@ void cpu_idle (void)
/* endless idle loop with no priority at all */
while (1) {
rcu_idle_enter();
while (!need_resched()) {
void (*idle)(void) = pm_idle;
if (!idle)
idle = default_idle;
idle();
}
while (!need_resched())
cpu_relax();
rcu_idle_exit();
schedule_preempt_disabled();
}
@ -120,21 +88,6 @@ void machine_power_off(void)
/* M32R_FIXME */
}
static int __init idle_setup (char *str)
{
if (!strncmp(str, "poll", 4)) {
printk("using poll in idle threads.\n");
pm_idle = poll_idle;
} else if (!strncmp(str, "sleep", 4)) {
printk("using sleep in idle threads.\n");
pm_idle = default_idle;
}
return 1;
}
__setup("idle=", idle_setup);
void show_regs(struct pt_regs * regs)
{
printk("\n");

View file

@ -41,7 +41,6 @@ void show_regs(struct pt_regs *regs)
regs->msr, regs->ear, regs->esr, regs->fsr);
}
void (*pm_idle)(void);
void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off);
@ -98,8 +97,6 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
void (*idle)(void) = pm_idle;
if (!idle)
idle = default_idle;

View file

@ -36,12 +36,6 @@
#include <asm/gdb-stub.h>
#include "internal.h"
/*
* power management idle function, if any..
*/
void (*pm_idle)(void);
EXPORT_SYMBOL(pm_idle);
/*
* return saved PC of a blocked thread.
*/
@ -113,7 +107,6 @@ void cpu_idle(void)
void (*idle)(void);
smp_rmb();
idle = pm_idle;
if (!idle) {
#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
idle = poll_idle;

View file

@ -39,11 +39,6 @@
void (*powersave) (void) = NULL;
static inline void pm_idle(void)
{
barrier();
}
void cpu_idle(void)
{
set_thread_flag(TIF_POLLING_NRFLAG);

View file

@ -22,7 +22,7 @@
#include <asm/smp.h>
#include <asm/bl_bit.h>
void (*pm_idle)(void);
static void (*sh_idle)(void);
static int hlt_counter;
@ -103,9 +103,9 @@ void cpu_idle(void)
/* Don't trace irqs off for idle */
stop_critical_timings();
if (cpuidle_idle_call())
pm_idle();
sh_idle();
/*
* Sanity check to ensure that pm_idle() returns
* Sanity check to ensure that sh_idle() returns
* with IRQs enabled
*/
WARN_ON(irqs_disabled());
@ -123,13 +123,13 @@ void __init select_idle_routine(void)
/*
* If a platform has set its own idle routine, leave it alone.
*/
if (pm_idle)
if (sh_idle)
return;
if (hlt_works())
pm_idle = default_idle;
sh_idle = default_idle;
else
pm_idle = poll_idle;
sh_idle = poll_idle;
}
void stop_this_cpu(void *unused)

View file

@ -118,6 +118,7 @@ extern unsigned long get_wchan(struct task_struct *);
extern struct task_struct *last_task_used_math;
#define cpu_relax() barrier()
extern void (*sparc_idle)(void);
#endif

View file

@ -20,6 +20,7 @@
#include <asm/uaccess.h>
#include <asm/auxio.h>
#include <asm/apc.h>
#include <asm/processor.h>
/* Debugging
*
@ -158,7 +159,7 @@ static int apc_probe(struct platform_device *op)
/* Assign power management IDLE handler */
if (!apc_no_idle)
pm_idle = apc_swift_idle;
sparc_idle = apc_swift_idle;
printk(KERN_INFO "%s: power management initialized%s\n",
APC_DEVNAME, apc_no_idle ? " (CPU idle disabled)" : "");

View file

@ -9,6 +9,7 @@
#include <asm/leon_amba.h>
#include <asm/cpu_type.h>
#include <asm/leon.h>
#include <asm/processor.h>
/* List of Systems that need fixup instructions around power-down instruction */
unsigned int pmc_leon_fixup_ids[] = {
@ -69,9 +70,9 @@ static int __init leon_pmc_install(void)
if (sparc_cpu_model == sparc_leon) {
/* Assign power management IDLE handler */
if (pmc_leon_need_fixup())
pm_idle = pmc_leon_idle_fixup;
sparc_idle = pmc_leon_idle_fixup;
else
pm_idle = pmc_leon_idle;
sparc_idle = pmc_leon_idle;
printk(KERN_INFO "leon: power management initialized\n");
}

View file

@ -17,6 +17,7 @@
#include <asm/oplib.h>
#include <asm/uaccess.h>
#include <asm/auxio.h>
#include <asm/processor.h>
/* Debug
*
@ -63,7 +64,7 @@ static int pmc_probe(struct platform_device *op)
#ifndef PMC_NO_IDLE
/* Assign power management IDLE handler */
pm_idle = pmc_swift_idle;
sparc_idle = pmc_swift_idle;
#endif
printk(KERN_INFO "%s: power management initialized\n", PMC_DEVNAME);

View file

@ -43,8 +43,7 @@
* Power management idle function
* Set in pm platform drivers (apc.c and pmc.c)
*/
void (*pm_idle)(void);
EXPORT_SYMBOL(pm_idle);
void (*sparc_idle)(void);
/*
* Power-off handler instantiation for pm.h compliance
@ -75,8 +74,8 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
for (;;) {
while (!need_resched()) {
if (pm_idle)
(*pm_idle)();
if (sparc_idle)
(*sparc_idle)();
else
cpu_relax();
}

View file

@ -45,11 +45,6 @@ static const char * const processor_modes[] = {
"UK18", "UK19", "UK1A", "EXTN", "UK1C", "UK1D", "UK1E", "SUSR"
};
/*
* The idle thread, has rather strange semantics for calling pm_idle,
* but this is what x86 does and we need to do the same, so that
* things like cpuidle get called in the same way.
*/
void cpu_idle(void)
{
/* endless idle loop with no priority at all */

View file

@ -1912,6 +1912,7 @@ config APM_DO_ENABLE
this feature.
config APM_CPU_IDLE
depends on CPU_IDLE
bool "Make CPU Idle calls when idle"
---help---
Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.

View file

@ -4,7 +4,8 @@
#define MWAIT_SUBSTATE_MASK 0xf
#define MWAIT_CSTATE_MASK 0xf
#define MWAIT_SUBSTATE_SIZE 4
#define MWAIT_MAX_NUM_CSTATES 8
#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
#define CPUID_MWAIT_LEAF 5
#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1

View file

@ -103,6 +103,8 @@
#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
#define MSR_IA32_POWER_CTL 0x000001fc
#define MSR_IA32_MC0_CTL 0x00000400
#define MSR_IA32_MC0_STATUS 0x00000401
#define MSR_IA32_MC0_ADDR 0x00000402
@ -272,6 +274,7 @@
#define MSR_IA32_PLATFORM_ID 0x00000017
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
#define MSR_EBC_FREQUENCY_ID 0x0000002c
#define MSR_SMI_COUNT 0x00000034
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
#define MSR_IA32_TSC_ADJUST 0x0000003b

View file

@ -232,6 +232,7 @@
#include <linux/acpi.h>
#include <linux/syscore_ops.h>
#include <linux/i8253.h>
#include <linux/cpuidle.h>
#include <asm/uaccess.h>
#include <asm/desc.h>
@ -360,13 +361,35 @@ struct apm_user {
* idle percentage above which bios idle calls are done
*/
#ifdef CONFIG_APM_CPU_IDLE
#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012
#define DEFAULT_IDLE_THRESHOLD 95
#else
#define DEFAULT_IDLE_THRESHOLD 100
#endif
#define DEFAULT_IDLE_PERIOD (100 / 3)
static int apm_cpu_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index);
static struct cpuidle_driver apm_idle_driver = {
.name = "apm_idle",
.owner = THIS_MODULE,
.en_core_tk_irqen = 1,
.states = {
{ /* entry 0 is for polling */ },
{ /* entry 1 is for APM idle */
.name = "APM",
.desc = "APM idle",
.flags = CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 250, /* WAG */
.target_residency = 500, /* WAG */
.enter = &apm_cpu_idle
},
},
.state_count = 2,
};
static struct cpuidle_device apm_cpuidle_device;
/*
* Local variables
*/
@ -377,7 +400,6 @@ static struct {
static int clock_slowed;
static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD;
static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD;
static int set_pm_idle;
static int suspends_pending;
static int standbys_pending;
static int ignore_sys_suspend;
@ -884,8 +906,6 @@ static void apm_do_busy(void)
#define IDLE_CALC_LIMIT (HZ * 100)
#define IDLE_LEAKY_MAX 16
static void (*original_pm_idle)(void) __read_mostly;
/**
* apm_cpu_idle - cpu idling for APM capable Linux
*
@ -894,7 +914,8 @@ static void (*original_pm_idle)(void) __read_mostly;
* Furthermore it calls the system default idle routine.
*/
static void apm_cpu_idle(void)
static int apm_cpu_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
static int use_apm_idle; /* = 0 */
static unsigned int last_jiffies; /* = 0 */
@ -904,7 +925,6 @@ static void apm_cpu_idle(void)
unsigned int jiffies_since_last_check = jiffies - last_jiffies;
unsigned int bucket;
WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
recalc:
if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
use_apm_idle = 0;
@ -950,10 +970,7 @@ static void apm_cpu_idle(void)
break;
}
}
if (original_pm_idle)
original_pm_idle();
else
default_idle();
default_idle();
local_irq_disable();
jiffies_since_last_check = jiffies - last_jiffies;
if (jiffies_since_last_check > idle_period)
@ -963,7 +980,7 @@ static void apm_cpu_idle(void)
if (apm_idle_done)
apm_do_busy();
local_irq_enable();
return index;
}
/**
@ -2381,9 +2398,9 @@ static int __init apm_init(void)
if (HZ != 100)
idle_period = (idle_period * HZ) / 100;
if (idle_threshold < 100) {
original_pm_idle = pm_idle;
pm_idle = apm_cpu_idle;
set_pm_idle = 1;
if (!cpuidle_register_driver(&apm_idle_driver))
if (cpuidle_register_device(&apm_cpuidle_device))
cpuidle_unregister_driver(&apm_idle_driver);
}
return 0;
@ -2393,15 +2410,9 @@ static void __exit apm_exit(void)
{
int error;
if (set_pm_idle) {
pm_idle = original_pm_idle;
/*
* We are about to unload the current idle thread pm callback
* (pm_idle), Wait for all processors to update cached/local
* copies of pm_idle before proceeding.
*/
kick_all_cpus_sync();
}
cpuidle_unregister_device(&apm_cpuidle_device);
cpuidle_unregister_driver(&apm_idle_driver);
if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
&& (apm_info.connection_version > 0x0100)) {
error = apm_engage_power_management(APM_DEVICE_ALL, 0);

View file

@ -268,13 +268,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override);
/*
* Powermanagement idle function, if any..
*/
void (*pm_idle)(void);
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(pm_idle);
#endif
static void (*x86_idle)(void);
#ifndef CONFIG_SMP
static inline void play_dead(void)
@ -351,7 +345,7 @@ void cpu_idle(void)
rcu_idle_enter();
if (cpuidle_idle_call())
pm_idle();
x86_idle();
rcu_idle_exit();
start_critical_timings();
@ -398,9 +392,9 @@ EXPORT_SYMBOL(default_idle);
bool set_pm_idle_to_default(void)
{
bool ret = !!pm_idle;
bool ret = !!x86_idle;
pm_idle = default_idle;
x86_idle = default_idle;
return ret;
}
@ -567,11 +561,10 @@ static void amd_e400_idle(void)
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
if (pm_idle == poll_idle && smp_num_siblings > 1) {
if (x86_idle == poll_idle && smp_num_siblings > 1)
pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
}
#endif
if (pm_idle)
if (x86_idle)
return;
if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
@ -579,19 +572,19 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
* One CPU supports mwait => All CPUs supports mwait
*/
pr_info("using mwait in idle threads\n");
pm_idle = mwait_idle;
x86_idle = mwait_idle;
} else if (cpu_has_amd_erratum(amd_erratum_400)) {
/* E400: APIC timer interrupt does not wake up CPU from C1e */
pr_info("using AMD E400 aware idle routine\n");
pm_idle = amd_e400_idle;
x86_idle = amd_e400_idle;
} else
pm_idle = default_idle;
x86_idle = default_idle;
}
void __init init_amd_e400_c1e_mask(void)
{
/* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
if (pm_idle == amd_e400_idle)
if (x86_idle == amd_e400_idle)
zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
}
@ -602,7 +595,7 @@ static int __init idle_setup(char *str)
if (!strcmp(str, "poll")) {
pr_info("using polling idle threads\n");
pm_idle = poll_idle;
x86_idle = poll_idle;
boot_option_idle_override = IDLE_POLL;
} else if (!strcmp(str, "mwait")) {
boot_option_idle_override = IDLE_FORCE_MWAIT;
@ -615,7 +608,7 @@ static int __init idle_setup(char *str)
* To continue to load the CPU idle driver, don't touch
* the boot_option_idle_override.
*/
pm_idle = default_idle;
x86_idle = default_idle;
boot_option_idle_override = IDLE_HALT;
} else if (!strcmp(str, "nomwait")) {
/*

View file

@ -74,7 +74,7 @@ static struct cpuidle_driver intel_idle_driver = {
.en_core_tk_irqen = 1,
};
/* intel_idle.max_cstate=0 disables driver */
static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1;
static int max_cstate = CPUIDLE_STATE_MAX - 1;
static unsigned int mwait_substates;
@ -90,6 +90,7 @@ struct idle_cpu {
* Indicate which enable bits to clear here.
*/
unsigned long auto_demotion_disable_flags;
bool disable_promotion_to_c1e;
};
static const struct idle_cpu *icpu;
@ -123,127 +124,190 @@ static struct cpuidle_state *cpuidle_state_table;
* which is also the index into the MWAIT hint array.
* Thus C0 is a dummy.
*/
static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = {
{ /* MWAIT C0 */ },
{ /* MWAIT C1 */
static struct cpuidle_state nehalem_cstates[CPUIDLE_STATE_MAX] = {
{
.name = "C1-NHM",
.desc = "MWAIT 0x00",
.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 3,
.target_residency = 6,
.enter = &intel_idle },
{ /* MWAIT C2 */
{
.name = "C1E-NHM",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 10,
.target_residency = 20,
.enter = &intel_idle },
{
.name = "C3-NHM",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 20,
.target_residency = 80,
.enter = &intel_idle },
{ /* MWAIT C3 */
{
.name = "C6-NHM",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 200,
.target_residency = 800,
.enter = &intel_idle },
{
.enter = NULL }
};
static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = {
{ /* MWAIT C0 */ },
{ /* MWAIT C1 */
static struct cpuidle_state snb_cstates[CPUIDLE_STATE_MAX] = {
{
.name = "C1-SNB",
.desc = "MWAIT 0x00",
.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 1,
.target_residency = 1,
.exit_latency = 2,
.target_residency = 2,
.enter = &intel_idle },
{ /* MWAIT C2 */
{
.name = "C1E-SNB",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 10,
.target_residency = 20,
.enter = &intel_idle },
{
.name = "C3-SNB",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 80,
.target_residency = 211,
.enter = &intel_idle },
{ /* MWAIT C3 */
{
.name = "C6-SNB",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 104,
.target_residency = 345,
.enter = &intel_idle },
{ /* MWAIT C4 */
{
.name = "C7-SNB",
.desc = "MWAIT 0x30",
.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 109,
.target_residency = 345,
.enter = &intel_idle },
{
.enter = NULL }
};
static struct cpuidle_state ivb_cstates[MWAIT_MAX_NUM_CSTATES] = {
{ /* MWAIT C0 */ },
{ /* MWAIT C1 */
static struct cpuidle_state ivb_cstates[CPUIDLE_STATE_MAX] = {
{
.name = "C1-IVB",
.desc = "MWAIT 0x00",
.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 1,
.target_residency = 1,
.enter = &intel_idle },
{ /* MWAIT C2 */
{
.name = "C1E-IVB",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 10,
.target_residency = 20,
.enter = &intel_idle },
{
.name = "C3-IVB",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 59,
.target_residency = 156,
.enter = &intel_idle },
{ /* MWAIT C3 */
{
.name = "C6-IVB",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 80,
.target_residency = 300,
.enter = &intel_idle },
{ /* MWAIT C4 */
{
.name = "C7-IVB",
.desc = "MWAIT 0x30",
.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 87,
.target_residency = 300,
.enter = &intel_idle },
{
.enter = NULL }
};
static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = {
{ /* MWAIT C0 */ },
{ /* MWAIT C1 */
.name = "C1-ATM",
static struct cpuidle_state hsw_cstates[CPUIDLE_STATE_MAX] = {
{
.name = "C1-HSW",
.desc = "MWAIT 0x00",
.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 1,
.target_residency = 4,
.exit_latency = 2,
.target_residency = 2,
.enter = &intel_idle },
{ /* MWAIT C2 */
{
.name = "C1E-HSW",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 10,
.target_residency = 20,
.enter = &intel_idle },
{
.name = "C3-HSW",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 33,
.target_residency = 100,
.enter = &intel_idle },
{
.name = "C6-HSW",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
.target_residency = 400,
.enter = &intel_idle },
{
.name = "C7s-HSW",
.desc = "MWAIT 0x32",
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 166,
.target_residency = 500,
.enter = &intel_idle },
{
.enter = NULL }
};
static struct cpuidle_state atom_cstates[CPUIDLE_STATE_MAX] = {
{
.name = "C1E-ATM",
.desc = "MWAIT 0x00",
.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 10,
.target_residency = 20,
.enter = &intel_idle },
{
.name = "C2-ATM",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 20,
.target_residency = 80,
.enter = &intel_idle },
{ /* MWAIT C3 */ },
{ /* MWAIT C4 */
{
.name = "C4-ATM",
.desc = "MWAIT 0x30",
.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 100,
.target_residency = 400,
.enter = &intel_idle },
{ /* MWAIT C5 */ },
{ /* MWAIT C6 */
{
.name = "C6-ATM",
.desc = "MWAIT 0x52",
.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 140,
.target_residency = 560,
.enter = &intel_idle },
{
.enter = NULL }
};
/**
@ -342,10 +406,19 @@ static void auto_demotion_disable(void *dummy)
msr_bits &= ~(icpu->auto_demotion_disable_flags);
wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
}
static void c1e_promotion_disable(void *dummy)
{
unsigned long long msr_bits;
rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
msr_bits &= ~0x2;
wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
}
static const struct idle_cpu idle_cpu_nehalem = {
.state_table = nehalem_cstates,
.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
.disable_promotion_to_c1e = true,
};
static const struct idle_cpu idle_cpu_atom = {
@ -359,10 +432,17 @@ static const struct idle_cpu idle_cpu_lincroft = {
static const struct idle_cpu idle_cpu_snb = {
.state_table = snb_cstates,
.disable_promotion_to_c1e = true,
};
static const struct idle_cpu idle_cpu_ivb = {
.state_table = ivb_cstates,
.disable_promotion_to_c1e = true,
};
static const struct idle_cpu idle_cpu_hsw = {
.state_table = hsw_cstates,
.disable_promotion_to_c1e = true,
};
#define ICPU(model, cpu) \
@ -382,6 +462,9 @@ static const struct x86_cpu_id intel_idle_ids[] = {
ICPU(0x2d, idle_cpu_snb),
ICPU(0x3a, idle_cpu_ivb),
ICPU(0x3e, idle_cpu_ivb),
ICPU(0x3c, idle_cpu_hsw),
ICPU(0x3f, idle_cpu_hsw),
ICPU(0x45, idle_cpu_hsw),
{}
};
MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
@ -464,32 +547,31 @@ static int intel_idle_cpuidle_driver_init(void)
drv->state_count = 1;
for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
int num_substates;
for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
int num_substates, mwait_hint, mwait_cstate, mwait_substate;
if (cstate > max_cstate) {
if (cpuidle_state_table[cstate].enter == NULL)
break;
if (cstate + 1 > max_cstate) {
printk(PREFIX "max_cstate %d reached\n",
max_cstate);
break;
}
/* does the state exist in CPUID.MWAIT? */
num_substates = (mwait_substates >> ((cstate) * 4))
& MWAIT_SUBSTATE_MASK;
if (num_substates == 0)
continue;
/* is the state not enabled? */
if (cpuidle_state_table[cstate].enter == NULL) {
/* does the driver not know about the state? */
if (*cpuidle_state_table[cstate].name == '\0')
pr_debug(PREFIX "unaware of model 0x%x"
" MWAIT %d please"
" contact lenb@kernel.org\n",
boot_cpu_data.x86_model, cstate);
continue;
}
mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
mwait_substate = MWAIT_HINT2SUBSTATE(mwait_hint);
if ((cstate > 2) &&
/* does the state exist in CPUID.MWAIT? */
num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
& MWAIT_SUBSTATE_MASK;
/* if sub-state in table is not enumerated by CPUID */
if ((mwait_substate + 1) > num_substates)
continue;
if (((mwait_cstate + 1) > 2) &&
!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
mark_tsc_unstable("TSC halts in idle"
" states deeper than C2");
@ -503,6 +585,9 @@ static int intel_idle_cpuidle_driver_init(void)
if (icpu->auto_demotion_disable_flags)
on_each_cpu(auto_demotion_disable, NULL, 1);
if (icpu->disable_promotion_to_c1e) /* each-cpu is redundant */
on_each_cpu(c1e_promotion_disable, NULL, 1);
return 0;
}
@ -521,21 +606,27 @@ static int intel_idle_cpu_init(int cpu)
dev->state_count = 1;
for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
int num_substates;
for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
int num_substates, mwait_hint, mwait_cstate, mwait_substate;
if (cstate > max_cstate) {
if (cpuidle_state_table[cstate].enter == NULL)
continue;
if (cstate + 1 > max_cstate) {
printk(PREFIX "max_cstate %d reached\n", max_cstate);
break;
}
mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
mwait_substate = MWAIT_HINT2SUBSTATE(mwait_hint);
/* does the state exist in CPUID.MWAIT? */
num_substates = (mwait_substates >> ((cstate) * 4))
& MWAIT_SUBSTATE_MASK;
if (num_substates == 0)
continue;
/* is the state not enabled? */
if (cpuidle_state_table[cstate].enter == NULL)
num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
& MWAIT_SUBSTATE_MASK;
/* if sub-state in table is not enumerated by CPUID */
if ((mwait_substate + 1) > num_substates)
continue;
dev->state_count += 1;

View file

@ -31,7 +31,6 @@
/*
* Callbacks for platform drivers to implement.
*/
extern void (*pm_idle)(void);
extern void (*pm_power_off)(void);
extern void (*pm_power_off_prepare)(void);

View file

@ -31,8 +31,6 @@ The \fB-S\fP option limits output to a 1-line System Summary for each interval.
.PP
The \fB-v\fP option increases verbosity.
.PP
The \fB-s\fP option prints the SMI counter, equivalent to "-c 0x34"
.PP
The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter.
.PP
The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter.
@ -186,26 +184,24 @@ This is a weighted average, where the weight is %c0. ie. it is the total number
un-halted cycles elapsed per time divided by the number of CPUs.
.SH SMI COUNTING EXAMPLE
On Intel Nehalem and newer processors, MSR 0x34 is a System Management Mode Interrupt (SMI) counter.
Using the -m option, you can display how many SMIs have fired since reset, or if there
are SMIs during the measurement interval, you can display the delta using the -d option.
This counter is shown by default under the "SMI" column.
.nf
[root@x980 ~]# turbostat -m 0x34
cor CPU %c0 GHz TSC MSR 0x034 %c1 %c3 %c6 %pc3 %pc6
1.41 1.82 3.38 0x00000000 8.92 37.82 51.85 17.37 0.55
0 0 3.73 2.03 3.38 0x00000055 1.72 48.25 46.31 17.38 0.55
0 6 0.14 1.63 3.38 0x00000056 5.30
1 2 2.51 1.80 3.38 0x00000056 15.65 29.33 52.52
1 8 0.10 1.65 3.38 0x00000056 18.05
2 4 1.16 1.68 3.38 0x00000056 5.87 24.47 68.50
2 10 0.10 1.63 3.38 0x00000056 6.93
8 1 3.84 1.91 3.38 0x00000056 1.36 50.65 44.16
8 7 0.08 1.64 3.38 0x00000056 5.12
9 3 1.82 1.73 3.38 0x00000056 7.59 24.21 66.38
9 9 0.09 1.68 3.38 0x00000056 9.32
10 5 1.66 1.65 3.38 0x00000056 15.10 50.00 33.23
10 11 1.72 1.65 3.38 0x00000056 15.05
[root@x980 ~]# turbostat
cor CPU %c0 GHz TSC SMI %c1 %c3 %c6 CTMP %pc3 %pc6
0.11 1.91 3.38 0 1.84 0.26 97.79 29 0.82 83.87
0 0 0.40 1.63 3.38 0 10.27 0.12 89.20 20 0.82 83.88
0 6 0.06 1.63 3.38 0 10.61
1 2 0.37 2.63 3.38 0 0.02 0.10 99.51 22
1 8 0.01 1.62 3.38 0 0.39
2 4 0.07 1.62 3.38 0 0.04 0.07 99.82 23
2 10 0.02 1.62 3.38 0 0.09
8 1 0.23 1.64 3.38 0 0.10 1.07 98.60 24
8 7 0.02 1.64 3.38 0 0.31
9 3 0.03 1.62 3.38 0 0.03 0.05 99.89 29
9 9 0.02 1.62 3.38 0 0.05
10 5 0.07 1.62 3.38 0 0.08 0.12 99.73 27
10 11 0.03 1.62 3.38 0 0.13
^C
[root@x980 ~]#
.fi
.SH NOTES

View file

@ -58,6 +58,7 @@ unsigned int extra_msr_offset32;
unsigned int extra_msr_offset64;
unsigned int extra_delta_offset32;
unsigned int extra_delta_offset64;
int do_smi;
double bclk;
unsigned int show_pkg;
unsigned int show_core;
@ -99,6 +100,7 @@ struct thread_data {
unsigned long long extra_delta64;
unsigned long long extra_msr32;
unsigned long long extra_delta32;
unsigned int smi_count;
unsigned int cpu_id;
unsigned int flags;
#define CPU_IS_FIRST_THREAD_IN_CORE 0x2
@ -248,6 +250,8 @@ void print_header(void)
if (has_aperf)
outp += sprintf(outp, " GHz");
outp += sprintf(outp, " TSC");
if (do_smi)
outp += sprintf(outp, " SMI");
if (extra_delta_offset32)
outp += sprintf(outp, " count 0x%03X", extra_delta_offset32);
if (extra_delta_offset64)
@ -314,6 +318,8 @@ int dump_counters(struct thread_data *t, struct core_data *c,
extra_msr_offset32, t->extra_msr32);
fprintf(stderr, "msr0x%x: %016llX\n",
extra_msr_offset64, t->extra_msr64);
if (do_smi)
fprintf(stderr, "SMI: %08X\n", t->smi_count);
}
if (c) {
@ -352,6 +358,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
* RAM_W: %5.2
* GHz: "GHz" 3 columns %3.2
* TSC: "TSC" 3 columns %3.2
* SMI: "SMI" 4 columns %4d
* percentage " %pc3" %6.2
* Perf Status percentage: %5.2
* "CTMP" 4 columns %4d
@ -431,6 +438,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
/* TSC */
outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float);
/* SMI */
if (do_smi)
outp += sprintf(outp, "%4d", t->smi_count);
/* delta */
if (extra_delta_offset32)
outp += sprintf(outp, " %11llu", t->extra_delta32);
@ -645,6 +656,9 @@ delta_thread(struct thread_data *new, struct thread_data *old,
*/
old->extra_msr32 = new->extra_msr32;
old->extra_msr64 = new->extra_msr64;
if (do_smi)
old->smi_count = new->smi_count - old->smi_count;
}
int delta_cpu(struct thread_data *t, struct core_data *c,
@ -672,6 +686,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
t->mperf = 0;
t->c1 = 0;
t->smi_count = 0;
t->extra_delta32 = 0;
t->extra_delta64 = 0;
@ -802,6 +817,11 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return -4;
}
if (do_smi) {
if (get_msr(cpu, MSR_SMI_COUNT, &msr))
return -5;
t->smi_count = msr & 0xFFFFFFFF;
}
if (extra_delta_offset32) {
if (get_msr(cpu, extra_delta_offset32, &msr))
return -5;
@ -908,8 +928,7 @@ void print_verbose_header(void)
get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
if (verbose)
fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
ratio = (msr >> 40) & 0xFF;
fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
@ -919,13 +938,16 @@ void print_verbose_header(void)
fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
ratio, bclk, ratio * bclk);
get_msr(0, MSR_IA32_POWER_CTL, &msr);
fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E: %sabled)\n",
msr, msr & 0x2 ? "EN" : "DIS");
if (!do_ivt_turbo_ratio_limit)
goto print_nhm_turbo_ratio_limits;
get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
if (verbose)
fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
ratio = (msr >> 56) & 0xFF;
if (ratio)
@ -1016,8 +1038,7 @@ void print_verbose_header(void)
get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
if (verbose)
fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
ratio = (msr >> 56) & 0xFF;
if (ratio)
@ -1397,6 +1418,9 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
case 0x2D: /* SNB Xeon */
case 0x3A: /* IVB */
case 0x3E: /* IVB Xeon */
case 0x3C: /* HSW */
case 0x3F: /* HSW */
case 0x45: /* HSW */
return 1;
case 0x2E: /* Nehalem-EX Xeon - Beckton */
case 0x2F: /* Westmere-EX Xeon - Eagleton */
@ -1488,6 +1512,9 @@ void rapl_probe(unsigned int family, unsigned int model)
switch (model) {
case 0x2A:
case 0x3A:
case 0x3C: /* HSW */
case 0x3F: /* HSW */
case 0x45: /* HSW */
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
break;
case 0x2D:
@ -1724,6 +1751,9 @@ int is_snb(unsigned int family, unsigned int model)
case 0x2D:
case 0x3A: /* IVB */
case 0x3E: /* IVB Xeon */
case 0x3C: /* HSW */
case 0x3F: /* HSW */
case 0x45: /* HSW */
return 1;
}
return 0;
@ -1883,6 +1913,7 @@ void check_cpuid()
do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */
do_smi = do_nhm_cstates;
do_snb_cstates = is_snb(family, model);
bclk = discover_bclk(family, model);
@ -2219,9 +2250,6 @@ void cmdline(int argc, char **argv)
case 'c':
sscanf(optarg, "%x", &extra_delta_offset32);
break;
case 's':
extra_delta_offset32 = 0x34; /* SMI counter */
break;
case 'C':
sscanf(optarg, "%x", &extra_delta_offset64);
break;
@ -2248,7 +2276,7 @@ int main(int argc, char **argv)
cmdline(argc, argv);
if (verbose)
fprintf(stderr, "turbostat v3.0 November 23, 2012"
fprintf(stderr, "turbostat v3.2 February 11, 2013"
" - Len Brown <lenb@kernel.org>\n");
turbostat_init();