powernv: Add OPAL tracepoints

Knowing how long we spend in firmware calls is an important part of
minimising OS jitter.

This patch adds tracepoints to each OPAL call. If tracepoints are
enabled we branch out to a common routine that calls an entry and exit
tracepoint.

This allows us to write tools that monitor the frequency and duration
of OPAL calls, eg:

name                  count  total(ms)  min(ms)  max(ms)  avg(ms)  period(ms)
OPAL_HANDLE_INTERRUPT     5      0.199    0.037    0.042    0.040   12547.545
OPAL_POLL_EVENTS        204      2.590    0.012    0.036    0.013    2264.899
OPAL_PCI_MSI_EOI       2830      3.066    0.001    0.005    0.001      81.166

We use jump labels if configured, which means we only add a single
nop instruction to every OPAL call when the tracepoints are disabled.

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
This commit is contained in:
Anton Blanchard 2014-07-03 17:20:50 +10:00 committed by Benjamin Herrenschmidt
parent aaad422482
commit c49f63530b
4 changed files with 236 additions and 9 deletions

View file

@ -99,6 +99,51 @@ TRACE_EVENT_FN(hcall_exit,
);
#endif
#ifdef CONFIG_PPC_POWERNV
extern void opal_tracepoint_regfunc(void);
extern void opal_tracepoint_unregfunc(void);
TRACE_EVENT_FN(opal_entry,
TP_PROTO(unsigned long opcode, unsigned long *args),
TP_ARGS(opcode, args),
TP_STRUCT__entry(
__field(unsigned long, opcode)
),
TP_fast_assign(
__entry->opcode = opcode;
),
TP_printk("opcode=%lu", __entry->opcode),
opal_tracepoint_regfunc, opal_tracepoint_unregfunc
);
TRACE_EVENT_FN(opal_exit,
TP_PROTO(unsigned long opcode, unsigned long retval),
TP_ARGS(opcode, retval),
TP_STRUCT__entry(
__field(unsigned long, opcode)
__field(unsigned long, retval)
),
TP_fast_assign(
__entry->opcode = opcode;
__entry->retval = retval;
),
TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
opal_tracepoint_regfunc, opal_tracepoint_unregfunc
);
#endif
#endif /* _TRACE_POWERPC_H */
#undef TRACE_INCLUDE_PATH

View file

@ -8,3 +8,4 @@ obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o
obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o
obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o

View file

@ -0,0 +1,84 @@
#include <linux/percpu.h>
#include <linux/jump_label.h>
#include <asm/trace.h>
#ifdef CONFIG_JUMP_LABEL
struct static_key opal_tracepoint_key = STATIC_KEY_INIT;
void opal_tracepoint_regfunc(void)
{
static_key_slow_inc(&opal_tracepoint_key);
}
void opal_tracepoint_unregfunc(void)
{
static_key_slow_dec(&opal_tracepoint_key);
}
#else
/*
* We optimise OPAL calls by placing opal_tracepoint_refcount
* directly in the TOC so we can check if the opal tracepoints are
* enabled via a single load.
*/
/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
extern long opal_tracepoint_refcount;
void opal_tracepoint_regfunc(void)
{
opal_tracepoint_refcount++;
}
void opal_tracepoint_unregfunc(void)
{
opal_tracepoint_refcount--;
}
#endif
/*
* Since the tracing code might execute OPAL calls we need to guard against
* recursion.
*/
static DEFINE_PER_CPU(unsigned int, opal_trace_depth);
void __trace_opal_entry(unsigned long opcode, unsigned long *args)
{
unsigned long flags;
unsigned int *depth;
local_irq_save(flags);
depth = &__get_cpu_var(opal_trace_depth);
if (*depth)
goto out;
(*depth)++;
preempt_disable();
trace_opal_entry(opcode, args);
(*depth)--;
out:
local_irq_restore(flags);
}
void __trace_opal_exit(long opcode, unsigned long retval)
{
unsigned long flags;
unsigned int *depth;
local_irq_save(flags);
depth = &__get_cpu_var(opal_trace_depth);
if (*depth)
goto out;
(*depth)++;
trace_opal_exit(opcode, retval);
preempt_enable();
(*depth)--;
out:
local_irq_restore(flags);
}

View file

@ -13,30 +13,69 @@
#include <asm/hvcall.h>
#include <asm/asm-offsets.h>
#include <asm/opal.h>
#include <asm/jump_label.h>
.section ".text"
#ifdef CONFIG_TRACEPOINTS
#ifdef CONFIG_JUMP_LABEL
#define OPAL_BRANCH(LABEL) \
ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key)
#else
.section ".toc","aw"
.globl opal_tracepoint_refcount
opal_tracepoint_refcount:
.llong 0
.section ".text"
/*
* We branch around this in early init by using an unconditional cpu
* feature.
*/
#define OPAL_BRANCH(LABEL) \
BEGIN_FTR_SECTION; \
b 1f; \
END_FTR_SECTION(0, 1); \
ld r12,opal_tracepoint_refcount@toc(r2); \
std r12,32(r1); \
cmpdi r12,0; \
bne- LABEL; \
1:
#endif
#else
#define OPAL_BRANCH(LABEL)
#endif
/* TODO:
*
* - Trace irqs in/off (needs saving/restoring all args, argh...)
* - Get r11 feed up by Dave so I can have better register usage
*/
#define OPAL_CALL(name, token) \
_GLOBAL(name); \
mflr r0; \
mfcr r12; \
std r0,16(r1); \
li r0,token; \
OPAL_BRANCH(opal_tracepoint_entry) \
mfcr r12; \
stw r12,8(r1); \
std r1,PACAR1(r13); \
li r0,0; \
li r11,0; \
mfmsr r12; \
ori r0,r0,MSR_EE; \
ori r11,r11,MSR_EE; \
std r12,PACASAVEDMSR(r13); \
andc r12,r12,r0; \
andc r12,r12,r11; \
mtmsrd r12,1; \
LOAD_REG_ADDR(r0,opal_return); \
mtlr r0; \
li r0,MSR_DR|MSR_IR|MSR_LE;\
andc r12,r12,r0; \
li r0,token; \
LOAD_REG_ADDR(r11,opal_return); \
mtlr r11; \
li r11,MSR_DR|MSR_IR|MSR_LE;\
andc r12,r12,r11; \
mtspr SPRN_HSRR1,r12; \
LOAD_REG_ADDR(r11,opal); \
ld r12,8(r11); \
@ -61,6 +100,64 @@ opal_return:
mtcr r4;
rfid
#ifdef CONFIG_TRACEPOINTS
opal_tracepoint_entry:
stdu r1,-STACKFRAMESIZE(r1)
std r0,STK_REG(R23)(r1)
std r3,STK_REG(R24)(r1)
std r4,STK_REG(R25)(r1)
std r5,STK_REG(R26)(r1)
std r6,STK_REG(R27)(r1)
std r7,STK_REG(R28)(r1)
std r8,STK_REG(R29)(r1)
std r9,STK_REG(R30)(r1)
std r10,STK_REG(R31)(r1)
mr r3,r0
addi r4,r1,STK_REG(R24)
bl __trace_opal_entry
ld r0,STK_REG(R23)(r1)
ld r3,STK_REG(R24)(r1)
ld r4,STK_REG(R25)(r1)
ld r5,STK_REG(R26)(r1)
ld r6,STK_REG(R27)(r1)
ld r7,STK_REG(R28)(r1)
ld r8,STK_REG(R29)(r1)
ld r9,STK_REG(R30)(r1)
ld r10,STK_REG(R31)(r1)
LOAD_REG_ADDR(r11,opal_tracepoint_return)
mfcr r12
std r11,16(r1)
stw r12,8(r1)
std r1,PACAR1(r13)
li r11,0
mfmsr r12
ori r11,r11,MSR_EE
std r12,PACASAVEDMSR(r13)
andc r12,r12,r11
mtmsrd r12,1
LOAD_REG_ADDR(r11,opal_return)
mtlr r11
li r11,MSR_DR|MSR_IR|MSR_LE
andc r12,r12,r11
mtspr SPRN_HSRR1,r12
LOAD_REG_ADDR(r11,opal)
ld r12,8(r11)
ld r2,0(r11)
mtspr SPRN_HSRR0,r12
hrfid
opal_tracepoint_return:
std r3,STK_REG(R31)(r1)
mr r4,r3
ld r0,STK_REG(R23)(r1)
bl __trace_opal_exit
ld r3,STK_REG(R31)(r1)
addi r1,r1,STACKFRAMESIZE
ld r0,16(r1)
mtlr r0
blr
#endif
OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);