cmd/pprof: instruction-level granularity in callgrind output

When generating callgrind format output, produce cost lines at
instruction granularity. This allows visualizers supporting the
callgrind format to display instruction-level profiling information.

We also need to provide the object file (ob=) in order for tools to find
the object file to disassemble when displaying assembly.

We opportunistically group cost lines corressponding to the same
function together, reducing the number of superfluous description lines.
Subposition compression (relative position numbering) is also used to
reduce the output size.

Change-Id: Id8e960b81dc7a47ec1dfbae877521f76972431c4
Reviewed-on: https://go-review.googlesource.com/23781
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Raul Silvera <rsilvera@google.com>
Reviewed-by: Russ Cox <rsc@golang.org>
This commit is contained in:
Michael Pratt 2016-07-20 22:01:33 -07:00 committed by Russ Cox
parent f64c670181
commit 0aadaf2ad5
2 changed files with 54 additions and 14 deletions

View file

@ -712,8 +712,9 @@ func processFlags(p *profile.Profile, ui plugin.UI, f *flags) error {
flagPeek := f.isFormat("peek")
flagWebList := f.isFormat("weblist")
flagList := f.isFormat("list")
flagCallgrind := f.isFormat("callgrind")
if flagDis || flagWebList {
if flagDis || flagWebList || flagCallgrind {
// Collect all samples at address granularity for assembly
// listing.
f.flagNodeCount = newInt(0)
@ -904,16 +905,13 @@ func aggregate(prof *profile.Profile, f *flags) error {
switch {
case f.isFormat("proto"), f.isFormat("raw"):
// No aggregation for raw profiles.
case f.isFormat("callgrind"):
// Aggregate to file/line for callgrind.
fallthrough
case *f.flagLines:
return prof.Aggregate(true, true, true, true, false)
case *f.flagFiles:
return prof.Aggregate(true, false, true, false, false)
case *f.flagFunctions:
return prof.Aggregate(true, true, false, false, false)
case f.isFormat("weblist"), f.isFormat("disasm"):
case f.isFormat("weblist"), f.isFormat("disasm"), f.isFormat("callgrind"):
return prof.Aggregate(false, true, true, true, true)
}
return nil

View file

@ -355,27 +355,43 @@ func printCallgrind(w io.Writer, rpt *Report) error {
g.preprocess(rpt)
fmt.Fprintln(w, "positions: instr line")
fmt.Fprintln(w, "events:", o.SampleType+"("+o.OutputUnit+")")
objfiles := make(map[string]int)
files := make(map[string]int)
names := make(map[string]int)
// prevInfo points to the previous nodeInfo.
// It is used to group cost lines together as much as possible.
var prevInfo *nodeInfo
for _, n := range g.ns {
fmt.Fprintln(w, "fl="+callgrindName(files, n.info.file))
fmt.Fprintln(w, "fn="+callgrindName(names, n.info.name))
if prevInfo == nil || n.info.objfile != prevInfo.objfile || n.info.file != prevInfo.file || n.info.name != prevInfo.name {
fmt.Fprintln(w)
fmt.Fprintln(w, "ob="+callgrindName(objfiles, n.info.objfile))
fmt.Fprintln(w, "fl="+callgrindName(files, n.info.file))
fmt.Fprintln(w, "fn="+callgrindName(names, n.info.name))
}
addr := callgrindAddress(prevInfo, n.info.address)
sv, _ := ScaleValue(n.flat, o.SampleUnit, o.OutputUnit)
fmt.Fprintf(w, "%d %d\n", n.info.lineno, int(sv))
fmt.Fprintf(w, "%s %d %d\n", addr, n.info.lineno, int(sv))
// Print outgoing edges.
for _, out := range sortedEdges(n.out) {
c, _ := ScaleValue(out.weight, o.SampleUnit, o.OutputUnit)
count := fmt.Sprintf("%d", int(c))
callee := out.dest
fmt.Fprintln(w, "cfl="+callgrindName(files, callee.info.file))
fmt.Fprintln(w, "cfn="+callgrindName(names, callee.info.name))
fmt.Fprintln(w, "calls="+count, callee.info.lineno)
fmt.Fprintln(w, n.info.lineno, count)
fmt.Fprintf(w, "calls=%d %s %d\n", int(c), callgrindAddress(prevInfo, callee.info.address), callee.info.lineno)
// TODO: This address may be in the middle of a call
// instruction. It would be best to find the beginning
// of the instruction, but the tools seem to handle
// this OK.
fmt.Fprintf(w, "* * %d\n", int(c))
}
fmt.Fprintln(w)
prevInfo = &n.info
}
return nil
@ -397,6 +413,32 @@ func callgrindName(names map[string]int, name string) string {
return fmt.Sprintf("(%d) %s", id, name)
}
// callgrindAddress implements the callgrind subposition compression scheme if
// possible. If prevInfo != nil, it contains the previous address. The current
// address can be given relative to the previous address, with an explicit +/-
// to indicate it is relative, or * for the same address.
func callgrindAddress(prevInfo *nodeInfo, curr uint64) string {
abs := fmt.Sprintf("%#x", curr)
if prevInfo == nil {
return abs
}
prev := prevInfo.address
if prev == curr {
return "*"
}
diff := int64(curr - prev)
relative := fmt.Sprintf("%+d", diff)
// Only bother to use the relative address if it is actually shorter.
if len(relative) < len(abs) {
return relative
}
return abs
}
// printTree prints a tree-based report in text form.
func printTree(w io.Writer, rpt *Report) error {
const separator = "----------------------------------------------------------+-------------"
@ -1021,7 +1063,7 @@ func newLocInfo(l *profile.Location) []nodeInfo {
var objfile string
if m := l.Mapping; m != nil {
objfile = filepath.Base(m.File)
objfile = m.File
}
if len(l.Line) == 0 {
@ -1622,7 +1664,7 @@ func (info *nodeInfo) prettyName() string {
}
if name = strings.TrimSpace(name); name == "" && info.objfile != "" {
name = "[" + info.objfile + "]"
name = "[" + filepath.Base(info.objfile) + "]"
}
return name
}