diff --git a/src/cmd/compile/internal/ssa/TODO b/src/cmd/compile/internal/ssa/TODO index 4e39d1e9c3..a457e67101 100644 --- a/src/cmd/compile/internal/ssa/TODO +++ b/src/cmd/compile/internal/ssa/TODO @@ -24,7 +24,7 @@ Optimizations (better compiled code) - Figure out how to make PARAMOUT variables ssa-able. They need to get spilled automatically at end-of-function somehow. - If strings are being passed around without being interpreted (ptr - and len feilds being accessed) pass them in xmm registers? + and len fields being accessed) pass them in xmm registers? Same for interfaces? - OpArrayIndex should take its index in AuxInt, not a full value. - remove FLAGS from REP instruction clobbers @@ -32,7 +32,6 @@ Optimizations (better compiled code) Note that this is challenging for ops that generate flags because flagalloc wants to move those instructions around for flag regeneration. -- In forms like if ... { call } else { no call }, mark the call branch as unlikely. - Non-constant rotate detection. - Do 0 <= x && x < n with one unsigned compare - nil-check removal in indexed load/store case: diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index 2780e5bcfc..f68819c3c2 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -178,7 +178,8 @@ var passes = [...]pass{ {name: "late phielim", fn: phielim}, {name: "late copyelim", fn: copyelim}, {name: "late deadcode", fn: deadcode}, - {name: "critical", fn: critical, required: true}, // remove critical edges + {name: "critical", fn: critical, required: true}, // remove critical edges + {name: "likelyadjust", fn: likelyadjust}, {name: "layout", fn: layout, required: true}, // schedule blocks {name: "schedule", fn: schedule, required: true}, // schedule values {name: "flagalloc", fn: flagalloc, required: true}, // allocate flags register diff --git a/src/cmd/compile/internal/ssa/likelyadjust.go b/src/cmd/compile/internal/ssa/likelyadjust.go new file mode 100755 index 0000000000..6ce8705272 --- /dev/null +++ b/src/cmd/compile/internal/ssa/likelyadjust.go @@ -0,0 +1,300 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +import ( + "fmt" +) + +type loop struct { + header *Block // The header node of this (reducible) loop + outer *loop // loop containing this loop + // Next two fields not currently used, but cheap to maintain, + // and aid in computation of inner-ness and list of blocks. + nBlocks int32 // Number of blocks in this loop but not within inner loops + isInner bool // True if never discovered to contain a loop +} + +// outerinner records that outer contains inner +func (sdom sparseTree) outerinner(outer, inner *loop) { + oldouter := inner.outer + if oldouter == nil || sdom.isAncestorEq(oldouter.header, outer.header) { + inner.outer = outer + outer.isInner = false + } +} + +type loopnest struct { + f *Func + b2l []*loop + po []*Block + sdom sparseTree + loops []*loop +} + +func min8(a, b int8) int8 { + if a < b { + return a + } + return b +} + +func max8(a, b int8) int8 { + if a > b { + return a + } + return b +} + +const ( + blDEFAULT = 0 + blMin = blDEFAULT + blCALL = 1 + blRET = 2 + blEXIT = 3 +) + +var bllikelies [4]string = [4]string{"default", "call", "ret", "exit"} + +func describePredictionAgrees(b *Block, prediction BranchPrediction) string { + s := "" + if prediction == b.Likely { + s = " (agrees with previous)" + } else if b.Likely != BranchUnknown { + s = " (disagrees with previous, ignored)" + } + return s +} + +func describeBranchPrediction(f *Func, b *Block, likely, not int8, prediction BranchPrediction) { + f.Config.Warnl(int(b.Line), "Branch prediction rule %s < %s%s", + bllikelies[likely-blMin], bllikelies[not-blMin], describePredictionAgrees(b, prediction)) +} + +func likelyadjust(f *Func) { + // The values assigned to certain and local only matter + // in their rank order. 0 is default, more positive + // is less likely. It's possible to assign a negative + // unlikeliness (though not currently the case). + certain := make([]int8, f.NumBlocks()) // In the long run, all outcomes are at least this bad. Mainly for Exit + local := make([]int8, f.NumBlocks()) // for our immediate predecessors. + + nest := loopnestfor(f) + po := nest.po + b2l := nest.b2l + + for _, b := range po { + switch b.Kind { + case BlockExit: + // Very unlikely. + local[b.ID] = blEXIT + certain[b.ID] = blEXIT + + // Ret, it depends. + case BlockRet, BlockRetJmp: + local[b.ID] = blRET + certain[b.ID] = blRET + + // Calls. TODO not all calls are equal, names give useful clues. + // Any name-based heuristics are only relative to other calls, + // and less influential than inferences from loop structure. + case BlockCall: + local[b.ID] = blCALL + certain[b.ID] = max8(blCALL, certain[b.Succs[0].ID]) + + default: + if len(b.Succs) == 1 { + certain[b.ID] = certain[b.Succs[0].ID] + } else if len(b.Succs) == 2 { + // If successor is an unvisited backedge, it's in loop and we don't care. + // Its default unlikely is also zero which is consistent with favoring loop edges. + // Notice that this can act like a "reset" on unlikeliness at loops; the + // default "everything returns" unlikeliness is erased by min with the + // backedge likeliness; however a loop with calls on every path will be + // tagged with call cost. Net effect is that loop entry is favored. + b0 := b.Succs[0].ID + b1 := b.Succs[1].ID + certain[b.ID] = min8(certain[b0], certain[b1]) + + l := b2l[b.ID] + l0 := b2l[b0] + l1 := b2l[b1] + + prediction := b.Likely + // Weak loop heuristic -- both source and at least one dest are in loops, + // and there is a difference in the destinations. + // TODO what is best arrangement for nested loops? + if l != nil && l0 != l1 { + noprediction := false + switch { + // prefer not to exit loops + case l1 == nil: + prediction = BranchLikely + case l0 == nil: + prediction = BranchUnlikely + + // prefer to stay in loop, not exit to outer. + case l == l0: + prediction = BranchLikely + case l == l1: + prediction = BranchUnlikely + default: + noprediction = true + } + if f.pass.debug > 0 && !noprediction { + f.Config.Warnl(int(b.Line), "Branch prediction rule stay in loop%s", + describePredictionAgrees(b, prediction)) + } + + } else { + // Lacking loop structure, fall back on heuristics. + if certain[b1] > certain[b0] { + prediction = BranchLikely + if f.pass.debug > 0 { + describeBranchPrediction(f, b, certain[b0], certain[b1], prediction) + } + } else if certain[b0] > certain[b1] { + prediction = BranchUnlikely + if f.pass.debug > 0 { + describeBranchPrediction(f, b, certain[b1], certain[b0], prediction) + } + } else if local[b1] > local[b0] { + prediction = BranchLikely + if f.pass.debug > 0 { + describeBranchPrediction(f, b, local[b0], local[b1], prediction) + } + } else if local[b0] > local[b1] { + prediction = BranchUnlikely + if f.pass.debug > 0 { + describeBranchPrediction(f, b, local[b1], local[b0], prediction) + } + } + } + if b.Likely != prediction { + if b.Likely == BranchUnknown { + b.Likely = prediction + } + } + } + } + if f.pass.debug > 2 { + f.Config.Warnl(int(b.Line), "BP: Block %s, local=%s, certain=%s", b, bllikelies[local[b.ID]-blMin], bllikelies[certain[b.ID]-blMin]) + } + + } +} + +func (l *loop) String() string { + return fmt.Sprintf("hdr:%s", l.header) +} + +func (l *loop) LongString() string { + i := "" + o := "" + if l.isInner { + i = ", INNER" + } + if l.outer != nil { + o = ", o=" + l.outer.header.String() + } + return fmt.Sprintf("hdr:%s%s%s", l.header, i, o) +} + +// nearestOuterLoop returns the outer loop of loop most nearly +// containing block b; the header must dominate b. loop itself +// is assumed to not be that loop. For acceptable performance, +// we're relying on loop nests to not be terribly deep. +func (l *loop) nearestOuterLoop(sdom sparseTree, b *Block) *loop { + var o *loop + for o = l.outer; o != nil && !sdom.isAncestorEq(o.header, b); o = o.outer { + } + return o +} + +func loopnestfor(f *Func) *loopnest { + po := postorder(f) + dom := dominators(f) + sdom := newSparseTree(f, dom) + b2l := make([]*loop, f.NumBlocks()) + loops := make([]*loop, 0) + + // Reducible-loop-nest-finding. + for _, b := range po { + if f.pass.debug > 3 { + fmt.Printf("loop finding (0) at %s\n", b) + } + + var innermost *loop // innermost header reachable from this block + + // IF any successor s of b is in a loop headed by h + // AND h dominates b + // THEN b is in the loop headed by h. + // + // Choose the first/innermost such h. + // + // IF s itself dominates b, the s is a loop header; + // and there may be more than one such s. + // Since there's at most 2 successors, the inner/outer ordering + // between them can be established with simple comparisons. + for _, bb := range b.Succs { + l := b2l[bb.ID] + + if sdom.isAncestorEq(bb, b) { // Found a loop header + if l == nil { + l = &loop{header: bb, isInner: true} + loops = append(loops, l) + b2l[bb.ID] = l + } + } else { // Perhaps a loop header is inherited. + // is there any loop containing our successor whose + // header dominates b? + if l != nil && !sdom.isAncestorEq(l.header, b) { + l = l.nearestOuterLoop(sdom, b) + } + } + + if l == nil || innermost == l { + continue + } + + if innermost == nil { + innermost = l + continue + } + + if sdom.isAncestor(innermost.header, l.header) { + sdom.outerinner(innermost, l) + innermost = l + } else if sdom.isAncestor(l.header, innermost.header) { + sdom.outerinner(l, innermost) + } + } + + if innermost != nil { + b2l[b.ID] = innermost + innermost.nBlocks++ + } + } + if f.pass.debug > 1 && len(loops) > 0 { + fmt.Printf("Loops in %s:\n", f.Name) + for _, l := range loops { + fmt.Printf("%s, b=", l.LongString()) + for _, b := range f.Blocks { + if b2l[b.ID] == l { + fmt.Printf(" %s", b) + } + } + fmt.Print("\n") + } + fmt.Printf("Nonloop blocks in %s:", f.Name) + for _, b := range f.Blocks { + if b2l[b.ID] == nil { + fmt.Printf(" %s", b) + } + } + fmt.Print("\n") + } + return &loopnest{f, b2l, po, sdom, loops} +} diff --git a/test/opt_branchlikely.go b/test/opt_branchlikely.go new file mode 100644 index 0000000000..99e914654f --- /dev/null +++ b/test/opt_branchlikely.go @@ -0,0 +1,85 @@ +// +build amd64 +// errorcheck -0 -d=ssa/likelyadjust/debug=1 + +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Test that branches have some prediction properties. +package foo + +func f(x, y, z int) int { + a := 0 + for i := 0; i < x; i++ { // ERROR "Branch prediction rule stay in loop" + for j := 0; j < y; j++ { // ERROR "Branch prediction rule stay in loop" + a += j + } + for k := 0; k < z; k++ { // ERROR "Branch prediction rule stay in loop" + a -= x + y + z + } + } + return a +} + +func g(x, y, z int) int { + a := 0 + if y == 0 { // ERROR "Branch prediction rule default < call" + y = g(y, z, x) + } else { + y++ + } + if y == x { // ERROR "Branch prediction rule default < call" + y = g(y, z, x) + } else { + } + if y == 2 { // ERROR "Branch prediction rule default < call" + z++ + } else { + y = g(z, x, y) + } + if y+z == 3 { // ERROR "Branch prediction rule call < exit" + println("ha ha") + } else { + panic("help help help") + } + if x != 0 { // ERROR "Branch prediction rule default < ret" + for i := 0; i < x; i++ { // ERROR "Branch prediction rule stay in loop" + if x == 4 { // ERROR "Branch prediction rule stay in loop" + return a + } + for j := 0; j < y; j++ { // ERROR "Branch prediction rule stay in loop" + for k := 0; k < z; k++ { // ERROR "Branch prediction rule stay in loop" + a -= j * i + } + a += j + } + } + } + return a +} + +func h(x, y, z int) int { + a := 0 + for i := 0; i < x; i++ { // ERROR "Branch prediction rule stay in loop" + for j := 0; j < y; j++ { // ERROR "Branch prediction rule stay in loop" + a += j + if i == j { // ERROR "Branch prediction rule stay in loop" + break + } + a *= j + } + for k := 0; k < z; k++ { // ERROR "Branch prediction rule stay in loop" + a -= k + if i == k { + continue + } + a *= k + } + } + if a > 0 { // ERROR "Branch prediction rule default < call" + a = g(x, y, z) + } else { + a = -a + } + return a +}