From a367f44c18aa700abdbe5a4806e570a2b403bd19 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Mon, 26 Feb 2018 20:48:53 -0500 Subject: [PATCH] cmd/compile: enable stack maps everywhere except unsafe points MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This modifies issafepoint in liveness analysis to report almost every operation as a safe point. There are four things we don't mark as safe-points: 1. Runtime code (other than at calls). 2. go:nosplit functions (other than at calls). 3. Instructions between the load of the write barrier-enabled flag and the write. 4. Instructions leading up to a uintptr -> unsafe.Pointer conversion. We'll optimize this in later CLs: name old time/op new time/op delta Template 185ms ± 2% 190ms ± 2% +2.95% (p=0.000 n=10+10) Unicode 96.3ms ± 3% 96.4ms ± 1% ~ (p=0.905 n=10+9) GoTypes 658ms ± 0% 669ms ± 1% +1.72% (p=0.000 n=10+9) Compiler 3.14s ± 1% 3.18s ± 1% +1.56% (p=0.000 n=9+10) SSA 7.41s ± 2% 7.59s ± 1% +2.48% (p=0.000 n=9+10) Flate 126ms ± 1% 128ms ± 1% +2.08% (p=0.000 n=10+10) GoParser 153ms ± 1% 157ms ± 2% +2.38% (p=0.000 n=10+10) Reflect 437ms ± 1% 442ms ± 1% +0.98% (p=0.001 n=10+10) Tar 178ms ± 1% 179ms ± 1% +0.67% (p=0.035 n=10+9) XML 223ms ± 1% 229ms ± 1% +2.58% (p=0.000 n=10+10) [Geo mean] 394ms 401ms +1.75% No effect on binary size because we're not yet emitting these extra safe points. For #24543. Change-Id: I16a1eebb9183cad7cef9d53c0fd21a973cad6859 Reviewed-on: https://go-review.googlesource.com/109348 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: David Chase --- src/cmd/compile/internal/gc/plive.go | 181 ++++++++++++++++++- src/cmd/compile/internal/gc/ssa.go | 7 +- src/cmd/compile/internal/ssa/func.go | 6 + src/cmd/compile/internal/ssa/writebarrier.go | 18 ++ test/live.go | 6 +- 5 files changed, 207 insertions(+), 11 deletions(-) diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go index 5eca80718c..456a2f7652 100644 --- a/src/cmd/compile/internal/gc/plive.go +++ b/src/cmd/compile/internal/gc/plive.go @@ -116,6 +116,9 @@ type Liveness struct { be []BlockEffects + // unsafePoints bit i is set if Value ID i is not a safe point. + unsafePoints bvec + // An array with a bit vector for each safe point tracking live variables. // Indexed sequentially by safe points in Block and Value order. livevars []bvec @@ -367,6 +370,8 @@ func newliveness(fn *Node, f *ssa.Func, vars []*Node, idx map[*Node]int32, stkpt be.avarinitany = bulk.next() be.avarinitall = bulk.next() } + + lv.markUnsafePoints() return lv } @@ -470,10 +475,167 @@ func (lv *Liveness) pointerMap(liveout bvec, vars []*Node, args, locals bvec) { } } +// markUnsafePoints finds unsafe points and computes lv.unsafePoints. +func (lv *Liveness) markUnsafePoints() { + if compiling_runtime || lv.f.NoSplit { + // No complex analysis necessary. Do this on the fly + // in issafepoint. + return + } + + lv.unsafePoints = bvalloc(int32(lv.f.NumValues())) + + // Mark write barrier unsafe points. + for _, wbBlock := range lv.f.WBLoads { + // Check that we have the expected diamond shape. + if len(wbBlock.Succs) != 2 { + lv.f.Fatalf("expected branch at write barrier block %v", wbBlock) + } + s0, s1 := wbBlock.Succs[0].Block(), wbBlock.Succs[1].Block() + if s0.Kind != ssa.BlockPlain || s1.Kind != ssa.BlockPlain { + lv.f.Fatalf("expected successors of write barrier block %v to be plain", wbBlock) + } + if s0.Succs[0].Block() != s1.Succs[0].Block() { + lv.f.Fatalf("expected successors of write barrier block %v to converge", wbBlock) + } + + // Flow backwards from the control value to find the + // flag load. We don't know what lowered ops we're + // looking for, but all current arches produce a + // single op that does the memory load from the flag + // address, so we look for that. + var load *ssa.Value + v := wbBlock.Control + for { + if sym, ok := v.Aux.(*obj.LSym); ok && sym == writeBarrier { + load = v + break + } + switch v.Op { + case ssa.Op386TESTL: + // 386 lowers Neq32 to (TESTL cond cond), + if v.Args[0] == v.Args[1] { + v = v.Args[0] + continue + } + case ssa.OpPPC64MOVWZload, ssa.Op386MOVLload: + // Args[0] is the address of the write + // barrier control. Ignore Args[1], + // which is the mem operand. + v = v.Args[0] + continue + } + // Common case: just flow backwards. + if len(v.Args) != 1 { + v.Fatalf("write barrier control value has more than one argument: %s", v.LongString()) + } + v = v.Args[0] + } + + // Mark everything after the load unsafe. + found := false + for _, v := range wbBlock.Values { + found = found || v == load + if found { + lv.unsafePoints.Set(int32(v.ID)) + } + } + + // Mark the two successor blocks unsafe. These come + // back together immediately after the direct write in + // one successor and the last write barrier call in + // the other, so there's no need to be more precise. + for _, succ := range wbBlock.Succs { + for _, v := range succ.Block().Values { + lv.unsafePoints.Set(int32(v.ID)) + } + } + } + + // Find uintptr -> unsafe.Pointer conversions and flood + // unsafeness back to a call (which is always a safe point). + // + // Looking for the uintptr -> unsafe.Pointer conversion has a + // few advantages over looking for unsafe.Pointer -> uintptr + // conversions: + // + // 1. We avoid needlessly blocking safe-points for + // unsafe.Pointer -> uintptr conversions that never go back to + // a Pointer. + // + // 2. We don't have to detect calls to reflect.Value.Pointer, + // reflect.Value.UnsafeAddr, and reflect.Value.InterfaceData, + // which are implicit unsafe.Pointer -> uintptr conversions. + // We can't even reliably detect this if there's an indirect + // call to one of these methods. + // + // TODO: For trivial unsafe.Pointer arithmetic, it would be + // nice to only flood as far as the unsafe.Pointer -> uintptr + // conversion, but it's hard to know which argument of an Add + // or Sub to follow. + var flooded bvec + var flood func(b *ssa.Block, vi int) + flood = func(b *ssa.Block, vi int) { + if flooded.n == 0 { + flooded = bvalloc(int32(lv.f.NumBlocks())) + } + if flooded.Get(int32(b.ID)) { + return + } + for i := vi - 1; i >= 0; i-- { + v := b.Values[i] + if v.Op.IsCall() { + // Uintptrs must not contain live + // pointers across calls, so stop + // flooding. + return + } + lv.unsafePoints.Set(int32(v.ID)) + } + if vi == len(b.Values) { + // We marked all values in this block, so no + // need to flood this block again. + flooded.Set(int32(b.ID)) + } + for _, pred := range b.Preds { + flood(pred.Block(), len(pred.Block().Values)) + } + } + for _, b := range lv.f.Blocks { + for i, v := range b.Values { + if !(v.Op == ssa.OpConvert && v.Type.IsPtrShaped()) { + continue + } + // Flood the unsafe-ness of this backwards + // until we hit a call. + flood(b, i+1) + } + } +} + // Returns true for instructions that are safe points that must be annotated // with liveness information. -func issafepoint(v *ssa.Value) bool { - return v.Op.IsCall() +func (lv *Liveness) issafepoint(v *ssa.Value) bool { + // The runtime was written with the assumption that + // safe-points only appear at call sites (because that's how + // it used to be). We could and should improve that, but for + // now keep the old safe-point rules in the runtime. + // + // go:nosplit functions are similar. Since safe points used to + // be coupled with stack checks, go:nosplit often actually + // means "no safe points in this function". + if compiling_runtime || lv.f.NoSplit { + return v.Op.IsCall() + } + switch v.Op { + case ssa.OpInitMem, ssa.OpArg, ssa.OpSP, ssa.OpSB, + ssa.OpSelect0, ssa.OpSelect1, ssa.OpGetG, + ssa.OpVarDef, ssa.OpVarLive, ssa.OpKeepAlive, + ssa.OpPhi: + // These don't produce code (see genssa). + return false + } + return !lv.unsafePoints.Get(int32(v.ID)) } // Initializes the sets for solving the live variables. Visits all the @@ -680,7 +842,7 @@ func (lv *Liveness) epilogue() { all.Set(pos) } - if !issafepoint(v) { + if !lv.issafepoint(v) { continue } @@ -728,7 +890,7 @@ func (lv *Liveness) epilogue() { for i := len(b.Values) - 1; i >= 0; i-- { v := b.Values[i] - if issafepoint(v) { + if lv.issafepoint(v) { // Found an interesting instruction, record the // corresponding liveness information. @@ -829,7 +991,7 @@ func (lv *Liveness) clobber() { // Copy values into schedule, adding clobbering around safepoints. for _, v := range oldSched { - if !issafepoint(v) { + if !lv.issafepoint(v) { b.Values = append(b.Values, v) continue } @@ -1037,7 +1199,7 @@ Outer: lv.livenessMap = LivenessMap{make(map[*ssa.Value]LivenessIndex)} for _, b := range lv.f.Blocks { for _, v := range b.Values { - if issafepoint(v) { + if lv.issafepoint(v) { lv.showlive(v, lv.stackMaps[remap[pos]]) lv.livenessMap.m[v] = LivenessIndex{remap[pos]} pos++ @@ -1050,6 +1212,11 @@ func (lv *Liveness) showlive(v *ssa.Value, live bvec) { if debuglive == 0 || lv.fn.funcname() == "init" || strings.HasPrefix(lv.fn.funcname(), ".") { return } + if !(v == nil || v.Op.IsCall()) { + // Historically we only printed this information at + // calls. Keep doing so. + return + } if live.IsEmpty() { return } @@ -1194,7 +1361,7 @@ func (lv *Liveness) printDebug() { fmt.Printf("\n") } - if !issafepoint(v) { + if !lv.issafepoint(v) { continue } diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 038886c3ff..09d12cba1e 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -5272,7 +5272,12 @@ func (s *SSAGenState) Call(v *ssa.Value) *obj.Prog { func (s *SSAGenState) PrepareCall(v *ssa.Value) { idx := s.livenessMap.Get(v) if !idx.Valid() { - Fatalf("missing stack map index for %v", v.LongString()) + // typedmemclr and typedmemmove are write barriers and + // deeply non-preemptible. They are unsafe points and + // hence should not have liveness maps. + if sym, _ := v.Aux.(*obj.LSym); !(sym == typedmemclr || sym == typedmemmove) { + Fatalf("missing stack map index for %v", v.LongString()) + } } p := s.Prog(obj.APCDATA) Addrconst(&p.From, objabi.PCDATA_StackMapIndex) diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go index 900be71c42..85d41d124b 100644 --- a/src/cmd/compile/internal/ssa/func.go +++ b/src/cmd/compile/internal/ssa/func.go @@ -53,6 +53,12 @@ type Func struct { // of keys to make iteration order deterministic. Names []LocalSlot + // WBLoads is a list of Blocks that branch on the write + // barrier flag. Safe-points are disabled from the OpLoad that + // reads the write-barrier flag until the control flow rejoins + // below the two successors of this block. + WBLoads []*Block + freeValues *Value // free Values linked by argstorage[0]. All other fields except ID are 0/nil. freeBlocks *Block // free Blocks linked by succstorage[0].b. All other fields except ID are 0/nil. diff --git a/src/cmd/compile/internal/ssa/writebarrier.go b/src/cmd/compile/internal/ssa/writebarrier.go index c3f3cf95ed..92b8b006b7 100644 --- a/src/cmd/compile/internal/ssa/writebarrier.go +++ b/src/cmd/compile/internal/ssa/writebarrier.go @@ -111,6 +111,7 @@ func writebarrier(f *Func) { // order values in store order b.Values = storeOrder(b.Values, sset, storeNumber) + firstSplit := true again: // find the start and end of the last contiguous WB store sequence. // a branch will be inserted there. values after it will be moved @@ -268,6 +269,23 @@ func writebarrier(f *Func) { w.Block = bEnd } + // Preemption is unsafe between loading the write + // barrier-enabled flag and performing the write + // because that would allow a GC phase transition, + // which would invalidate the flag. Remember the + // conditional block so liveness analysis can disable + // safe-points. This is somewhat subtle because we're + // splitting b bottom-up. + if firstSplit { + // Add b itself. + b.Func.WBLoads = append(b.Func.WBLoads, b) + firstSplit = false + } else { + // We've already split b, so we just pushed a + // write barrier test into bEnd. + b.Func.WBLoads = append(b.Func.WBLoads, bEnd) + } + // if we have more stores in this block, do this block again if nWBops > 0 { goto again diff --git a/test/live.go b/test/live.go index 8de3cf7e86..18611f5113 100644 --- a/test/live.go +++ b/test/live.go @@ -553,7 +553,7 @@ func f34() { } func f35() { - if m33[byteptr()] == 0 && m33[byteptr()] == 0 { // ERROR "live at call to mapaccess1: .autotmp_[0-9]+$" + if m33[byteptr()] == 0 && m33[byteptr()] == 0 { // ERROR "live at call to mapaccess1: .autotmp_[0-9]+$" "f35: .autotmp_[0-9]+ \(type interface \{\}\) is ambiguously live$" printnl() return } @@ -561,7 +561,7 @@ func f35() { } func f36() { - if m33[byteptr()] == 0 || m33[byteptr()] == 0 { // ERROR "live at call to mapaccess1: .autotmp_[0-9]+$" + if m33[byteptr()] == 0 || m33[byteptr()] == 0 { // ERROR "live at call to mapaccess1: .autotmp_[0-9]+$" "f36: .autotmp_[0-9]+ \(type interface \{\}\) is ambiguously live$" printnl() return } @@ -569,7 +569,7 @@ func f36() { } func f37() { - if (m33[byteptr()] == 0 || m33[byteptr()] == 0) && m33[byteptr()] == 0 { // ERROR "live at call to mapaccess1: .autotmp_[0-9]+$" + if (m33[byteptr()] == 0 || m33[byteptr()] == 0) && m33[byteptr()] == 0 { // ERROR "live at call to mapaccess1: .autotmp_[0-9]+$" "f37: .autotmp_[0-9]+ \(type interface \{\}\) is ambiguously live$" printnl() return }