cmd/compile: better write barrier removal when initializing new objects

When initializing a new object, we're often writing 1) to a location that doesn't have a pointer to a heap object 2) a pointer that doesn't point to a heap object When both those conditions are true, we can avoid the write barrier. This CL detects case 1 by looking for writes to known-zeroed locations. The results of runtime.newobject are zeroed, and we perform a simple tracking of which parts of that object are written so we can determine what part remains zero at each write. This CL detects case 2 by looking for addresses of globals (including the types and itabs which are used in interfaces) and for nil pointers. Makes cmd/go 0.3% smaller. Some particular cases, like the slice literal in #29573, can get much smaller. TODO: we can remove actual zero writes also with this mechanism. Update #29573 Change-Id: Ie74a3533775ea88da0495ba02458391e5db26cb9 Reviewed-on: https://go-review.googlesource.com/c/go/+/156363 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2024-10-02 22:25:08 +00:00 · 2019-01-04 17:34:33 -08:00 · 2019-01-04 17:34:33 -08:00 · ca36af215f
parent 08751259b7
commit ca36af215f
3 changed files with 145 additions and 13 deletions
--- a/src/cmd/compile/internal/gc/sinit.go
+++ b/src/cmd/compile/internal/gc/sinit.go
@ -729,6 +729,7 @@ func fixedlit(ctxt initContext, kind initKind, n *Node, var_ *Node, init *Nodes)
 			if r.Sym.IsBlank() {
 				return nblank, r.Left
 			}
+			setlineno(r)
 			return nodSym(ODOT, var_, r.Sym), r.Left
 		}
 	default:
@ -756,7 +757,7 @@ func fixedlit(ctxt initContext, kind initKind, n *Node, var_ *Node, init *Nodes)
 		}

 		// build list of assignments: var[index] = expr
-		setlineno(value)
+		setlineno(a)
 		a = nod(OAS, a, value)
 		a = typecheck(a, ctxStmt)
 		switch kind {
--- a/src/cmd/compile/internal/ssa/writebarrier.go
+++ b/src/cmd/compile/internal/ssa/writebarrier.go
@ -11,9 +11,18 @@ import (
 	"strings"
 )

+// A ZeroRegion records a range of an object which is known to be zero.
+// A ZeroRegion only applies to a single memory state.
+type ZeroRegion struct {
+	base *Value
+	min  int64
+	max  int64
+}
+
 // needwb reports whether we need write barrier for store op v.
 // v must be Store/Move/Zero.
-func needwb(v *Value) bool {
+// zeroes provides known zero information (keyed by ID of memory-type values).
+func needwb(v *Value, zeroes map[ID]ZeroRegion) bool {
 	t, ok := v.Aux.(*types.Type)
 	if !ok {
 		v.Fatalf("store aux is not a type: %s", v.LongString())
@ -24,14 +33,24 @@ func needwb(v *Value) bool {
 	if IsStackAddr(v.Args[0]) {
 		return false // write on stack doesn't need write barrier
 	}
-	if v.Op == OpStore && IsGlobalAddr(v.Args[1]) && IsNewObject(v.Args[0], v.MemoryArg()) {
-		// Storing pointers to non-heap locations into a fresh object doesn't need a write barrier.
-		return false
-	}
 	if v.Op == OpMove && IsReadOnlyGlobalAddr(v.Args[1]) && IsNewObject(v.Args[0], v.MemoryArg()) {
 		// Copying data from readonly memory into a fresh object doesn't need a write barrier.
 		return false
 	}
+	if v.Op == OpStore && IsGlobalAddr(v.Args[1]) {
+		// Storing pointers to non-heap locations into zeroed memory doesn't need a write barrier.
+		ptr := v.Args[0]
+		var off int64
+		size := v.Aux.(*types.Type).Size()
+		for ptr.Op == OpOffPtr {
+			off += ptr.AuxInt
+			ptr = ptr.Args[0]
+		}
+		z := zeroes[v.MemoryArg().ID]
+		if ptr == z.base && off >= z.min && off+size <= z.max {
+			return false
+		}
+	}
 	return true
 }

@ -58,6 +77,7 @@ func writebarrier(f *Func) {
 	var sset *sparseSet
 	var storeNumber []int32

+	zeroes := f.computeZeroMap()
 	for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no stores to expand
 		// first, identify all the stores that need to insert a write barrier.
 		// mark them with WB ops temporarily. record presence of WB ops.
@ -65,7 +85,7 @@ func writebarrier(f *Func) {
 		for _, v := range b.Values {
 			switch v.Op {
 			case OpStore, OpMove, OpZero:
-				if needwb(v) {
+				if needwb(v, zeroes) {
 					switch v.Op {
 					case OpStore:
 						v.Op = OpStoreWB
@ -301,6 +321,87 @@ func writebarrier(f *Func) {
 	}
 }

+// computeZeroMap returns a map from an ID of a memory value to
+// a set of locations that are known to be zeroed at that memory value.
+func (f *Func) computeZeroMap() map[ID]ZeroRegion {
+	// Keep track of which parts of memory are known to be zero.
+	// This helps with removing write barriers for various initialization patterns.
+	// This analysis is conservative. We only keep track, for each memory state, of
+	// a single constant range of a single object which is known to be zero.
+	zeroes := map[ID]ZeroRegion{}
+	// Find new objects.
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			if v.Op != OpLoad {
+				continue
+			}
+			mem := v.MemoryArg()
+			if IsNewObject(v, mem) {
+				zeroes[mem.ID] = ZeroRegion{v, 0, v.Type.Elem().Size()}
+			}
+		}
+	}
+	// Find stores to those new objects.
+	for {
+		changed := false
+		for _, b := range f.Blocks {
+			// Note: iterating forwards helps convergence, as values are
+			// typically (but not always!) in store order.
+			for _, v := range b.Values {
+				if v.Op != OpStore {
+					continue
+				}
+				z, ok := zeroes[v.MemoryArg().ID]
+				if !ok {
+					continue
+				}
+				ptr := v.Args[0]
+				var off int64
+				size := v.Aux.(*types.Type).Size()
+				for ptr.Op == OpOffPtr {
+					off += ptr.AuxInt
+					ptr = ptr.Args[0]
+				}
+				if ptr != z.base {
+					// Different base object - we don't know anything.
+					// We could even be writing to the base object we know
+					// about, but through an aliased but offset pointer.
+					// So we have to throw all the zero information we have away.
+					continue
+				}
+				if off < z.min || off+size > z.max {
+					// Writing, at least partially, outside the known zeroes.
+					// We could salvage some zero information, but probably
+					// not worth it.
+					continue
+				}
+				// We now know we're storing to a zeroed area.
+				// We need to make a smaller zero range for the result of this store.
+				if off == z.min {
+					z.min += size
+				} else if off+size == z.max {
+					z.max -= size
+				} else {
+					// The store splits the known zero range in two.
+					// Keep track of the upper one, as we tend to initialize
+					// things in increasing memory order.
+					// TODO: keep track of larger one instead?
+					z.min = off + size
+				}
+				// Save updated zero range.
+				if zeroes[v.ID] != z {
+					zeroes[v.ID] = z
+					changed = true
+				}
+			}
+		}
+		if !changed {
+			break
+		}
+	}
+	return zeroes
+}
+
 // wbcall emits write barrier runtime call in b, returns memory.
 // if valIsVolatile, it moves val into temp space before making the call.
 func wbcall(pos src.XPos, b *Block, fn, typ *obj.LSym, ptr, val, mem, sp, sb *Value, valIsVolatile bool) *Value {
@ -373,9 +474,15 @@ func IsStackAddr(v *Value) bool {
 	return false
 }

-// IsGlobalAddr reports whether v is known to be an address of a global.
+// IsGlobalAddr reports whether v is known to be an address of a global (or nil).
 func IsGlobalAddr(v *Value) bool {
-	return v.Op == OpAddr && v.Args[0].Op == OpSB
+	if v.Op == OpAddr && v.Args[0].Op == OpSB {
+		return true // address of a global
+	}
+	if v.Op == OpConst64 || v.Op == OpConst32 {
+		return true // nil, the only possible pointer constant
+	}
+	return false
 }

 // IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global.
@ -388,10 +495,6 @@ func IsReadOnlyGlobalAddr(v *Value) bool {
 }

 // IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object at memory state mem.
-// TODO: Be more precise. We really want "IsNilPointer" for the particular field in question.
-// Right now, we can only detect a new object before any writes have been done to it.
-// We could ignore non-pointer writes, writes to offsets which
-// are known not to overlap the write in question, etc.
 func IsNewObject(v *Value, mem *Value) bool {
 	if v.Op != OpLoad {
 		return false
--- a/test/writebarrier.go
+++ b/test/writebarrier.go
@ -261,3 +261,31 @@ func f24() **int {
 func f25() []string {
 	return []string{"abc", "def", "ghi"} // no write barrier here
 }
+
+type T26 struct {
+	a, b, c int
+	d, e, f *int
+}
+
+var g26 int
+
+func f26(p *int) *T26 { // see issue 29573
+	return &T26{
+		a: 5,
+		b: 6,
+		c: 7,
+		d: &g26, // no write barrier: global ptr
+		e: nil,  // no write barrier: nil ptr
+		f: p,    // ERROR "write barrier"
+	}
+}
+
+func f27(p *int) []interface{} {
+	return []interface{}{
+		nil,         // no write barrier: zeroed memory, nil ptr
+		(*T26)(nil), // no write barrier: zeroed memory, type ptr & nil ptr
+		&g26,        // no write barrier: zeroed memory, type ptr & global ptr
+		7,           // no write barrier: zeroed memory, type ptr & global ptr
+		p,           // ERROR "write barrier"
+	}
+}