cmd/compile: fix OpArg decomposer for registers in expandCalls

Includes test taken from
https://github.com/golang/go/issues/44816#issuecomment-791618179
and improved debugging output.

Updates #44816

Change-Id: I94aeb9c5255f175fe80727be29d218bad54bf7ea
Reviewed-on: https://go-review.googlesource.com/c/go/+/299389
Trust: David Chase <drchase@google.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
David Chase 2021-03-05 19:56:13 -05:00
parent 48895d021b
commit 5eb9912084
5 changed files with 166 additions and 43 deletions

View file

@ -78,7 +78,8 @@ func (rc *registerCursor) String() string {
regs = regs + x.LongString()
}
}
return fmt.Sprintf("RCSR{storeDest=%v, regsLen=%d, nextSlice=%d, regValues=[%s], config=%v", dest, rc.regsLen, rc.nextSlice, regs, rc.config)
// not printing the config because that has not been useful
return fmt.Sprintf("RCSR{storeDest=%v, regsLen=%d, nextSlice=%d, regValues=[%s]}", dest, rc.regsLen, rc.nextSlice, regs)
}
// next effectively post-increments the register cursor; the receiver is advanced,
@ -189,6 +190,7 @@ type expandState struct {
commonSelectors map[selKey]*Value // used to de-dupe selectors
commonArgs map[selKey]*Value // used to de-dupe OpArg/OpArgIntReg/OpArgFloatReg
memForCall map[ID]*Value // For a call, need to know the unique selector that gets the mem.
indentLevel int // Indentation for debugging recursion
}
// intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target
@ -267,6 +269,19 @@ func ParamAssignmentForArgName(f *Func, name *ir.Name) *abi.ABIParamAssignment {
panic(fmt.Errorf("Did not match param %v in prInfo %+v", name, abiInfo.InParams()))
}
// indent increments (or decrements) the indentation.
func (x *expandState) indent(n int) {
x.indentLevel += n
}
// Printf does an indented fmt.Printf on te format and args.
func (x *expandState) Printf(format string, a ...interface{}) (n int, err error) {
if x.indentLevel > 0 {
fmt.Printf("%[1]*s", x.indentLevel, "")
}
return fmt.Printf(format, a...)
}
// Calls that need lowering have some number of inputs, including a memory input,
// and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
@ -286,7 +301,9 @@ func ParamAssignmentForArgName(f *Func, name *ir.Name) *abi.ABIParamAssignment {
// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
func (x *expandState) rewriteSelect(leaf *Value, selector *Value, offset int64, regOffset Abi1RO) []LocalSlot {
if x.debug {
fmt.Printf("rewriteSelect(%s, %s, %d)\n", leaf.LongString(), selector.LongString(), offset)
x.indent(3)
defer x.indent(-3)
x.Printf("rewriteSelect(%s, %s, %d)\n", leaf.LongString(), selector.LongString(), offset)
}
var locs []LocalSlot
leafType := leaf.Type
@ -308,7 +325,7 @@ func (x *expandState) rewriteSelect(leaf *Value, selector *Value, offset int64,
x.f.Fatalf("Unexpected OpArg type, selector=%s, leaf=%s\n", selector.LongString(), leaf.LongString())
}
if x.debug {
fmt.Printf("\tOpArg, break\n")
x.Printf("---OpArg, break\n")
}
break
}
@ -427,7 +444,7 @@ func (x *expandState) rewriteSelect(leaf *Value, selector *Value, offset int64,
w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call)
leaf.copyOf(w)
if x.debug {
fmt.Printf("\tnew %s\n", w.LongString())
x.Printf("---new %s\n", w.LongString())
}
}
}
@ -539,9 +556,86 @@ func (x *expandState) rewriteDereference(b *Block, base, a, mem *Value, offset,
return mem
}
// decomposeArgOrLoad is a helper for storeArgOrLoad.
// It decomposes a Load or an Arg into smaller parts, parameterized by the decomposeOne and decomposeTwo functions
// passed to it, and returns the new mem.
// decomposeArg is a helper for storeArgOrLoad.
// It decomposes a Load or an Arg into smaller parts and returns the new mem.
// If the type does not match one of the expected aggregate types, it returns nil instead.
// Parameters:
// pos -- the location of any generated code.
// b -- the block into which any generated code should normally be placed
// source -- the value, possibly an aggregate, to be stored.
// mem -- the mem flowing into this decomposition (loads depend on it, stores updated it)
// t -- the type of the value to be stored
// storeOffset -- if the value is stored in memory, it is stored at base (see storeRc) + storeOffset
// loadRegOffset -- regarding source as a value in registers, the register offset in ABI1. Meaningful only if source is OpArg.
// storeRc -- storeRC; if the value is stored in registers, this specifies the registers.
// StoreRc also identifies whether the target is registers or memory, and has the base for the store operation.
func (x *expandState) decomposeArg(pos src.XPos, b *Block, source, mem *Value, t *types.Type, storeOffset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {
pa := x.prAssignForArg(source)
if len(pa.Registers) > 0 {
// Handle the in-registers case directly
rts, offs := pa.RegisterTypesAndOffsets()
last := loadRegOffset + x.regWidth(t)
if offs[loadRegOffset] != 0 {
panic(fmt.Errorf("offset %d of requested register %d should be zero", offs[loadRegOffset], loadRegOffset))
}
for i := loadRegOffset; i < last; i++ {
rt := rts[i]
off := offs[i]
w := x.commonArgs[selKey{source, off, rt.Width, rt}]
if w == nil {
w = x.newArgToMemOrRegs(source, w, off, i, rt, pos)
}
mem = x.storeArgOrLoad(pos, b, w, mem, rt, storeOffset+off, i, storeRc.next(rt))
}
return mem
}
u := source.Type
switch u.Kind() {
case types.TARRAY:
elem := u.Elem()
elemRO := x.regWidth(elem)
for i := int64(0); i < u.NumElem(); i++ {
elemOff := i * elem.Size()
mem = storeOneArg(x, pos, b, source, mem, elem, elemOff, storeOffset+elemOff, loadRegOffset, storeRc.next(elem))
loadRegOffset += elemRO
pos = pos.WithNotStmt()
}
return mem
case types.TSTRUCT:
for i := 0; i < u.NumFields(); i++ {
fld := u.Field(i)
mem = storeOneArg(x, pos, b, source, mem, fld.Type, fld.Offset, storeOffset+fld.Offset, loadRegOffset, storeRc.next(fld.Type))
loadRegOffset += x.regWidth(fld.Type)
pos = pos.WithNotStmt()
}
return mem
case types.TINT64, types.TUINT64:
if t.Width == x.regSize {
break
}
tHi, tLo := x.intPairTypes(t.Kind())
mem = storeOneArg(x, pos, b, source, mem, tHi, x.hiOffset, storeOffset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo))
pos = pos.WithNotStmt()
return storeOneArg(x, pos, b, source, mem, tLo, x.lowOffset, storeOffset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo))
case types.TINTER:
return storeTwoArg(x, pos, b, source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc)
case types.TSTRING:
return storeTwoArg(x, pos, b, source, mem, x.typs.BytePtr, x.typs.Int, 0, storeOffset, loadRegOffset, storeRc)
case types.TCOMPLEX64:
return storeTwoArg(x, pos, b, source, mem, x.typs.Float32, x.typs.Float32, 0, storeOffset, loadRegOffset, storeRc)
case types.TCOMPLEX128:
return storeTwoArg(x, pos, b, source, mem, x.typs.Float64, x.typs.Float64, 0, storeOffset, loadRegOffset, storeRc)
case types.TSLICE:
mem = storeOneArg(x, pos, b, source, mem, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc.next(x.typs.BytePtr))
return storeTwoArg(x, pos, b, source, mem, x.typs.Int, x.typs.Int, x.ptrSize, storeOffset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc)
}
return nil
}
// decomposeLoad is a helper for storeArgOrLoad.
// It decomposes a Load into smaller parts and returns the new mem.
// If the type does not match one of the expected aggregate types, it returns nil instead.
// Parameters:
// pos -- the location of any generated code.
@ -555,11 +649,7 @@ func (x *expandState) rewriteDereference(b *Block, base, a, mem *Value, offset,
// StoreRc also identifies whether the target is registers or memory, and has the base for the store operation.
//
// TODO -- this needs cleanup; it just works for SSA-able aggregates, and won't fully generalize to register-args aggregates.
func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, offset int64, loadRegOffset Abi1RO, storeRc registerCursor,
// For decompose One and Two, the additional offArg provides the offset from the beginning of "source", if it is in memory.
// offStore is combined to base to obtain a store destionation, like "offset" of decomposeArgOrLoad
decomposeOne func(x *expandState, pos src.XPos, b *Block, source, mem *Value, t1 *types.Type, offArg, offStore int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value,
decomposeTwo func(x *expandState, pos src.XPos, b *Block, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value) *Value {
func (x *expandState) decomposeLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, offset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {
u := source.Type
switch u.Kind() {
case types.TARRAY:
@ -567,7 +657,7 @@ func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Va
elemRO := x.regWidth(elem)
for i := int64(0); i < u.NumElem(); i++ {
elemOff := i * elem.Size()
mem = decomposeOne(x, pos, b, source, mem, elem, elemOff, offset+elemOff, loadRegOffset, storeRc.next(elem))
mem = storeOneLoad(x, pos, b, source, mem, elem, elemOff, offset+elemOff, loadRegOffset, storeRc.next(elem))
loadRegOffset += elemRO
pos = pos.WithNotStmt()
}
@ -575,7 +665,7 @@ func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Va
case types.TSTRUCT:
for i := 0; i < u.NumFields(); i++ {
fld := u.Field(i)
mem = decomposeOne(x, pos, b, source, mem, fld.Type, fld.Offset, offset+fld.Offset, loadRegOffset, storeRc.next(fld.Type))
mem = storeOneLoad(x, pos, b, source, mem, fld.Type, fld.Offset, offset+fld.Offset, loadRegOffset, storeRc.next(fld.Type))
loadRegOffset += x.regWidth(fld.Type)
pos = pos.WithNotStmt()
}
@ -585,20 +675,20 @@ func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Va
break
}
tHi, tLo := x.intPairTypes(t.Kind())
mem = decomposeOne(x, pos, b, source, mem, tHi, x.hiOffset, offset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo))
mem = storeOneLoad(x, pos, b, source, mem, tHi, x.hiOffset, offset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo))
pos = pos.WithNotStmt()
return decomposeOne(x, pos, b, source, mem, tLo, x.lowOffset, offset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo))
return storeOneLoad(x, pos, b, source, mem, tLo, x.lowOffset, offset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo))
case types.TINTER:
return decomposeTwo(x, pos, b, source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc)
return storeTwoLoad(x, pos, b, source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc)
case types.TSTRING:
return decomposeTwo(x, pos, b, source, mem, x.typs.BytePtr, x.typs.Int, 0, offset, loadRegOffset, storeRc)
return storeTwoLoad(x, pos, b, source, mem, x.typs.BytePtr, x.typs.Int, 0, offset, loadRegOffset, storeRc)
case types.TCOMPLEX64:
return decomposeTwo(x, pos, b, source, mem, x.typs.Float32, x.typs.Float32, 0, offset, loadRegOffset, storeRc)
return storeTwoLoad(x, pos, b, source, mem, x.typs.Float32, x.typs.Float32, 0, offset, loadRegOffset, storeRc)
case types.TCOMPLEX128:
return decomposeTwo(x, pos, b, source, mem, x.typs.Float64, x.typs.Float64, 0, offset, loadRegOffset, storeRc)
return storeTwoLoad(x, pos, b, source, mem, x.typs.Float64, x.typs.Float64, 0, offset, loadRegOffset, storeRc)
case types.TSLICE:
mem = decomposeOne(x, pos, b, source, mem, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc.next(x.typs.BytePtr))
return decomposeTwo(x, pos, b, source, mem, x.typs.Int, x.typs.Int, x.ptrSize, offset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc)
mem = storeOneLoad(x, pos, b, source, mem, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc.next(x.typs.BytePtr))
return storeTwoLoad(x, pos, b, source, mem, x.typs.Int, x.typs.Int, x.ptrSize, offset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc)
}
return nil
}
@ -642,7 +732,9 @@ func storeTwoLoad(x *expandState, pos src.XPos, b *Block, source, mem *Value, t1
// If it does not reach a Load or an Arg, nothing happens; this allows a little freedom in phase ordering.
func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, offset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {
if x.debug {
fmt.Printf("\tstoreArgOrLoad(%s; %s; %s; %d; %s)\n", source.LongString(), mem.String(), t.String(), offset, storeRc.String())
x.indent(3)
defer x.indent(-3)
x.Printf("storeArgOrLoad(%s; %s; %s; %d; %s)\n", source.LongString(), mem.String(), t.String(), offset, storeRc.String())
}
// Start with Opcodes that can be disassembled
@ -651,13 +743,13 @@ func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value,
return x.storeArgOrLoad(pos, b, source.Args[0], mem, t, offset, loadRegOffset, storeRc)
case OpLoad, OpDereference:
ret := x.decomposeArgOrLoad(pos, b, source, mem, t, offset, loadRegOffset, storeRc, storeOneLoad, storeTwoLoad)
ret := x.decomposeLoad(pos, b, source, mem, t, offset, loadRegOffset, storeRc)
if ret != nil {
return ret
}
case OpArg:
ret := x.decomposeArgOrLoad(pos, b, source, mem, t, offset, loadRegOffset, storeRc, storeOneArg, storeTwoArg)
ret := x.decomposeArg(pos, b, source, mem, t, offset, loadRegOffset, storeRc)
if ret != nil {
return ret
}
@ -823,7 +915,7 @@ func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value,
s = b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, source, mem)
}
if x.debug {
fmt.Printf("\t\tstoreArg returns %s, storeRc=%s\n", s.LongString(), storeRc.String())
x.Printf("-->storeArg returns %s, storeRc=%s\n", s.LongString(), storeRc.String())
}
return s
}
@ -860,7 +952,7 @@ func (x *expandState) rewriteArgs(v *Value, firstArg int) (*Value, []*Value) {
aOffset = aux.OffsetOfArg(auxI)
}
if x.debug {
fmt.Printf("storeArg %s, %v, %d\n", a.LongString(), aType, aOffset)
x.Printf("storeArg %s, %v, %d\n", a.LongString(), aType, aOffset)
}
rc.init(aRegs, aux.abiInfo, result, x.sp)
mem = x.storeArgOrLoad(pos, v.Block, a, mem, aType, aOffset, 0, rc)
@ -910,7 +1002,7 @@ func expandCalls(f *Func) {
}
if x.debug {
fmt.Printf("\nexpandsCalls(%s)\n", f.Name)
x.Printf("\nexpandsCalls(%s)\n", f.Name)
}
// TODO if too slow, whole program iteration can be replaced w/ slices of appropriate values, accumulated in first loop here.
@ -1055,7 +1147,7 @@ func expandCalls(f *Func) {
case OpStructSelect, OpArraySelect, OpSelectN, OpArg:
val2Preds[w] += 1
if x.debug {
fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
x.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
}
}
fallthrough
@ -1064,7 +1156,7 @@ func expandCalls(f *Func) {
if _, ok := val2Preds[v]; !ok {
val2Preds[v] = 0
if x.debug {
fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
x.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
}
}
@ -1075,7 +1167,7 @@ func expandCalls(f *Func) {
if _, ok := val2Preds[v]; !ok {
val2Preds[v] = 0
if x.debug {
fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
x.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
}
}
@ -1203,7 +1295,7 @@ func expandCalls(f *Func) {
for i, v := range allOrdered {
if x.debug {
b := v.Block
fmt.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses)
x.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses)
}
if v.Uses == 0 {
v.reset(OpInvalid)
@ -1305,7 +1397,7 @@ func (x *expandState) rewriteArgToMemOrRegs(v *Value) *Value {
// newArgToMemOrRegs either rewrites toReplace into an OpArg referencing memory or into an OpArgXXXReg to a register,
// or rewrites it into a copy of the appropriate OpArgXXX. The actual OpArgXXX is determined by combining baseArg (an OpArg)
// with offset, regOffset, and t to determine which portion of it reference (either all or a part, in memory or in registers).
// with offset, regOffset, and t to determine which portion of it to reference (either all or a part, in memory or in registers).
func (x *expandState) newArgToMemOrRegs(baseArg, toReplace *Value, offset int64, regOffset Abi1RO, t *types.Type, pos src.XPos) *Value {
key := selKey{baseArg, offset, t.Width, t}
w := x.commonArgs[key]
@ -1336,7 +1428,7 @@ func (x *expandState) newArgToMemOrRegs(baseArg, toReplace *Value, offset int64,
w := baseArg.Block.NewValue0IA(pos, OpArg, t, auxInt, aux)
x.commonArgs[key] = w
if x.debug {
fmt.Printf("\tnew %s\n", w.LongString())
x.Printf("---new %s\n", w.LongString())
}
if toReplace != nil {
toReplace.copyOf(w)
@ -1364,7 +1456,7 @@ func (x *expandState) newArgToMemOrRegs(baseArg, toReplace *Value, offset int64,
} else {
w := baseArg.Block.NewValue0IA(pos, op, t, auxInt, aux)
if x.debug {
fmt.Printf("\tnew %s\n", w.LongString())
x.Printf("---new %s\n", w.LongString())
}
x.commonArgs[key] = w
if toReplace != nil {

View file

@ -8,7 +8,7 @@
// license that can be found in the LICENSE file.
// wasm is excluded because the compiler chatter about register abi pragma ends up
// on stdout, and causes the expected output to not match.
// on stdout, and causes the expected output to not match.
package main
@ -37,13 +37,12 @@ func H(spp stringPairPair) string {
//go:registerparams
//go:noinline
func G(a,b,c,d string) stringPairPair {
return stringPairPair{stringPair{a,b},stringPair{c,d}}
func G(a, b, c, d string) stringPairPair {
return stringPairPair{stringPair{a, b}, stringPair{c, d}}
}
func main() {
spp := G("this","is","a","test")
spp := G("this", "is", "a", "test")
s := H(spp)
gotVsWant(s, "this is a test")
}

View file

@ -0,0 +1,30 @@
// run
//go:build !wasm
// +build !wasm
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import "fmt"
//go:registerparams
//go:noinline
func passStruct6(a Struct6) Struct6 {
return a
}
type Struct6 struct {
Struct1
}
type Struct1 struct {
A, B, C uint
}
func main() {
fmt.Println(passStruct6(Struct6{Struct1{1, 2, 3}}))
}

View file

@ -0,0 +1 @@
{{1 2 3}}

View file

@ -1,5 +1,6 @@
// run
//go:build !wasm
// +build !wasm
// Copyright 2021 The Go Authors. All rights reserved.
@ -16,7 +17,7 @@ var sink *string
type toobig struct {
// 6 words will not SSA but will fit in registers
a,b,c string
a, b, c string
}
//go:registerparams
@ -27,8 +28,8 @@ func H(x toobig) string {
//go:registerparams
//go:noinline
func I(a,b,c string) toobig {
return toobig{a,b,c}
func I(a, b, c string) toobig {
return toobig{a, b, c}
}
func main() {