diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go index 82d21b48f0..bc47e1b441 100644 --- a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go @@ -169,7 +169,7 @@ func init() { {name: "REMW", argLength: 2, reg: gp21, asm: "REMW", typ: "Int32"}, {name: "REMUW", argLength: 2, reg: gp21, asm: "REMUW", typ: "UInt32"}, - {name: "MOVaddr", argLength: 1, reg: gp11sb, asm: "MOV", aux: "SymOff", rematerializeable: true, symEffect: "RdWr"}, // arg0 + auxint + offset encoded in aux + {name: "MOVaddr", argLength: 1, reg: gp11sb, asm: "MOV", aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux // auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address {name: "MOVDconst", reg: gp01, asm: "MOV", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint diff --git a/src/cmd/compile/internal/ssa/_gen/S390XOps.go b/src/cmd/compile/internal/ssa/_gen/S390XOps.go index f2184ad11f..636893a3c5 100644 --- a/src/cmd/compile/internal/ssa/_gen/S390XOps.go +++ b/src/cmd/compile/internal/ssa/_gen/S390XOps.go @@ -418,8 +418,8 @@ func init() { {name: "LEDBR", argLength: 1, reg: fp11, asm: "LEDBR"}, // convert float64 to float32 {name: "LDEBR", argLength: 1, reg: fp11, asm: "LDEBR"}, // convert float32 to float64 - {name: "MOVDaddr", argLength: 1, reg: addr, aux: "SymOff", rematerializeable: true, symEffect: "Read"}, // arg0 + auxint + offset encoded in aux - {name: "MOVDaddridx", argLength: 2, reg: addridx, aux: "SymOff", symEffect: "Read"}, // arg0 + arg1 + auxint + aux + {name: "MOVDaddr", argLength: 1, reg: addr, aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux + {name: "MOVDaddridx", argLength: 2, reg: addridx, aux: "SymOff", symEffect: "Addr"}, // arg0 + arg1 + auxint + aux // auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address {name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // load byte from arg0+auxint+aux. arg1=mem. Zero extend. diff --git a/src/cmd/compile/internal/ssa/debug_lines_test.go b/src/cmd/compile/internal/ssa/debug_lines_test.go index ff651f6862..a9d33b6b0a 100644 --- a/src/cmd/compile/internal/ssa/debug_lines_test.go +++ b/src/cmd/compile/internal/ssa/debug_lines_test.go @@ -115,7 +115,7 @@ func TestInlineLines(t *testing.T) { t.Skip("only runs for amd64 unless -arch explicitly supplied") } - want := [][]int{{3}, {4, 10}, {4, 10, 16}, {4, 10}, {4, 11, 16}, {4, 11}, {4}, {5, 10}, {5, 10, 16}, {5, 10}, {5, 11, 16}, {5, 11}, {5}} + want := [][]int{{3}, {3}, {4, 10}, {4, 10, 16}, {4, 10}, {4, 11, 16}, {4, 11}, {4}, {5, 10}, {5, 10, 16}, {5, 10}, {5, 11, 16}, {5, 11}, {5}} testInlineStack(t, "inline-dump.go", "f", want) } diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 597dc9c72e..9db2aec462 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -31047,7 +31047,7 @@ var opcodeTable = [...]opInfo{ auxType: auxSymOff, argLen: 1, rematerializeable: true, - symEffect: SymRdWr, + symEffect: SymAddr, asm: riscv.AMOV, reg: regInfo{ inputs: []inputInfo{ @@ -34771,7 +34771,7 @@ var opcodeTable = [...]opInfo{ auxType: auxSymOff, argLen: 1, rematerializeable: true, - symEffect: SymRead, + symEffect: SymAddr, reg: regInfo{ inputs: []inputInfo{ {0, 4295000064}, // SP SB @@ -34785,7 +34785,7 @@ var opcodeTable = [...]opInfo{ name: "MOVDaddridx", auxType: auxSymOff, argLen: 2, - symEffect: SymRead, + symEffect: SymAddr, reg: regInfo{ inputs: []inputInfo{ {0, 4295000064}, // SP SB diff --git a/src/cmd/compile/internal/ssa/schedule.go b/src/cmd/compile/internal/ssa/schedule.go index d88c33f304..a69e406df2 100644 --- a/src/cmd/compile/internal/ssa/schedule.go +++ b/src/cmd/compile/internal/ssa/schedule.go @@ -7,17 +7,16 @@ package ssa import ( "cmd/compile/internal/base" "cmd/compile/internal/types" + "cmd/internal/src" "container/heap" "sort" ) const ( - ScorePhi = iota // towards top of block - ScoreArg + ScorePhi = iota // towards top of block + ScoreArg // must occur at the top of the entry block + ScoreReadTuple // must occur immediately after tuple-generating insn (or call) ScoreNilCheck - ScoreReadTuple - ScoreVarDef - ScoreCarryChainTail ScoreMemory ScoreReadFlags ScoreDefault @@ -52,29 +51,36 @@ func (h ValHeap) Less(i, j int) bool { sx := h.score[x.ID] sy := h.score[y.ID] if c := sx - sy; c != 0 { - return c > 0 // higher score comes later. + return c < 0 // lower scores come earlier. } + // Note: only scores are required for correct scheduling. + // Everything else is just heuristics. + if x.Pos != y.Pos { // Favor in-order line stepping - return x.Pos.After(y.Pos) + if x.Block == x.Block.Func.Entry && x.Pos.IsStmt() != y.Pos.IsStmt() { + // In the entry block, put statement-marked instructions earlier. + return x.Pos.IsStmt() == src.PosIsStmt && y.Pos.IsStmt() != src.PosIsStmt + } + return x.Pos.Before(y.Pos) } if x.Op != OpPhi { if c := len(x.Args) - len(y.Args); c != 0 { - return c < 0 // smaller args comes later + return c > 0 // smaller args come later } } if c := x.Uses - y.Uses; c != 0 { - return c < 0 // smaller uses come later + return c > 0 // smaller uses come later } // These comparisons are fairly arbitrary. // The goal here is stability in the face // of unrelated changes elsewhere in the compiler. if c := x.AuxInt - y.AuxInt; c != 0 { - return c > 0 + return c < 0 } if cmp := x.Type.Compare(y.Type); cmp != types.CMPeq { - return cmp == types.CMPgt + return cmp == types.CMPlt } - return x.ID > y.ID + return x.ID < y.ID } func (op Op) isLoweredGetClosurePtr() bool { @@ -93,11 +99,6 @@ func (op Op) isLoweredGetClosurePtr() bool { // reasonable valid schedule using a priority queue. TODO(khr): // schedule smarter. func schedule(f *Func) { - // For each value, the number of times it is used in the block - // by values that have not been scheduled yet. - uses := f.Cache.allocInt32Slice(f.NumValues()) - defer f.Cache.freeInt32Slice(uses) - // reusable priority queue priq := new(ValHeap) @@ -105,16 +106,9 @@ func schedule(f *Func) { score := f.Cache.allocInt8Slice(f.NumValues()) defer f.Cache.freeInt8Slice(score) - // scheduling order. We queue values in this list in reverse order. - // A constant bound allows this to be stack-allocated. 64 is - // enough to cover almost every schedule call. - order := make([]*Value, 0, 64) - // maps mem values to the next live memory value nextMem := f.Cache.allocValueSlice(f.NumValues()) defer f.Cache.freeValueSlice(nextMem) - // additional pretend arguments for each Value. Used to enforce load/store ordering. - additionalArgs := make([][]*Value, f.NumValues()) for _, b := range f.Blocks { // Compute score. Larger numbers are scheduled closer to the end of the block. @@ -129,68 +123,40 @@ func schedule(f *Func) { f.Fatalf("LoweredGetClosurePtr appeared outside of entry block, b=%s", b.String()) } score[v.ID] = ScorePhi - case v.Op == OpAMD64LoweredNilCheck || v.Op == OpPPC64LoweredNilCheck || - v.Op == OpARMLoweredNilCheck || v.Op == OpARM64LoweredNilCheck || - v.Op == Op386LoweredNilCheck || v.Op == OpMIPS64LoweredNilCheck || - v.Op == OpS390XLoweredNilCheck || v.Op == OpMIPSLoweredNilCheck || - v.Op == OpRISCV64LoweredNilCheck || v.Op == OpWasmLoweredNilCheck || - v.Op == OpLOONG64LoweredNilCheck: + case opcodeTable[v.Op].nilCheck: // Nil checks must come before loads from the same address. score[v.ID] = ScoreNilCheck case v.Op == OpPhi: // We want all the phis first. score[v.ID] = ScorePhi - case v.Op == OpVarDef: - // We want all the vardefs next. - score[v.ID] = ScoreVarDef case v.Op == OpArgIntReg || v.Op == OpArgFloatReg: // In-register args must be scheduled as early as possible to ensure that the // context register is not stomped. They should only appear in the entry block. if b != f.Entry { f.Fatalf("%s appeared outside of entry block, b=%s", v.Op, b.String()) } - score[v.ID] = ScorePhi + score[v.ID] = ScoreArg case v.Op == OpArg: // We want all the args as early as possible, for better debugging. score[v.ID] = ScoreArg case v.Type.IsMemory(): // Schedule stores as early as possible. This tends to - // reduce register pressure. It also helps make sure - // VARDEF ops are scheduled before the corresponding LEA. + // reduce register pressure. score[v.ID] = ScoreMemory case v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN: - if (v.Op == OpSelect1 || v.Op == OpSelect0) && (v.Args[0].isCarry() || v.Type.IsFlags()) { - // When the Select pseudo op is being used for a carry or flag from - // a tuple then score it as ScoreFlags so it happens later. This - // prevents the bit from being clobbered before it is used. - score[v.ID] = ScoreFlags - } else { - score[v.ID] = ScoreReadTuple - } - case v.isCarry(): - if w := v.getCarryInput(); w != nil && w.Block == b { - // The producing op is not the final user of the carry bit. Its - // current score is one of unscored, Flags, or CarryChainTail. - // These occur if the producer has not been scored, another user - // of the producers carry flag was scored (there are >1 users of - // the carry out flag), or it was visited earlier and already - // scored CarryChainTail (and prove w is not a tail). - score[w.ID] = ScoreFlags - } - // Verify v has not been scored. If v has not been visited, v may be - // the final (tail) operation in a carry chain. If v is not, v will be - // rescored above when v's carry-using op is scored. When scoring is done, - // only tail operations will retain the CarryChainTail score. - if score[v.ID] != ScoreFlags { - // Score the tail of carry chain operations to a lower (earlier in the - // block) priority. This creates a priority inversion which allows only - // one chain to be scheduled, if possible. - score[v.ID] = ScoreCarryChainTail - } + // Tuple selectors need to appear immediately after the instruction + // that generates the tuple. + score[v.ID] = ScoreReadTuple + case v.hasFlagInput(): + // Schedule flag-reading ops earlier, to minimize the lifetime + // of flag values. + score[v.ID] = ScoreReadFlags case v.isFlagOp(): // Schedule flag register generation as late as possible. // This makes sure that we only have one live flags // value at a time. + // Note that this case is afer the case above, so values + // which both read and generate flags are given ScoreReadFlags. score[v.ID] = ScoreFlags default: score[v.ID] = ScoreDefault @@ -202,23 +168,35 @@ func schedule(f *Func) { } } } + for _, c := range b.ControlValues() { + // Force the control values to be scheduled at the end, + // unless they have other special priority. + if c.Block != b || score[c.ID] < ScoreReadTuple { + continue + } + if score[c.ID] == ScoreReadTuple { + score[c.Args[0].ID] = ScoreControl + continue + } + score[c.ID] = ScoreControl + } } + priq.score = score + + // An edge represents a scheduling constraint that x must appear before y in the schedule. + type edge struct { + x, y *Value + } + edges := make([]edge, 0, 64) + + // inEdges is the number of scheduling edges incoming from values that haven't been scheduled yet. + // i.e. inEdges[y.ID] = |e in edges where e.y == y and e.x is not in the schedule yet|. + inEdges := f.Cache.allocInt32Slice(f.NumValues()) + defer f.Cache.freeInt32Slice(inEdges) for _, b := range f.Blocks { - // Find store chain for block. - // Store chains for different blocks overwrite each other, so - // the calculated store chain is good only for this block. - for _, v := range b.Values { - if v.Op != OpPhi && v.Type.IsMemory() { - for _, w := range v.Args { - if w.Type.IsMemory() { - nextMem[w.ID] = v - } - } - } - } - - // Compute uses. + edges = edges[:0] + // Standard edges: from the argument of a value to that value. for _, v := range b.Values { if v.Op == OpPhi { // If a value is used by a phi, it does not induce @@ -226,144 +204,81 @@ func schedule(f *Func) { // previous iteration. continue } - for _, w := range v.Args { - if w.Block == b { - uses[w.ID]++ - } - // Any load must come before the following store. - if !v.Type.IsMemory() && w.Type.IsMemory() { - // v is a load. - s := nextMem[w.ID] - if s == nil || s.Block != b { - continue - } - additionalArgs[s.ID] = append(additionalArgs[s.ID], v) - uses[v.ID]++ + for _, a := range v.Args { + if a.Block == b { + edges = append(edges, edge{a, v}) } } } - for _, c := range b.ControlValues() { - // Force the control values to be scheduled at the end, - // unless they are phi values (which must be first). - // OpArg also goes first -- if it is stack it register allocates - // to a LoadReg, if it is register it is from the beginning anyway. - if score[c.ID] == ScorePhi || score[c.ID] == ScoreArg { + // Find store chain for block. + // Store chains for different blocks overwrite each other, so + // the calculated store chain is good only for this block. + for _, v := range b.Values { + if v.Op != OpPhi && v.Op != OpInitMem && v.Type.IsMemory() { + nextMem[v.MemoryArg().ID] = v + } + } + + // Add edges to enforce that any load must come before the following store. + for _, v := range b.Values { + if v.Op == OpPhi || v.Type.IsMemory() { continue } - score[c.ID] = ScoreControl - - // Schedule values dependent on the control values at the end. - // This reduces the number of register spills. We don't find - // all values that depend on the controls, just values with a - // direct dependency. This is cheaper and in testing there - // was no difference in the number of spills. - for _, v := range b.Values { - if v.Op != OpPhi { - for _, a := range v.Args { - if a == c { - score[v.ID] = ScoreControl - } - } - } + w := v.MemoryArg() + if w == nil { + continue + } + if s := nextMem[w.ID]; s != nil && s.Block == b { + edges = append(edges, edge{v, s}) } } - // To put things into a priority queue - // The values that should come last are least. - priq.score = score - priq.a = priq.a[:0] + // Sort all the edges by source Value ID. + sort.Slice(edges, func(i, j int) bool { + return edges[i].x.ID < edges[j].x.ID + }) + // Compute inEdges for values in this block. + for _, e := range edges { + inEdges[e.y.ID]++ + } // Initialize priority queue with schedulable values. + priq.a = priq.a[:0] for _, v := range b.Values { - if uses[v.ID] == 0 { + if inEdges[v.ID] == 0 { heap.Push(priq, v) } } - // Schedule highest priority value, update use counts, repeat. - order = order[:0] - tuples := make(map[ID][]*Value) + // Produce the schedule. Pick the highest priority scheduleable value, + // add it to the schedule, add any of its uses that are now scheduleable + // to the queue, and repeat. + nv := len(b.Values) + b.Values = b.Values[:0] for priq.Len() > 0 { - // Find highest priority schedulable value. - // Note that schedule is assembled backwards. - + // Schedule the next schedulable value in priority order. v := heap.Pop(priq).(*Value) + b.Values = append(b.Values, v) - if f.pass.debug > 1 && score[v.ID] == ScoreCarryChainTail && v.isCarry() { - // Add some debugging noise if the chain of carrying ops will not - // likely be scheduled without potential carry flag clobbers. - if !isCarryChainReady(v, uses) { - f.Warnl(v.Pos, "carry chain ending with %v not ready", v) - } - } - - // Add it to the schedule. - // Do not emit tuple-reading ops until we're ready to emit the tuple-generating op. - //TODO: maybe remove ReadTuple score above, if it does not help on performance - switch { - case v.Op == OpSelect0: - if tuples[v.Args[0].ID] == nil { - tuples[v.Args[0].ID] = make([]*Value, 2) - } - tuples[v.Args[0].ID][0] = v - case v.Op == OpSelect1: - if tuples[v.Args[0].ID] == nil { - tuples[v.Args[0].ID] = make([]*Value, 2) - } - tuples[v.Args[0].ID][1] = v - case v.Op == OpSelectN: - if tuples[v.Args[0].ID] == nil { - tuples[v.Args[0].ID] = make([]*Value, v.Args[0].Type.NumFields()) - } - tuples[v.Args[0].ID][v.AuxInt] = v - case v.Type.IsResults() && tuples[v.ID] != nil: - tup := tuples[v.ID] - for i := len(tup) - 1; i >= 0; i-- { - if tup[i] != nil { - order = append(order, tup[i]) - } - } - delete(tuples, v.ID) - order = append(order, v) - case v.Type.IsTuple() && tuples[v.ID] != nil: - if tuples[v.ID][1] != nil { - order = append(order, tuples[v.ID][1]) - } - if tuples[v.ID][0] != nil { - order = append(order, tuples[v.ID][0]) - } - delete(tuples, v.ID) - fallthrough - default: - order = append(order, v) - } - - // Update use counts of arguments. - for _, w := range v.Args { - if w.Block != b { - continue - } - uses[w.ID]-- - if uses[w.ID] == 0 { - // All uses scheduled, w is now schedulable. - heap.Push(priq, w) - } - } - for _, w := range additionalArgs[v.ID] { - uses[w.ID]-- - if uses[w.ID] == 0 { - // All uses scheduled, w is now schedulable. - heap.Push(priq, w) + // Find all the scheduling edges out from this value. + i := sort.Search(len(edges), func(i int) bool { + return edges[i].x.ID >= v.ID + }) + j := sort.Search(len(edges), func(i int) bool { + return edges[i].x.ID > v.ID + }) + // Decrement inEdges for each target of edges from v. + for _, e := range edges[i:j] { + inEdges[e.y.ID]-- + if inEdges[e.y.ID] == 0 { + heap.Push(priq, e.y) } } } - if len(order) != len(b.Values) { + if len(b.Values) != nv { f.Fatalf("schedule does not include all values in block %s", b) } - for i := 0; i < len(b.Values); i++ { - b.Values[i] = order[len(b.Values)-1-i] - } } // Remove SPanchored now that we've scheduled. @@ -584,74 +499,32 @@ func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value // isFlagOp reports if v is an OP with the flag type. func (v *Value) isFlagOp() bool { - return v.Type.IsFlags() || v.Type.IsTuple() && v.Type.FieldType(1).IsFlags() -} - -// isCarryChainReady reports whether all dependent carry ops can be scheduled after this. -func isCarryChainReady(v *Value, uses []int32) bool { - // A chain can be scheduled in it's entirety if - // the use count of each dependent op is 1. If none, - // schedule the first. - j := 1 // The first op uses[k.ID] == 0. Dependent ops are always >= 1. - for k := v; k != nil; k = k.getCarryInput() { - j += int(uses[k.ID]) - 1 + if v.Type.IsFlags() || v.Type.IsTuple() && v.Type.FieldType(1).IsFlags() { + return true } - return j == 0 + // PPC64 carry generators put their carry in a non-flag-typed register + // in their output. + switch v.Op { + case OpPPC64SUBC, OpPPC64ADDC, OpPPC64SUBCconst, OpPPC64ADDCconst: + return true + } + return false } -// isCarryInput reports whether v accepts a carry value as input. -func (v *Value) isCarryInput() bool { - return v.getCarryInput() != nil -} - -// isCarryOutput reports whether v generates a carry as output. -func (v *Value) isCarryOutput() bool { - // special cases for PPC64 which put their carry values in XER instead of flags - switch v.Block.Func.Config.arch { - case "ppc64", "ppc64le": - switch v.Op { - case OpPPC64SUBC, OpPPC64ADDC, OpPPC64SUBCconst, OpPPC64ADDCconst: +// hasFlagInput reports whether v has a flag value as any of its inputs. +func (v *Value) hasFlagInput() bool { + for _, a := range v.Args { + if a.isFlagOp() { return true } - return false } - return v.isFlagOp() && v.Op != OpSelect1 -} - -// isCarryCreator reports whether op is an operation which produces a carry bit value, -// but does not consume it. -func (v *Value) isCarryCreator() bool { - return v.isCarryOutput() && !v.isCarryInput() -} - -// isCarry reports whether op consumes or creates a carry a bit value. -func (v *Value) isCarry() bool { - return v.isCarryOutput() || v.isCarryInput() -} - -// getCarryInput returns the producing *Value of the carry bit of this op, or nil if none. -func (v *Value) getCarryInput() *Value { - // special cases for PPC64 which put their carry values in XER instead of flags - switch v.Block.Func.Config.arch { - case "ppc64", "ppc64le": - switch v.Op { - case OpPPC64SUBE, OpPPC64ADDE, OpPPC64SUBZEzero, OpPPC64ADDZEzero: - // PPC64 carry dependencies are conveyed through their final argument. - // Likewise, there is always an OpSelect1 between them. - return v.Args[len(v.Args)-1].Args[0] - } - return nil + // PPC64 carry dependencies are conveyed through their final argument, + // so we treat those operations as taking flags as well. + switch v.Op { + case OpPPC64SUBE, OpPPC64ADDE, OpPPC64SUBZEzero, OpPPC64ADDZEzero: + return true } - for _, a := range v.Args { - if !a.isFlagOp() { - continue - } - if a.Op == OpSelect1 { - a = a.Args[0] - } - return a - } - return nil + return false } type bySourcePos []*Value diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index 9bad115def..4b6b28fad1 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -6928,7 +6928,7 @@ func genssa(f *ssa.Func, pp *objw.Progs) { // debuggers may attribute it to previous function in program. firstPos := src.NoXPos for _, v := range f.Entry.Values { - if v.Pos.IsStmt() == src.PosIsStmt { + if v.Pos.IsStmt() == src.PosIsStmt && v.Op != ssa.OpArg && v.Op != ssa.OpArgIntReg && v.Op != ssa.OpArgFloatReg && v.Op != ssa.OpLoadReg && v.Op != ssa.OpStoreReg { firstPos = v.Pos v.Pos = firstPos.WithDefaultStmt() break @@ -7009,6 +7009,7 @@ func genssa(f *ssa.Func, pp *objw.Progs) { inlMarkList = append(inlMarkList, p) pos := v.Pos.AtColumn1() inlMarksByPos[pos] = append(inlMarksByPos[pos], p) + firstPos = src.NoXPos default: // Special case for first line in function; move it to the start (which cannot be a register-valued instruction) diff --git a/test/nilptr3.go b/test/nilptr3.go index 3345cfa5ab..0e818ebf66 100644 --- a/test/nilptr3.go +++ b/test/nilptr3.go @@ -241,3 +241,10 @@ func f9() []int { y := x[:] // ERROR "removed nil check" return y } + +// See issue 42673. +func f10(p **int) int { + return * // ERROR "removed nil check" + /* */ + *p // ERROR "removed nil check" +}