From e0ceba8139c81d612ab6226e057fb3b01555b7f9 Mon Sep 17 00:00:00 2001 From: eric fang Date: Fri, 5 May 2023 01:07:25 +0000 Subject: [PATCH] cmd/compile: enhance tighten pass for memory values [This is a roll-forward of CL 458755, which was reverted due to make.bash being broken on GOAMD64=v3. But it turned out that the problem was caused by wrong bswap/load rewrite rules, and it was fixed in CL 492616.] This CL enhances the tighten pass. Previously if a value has memory arg, then the tighten pass won't move it, actually if the memory state is consistent among definition and use block, we can move the value. This CL optimizes this case. This is useful for the following situation: b1: x = load(...mem) if(...) goto b2 else b3 b2: use(x) b3: some_op_not_use_x For the micro-benchmark mentioned in #56620, the performance improvement is about 15%. There's no noticeable performance change in the go1 benchmark. Fixes #56620 Change-Id: I36ea68bed384986cd3ae81cb9e6efe84bb213adc Reviewed-on: https://go-review.googlesource.com/c/go/+/492895 Reviewed-by: Keith Randall TryBot-Result: Gopher Robot Reviewed-by: Heschi Kreinick Reviewed-by: Keith Randall Run-TryBot: Eric Fang --- src/cmd/compile/internal/ssa/regalloc.go | 7 +- src/cmd/compile/internal/ssa/tighten.go | 105 +++++++++++++++++++++-- test/tighten.go | 22 +++++ 3 files changed, 127 insertions(+), 7 deletions(-) create mode 100644 test/tighten.go diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index c7cdea261d..c4d6e48cad 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -602,6 +602,11 @@ func isLeaf(f *Func) bool { return true } +// needRegister reports whether v needs a register. +func (v *Value) needRegister() bool { + return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() +} + func (s *regAllocState) init(f *Func) { s.f = f s.f.RegAlloc = s.f.Cache.locs[:0] @@ -702,7 +707,7 @@ func (s *regAllocState) init(f *Func) { s.copies = make(map[*Value]bool) for _, b := range s.visitOrder { for _, v := range b.Values { - if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() { + if v.needRegister() { s.values[v.ID].needReg = true s.values[v.ID].rematerializeable = v.rematerializeable() s.orig[v.ID] = v diff --git a/src/cmd/compile/internal/ssa/tighten.go b/src/cmd/compile/internal/ssa/tighten.go index 048532a4ca..85b6a84cc3 100644 --- a/src/cmd/compile/internal/ssa/tighten.go +++ b/src/cmd/compile/internal/ssa/tighten.go @@ -21,6 +21,14 @@ func tighten(f *Func) { canMove := f.Cache.allocBoolSlice(f.NumValues()) defer f.Cache.freeBoolSlice(canMove) + + // Compute the memory states of each block. + startMem := f.Cache.allocValueSlice(f.NumBlocks()) + defer f.Cache.freeValueSlice(startMem) + endMem := f.Cache.allocValueSlice(f.NumBlocks()) + defer f.Cache.freeValueSlice(endMem) + memState(f, startMem, endMem) + for _, b := range f.Blocks { for _, v := range b.Values { if v.Op.isLoweredGetClosurePtr() { @@ -35,15 +43,12 @@ func tighten(f *Func) { // SelectN is typically, ultimately, a register. continue } - if v.MemoryArg() != nil { - // We can't move values which have a memory arg - it might - // make two memory values live across a block boundary. - continue - } // Count arguments which will need a register. narg := 0 for _, a := range v.Args { - if !a.rematerializeable() { + // SP and SB are special registers and have no effect on + // the allocation of general-purpose registers. + if a.needRegister() && a.Op != OpSB && a.Op != OpSP { narg++ } } @@ -138,6 +143,16 @@ func tighten(f *Func) { // v is not moveable, or is already in correct place. continue } + if mem := v.MemoryArg(); mem != nil { + if startMem[t.ID] != mem { + // We can't move a value with a memory arg unless the target block + // has that memory arg as its starting memory. + continue + } + } + if f.pass.debug > 0 { + b.Func.Warnl(v.Pos, "%v is moved", v.Op) + } // Move v to the block which dominates its uses. t.Values = append(t.Values, v) v.Block = t @@ -174,3 +189,81 @@ func phiTighten(f *Func) { } } } + +// memState computes the memory state at the beginning and end of each block of +// the function. The memory state is represented by a value of mem type. +// The returned result is stored in startMem and endMem, and endMem is nil for +// blocks with no successors (Exit,Ret,RetJmp blocks). This algorithm is not +// suitable for infinite loop blocks that do not contain any mem operations. +// For example: +// b1: +// +// (some values) +// +// plain -> b2 +// b2: <- b1 b2 +// Plain -> b2 +// +// Algorithm introduction: +// 1. The start memory state of a block is InitMem, a Phi node of type mem or +// an incoming memory value. +// 2. The start memory state of a block is consistent with the end memory state +// of its parent nodes. If the start memory state of a block is a Phi value, +// then the end memory state of its parent nodes is consistent with the +// corresponding argument value of the Phi node. +// 3. The algorithm first obtains the memory state of some blocks in the tree +// in the first step. Then floods the known memory state to other nodes in +// the second step. +func memState(f *Func, startMem, endMem []*Value) { + // This slice contains the set of blocks that have had their startMem set but this + // startMem value has not yet been propagated to the endMem of its predecessors + changed := make([]*Block, 0) + // First step, init the memory state of some blocks. + for _, b := range f.Blocks { + for _, v := range b.Values { + var mem *Value + if v.Op == OpPhi { + if v.Type.IsMemory() { + mem = v + } + } else if v.Op == OpInitMem { + mem = v // This is actually not needed. + } else if a := v.MemoryArg(); a != nil && a.Block != b { + // The only incoming memory value doesn't belong to this block. + mem = a + } + if mem != nil { + if old := startMem[b.ID]; old != nil { + if old == mem { + continue + } + f.Fatalf("func %s, startMem[%v] has different values, old %v, new %v", f.Name, b, old, mem) + } + startMem[b.ID] = mem + changed = append(changed, b) + } + } + } + + // Second step, floods the known memory state of some blocks to others. + for len(changed) != 0 { + top := changed[0] + changed = changed[1:] + mem := startMem[top.ID] + for i, p := range top.Preds { + pb := p.b + if endMem[pb.ID] != nil { + continue + } + if mem.Op == OpPhi && mem.Block == top { + endMem[pb.ID] = mem.Args[i] + } else { + endMem[pb.ID] = mem + } + if startMem[pb.ID] == nil { + startMem[pb.ID] = endMem[pb.ID] + changed = append(changed, pb) + } + } + } +} diff --git a/test/tighten.go b/test/tighten.go new file mode 100644 index 0000000000..92ed2492b2 --- /dev/null +++ b/test/tighten.go @@ -0,0 +1,22 @@ +// errorcheck -0 -d=ssa/tighten/debug=1 + +//go:build arm64 + +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +var ( + e any + ts uint16 +) + +func moveValuesWithMemoryArg(len int) { + for n := 0; n < len; n++ { + // Load of e.data is lowed as a MOVDload op, which has a memory + // argument. It's moved near where it's used. + _ = e != ts // ERROR "MOVDload is moved$" "MOVDaddr is moved$" + } +}