mirror of
https://github.com/golang/go
synced 2024-10-14 20:05:36 +00:00
cmd/compile: make memcombine pass a bit more robust to reassociation of exprs
Be more liberal about expanding the OR tree. Handle any tree shape instead of a fully left or right associative tree. Also remove tail feature, it isn't ever needed. Change-Id: If16bebef94b952a604d6069e9be3d9129994cb6f Reviewed-on: https://go-review.googlesource.com/c/go/+/494056 TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Keith Randall <khr@golang.org> Reviewed-by: Ryan Berger <ryanbberger@gmail.com> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
6fc5e7d4b5
commit
6042a062dc
|
@ -141,27 +141,26 @@ func combineLoads(root *Value, n int64) bool {
|
|||
|
||||
// Find n values that are ORed together with the above op.
|
||||
a := make([]*Value, 0, 8)
|
||||
v := root
|
||||
for int64(len(a)) < n {
|
||||
if v.Args[0].Op == orOp {
|
||||
a = append(a, v.Args[1])
|
||||
v = v.Args[0]
|
||||
} else if v.Args[1].Op == orOp {
|
||||
a = append(a, v.Args[0])
|
||||
v = v.Args[1]
|
||||
} else if int64(len(a)) == n-2 {
|
||||
a = append(a, v.Args[0])
|
||||
a = append(a, v.Args[1])
|
||||
v = nil
|
||||
} else {
|
||||
a = append(a, root)
|
||||
for i := 0; i < len(a) && int64(len(a)) < n; i++ {
|
||||
v := a[i]
|
||||
if v.Uses != 1 && v != root {
|
||||
// Something in this subtree is used somewhere else.
|
||||
return false
|
||||
}
|
||||
if v.Op == orOp {
|
||||
a[i] = v.Args[0]
|
||||
a = append(a, v.Args[1])
|
||||
i--
|
||||
}
|
||||
}
|
||||
if int64(len(a)) != n {
|
||||
return false
|
||||
}
|
||||
tail := v // Value to OR in beyond the ones we're working with (or nil if none).
|
||||
|
||||
// Check that the first entry to see what ops we're looking for.
|
||||
// All the entries should be of the form shift(extend(load)), maybe with no shift.
|
||||
v = a[0]
|
||||
v := a[0]
|
||||
if v.Op == shiftOp {
|
||||
v = v.Args[0]
|
||||
}
|
||||
|
@ -317,15 +316,9 @@ func combineLoads(root *Value, n int64) bool {
|
|||
v = leftShift(loadBlock, pos, v, shift0-(n-1)*8)
|
||||
}
|
||||
|
||||
// Install. If there's a tail, make the root (OR v tail).
|
||||
// If not, do (Copy v).
|
||||
if tail != nil {
|
||||
root.SetArg(0, v)
|
||||
root.SetArg(1, tail)
|
||||
} else {
|
||||
root.reset(OpCopy)
|
||||
root.AddArg(v)
|
||||
}
|
||||
// Install with (Copy v).
|
||||
root.reset(OpCopy)
|
||||
root.AddArg(v)
|
||||
|
||||
// Clobber the loads, just to prevent additional work being done on
|
||||
// subtrees (which are now unreachable).
|
||||
|
|
|
@ -338,6 +338,32 @@ func load_be_byte8_uint64_idx8(s []byte, idx int) uint64 {
|
|||
return uint64(s[idx<<3])<<56 | uint64(s[(idx<<3)+1])<<48 | uint64(s[(idx<<3)+2])<<40 | uint64(s[(idx<<3)+3])<<32 | uint64(s[(idx<<3)+4])<<24 | uint64(s[(idx<<3)+5])<<16 | uint64(s[(idx<<3)+6])<<8 | uint64(s[(idx<<3)+7])
|
||||
}
|
||||
|
||||
// Some tougher cases for the memcombine pass.
|
||||
|
||||
func reassoc_load_uint32(b []byte) uint32 {
|
||||
// amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
|
||||
return (uint32(b[0]) | uint32(b[1])<<8) | (uint32(b[2])<<16 | uint32(b[3])<<24)
|
||||
}
|
||||
|
||||
func extrashift_load_uint32(b []byte) uint32 {
|
||||
// amd64:`MOVL\s\([A-Z]+\)`,`SHLL\s[$]2`,-`MOV[BW]`,-`OR`
|
||||
return uint32(b[0])<<2 | uint32(b[1])<<10 | uint32(b[2])<<18 | uint32(b[3])<<26
|
||||
|
||||
}
|
||||
|
||||
func outoforder_load_uint32(b []byte) uint32 {
|
||||
// amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
|
||||
return uint32(b[0]) | uint32(b[2])<<16 | uint32(b[1])<<8 | uint32(b[3])<<24
|
||||
}
|
||||
|
||||
func extraOr_load_uint32(b []byte, x, y uint32) uint32 {
|
||||
// amd64:`ORL\s\([A-Z]+\)`,-`MOV[BW]`
|
||||
return x | binary.LittleEndian.Uint32(b) | y
|
||||
// TODO: Note that
|
||||
// x | uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 | y
|
||||
// doesn't work because it associates in a way that memcombine can't detect it.
|
||||
}
|
||||
|
||||
// Check load combining across function calls.
|
||||
|
||||
func fcall_byte(a [2]byte) [2]byte {
|
||||
|
|
Loading…
Reference in a new issue