cmd/compile: convert memmove call into Move when arguments are disjoint

Move ops can be faster than memmove calls because the number of bytes
to be moved is fixed and they don't incur the overhead of a call.
This change allows memmove to be converted into a Move op when the
arguments are disjoint.

The optimization is only enabled on s390x at the moment, however
other architectures may also benefit from it in the future. The
memmove inlining rule triggers an extra 12 times when compiling the
standard library. It will most likely make more of a difference as the
disjoint function is improved over time (to recognize fresh heap
allocations for example).

Change-Id: I9af570dcfff28257b8e59e0ff584a46d8e248310
Reviewed-on: https://go-review.googlesource.com/110064
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
This commit is contained in:
Michael Munday 2018-04-29 15:12:50 +01:00
parent 66cb80c266
commit 6d00e8c478
4 changed files with 48 additions and 18 deletions

View file

@ -1390,13 +1390,13 @@
(IsNonNil (Const(32|64) [c])) -> (ConstBool [b2i(c != 0)])
(IsNonNil (Addr _)) -> (ConstBool [1])
// Inline small runtime.memmove calls with constant length.
(StaticCall {sym} s1:(Store _ (Const64 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem))))
&& isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config)
-> (Move {t.(*types.Type).Elem()} [sz] dst src mem)
(StaticCall {sym} s1:(Store _ (Const32 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem))))
&& isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config)
-> (Move {t.(*types.Type).Elem()} [sz] dst src mem)
// Inline small or disjoint runtime.memmove calls with constant length.
(StaticCall {sym} s1:(Store _ (Const(64|32) [sz]) s2:(Store _ src s3:(Store {t} _ dst mem))))
&& isSameSym(sym,"runtime.memmove")
&& s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1
&& isInlinableMemmove(dst,src,sz,config)
&& clobber(s1) && clobber(s2) && clobber(s3)
-> (Move {t.(*types.Type).Elem()} [sz] dst src mem)
// De-virtualize interface calls into static calls.
// Note that (ITab (IMake)) doesn't get

View file

@ -873,15 +873,22 @@ func zeroUpper32Bits(x *Value, depth int) bool {
return false
}
// inlineablememmovesize reports whether the given arch performs OpMove of the given size
// faster than memmove and in a safe way when src and dst overlap.
// This is used as a check for replacing memmove with OpMove.
func isInlinableMemmoveSize(sz int64, c *Config) bool {
// isInlinableMemmove reports whether the given arch performs a Move of the given size
// faster than memmove. It will only return true if replacing the memmove with a Move is
// safe, either because Move is small or because the arguments are disjoint.
// This is used as a check for replacing memmove with Move ops.
func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
// It is always safe to convert memmove into Move when its arguments are disjoint.
// Move ops may or may not be faster for large sizes depending on how the platform
// lowers them, so we only perform this optimization on platforms that we know to
// have fast Move ops.
switch c.arch {
case "amd64", "amd64p32":
return sz <= 16
case "386", "ppc64", "s390x", "ppc64le", "arm64":
case "386", "ppc64", "ppc64le", "arm64":
return sz <= 8
case "s390x":
return sz <= 8 || disjoint(dst, sz, src, sz)
case "arm", "mips", "mips64", "mipsle", "mips64le":
return sz <= 4
}

View file

@ -27265,7 +27265,7 @@ func rewriteValuegeneric_OpStaticCall_0(v *Value) bool {
config := b.Func.Config
_ = config
// match: (StaticCall {sym} s1:(Store _ (Const64 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem))))
// cond: isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config)
// cond: isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst,src,sz,config) && clobber(s1) && clobber(s2) && clobber(s3)
// result: (Move {t.(*types.Type).Elem()} [sz] dst src mem)
for {
sym := v.Aux
@ -27293,7 +27293,7 @@ func rewriteValuegeneric_OpStaticCall_0(v *Value) bool {
_ = s3.Args[2]
dst := s3.Args[1]
mem := s3.Args[2]
if !(isSameSym(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz, config)) {
if !(isSameSym(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(s1) && clobber(s2) && clobber(s3)) {
break
}
v.reset(OpMove)
@ -27305,7 +27305,7 @@ func rewriteValuegeneric_OpStaticCall_0(v *Value) bool {
return true
}
// match: (StaticCall {sym} s1:(Store _ (Const32 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem))))
// cond: isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config)
// cond: isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst,src,sz,config) && clobber(s1) && clobber(s2) && clobber(s3)
// result: (Move {t.(*types.Type).Elem()} [sz] dst src mem)
for {
sym := v.Aux
@ -27333,7 +27333,7 @@ func rewriteValuegeneric_OpStaticCall_0(v *Value) bool {
_ = s3.Args[2]
dst := s3.Args[1]
mem := s3.Args[2]
if !(isSameSym(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz, config)) {
if !(isSameSym(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(s1) && clobber(s2) && clobber(s3)) {
break
}
v.reset(OpMove)

View file

@ -6,6 +6,8 @@
package codegen
import "runtime"
// Check small copies are replaced with moves.
func movesmall4() {
@ -31,10 +33,31 @@ func movesmall16() {
copy(x[1:], x[:])
}
// Check that no branches are generated when the pointers are [not] equal.
var x [256]byte
// Check that large disjoint copies are replaced with moves.
func moveDisjointStack() {
var s [256]byte
// s390x:-".*memmove"
copy(s[:], x[:])
runtime.KeepAlive(&s)
}
func moveDisjointArg(b *[256]byte) {
var s [256]byte
// s390x:-".*memmove"
copy(s[:], b[:])
runtime.KeepAlive(&s)
}
func moveDisjointNoOverlap(a *[256]byte) {
// s390x:-".*memmove"
copy(a[:], a[128:])
}
// Check that no branches are generated when the pointers are [not] equal.
func ptrEqual() {
// amd64:-"JEQ",-"JNE"
// ppc64le:-"BEQ",-"BNE"