diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index 6225f1ebc8..76212c0866 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -1390,13 +1390,13 @@ (IsNonNil (Const(32|64) [c])) -> (ConstBool [b2i(c != 0)]) (IsNonNil (Addr _)) -> (ConstBool [1]) -// Inline small runtime.memmove calls with constant length. -(StaticCall {sym} s1:(Store _ (Const64 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem)))) - && isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config) - -> (Move {t.(*types.Type).Elem()} [sz] dst src mem) -(StaticCall {sym} s1:(Store _ (Const32 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem)))) - && isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config) - -> (Move {t.(*types.Type).Elem()} [sz] dst src mem) +// Inline small or disjoint runtime.memmove calls with constant length. +(StaticCall {sym} s1:(Store _ (Const(64|32) [sz]) s2:(Store _ src s3:(Store {t} _ dst mem)))) + && isSameSym(sym,"runtime.memmove") + && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 + && isInlinableMemmove(dst,src,sz,config) + && clobber(s1) && clobber(s2) && clobber(s3) + -> (Move {t.(*types.Type).Elem()} [sz] dst src mem) // De-virtualize interface calls into static calls. // Note that (ITab (IMake)) doesn't get diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index a7bfe4ccb8..f4781607fd 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -873,15 +873,22 @@ func zeroUpper32Bits(x *Value, depth int) bool { return false } -// inlineablememmovesize reports whether the given arch performs OpMove of the given size -// faster than memmove and in a safe way when src and dst overlap. -// This is used as a check for replacing memmove with OpMove. -func isInlinableMemmoveSize(sz int64, c *Config) bool { +// isInlinableMemmove reports whether the given arch performs a Move of the given size +// faster than memmove. It will only return true if replacing the memmove with a Move is +// safe, either because Move is small or because the arguments are disjoint. +// This is used as a check for replacing memmove with Move ops. +func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool { + // It is always safe to convert memmove into Move when its arguments are disjoint. + // Move ops may or may not be faster for large sizes depending on how the platform + // lowers them, so we only perform this optimization on platforms that we know to + // have fast Move ops. switch c.arch { case "amd64", "amd64p32": return sz <= 16 - case "386", "ppc64", "s390x", "ppc64le", "arm64": + case "386", "ppc64", "ppc64le", "arm64": return sz <= 8 + case "s390x": + return sz <= 8 || disjoint(dst, sz, src, sz) case "arm", "mips", "mips64", "mipsle", "mips64le": return sz <= 4 } diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index 1f89b2156e..357be76937 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -27265,7 +27265,7 @@ func rewriteValuegeneric_OpStaticCall_0(v *Value) bool { config := b.Func.Config _ = config // match: (StaticCall {sym} s1:(Store _ (Const64 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem)))) - // cond: isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config) + // cond: isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst,src,sz,config) && clobber(s1) && clobber(s2) && clobber(s3) // result: (Move {t.(*types.Type).Elem()} [sz] dst src mem) for { sym := v.Aux @@ -27293,7 +27293,7 @@ func rewriteValuegeneric_OpStaticCall_0(v *Value) bool { _ = s3.Args[2] dst := s3.Args[1] mem := s3.Args[2] - if !(isSameSym(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz, config)) { + if !(isSameSym(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(s1) && clobber(s2) && clobber(s3)) { break } v.reset(OpMove) @@ -27305,7 +27305,7 @@ func rewriteValuegeneric_OpStaticCall_0(v *Value) bool { return true } // match: (StaticCall {sym} s1:(Store _ (Const32 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem)))) - // cond: isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config) + // cond: isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst,src,sz,config) && clobber(s1) && clobber(s2) && clobber(s3) // result: (Move {t.(*types.Type).Elem()} [sz] dst src mem) for { sym := v.Aux @@ -27333,7 +27333,7 @@ func rewriteValuegeneric_OpStaticCall_0(v *Value) bool { _ = s3.Args[2] dst := s3.Args[1] mem := s3.Args[2] - if !(isSameSym(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz, config)) { + if !(isSameSym(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(s1) && clobber(s2) && clobber(s3)) { break } v.reset(OpMove) diff --git a/test/codegen/copy.go b/test/codegen/copy.go index 70a4f86ae8..5c3837bc7c 100644 --- a/test/codegen/copy.go +++ b/test/codegen/copy.go @@ -6,6 +6,8 @@ package codegen +import "runtime" + // Check small copies are replaced with moves. func movesmall4() { @@ -31,10 +33,31 @@ func movesmall16() { copy(x[1:], x[:]) } -// Check that no branches are generated when the pointers are [not] equal. - var x [256]byte +// Check that large disjoint copies are replaced with moves. + +func moveDisjointStack() { + var s [256]byte + // s390x:-".*memmove" + copy(s[:], x[:]) + runtime.KeepAlive(&s) +} + +func moveDisjointArg(b *[256]byte) { + var s [256]byte + // s390x:-".*memmove" + copy(s[:], b[:]) + runtime.KeepAlive(&s) +} + +func moveDisjointNoOverlap(a *[256]byte) { + // s390x:-".*memmove" + copy(a[:], a[128:]) +} + +// Check that no branches are generated when the pointers are [not] equal. + func ptrEqual() { // amd64:-"JEQ",-"JNE" // ppc64le:-"BEQ",-"BNE"