mirror of
https://github.com/golang/go
synced 2024-11-02 11:50:30 +00:00
cmd/compile: convert memmove call into Move when arguments are disjoint
Move ops can be faster than memmove calls because the number of bytes to be moved is fixed and they don't incur the overhead of a call. This change allows memmove to be converted into a Move op when the arguments are disjoint. The optimization is only enabled on s390x at the moment, however other architectures may also benefit from it in the future. The memmove inlining rule triggers an extra 12 times when compiling the standard library. It will most likely make more of a difference as the disjoint function is improved over time (to recognize fresh heap allocations for example). Change-Id: I9af570dcfff28257b8e59e0ff584a46d8e248310 Reviewed-on: https://go-review.googlesource.com/110064 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
This commit is contained in:
parent
66cb80c266
commit
6d00e8c478
4 changed files with 48 additions and 18 deletions
|
@ -1390,12 +1390,12 @@
|
|||
(IsNonNil (Const(32|64) [c])) -> (ConstBool [b2i(c != 0)])
|
||||
(IsNonNil (Addr _)) -> (ConstBool [1])
|
||||
|
||||
// Inline small runtime.memmove calls with constant length.
|
||||
(StaticCall {sym} s1:(Store _ (Const64 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem))))
|
||||
&& isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config)
|
||||
-> (Move {t.(*types.Type).Elem()} [sz] dst src mem)
|
||||
(StaticCall {sym} s1:(Store _ (Const32 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem))))
|
||||
&& isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config)
|
||||
// Inline small or disjoint runtime.memmove calls with constant length.
|
||||
(StaticCall {sym} s1:(Store _ (Const(64|32) [sz]) s2:(Store _ src s3:(Store {t} _ dst mem))))
|
||||
&& isSameSym(sym,"runtime.memmove")
|
||||
&& s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1
|
||||
&& isInlinableMemmove(dst,src,sz,config)
|
||||
&& clobber(s1) && clobber(s2) && clobber(s3)
|
||||
-> (Move {t.(*types.Type).Elem()} [sz] dst src mem)
|
||||
|
||||
// De-virtualize interface calls into static calls.
|
||||
|
|
|
@ -873,15 +873,22 @@ func zeroUpper32Bits(x *Value, depth int) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// inlineablememmovesize reports whether the given arch performs OpMove of the given size
|
||||
// faster than memmove and in a safe way when src and dst overlap.
|
||||
// This is used as a check for replacing memmove with OpMove.
|
||||
func isInlinableMemmoveSize(sz int64, c *Config) bool {
|
||||
// isInlinableMemmove reports whether the given arch performs a Move of the given size
|
||||
// faster than memmove. It will only return true if replacing the memmove with a Move is
|
||||
// safe, either because Move is small or because the arguments are disjoint.
|
||||
// This is used as a check for replacing memmove with Move ops.
|
||||
func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
|
||||
// It is always safe to convert memmove into Move when its arguments are disjoint.
|
||||
// Move ops may or may not be faster for large sizes depending on how the platform
|
||||
// lowers them, so we only perform this optimization on platforms that we know to
|
||||
// have fast Move ops.
|
||||
switch c.arch {
|
||||
case "amd64", "amd64p32":
|
||||
return sz <= 16
|
||||
case "386", "ppc64", "s390x", "ppc64le", "arm64":
|
||||
case "386", "ppc64", "ppc64le", "arm64":
|
||||
return sz <= 8
|
||||
case "s390x":
|
||||
return sz <= 8 || disjoint(dst, sz, src, sz)
|
||||
case "arm", "mips", "mips64", "mipsle", "mips64le":
|
||||
return sz <= 4
|
||||
}
|
||||
|
|
|
@ -27265,7 +27265,7 @@ func rewriteValuegeneric_OpStaticCall_0(v *Value) bool {
|
|||
config := b.Func.Config
|
||||
_ = config
|
||||
// match: (StaticCall {sym} s1:(Store _ (Const64 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem))))
|
||||
// cond: isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config)
|
||||
// cond: isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst,src,sz,config) && clobber(s1) && clobber(s2) && clobber(s3)
|
||||
// result: (Move {t.(*types.Type).Elem()} [sz] dst src mem)
|
||||
for {
|
||||
sym := v.Aux
|
||||
|
@ -27293,7 +27293,7 @@ func rewriteValuegeneric_OpStaticCall_0(v *Value) bool {
|
|||
_ = s3.Args[2]
|
||||
dst := s3.Args[1]
|
||||
mem := s3.Args[2]
|
||||
if !(isSameSym(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz, config)) {
|
||||
if !(isSameSym(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(s1) && clobber(s2) && clobber(s3)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpMove)
|
||||
|
@ -27305,7 +27305,7 @@ func rewriteValuegeneric_OpStaticCall_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
// match: (StaticCall {sym} s1:(Store _ (Const32 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem))))
|
||||
// cond: isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config)
|
||||
// cond: isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst,src,sz,config) && clobber(s1) && clobber(s2) && clobber(s3)
|
||||
// result: (Move {t.(*types.Type).Elem()} [sz] dst src mem)
|
||||
for {
|
||||
sym := v.Aux
|
||||
|
@ -27333,7 +27333,7 @@ func rewriteValuegeneric_OpStaticCall_0(v *Value) bool {
|
|||
_ = s3.Args[2]
|
||||
dst := s3.Args[1]
|
||||
mem := s3.Args[2]
|
||||
if !(isSameSym(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz, config)) {
|
||||
if !(isSameSym(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(s1) && clobber(s2) && clobber(s3)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpMove)
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
package codegen
|
||||
|
||||
import "runtime"
|
||||
|
||||
// Check small copies are replaced with moves.
|
||||
|
||||
func movesmall4() {
|
||||
|
@ -31,10 +33,31 @@ func movesmall16() {
|
|||
copy(x[1:], x[:])
|
||||
}
|
||||
|
||||
// Check that no branches are generated when the pointers are [not] equal.
|
||||
|
||||
var x [256]byte
|
||||
|
||||
// Check that large disjoint copies are replaced with moves.
|
||||
|
||||
func moveDisjointStack() {
|
||||
var s [256]byte
|
||||
// s390x:-".*memmove"
|
||||
copy(s[:], x[:])
|
||||
runtime.KeepAlive(&s)
|
||||
}
|
||||
|
||||
func moveDisjointArg(b *[256]byte) {
|
||||
var s [256]byte
|
||||
// s390x:-".*memmove"
|
||||
copy(s[:], b[:])
|
||||
runtime.KeepAlive(&s)
|
||||
}
|
||||
|
||||
func moveDisjointNoOverlap(a *[256]byte) {
|
||||
// s390x:-".*memmove"
|
||||
copy(a[:], a[128:])
|
||||
}
|
||||
|
||||
// Check that no branches are generated when the pointers are [not] equal.
|
||||
|
||||
func ptrEqual() {
|
||||
// amd64:-"JEQ",-"JNE"
|
||||
// ppc64le:-"BEQ",-"BNE"
|
||||
|
|
Loading…
Reference in a new issue