mirror of
https://github.com/golang/go
synced 2024-11-05 18:36:08 +00:00
cmd/compile: optimize math.Copysign on arm64
Add rewrite rules to optimize math.Copysign() when the second argument is negative floating point constant. For example, math.Copysign(c, -2): The previous compile output is "AND $9223372036854775807, R0, R0; ORR $-9223372036854775808, R0, R0". The optimized compile output is "ORR $-9223372036854775808, R0, R0" Math package benchmark results. name old time/op new time/op delta Copysign-8 2.61ns ± 2% 2.49ns ± 0% -4.55% (p=0.000 n=10+10) Cos-8 43.0ns ± 0% 41.5ns ± 0% -3.49% (p=0.000 n=10+10) Cosh-8 98.6ns ± 0% 98.1ns ± 0% -0.51% (p=0.000 n=10+10) ExpGo-8 107ns ± 0% 105ns ± 0% -1.87% (p=0.000 n=10+10) Exp2Go-8 100ns ± 0% 100ns ± 0% +0.39% (p=0.000 n=10+8) Max-8 6.56ns ± 2% 6.45ns ± 1% -1.63% (p=0.002 n=10+10) Min-8 6.66ns ± 3% 6.47ns ± 2% -2.82% (p=0.006 n=10+10) Mod-8 107ns ± 1% 104ns ± 1% -2.72% (p=0.000 n=10+10) Frexp-8 11.5ns ± 1% 11.0ns ± 0% -4.56% (p=0.000 n=8+10) HypotGo-8 19.4ns ± 0% 19.4ns ± 0% +0.36% (p=0.019 n=10+10) Ilogb-8 8.63ns ± 0% 8.51ns ± 0% -1.36% (p=0.000 n=10+10) Jn-8 584ns ± 0% 585ns ± 0% +0.17% (p=0.000 n=7+8) Ldexp-8 13.8ns ± 0% 13.5ns ± 0% -2.17% (p=0.002 n=8+10) Logb-8 10.2ns ± 0% 9.9ns ± 0% -2.65% (p=0.000 n=10+7) Nextafter64-8 7.54ns ± 0% 7.51ns ± 0% -0.37% (p=0.000 n=10+10) Remainder-8 73.5ns ± 1% 70.4ns ± 1% -4.27% (p=0.000 n=10+10) SqrtGoLatency-8 79.6ns ± 0% 76.2ns ± 0% -4.30% (p=0.000 n=9+10) Yn-8 582ns ± 0% 579ns ± 0% -0.52% (p=0.000 n=10+10) Change-Id: I0c9cd1ea87435e7b8bab94b4e79e6e29785f25b1 Reviewed-on: https://go-review.googlesource.com/132915 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
7ab4b5586d
commit
2e5c32518c
3 changed files with 51 additions and 0 deletions
|
@ -101,6 +101,8 @@
|
|||
|
||||
// Load args directly into the register class where it will be used.
|
||||
(FMOVDgpfp <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
|
||||
(FMOVDfpgp <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
|
||||
|
||||
// Similarly for stores, if we see a store after FPR <-> GPR move, then redirect store to use the other register set.
|
||||
(MOVDstore ptr (FMOVDfpgp val) mem) -> (FMOVDstore ptr val mem)
|
||||
(FMOVDstore ptr (FMOVDgpfp val) mem) -> (MOVDstore ptr val mem)
|
||||
|
@ -1626,6 +1628,9 @@
|
|||
(SRLconst [c] (SLLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [1<<uint(64-c)-1] x) // mask out high bits
|
||||
(SLLconst [c] (SRLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [^(1<<uint(c)-1)] x) // mask out low bits
|
||||
|
||||
// Special case setting bit as 1. An example is math.Copysign(c,-1)
|
||||
(ORconst [c1] (ANDconst [c2] x)) && c2|c1 == ^0 -> (ORconst [c1] x)
|
||||
|
||||
// bitfield ops
|
||||
|
||||
// sbfiz
|
||||
|
|
|
@ -87,6 +87,8 @@ func rewriteValueARM64(v *Value) bool {
|
|||
return rewriteValueARM64_OpARM64FADDD_0(v)
|
||||
case OpARM64FADDS:
|
||||
return rewriteValueARM64_OpARM64FADDS_0(v)
|
||||
case OpARM64FMOVDfpgp:
|
||||
return rewriteValueARM64_OpARM64FMOVDfpgp_0(v)
|
||||
case OpARM64FMOVDgpfp:
|
||||
return rewriteValueARM64_OpARM64FMOVDgpfp_0(v)
|
||||
case OpARM64FMOVDload:
|
||||
|
@ -3960,6 +3962,30 @@ func rewriteValueARM64_OpARM64FADDS_0(v *Value) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM64_OpARM64FMOVDfpgp_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
// match: (FMOVDfpgp <t> (Arg [off] {sym}))
|
||||
// cond:
|
||||
// result: @b.Func.Entry (Arg <t> [off] {sym})
|
||||
for {
|
||||
t := v.Type
|
||||
v_0 := v.Args[0]
|
||||
if v_0.Op != OpArg {
|
||||
break
|
||||
}
|
||||
off := v_0.AuxInt
|
||||
sym := v_0.Aux
|
||||
b = b.Func.Entry
|
||||
v0 := b.NewValue0(v.Pos, OpArg, t)
|
||||
v.reset(OpCopy)
|
||||
v.AddArg(v0)
|
||||
v0.AuxInt = off
|
||||
v0.Aux = sym
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM64_OpARM64FMOVDgpfp_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
|
@ -21834,6 +21860,25 @@ func rewriteValueARM64_OpARM64ORconst_0(v *Value) bool {
|
|||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (ORconst [c1] (ANDconst [c2] x))
|
||||
// cond: c2|c1 == ^0
|
||||
// result: (ORconst [c1] x)
|
||||
for {
|
||||
c1 := v.AuxInt
|
||||
v_0 := v.Args[0]
|
||||
if v_0.Op != OpARM64ANDconst {
|
||||
break
|
||||
}
|
||||
c2 := v_0.AuxInt
|
||||
x := v_0.Args[0]
|
||||
if !(c2|c1 == ^0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARM64ORconst)
|
||||
v.AuxInt = c1
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
|
||||
|
|
|
@ -74,6 +74,7 @@ func copysign(a, b, c float64) {
|
|||
// amd64:"BTSQ\t[$]63"
|
||||
// s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
|
||||
// ppc64le:"FCPSGN"
|
||||
// arm64:"ORR\t[$]-9223372036854775808"
|
||||
sink64[1] = math.Copysign(c, -1)
|
||||
|
||||
// Like math.Copysign(c, -1), but with integer operations. Useful
|
||||
|
|
Loading…
Reference in a new issue