cmd/compile: optimize math.Copysign on arm64

Add rewrite rules to optimize math.Copysign() when the second
argument is negative floating point constant.

For example, math.Copysign(c, -2): The previous compile output is
"AND $9223372036854775807, R0, R0; ORR $-9223372036854775808, R0, R0".
The optimized compile output is "ORR $-9223372036854775808, R0, R0"

Math package benchmark results.
name                   old time/op  new time/op  delta
Copysign-8             2.61ns ± 2%  2.49ns ± 0%  -4.55%  (p=0.000 n=10+10)
Cos-8                  43.0ns ± 0%  41.5ns ± 0%  -3.49%  (p=0.000 n=10+10)
Cosh-8                 98.6ns ± 0%  98.1ns ± 0%  -0.51%  (p=0.000 n=10+10)
ExpGo-8                 107ns ± 0%   105ns ± 0%  -1.87%  (p=0.000 n=10+10)
Exp2Go-8                100ns ± 0%   100ns ± 0%  +0.39%  (p=0.000 n=10+8)
Max-8                  6.56ns ± 2%  6.45ns ± 1%  -1.63%  (p=0.002 n=10+10)
Min-8                  6.66ns ± 3%  6.47ns ± 2%  -2.82%  (p=0.006 n=10+10)
Mod-8                   107ns ± 1%   104ns ± 1%  -2.72%  (p=0.000 n=10+10)
Frexp-8                11.5ns ± 1%  11.0ns ± 0%  -4.56%  (p=0.000 n=8+10)
HypotGo-8              19.4ns ± 0%  19.4ns ± 0%  +0.36%  (p=0.019 n=10+10)
Ilogb-8                8.63ns ± 0%  8.51ns ± 0%  -1.36%  (p=0.000 n=10+10)
Jn-8                    584ns ± 0%   585ns ± 0%  +0.17%  (p=0.000 n=7+8)
Ldexp-8                13.8ns ± 0%  13.5ns ± 0%  -2.17%  (p=0.002 n=8+10)
Logb-8                 10.2ns ± 0%   9.9ns ± 0%  -2.65%  (p=0.000 n=10+7)
Nextafter64-8          7.54ns ± 0%  7.51ns ± 0%  -0.37%  (p=0.000 n=10+10)
Remainder-8            73.5ns ± 1%  70.4ns ± 1%  -4.27%  (p=0.000 n=10+10)
SqrtGoLatency-8        79.6ns ± 0%  76.2ns ± 0%  -4.30%  (p=0.000 n=9+10)
Yn-8                    582ns ± 0%   579ns ± 0%  -0.52%  (p=0.000 n=10+10)

Change-Id: I0c9cd1ea87435e7b8bab94b4e79e6e29785f25b1
Reviewed-on: https://go-review.googlesource.com/132915
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
fanzha02 2018-08-21 04:57:03 +00:00 committed by Cherry Zhang
parent 7ab4b5586d
commit 2e5c32518c
3 changed files with 51 additions and 0 deletions

View file

@ -101,6 +101,8 @@
// Load args directly into the register class where it will be used.
(FMOVDgpfp <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
(FMOVDfpgp <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
// Similarly for stores, if we see a store after FPR <-> GPR move, then redirect store to use the other register set.
(MOVDstore ptr (FMOVDfpgp val) mem) -> (FMOVDstore ptr val mem)
(FMOVDstore ptr (FMOVDgpfp val) mem) -> (MOVDstore ptr val mem)
@ -1626,6 +1628,9 @@
(SRLconst [c] (SLLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [1<<uint(64-c)-1] x) // mask out high bits
(SLLconst [c] (SRLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [^(1<<uint(c)-1)] x) // mask out low bits
// Special case setting bit as 1. An example is math.Copysign(c,-1)
(ORconst [c1] (ANDconst [c2] x)) && c2|c1 == ^0 -> (ORconst [c1] x)
// bitfield ops
// sbfiz

View file

@ -87,6 +87,8 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64FADDD_0(v)
case OpARM64FADDS:
return rewriteValueARM64_OpARM64FADDS_0(v)
case OpARM64FMOVDfpgp:
return rewriteValueARM64_OpARM64FMOVDfpgp_0(v)
case OpARM64FMOVDgpfp:
return rewriteValueARM64_OpARM64FMOVDgpfp_0(v)
case OpARM64FMOVDload:
@ -3960,6 +3962,30 @@ func rewriteValueARM64_OpARM64FADDS_0(v *Value) bool {
}
return false
}
func rewriteValueARM64_OpARM64FMOVDfpgp_0(v *Value) bool {
b := v.Block
_ = b
// match: (FMOVDfpgp <t> (Arg [off] {sym}))
// cond:
// result: @b.Func.Entry (Arg <t> [off] {sym})
for {
t := v.Type
v_0 := v.Args[0]
if v_0.Op != OpArg {
break
}
off := v_0.AuxInt
sym := v_0.Aux
b = b.Func.Entry
v0 := b.NewValue0(v.Pos, OpArg, t)
v.reset(OpCopy)
v.AddArg(v0)
v0.AuxInt = off
v0.Aux = sym
return true
}
return false
}
func rewriteValueARM64_OpARM64FMOVDgpfp_0(v *Value) bool {
b := v.Block
_ = b
@ -21834,6 +21860,25 @@ func rewriteValueARM64_OpARM64ORconst_0(v *Value) bool {
v.AddArg(x)
return true
}
// match: (ORconst [c1] (ANDconst [c2] x))
// cond: c2|c1 == ^0
// result: (ORconst [c1] x)
for {
c1 := v.AuxInt
v_0 := v.Args[0]
if v_0.Op != OpARM64ANDconst {
break
}
c2 := v_0.AuxInt
x := v_0.Args[0]
if !(c2|c1 == ^0) {
break
}
v.reset(OpARM64ORconst)
v.AuxInt = c1
v.AddArg(x)
return true
}
return false
}
func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {

View file

@ -74,6 +74,7 @@ func copysign(a, b, c float64) {
// amd64:"BTSQ\t[$]63"
// s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
// ppc64le:"FCPSGN"
// arm64:"ORR\t[$]-9223372036854775808"
sink64[1] = math.Copysign(c, -1)
// Like math.Copysign(c, -1), but with integer operations. Useful