cmd/compile: optimize arm64 comparison of x and 0.0 with "FCMP $(0.0), Fn"

Code:
func comp(x float64) bool {return x < 0}

Previous version:
  FMOVD	"".x(FP), F0
  FMOVD	ZR, F1
  FCMPD	F1, F0
  CSET	MI, R0
  MOVB	R0, "".~r1+8(FP)
  RET	(R30)

Optimized version:
  FMOVD	"".x(FP), F0
  FCMPD	$(0.0), F0
  CSET	MI, R0
  MOVB	R0, "".~r1+8(FP)
  RET	(R30)

Math package benchmark results:
name                   old time/op          new time/op          delta
Acos-8                   77.500000ns +- 0%    77.400000ns +- 0%   -0.13%  (p=0.000 n=9+10)
Acosh-8                  98.600000ns +- 0%    98.100000ns +- 0%   -0.51%  (p=0.000 n=10+9)
Asin-8                   67.600000ns +- 0%    66.600000ns +- 0%   -1.48%  (p=0.000 n=9+10)
Asinh-8                 108.000000ns +- 0%   109.000000ns +- 0%   +0.93%  (p=0.000 n=10+10)
Atan-8                   36.788889ns +- 0%    36.000000ns +- 0%   -2.14%  (p=0.000 n=9+10)
Atanh-8                 104.000000ns +- 0%   105.000000ns +- 0%   +0.96%  (p=0.000 n=10+10)
Atan2-8                  67.100000ns +- 0%    66.600000ns +- 0%   -0.75%  (p=0.000 n=10+10)
Cbrt-8                   89.100000ns +- 0%    82.000000ns +- 0%   -7.97%  (p=0.000 n=10+10)
Erf-8                    43.500000ns +- 0%    43.000000ns +- 0%   -1.15%  (p=0.000 n=10+10)
Erfc-8                   49.000000ns +- 0%    48.220000ns +- 0%   -1.59%  (p=0.000 n=9+10)
Erfinv-8                 59.100000ns +- 0%    58.600000ns +- 0%   -0.85%  (p=0.000 n=10+10)
Erfcinv-8                59.100000ns +- 0%    58.600000ns +- 0%   -0.85%  (p=0.000 n=10+10)
Expm1-8                  56.600000ns +- 0%    56.040000ns +- 0%   -0.99%  (p=0.000 n=8+10)
Exp2Go-8                 97.600000ns +- 0%    99.400000ns +- 0%   +1.84%  (p=0.000 n=10+10)
Dim-8                     2.500000ns +- 0%     2.250000ns +- 0%  -10.00%  (p=0.000 n=10+10)
Mod-8                   108.000000ns +- 0%   106.000000ns +- 0%   -1.85%  (p=0.000 n=8+8)
Frexp-8                  12.000000ns +- 0%    12.500000ns +- 0%   +4.17%  (p=0.000 n=10+10)
Gamma-8                  67.100000ns +- 0%    67.600000ns +- 0%   +0.75%  (p=0.000 n=10+10)
Hypot-8                  17.100000ns +- 0%    17.000000ns +- 0%   -0.58%  (p=0.002 n=8+10)
Ilogb-8                   9.010000ns +- 0%     8.510000ns +- 0%   -5.55%  (p=0.000 n=10+9)
J1-8                    288.000000ns +- 0%   287.000000ns +- 0%   -0.35%  (p=0.000 n=10+10)
Jn-8                    605.000000ns +- 0%   604.000000ns +- 0%   -0.17%  (p=0.001 n=8+9)
Logb-8                   10.600000ns +- 0%    10.500000ns +- 0%   -0.94%  (p=0.000 n=9+10)
Log2-8                   16.500000ns +- 0%    17.000000ns +- 0%   +3.03%  (p=0.000 n=10+10)
PowFrac-8               232.000000ns +- 0%   233.000000ns +- 0%   +0.43%  (p=0.000 n=10+10)
Remainder-8              70.600000ns +- 0%    69.600000ns +- 0%   -1.42%  (p=0.000 n=10+10)
SqrtGoLatency-8          77.600000ns +- 0%    76.600000ns +- 0%   -1.29%  (p=0.000 n=10+10)
Tanh-8                   97.600000ns +- 0%    94.100000ns +- 0%   -3.59%  (p=0.000 n=10+10)
Y1-8                    289.000000ns +- 0%   288.000000ns +- 0%   -0.35%  (p=0.000 n=10+10)
Yn-8                    603.000000ns +- 0%   589.000000ns +- 0%   -2.32%  (p=0.000 n=10+10)

Change-Id: I6920734f8662b329aa58f5b8e4eeae73b409984d
Reviewed-on: https://go-review.googlesource.com/c/go/+/164719
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
fanzha02 2019-02-15 11:21:46 +00:00 committed by Cherry Zhang
parent 6efd51c6b7
commit 27cce773d3
5 changed files with 199 additions and 0 deletions

View file

@ -301,6 +301,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.From.Val = math.Float64frombits(uint64(v.AuxInt))
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpARM64FCMPS0,
ssa.OpARM64FCMPD0:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_FCONST
p.From.Val = math.Float64frombits(0)
p.Reg = v.Args[0].Reg()
case ssa.OpARM64CMP,
ssa.OpARM64CMPW,
ssa.OpARM64CMN,

View file

@ -348,6 +348,12 @@
(Geq32U x y) -> (GreaterEqualU (CMPW x y))
(Geq64U x y) -> (GreaterEqualU (CMP x y))
// Optimize comparision between a floating-point value and 0.0 with "FCMP $(0.0), Fn"
(FCMPS x (FMOVSconst [0])) -> (FCMPS0 x)
(FCMPS (FMOVSconst [0]) x) -> (InvertFlags (FCMPS0 x))
(FCMPD x (FMOVDconst [0])) -> (FCMPD0 x)
(FCMPD (FMOVDconst [0]) x) -> (InvertFlags (FCMPD0 x))
// CSEL needs a flag-generating argument. Synthesize a CMPW if necessary.
(CondSelect x y bool) && flagArg(bool) != nil -> (CSEL {bool.Op} x y flagArg(bool))
(CondSelect x y bool) && flagArg(bool) == nil -> (CSEL {OpARM64NotEqual} x y (CMPWconst [0] bool))
@ -1615,6 +1621,10 @@
(LessEqualU (InvertFlags x)) -> (GreaterEqualU x)
(GreaterEqual (InvertFlags x)) -> (LessEqual x)
(GreaterEqualU (InvertFlags x)) -> (LessEqualU x)
(LessThanF (InvertFlags x)) -> (GreaterThanF x)
(LessEqualF (InvertFlags x)) -> (GreaterEqualF x)
(GreaterThanF (InvertFlags x)) -> (LessThanF x)
(GreaterEqualF (InvertFlags x)) -> (LessEqualF x)
// Boolean-generating instructions always
// zero upper bit of the register; no need to zero-extend

View file

@ -158,6 +158,7 @@ func init() {
fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
fp2flags = regInfo{inputs: []regMask{fp, fp}}
fp1flags = regInfo{inputs: []regMask{fp}}
fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
fp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{fp}}
fpstore = regInfo{inputs: []regMask{gpspsbg, fp}}
@ -271,6 +272,8 @@ func init() {
{name: "TSTWconst", argLength: 1, reg: gp1flags, asm: "TSTW", aux: "Int32", typ: "Flags"}, // arg0 & auxInt compare to 0, 32 bit
{name: "FCMPS", argLength: 2, reg: fp2flags, asm: "FCMPS", typ: "Flags"}, // arg0 compare to arg1, float32
{name: "FCMPD", argLength: 2, reg: fp2flags, asm: "FCMPD", typ: "Flags"}, // arg0 compare to arg1, float64
{name: "FCMPS0", argLength: 1, reg: fp1flags, asm: "FCMPS", typ: "Flags"}, // arg0 compare to 0, float32
{name: "FCMPD0", argLength: 1, reg: fp1flags, asm: "FCMPD", typ: "Flags"}, // arg0 compare to 0, float64
// shifted ops
{name: "MVNshiftLL", argLength: 1, reg: gp11, asm: "MVN", aux: "Int64"}, // ^(arg0<<auxInt)

View file

@ -1218,6 +1218,8 @@ const (
OpARM64TSTWconst
OpARM64FCMPS
OpARM64FCMPD
OpARM64FCMPS0
OpARM64FCMPD0
OpARM64MVNshiftLL
OpARM64MVNshiftRL
OpARM64MVNshiftRA
@ -16146,6 +16148,26 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FCMPS0",
argLen: 1,
asm: arm64.AFCMPS,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FCMPD0",
argLen: 1,
asm: arm64.AFCMPD,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "MVNshiftLL",
auxType: auxInt64,

View file

@ -95,6 +95,10 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64FADDD_0(v)
case OpARM64FADDS:
return rewriteValueARM64_OpARM64FADDS_0(v)
case OpARM64FCMPD:
return rewriteValueARM64_OpARM64FCMPD_0(v)
case OpARM64FCMPS:
return rewriteValueARM64_OpARM64FCMPS_0(v)
case OpARM64FMOVDfpgp:
return rewriteValueARM64_OpARM64FMOVDfpgp_0(v)
case OpARM64FMOVDgpfp:
@ -133,18 +137,26 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64FSUBS_0(v)
case OpARM64GreaterEqual:
return rewriteValueARM64_OpARM64GreaterEqual_0(v)
case OpARM64GreaterEqualF:
return rewriteValueARM64_OpARM64GreaterEqualF_0(v)
case OpARM64GreaterEqualU:
return rewriteValueARM64_OpARM64GreaterEqualU_0(v)
case OpARM64GreaterThan:
return rewriteValueARM64_OpARM64GreaterThan_0(v)
case OpARM64GreaterThanF:
return rewriteValueARM64_OpARM64GreaterThanF_0(v)
case OpARM64GreaterThanU:
return rewriteValueARM64_OpARM64GreaterThanU_0(v)
case OpARM64LessEqual:
return rewriteValueARM64_OpARM64LessEqual_0(v)
case OpARM64LessEqualF:
return rewriteValueARM64_OpARM64LessEqualF_0(v)
case OpARM64LessEqualU:
return rewriteValueARM64_OpARM64LessEqualU_0(v)
case OpARM64LessThan:
return rewriteValueARM64_OpARM64LessThan_0(v)
case OpARM64LessThanF:
return rewriteValueARM64_OpARM64LessThanF_0(v)
case OpARM64LessThanU:
return rewriteValueARM64_OpARM64LessThanU_0(v)
case OpARM64MADD:
@ -5224,6 +5236,88 @@ func rewriteValueARM64_OpARM64FADDS_0(v *Value) bool {
}
return false
}
func rewriteValueARM64_OpARM64FCMPD_0(v *Value) bool {
b := v.Block
_ = b
// match: (FCMPD x (FMOVDconst [0]))
// cond:
// result: (FCMPD0 x)
for {
_ = v.Args[1]
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64FMOVDconst {
break
}
if v_1.AuxInt != 0 {
break
}
v.reset(OpARM64FCMPD0)
v.AddArg(x)
return true
}
// match: (FCMPD (FMOVDconst [0]) x)
// cond:
// result: (InvertFlags (FCMPD0 x))
for {
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64FMOVDconst {
break
}
if v_0.AuxInt != 0 {
break
}
x := v.Args[1]
v.reset(OpARM64InvertFlags)
v0 := b.NewValue0(v.Pos, OpARM64FCMPD0, types.TypeFlags)
v0.AddArg(x)
v.AddArg(v0)
return true
}
return false
}
func rewriteValueARM64_OpARM64FCMPS_0(v *Value) bool {
b := v.Block
_ = b
// match: (FCMPS x (FMOVSconst [0]))
// cond:
// result: (FCMPS0 x)
for {
_ = v.Args[1]
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64FMOVSconst {
break
}
if v_1.AuxInt != 0 {
break
}
v.reset(OpARM64FCMPS0)
v.AddArg(x)
return true
}
// match: (FCMPS (FMOVSconst [0]) x)
// cond:
// result: (InvertFlags (FCMPS0 x))
for {
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64FMOVSconst {
break
}
if v_0.AuxInt != 0 {
break
}
x := v.Args[1]
v.reset(OpARM64InvertFlags)
v0 := b.NewValue0(v.Pos, OpARM64FCMPS0, types.TypeFlags)
v0.AddArg(x)
v.AddArg(v0)
return true
}
return false
}
func rewriteValueARM64_OpARM64FMOVDfpgp_0(v *Value) bool {
b := v.Block
_ = b
@ -6310,6 +6404,22 @@ func rewriteValueARM64_OpARM64GreaterEqual_0(v *Value) bool {
}
return false
}
func rewriteValueARM64_OpARM64GreaterEqualF_0(v *Value) bool {
// match: (GreaterEqualF (InvertFlags x))
// cond:
// result: (LessEqualF x)
for {
v_0 := v.Args[0]
if v_0.Op != OpARM64InvertFlags {
break
}
x := v_0.Args[0]
v.reset(OpARM64LessEqualF)
v.AddArg(x)
return true
}
return false
}
func rewriteValueARM64_OpARM64GreaterEqualU_0(v *Value) bool {
// match: (GreaterEqualU (FlagEQ))
// cond:
@ -6462,6 +6572,22 @@ func rewriteValueARM64_OpARM64GreaterThan_0(v *Value) bool {
}
return false
}
func rewriteValueARM64_OpARM64GreaterThanF_0(v *Value) bool {
// match: (GreaterThanF (InvertFlags x))
// cond:
// result: (LessThanF x)
for {
v_0 := v.Args[0]
if v_0.Op != OpARM64InvertFlags {
break
}
x := v_0.Args[0]
v.reset(OpARM64LessThanF)
v.AddArg(x)
return true
}
return false
}
func rewriteValueARM64_OpARM64GreaterThanU_0(v *Value) bool {
// match: (GreaterThanU (FlagEQ))
// cond:
@ -6614,6 +6740,22 @@ func rewriteValueARM64_OpARM64LessEqual_0(v *Value) bool {
}
return false
}
func rewriteValueARM64_OpARM64LessEqualF_0(v *Value) bool {
// match: (LessEqualF (InvertFlags x))
// cond:
// result: (GreaterEqualF x)
for {
v_0 := v.Args[0]
if v_0.Op != OpARM64InvertFlags {
break
}
x := v_0.Args[0]
v.reset(OpARM64GreaterEqualF)
v.AddArg(x)
return true
}
return false
}
func rewriteValueARM64_OpARM64LessEqualU_0(v *Value) bool {
// match: (LessEqualU (FlagEQ))
// cond:
@ -6766,6 +6908,22 @@ func rewriteValueARM64_OpARM64LessThan_0(v *Value) bool {
}
return false
}
func rewriteValueARM64_OpARM64LessThanF_0(v *Value) bool {
// match: (LessThanF (InvertFlags x))
// cond:
// result: (GreaterThanF x)
for {
v_0 := v.Args[0]
if v_0.Op != OpARM64InvertFlags {
break
}
x := v_0.Args[0]
v.reset(OpARM64GreaterThanF)
v.AddArg(x)
return true
}
return false
}
func rewriteValueARM64_OpARM64LessThanU_0(v *Value) bool {
// match: (LessThanU (FlagEQ))
// cond: