cmd/compile: intrinsify Add64 on mips64

This CL intrinsify Add64 on mips64.

pkg: math/bits
                  _   sec/op    _   sec/op     vs base               _
Add64-4             2.783n _ 0%   1.950n _ 0%  -29.93% (p=0.000 n=8)
Add64multiple-4     5.713n _ 0%   3.063n _ 0%  -46.38% (p=0.000 n=8)

pkg: crypto/elliptic
                                     _    sec/op    _   sec/op     vs base               _
ScalarBaseMult/P256-4                   353.7_ _ 0%   282.7_ _ 0%  -20.09% (p=0.000 n=8)
ScalarBaseMult/P224-4                   330.5_ _ 0%   250.0_ _ 0%  -24.37% (p=0.000 n=8)
ScalarBaseMult/P384-4                  1228.8_ _ 0%   791.5_ _ 0%  -35.59% (p=0.000 n=8)
ScalarBaseMult/P521-4                  15.412m _ 0%   2.438m _ 0%  -84.18% (p=0.000 n=8)
ScalarMult/P256-4                      1189.4_ _ 0%   904.2_ _ 0%  -23.98% (p=0.000 n=8)
ScalarMult/P224-4                      1138.8_ _ 0%   813.8_ _ 0%  -28.54% (p=0.000 n=8)
ScalarMult/P384-4                       4.419m _ 0%   2.692m _ 0%  -39.08% (p=0.000 n=8)
ScalarMult/P521-4                      59.768m _ 0%   8.773m _ 0%  -85.32% (p=0.000 n=8)
MarshalUnmarshal/P256/Uncompressed-4    8.697_ _ 1%   7.923_ _ 1%   -8.91% (p=0.000 n=8)
MarshalUnmarshal/P256/Compressed-4     104.75_ _ 0%   66.29_ _ 0%  -36.72% (p=0.000 n=8)
MarshalUnmarshal/P224/Uncompressed-4    8.728_ _ 1%   7.823_ _ 1%  -10.37% (p=0.000 n=8)
MarshalUnmarshal/P224/Compressed-4     1035.7_ _ 0%   676.5_ _ 2%  -34.69% (p=0.000 n=8)
MarshalUnmarshal/P384/Uncompressed-4    15.32_ _ 1%   11.81_ _ 1%  -22.90% (p=0.000 n=8)
MarshalUnmarshal/P384/Compressed-4      399.8_ _ 0%   217.4_ _ 0%  -45.62% (p=0.000 n=8)
MarshalUnmarshal/P521/Uncompressed-4    96.79_ _ 0%   20.32_ _ 0%  -79.01% (p=0.000 n=8)
MarshalUnmarshal/P521/Compressed-4     6640.4_ _ 0%   790.8_ _ 0%  -88.09% (p=0.000 n=8)

Change-Id: I8a0960b9665720c1d3e57dce36386e74db37fefa
Reviewed-on: https://go-review.googlesource.com/c/go/+/498496
Reviewed-by: Heschi Kreinick <heschi@google.com>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Joel Sing <joel@sing.id.au>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Junxian Zhu 2023-05-26 13:26:51 +08:00 committed by Joel Sing
parent 457721cd52
commit 5f8a2fdf09
4 changed files with 76 additions and 1 deletions

View file

@ -38,6 +38,10 @@
(Mod8 x y) => (Select0 (DIVV (SignExt8to64 x) (SignExt8to64 y)))
(Mod8u x y) => (Select0 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y)))
(Select0 <t> (Add64carry x y c)) => (ADDV (ADDV <t> x y) c)
(Select1 <t> (Add64carry x y c)) =>
(OR (SGTU <t> x s:(ADDV <t> x y)) (SGTU <t> s (ADDV <t> s c)))
// math package intrinsics
(Abs ...) => (ABSD ...)
@ -798,6 +802,10 @@
(GEZ (MOVVconst [c]) yes no) && c >= 0 => (First yes no)
(GEZ (MOVVconst [c]) yes no) && c < 0 => (First no yes)
// SGT/SGTU with known outcomes.
(SGT x x) => (MOVVconst [0])
(SGTU x x) => (MOVVconst [0])
// fold readonly sym load
(MOVBload [off] {sym} (SB) _) && symIsRO(sym) => (MOVVconst [int64(read8(sym, int64(off)))])
(MOVHload [off] {sym} (SB) _) && symIsRO(sym) => (MOVVconst [int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))])

View file

@ -4797,6 +4797,17 @@ func rewriteValueMIPS64_OpMIPS64SGT(v *Value) bool {
v.AddArg(x)
return true
}
// match: (SGT x x)
// result: (MOVVconst [0])
for {
x := v_0
if x != v_1 {
break
}
v.reset(OpMIPS64MOVVconst)
v.AuxInt = int64ToAuxInt(0)
return true
}
return false
}
func rewriteValueMIPS64_OpMIPS64SGTU(v *Value) bool {
@ -4819,6 +4830,17 @@ func rewriteValueMIPS64_OpMIPS64SGTU(v *Value) bool {
v.AddArg(x)
return true
}
// match: (SGTU x x)
// result: (MOVVconst [0])
for {
x := v_0
if x != v_1 {
break
}
v.reset(OpMIPS64MOVVconst)
v.AuxInt = int64ToAuxInt(0)
return true
}
return false
}
func rewriteValueMIPS64_OpMIPS64SGTUconst(v *Value) bool {
@ -7315,6 +7337,22 @@ func rewriteValueMIPS64_OpSelect0(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (Select0 <t> (Add64carry x y c))
// result: (ADDV (ADDV <t> x y) c)
for {
t := v.Type
if v_0.Op != OpAdd64carry {
break
}
c := v_0.Args[2]
x := v_0.Args[0]
y := v_0.Args[1]
v.reset(OpMIPS64ADDV)
v0 := b.NewValue0(v.Pos, OpMIPS64ADDV, t)
v0.AddArg2(x, y)
v.AddArg2(v0, c)
return true
}
// match: (Select0 (DIVVU _ (MOVVconst [1])))
// result: (MOVVconst [0])
for {
@ -7427,6 +7465,28 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
v.AddArg2(v0, v2)
return true
}
// match: (Select1 <t> (Add64carry x y c))
// result: (OR (SGTU <t> x s:(ADDV <t> x y)) (SGTU <t> s (ADDV <t> s c)))
for {
t := v.Type
if v_0.Op != OpAdd64carry {
break
}
c := v_0.Args[2]
x := v_0.Args[0]
y := v_0.Args[1]
v.reset(OpMIPS64OR)
v0 := b.NewValue0(v.Pos, OpMIPS64SGTU, t)
s := b.NewValue0(v.Pos, OpMIPS64ADDV, t)
s.AddArg2(x, y)
v0.AddArg2(x, s)
v2 := b.NewValue0(v.Pos, OpMIPS64SGTU, t)
v3 := b.NewValue0(v.Pos, OpMIPS64ADDV, t)
v3.AddArg2(s, c)
v2.AddArg2(s, v3)
v.AddArg2(v0, v2)
return true
}
// match: (Select1 (MULVU x (MOVVconst [-1])))
// result: (NEGV x)
for {

View file

@ -4843,7 +4843,7 @@ func InitTables() {
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
},
sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64)
sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64, sys.MIPS64)
alias("math/bits", "Add", "math/bits", "Add64", p8...)
addF("math/bits", "Sub64",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {

View file

@ -434,6 +434,7 @@ func AddC(x, ci uint) (r, co uint) {
// loong64: "ADDV", "SGTU"
// ppc64x: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
// mips64:"ADDV","SGTU"
// riscv64: "ADD","SLTU"
return bits.Add(x, 7, ci)
}
@ -444,6 +445,7 @@ func AddZ(x, y uint) (r, co uint) {
// loong64: "ADDV", "SGTU"
// ppc64x: "ADDC", -"ADDE", "ADDZE"
// s390x:"ADDC",-"ADDC\t[$]-1,"
// mips64:"ADDV","SGTU"
// riscv64: "ADD","SLTU"
return bits.Add(x, y, 0)
}
@ -454,6 +456,7 @@ func AddR(x, y, ci uint) uint {
// loong64: "ADDV", -"SGTU"
// ppc64x: "ADDC", "ADDE", -"ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
// mips64:"ADDV",-"SGTU"
// riscv64: "ADD",-"SLTU"
r, _ := bits.Add(x, y, ci)
return r
@ -475,6 +478,7 @@ func Add64(x, y, ci uint64) (r, co uint64) {
// loong64: "ADDV", "SGTU"
// ppc64x: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
// mips64:"ADDV","SGTU"
// riscv64: "ADD","SLTU"
return bits.Add64(x, y, ci)
}
@ -485,6 +489,7 @@ func Add64C(x, ci uint64) (r, co uint64) {
// loong64: "ADDV", "SGTU"
// ppc64x: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
// mips64:"ADDV","SGTU"
// riscv64: "ADD","SLTU"
return bits.Add64(x, 7, ci)
}
@ -495,6 +500,7 @@ func Add64Z(x, y uint64) (r, co uint64) {
// loong64: "ADDV", "SGTU"
// ppc64x: "ADDC", -"ADDE", "ADDZE"
// s390x:"ADDC",-"ADDC\t[$]-1,"
// mips64:"ADDV","SGTU"
// riscv64: "ADD","SLTU"
return bits.Add64(x, y, 0)
}
@ -505,6 +511,7 @@ func Add64R(x, y, ci uint64) uint64 {
// loong64: "ADDV", -"SGTU"
// ppc64x: "ADDC", "ADDE", -"ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
// mips64:"ADDV",-"SGTU"
// riscv64: "ADD",-"SLTU"
r, _ := bits.Add64(x, y, ci)
return r