mirror of
https://github.com/golang/go
synced 2024-11-02 11:50:30 +00:00
cmd/compile: intrinsify Mul64 on riscv64
According to RISCV instruction set manual v2.2 Sec 6.1 MULHU followed by MUL will be fused into one multiply by microarchitecture Benchstat on Hifive unmatched: name old time/op new time/op delta Hash8Bytes 245ns ± 3% 186ns ± 4% -23.99% (p=0.000 n=10+10) Hash320Bytes 1.94µs ± 1% 1.31µs ± 1% -32.38% (p=0.000 n=9+10) Hash1K 5.84µs ± 0% 3.84µs ± 0% -34.20% (p=0.000 n=10+9) Hash8K 45.3µs ± 0% 29.4µs ± 0% -35.04% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes 32.7MB/s ± 3% 43.0MB/s ± 4% +31.61% (p=0.000 n=10+10) Hash320Bytes 165MB/s ± 1% 244MB/s ± 1% +47.88% (p=0.000 n=9+10) Hash1K 175MB/s ± 0% 266MB/s ± 0% +51.98% (p=0.000 n=10+9) Hash8K 181MB/s ± 0% 279MB/s ± 0% +53.94% (p=0.000 n=10+10) Change-Id: I3561495d02a4a0ad8578e9b9819bf0a4eaca5d12 Reviewed-on: https://go-review.googlesource.com/c/go/+/329970 Reviewed-by: Joel Sing <joel@sing.id.au> Run-TryBot: Joel Sing <joel@sing.id.au> TryBot-Result: Go Bot <gobot@golang.org> Trust: Meng Zhuo <mzh@golangcn.org>
This commit is contained in:
parent
7b7d7d7818
commit
efd206eb40
8 changed files with 43 additions and 3 deletions
|
@ -282,6 +282,21 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
p.Reg = r1
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = r
|
||||
case ssa.OpRISCV64LoweredMuluhilo:
|
||||
r0 := v.Args[0].Reg()
|
||||
r1 := v.Args[1].Reg()
|
||||
p := s.Prog(riscv.AMULHU)
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = r1
|
||||
p.Reg = r0
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg0()
|
||||
p1 := s.Prog(riscv.AMUL)
|
||||
p1.From.Type = obj.TYPE_REG
|
||||
p1.From.Reg = r1
|
||||
p1.Reg = r0
|
||||
p1.To.Type = obj.TYPE_REG
|
||||
p1.To.Reg = v.Reg1()
|
||||
case ssa.OpRISCV64FSQRTS, ssa.OpRISCV64FNEGS, ssa.OpRISCV64FSQRTD, ssa.OpRISCV64FNEGD,
|
||||
ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVDX,
|
||||
ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS,
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
(Sub64F ...) => (FSUBD ...)
|
||||
|
||||
(Mul64 ...) => (MUL ...)
|
||||
(Mul64uhilo ...) => (LoweredMuluhilo ...)
|
||||
(Mul32 ...) => (MULW ...)
|
||||
(Mul16 x y) => (MULW (SignExt16to32 x) (SignExt16to32 y))
|
||||
(Mul8 x y) => (MULW (SignExt8to32 x) (SignExt8to32 y))
|
||||
|
|
|
@ -123,6 +123,7 @@ func init() {
|
|||
gp01 = regInfo{outputs: []regMask{gpMask}}
|
||||
gp11 = regInfo{inputs: []regMask{gpMask}, outputs: []regMask{gpMask}}
|
||||
gp21 = regInfo{inputs: []regMask{gpMask, gpMask}, outputs: []regMask{gpMask}}
|
||||
gp22 = regInfo{inputs: []regMask{gpMask, gpMask}, outputs: []regMask{gpMask, gpMask}}
|
||||
gpload = regInfo{inputs: []regMask{gpspsbMask, 0}, outputs: []regMask{gpMask}}
|
||||
gp11sb = regInfo{inputs: []regMask{gpspsbMask}, outputs: []regMask{gpMask}}
|
||||
gpxchg = regInfo{inputs: []regMask{gpspsbgMask, gpgMask}, outputs: []regMask{gpMask}}
|
||||
|
@ -157,6 +158,8 @@ func init() {
|
|||
{name: "MULW", argLength: 2, reg: gp21, asm: "MULW", commutative: true, typ: "Int32"},
|
||||
{name: "MULH", argLength: 2, reg: gp21, asm: "MULH", commutative: true, typ: "Int64"},
|
||||
{name: "MULHU", argLength: 2, reg: gp21, asm: "MULHU", commutative: true, typ: "UInt64"},
|
||||
{name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, return (hi, lo)
|
||||
|
||||
{name: "DIV", argLength: 2, reg: gp21, asm: "DIV", typ: "Int64"}, // arg0 / arg1
|
||||
{name: "DIVU", argLength: 2, reg: gp21, asm: "DIVU", typ: "UInt64"},
|
||||
{name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},
|
||||
|
|
|
@ -2069,6 +2069,7 @@ const (
|
|||
OpRISCV64MULW
|
||||
OpRISCV64MULH
|
||||
OpRISCV64MULHU
|
||||
OpRISCV64LoweredMuluhilo
|
||||
OpRISCV64DIV
|
||||
OpRISCV64DIVU
|
||||
OpRISCV64DIVW
|
||||
|
@ -27603,6 +27604,21 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredMuluhilo",
|
||||
argLen: 2,
|
||||
resultNotInArgs: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
|
||||
{1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
|
||||
{1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "DIV",
|
||||
argLen: 2,
|
||||
|
|
|
@ -356,6 +356,9 @@ func rewriteValueRISCV64(v *Value) bool {
|
|||
case OpMul64F:
|
||||
v.Op = OpRISCV64FMULD
|
||||
return true
|
||||
case OpMul64uhilo:
|
||||
v.Op = OpRISCV64LoweredMuluhilo
|
||||
return true
|
||||
case OpMul8:
|
||||
return rewriteValueRISCV64_OpMul8(v)
|
||||
case OpNeg16:
|
||||
|
|
|
@ -4505,9 +4505,9 @@ func InitTables() {
|
|||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1])
|
||||
},
|
||||
sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.MIPS64)
|
||||
alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchS390X, sys.ArchMIPS64, sys.ArchMIPS64LE)
|
||||
alias("runtime/internal/math", "Mul64", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchS390X, sys.ArchMIPS64, sys.ArchMIPS64LE)
|
||||
sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.MIPS64, sys.RISCV64)
|
||||
alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchS390X, sys.ArchMIPS64, sys.ArchMIPS64LE, sys.ArchRISCV64)
|
||||
alias("runtime/internal/math", "Mul64", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchS390X, sys.ArchMIPS64, sys.ArchMIPS64LE, sys.ArchRISCV64)
|
||||
addF("math/bits", "Add64",
|
||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
|
||||
|
|
|
@ -710,6 +710,7 @@ func Mul64(x, y uint64) (hi, lo uint64) {
|
|||
// ppc64le:"MULHDU","MULLD"
|
||||
// s390x:"MLGR"
|
||||
// mips64: "MULVU"
|
||||
// riscv64:"MULHU","MUL"
|
||||
return bits.Mul64(x, y)
|
||||
}
|
||||
|
||||
|
|
|
@ -1756,6 +1756,7 @@ var (
|
|||
"ppc64le": {"GOPPC64", "power8", "power9"},
|
||||
"s390x": {},
|
||||
"wasm": {},
|
||||
"riscv64": {},
|
||||
}
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in a new issue