cmd/compile: ARM64 optimize []float64 and []float32 access

Optimize load and store to []float64 and []float32.
Previously it used LSL instead of shifted register indexed load/store.

Before:

    LSL   $3, R0, R0
    FMOVD F0, (R1)(R0)

After:

    FMOVD F0, (R1)(R0<<3)

Fixes #42798

Change-Id: I0c0912140c3dce5aa6abc27097c0eb93833cc589
Reviewed-on: https://go-review.googlesource.com/c/go/+/273706
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Giovanni Bajo <rasky@develer.com>
This commit is contained in:
Egon Elbre 2020-11-27 17:10:33 +02:00 committed by Cherry Zhang
parent 80ddc17ae1
commit 3ee32439b5
7 changed files with 396 additions and 14 deletions

View file

@ -100,9 +100,11 @@ func genIndexedOperand(v *ssa.Value) obj.Addr {
// Reg: base register, Index: (shifted) index register
mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
switch v.Op {
case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8:
case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8,
ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31
case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4:
case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4,
ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31
case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31
@ -435,7 +437,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpARM64MOVHUloadidx2,
ssa.OpARM64MOVWloadidx4,
ssa.OpARM64MOVWUloadidx4,
ssa.OpARM64MOVDloadidx8:
ssa.OpARM64MOVDloadidx8,
ssa.OpARM64FMOVDloadidx8,
ssa.OpARM64FMOVSloadidx4:
p := s.Prog(v.Op.Asm())
p.From = genIndexedOperand(v)
p.To.Type = obj.TYPE_REG
@ -472,7 +476,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpARM64FMOVDstoreidx,
ssa.OpARM64MOVHstoreidx2,
ssa.OpARM64MOVWstoreidx4,
ssa.OpARM64MOVDstoreidx8:
ssa.OpARM64FMOVSstoreidx4,
ssa.OpARM64MOVDstoreidx8,
ssa.OpARM64FMOVDstoreidx8:
p := s.Prog(v.Op.Asm())
p.To = genIndexedOperand(v)
p.From.Type = obj.TYPE_REG

View file

@ -792,6 +792,15 @@
(MOVHUloadidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHUload [int32(c)<<1] ptr mem)
(MOVHloadidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHload [int32(c)<<1] ptr mem)
(FMOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (FMOVDloadidx8 ptr idx mem)
(FMOVSload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (FMOVSloadidx4 ptr idx mem)
(FMOVDloadidx ptr (SLLconst [3] idx) mem) => (FMOVDloadidx8 ptr idx mem)
(FMOVSloadidx ptr (SLLconst [2] idx) mem) => (FMOVSloadidx4 ptr idx mem)
(FMOVDloadidx (SLLconst [3] idx) ptr mem) => (FMOVDloadidx8 ptr idx mem)
(FMOVSloadidx (SLLconst [2] idx) ptr mem) => (FMOVSloadidx4 ptr idx mem)
(FMOVDloadidx8 ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (FMOVDload ptr [int32(c)<<3] mem)
(FMOVSloadidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (FMOVSload ptr [int32(c)<<2] mem)
(MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
(MOVBstore [off1+int32(off2)] {sym} ptr val mem)
@ -865,6 +874,15 @@
(MOVWstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (MOVWstore [int32(c)<<2] ptr val mem)
(MOVHstoreidx2 ptr (MOVDconst [c]) val mem) && is32Bit(c<<1) => (MOVHstore [int32(c)<<1] ptr val mem)
(FMOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil => (FMOVDstoreidx8 ptr idx val mem)
(FMOVSstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil => (FMOVSstoreidx4 ptr idx val mem)
(FMOVDstoreidx ptr (SLLconst [3] idx) val mem) => (FMOVDstoreidx8 ptr idx val mem)
(FMOVSstoreidx ptr (SLLconst [2] idx) val mem) => (FMOVSstoreidx4 ptr idx val mem)
(FMOVDstoreidx (SLLconst [3] idx) ptr val mem) => (FMOVDstoreidx8 ptr idx val mem)
(FMOVSstoreidx (SLLconst [2] idx) ptr val mem) => (FMOVSstoreidx4 ptr idx val mem)
(FMOVDstoreidx8 ptr (MOVDconst [c]) val mem) && is32Bit(c<<3) => (FMOVDstore [int32(c)<<3] ptr val mem)
(FMOVSstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (FMOVSstore [int32(c)<<2] ptr val mem)
(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>

View file

@ -379,11 +379,13 @@ func init() {
{name: "FMOVDloadidx", argLength: 3, reg: fp2load, asm: "FMOVD", typ: "Float64"}, // load 64-bit float from arg0 + arg1, arg2=mem.
// shifted register indexed load
{name: "MOVHloadidx2", argLength: 3, reg: gp2load, asm: "MOVH", typ: "Int16"}, // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem.
{name: "MOVHUloadidx2", argLength: 3, reg: gp2load, asm: "MOVHU", typ: "UInt16"}, // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem.
{name: "MOVWloadidx4", argLength: 3, reg: gp2load, asm: "MOVW", typ: "Int32"}, // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem.
{name: "MOVWUloadidx4", argLength: 3, reg: gp2load, asm: "MOVWU", typ: "UInt32"}, // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem.
{name: "MOVDloadidx8", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"}, // load 64-bit double-word from arg0 + arg1*8, arg2 = mem.
{name: "MOVHloadidx2", argLength: 3, reg: gp2load, asm: "MOVH", typ: "Int16"}, // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem.
{name: "MOVHUloadidx2", argLength: 3, reg: gp2load, asm: "MOVHU", typ: "UInt16"}, // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem.
{name: "MOVWloadidx4", argLength: 3, reg: gp2load, asm: "MOVW", typ: "Int32"}, // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem.
{name: "MOVWUloadidx4", argLength: 3, reg: gp2load, asm: "MOVWU", typ: "UInt32"}, // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem.
{name: "MOVDloadidx8", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"}, // load 64-bit double-word from arg0 + arg1*8, arg2 = mem.
{name: "FMOVSloadidx4", argLength: 3, reg: fp2load, asm: "FMOVS", typ: "Float32"}, // load 32-bit float from arg0 + arg1*4, arg2 = mem.
{name: "FMOVDloadidx8", argLength: 3, reg: fp2load, asm: "FMOVD", typ: "Float64"}, // load 64-bit float from arg0 + arg1*8, arg2 = mem.
{name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
@ -402,9 +404,11 @@ func init() {
{name: "FMOVDstoreidx", argLength: 4, reg: fpstore2, asm: "FMOVD", typ: "Mem"}, // store 64-bit float of arg2 to arg0 + arg1, arg3=mem.
// shifted register indexed store
{name: "MOVHstoreidx2", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem.
{name: "MOVWstoreidx4", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem.
{name: "MOVDstoreidx8", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem.
{name: "MOVHstoreidx2", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem.
{name: "MOVWstoreidx4", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem.
{name: "MOVDstoreidx8", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem.
{name: "FMOVSstoreidx4", argLength: 4, reg: fpstore2, asm: "FMOVS", typ: "Mem"}, // store 32-bit float of arg2 to arg0 + arg1*4, arg3=mem.
{name: "FMOVDstoreidx8", argLength: 4, reg: fpstore2, asm: "FMOVD", typ: "Mem"}, // store 64-bit float of arg2 to arg0 + arg1*8, arg3=mem.
{name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem.

View file

@ -1481,6 +1481,8 @@ const (
OpARM64MOVWloadidx4
OpARM64MOVWUloadidx4
OpARM64MOVDloadidx8
OpARM64FMOVSloadidx4
OpARM64FMOVDloadidx8
OpARM64MOVBstore
OpARM64MOVHstore
OpARM64MOVWstore
@ -1497,6 +1499,8 @@ const (
OpARM64MOVHstoreidx2
OpARM64MOVWstoreidx4
OpARM64MOVDstoreidx8
OpARM64FMOVSstoreidx4
OpARM64FMOVDstoreidx8
OpARM64MOVBstorezero
OpARM64MOVHstorezero
OpARM64MOVWstorezero
@ -19787,6 +19791,34 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FMOVSloadidx4",
argLen: 3,
asm: arm64.AFMOVS,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FMOVDloadidx8",
argLen: 3,
asm: arm64.AFMOVD,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "MOVBstore",
auxType: auxSymOff,
@ -19994,6 +20026,30 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FMOVSstoreidx4",
argLen: 4,
asm: arm64.AFMOVS,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
{2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FMOVDstoreidx8",
argLen: 4,
asm: arm64.AFMOVD,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
{2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "MOVBstorezero",
auxType: auxSymOff,

View file

@ -99,18 +99,26 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64FMOVDload(v)
case OpARM64FMOVDloadidx:
return rewriteValueARM64_OpARM64FMOVDloadidx(v)
case OpARM64FMOVDloadidx8:
return rewriteValueARM64_OpARM64FMOVDloadidx8(v)
case OpARM64FMOVDstore:
return rewriteValueARM64_OpARM64FMOVDstore(v)
case OpARM64FMOVDstoreidx:
return rewriteValueARM64_OpARM64FMOVDstoreidx(v)
case OpARM64FMOVDstoreidx8:
return rewriteValueARM64_OpARM64FMOVDstoreidx8(v)
case OpARM64FMOVSload:
return rewriteValueARM64_OpARM64FMOVSload(v)
case OpARM64FMOVSloadidx:
return rewriteValueARM64_OpARM64FMOVSloadidx(v)
case OpARM64FMOVSloadidx4:
return rewriteValueARM64_OpARM64FMOVSloadidx4(v)
case OpARM64FMOVSstore:
return rewriteValueARM64_OpARM64FMOVSstore(v)
case OpARM64FMOVSstoreidx:
return rewriteValueARM64_OpARM64FMOVSstoreidx(v)
case OpARM64FMOVSstoreidx4:
return rewriteValueARM64_OpARM64FMOVSstoreidx4(v)
case OpARM64FMULD:
return rewriteValueARM64_OpARM64FMULD(v)
case OpARM64FMULS:
@ -3900,6 +3908,25 @@ func rewriteValueARM64_OpARM64FMOVDload(v *Value) bool {
v.AddArg3(ptr, idx, mem)
return true
}
// match: (FMOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem)
// cond: off == 0 && sym == nil
// result: (FMOVDloadidx8 ptr idx mem)
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 3 {
break
}
idx := v_0.Args[1]
ptr := v_0.Args[0]
mem := v_1
if !(off == 0 && sym == nil) {
break
}
v.reset(OpARM64FMOVDloadidx8)
v.AddArg3(ptr, idx, mem)
return true
}
// match: (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
// result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
@ -3964,6 +3991,56 @@ func rewriteValueARM64_OpARM64FMOVDloadidx(v *Value) bool {
v.AddArg2(ptr, mem)
return true
}
// match: (FMOVDloadidx ptr (SLLconst [3] idx) mem)
// result: (FMOVDloadidx8 ptr idx mem)
for {
ptr := v_0
if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 3 {
break
}
idx := v_1.Args[0]
mem := v_2
v.reset(OpARM64FMOVDloadidx8)
v.AddArg3(ptr, idx, mem)
return true
}
// match: (FMOVDloadidx (SLLconst [3] idx) ptr mem)
// result: (FMOVDloadidx8 ptr idx mem)
for {
if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 3 {
break
}
idx := v_0.Args[0]
ptr := v_1
mem := v_2
v.reset(OpARM64FMOVDloadidx8)
v.AddArg3(ptr, idx, mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64FMOVDloadidx8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (FMOVDloadidx8 ptr (MOVDconst [c]) mem)
// cond: is32Bit(c<<3)
// result: (FMOVDload ptr [int32(c)<<3] mem)
for {
ptr := v_0
if v_1.Op != OpARM64MOVDconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
mem := v_2
if !(is32Bit(c << 3)) {
break
}
v.reset(OpARM64FMOVDload)
v.AuxInt = int32ToAuxInt(int32(c) << 3)
v.AddArg2(ptr, mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64FMOVDstore(v *Value) bool {
@ -4031,6 +4108,26 @@ func rewriteValueARM64_OpARM64FMOVDstore(v *Value) bool {
v.AddArg4(ptr, idx, val, mem)
return true
}
// match: (FMOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem)
// cond: off == 0 && sym == nil
// result: (FMOVDstoreidx8 ptr idx val mem)
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 3 {
break
}
idx := v_0.Args[1]
ptr := v_0.Args[0]
val := v_1
mem := v_2
if !(off == 0 && sym == nil) {
break
}
v.reset(OpARM64FMOVDstoreidx8)
v.AddArg4(ptr, idx, val, mem)
return true
}
// match: (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
// result: (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
@ -4099,6 +4196,60 @@ func rewriteValueARM64_OpARM64FMOVDstoreidx(v *Value) bool {
v.AddArg3(idx, val, mem)
return true
}
// match: (FMOVDstoreidx ptr (SLLconst [3] idx) val mem)
// result: (FMOVDstoreidx8 ptr idx val mem)
for {
ptr := v_0
if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 3 {
break
}
idx := v_1.Args[0]
val := v_2
mem := v_3
v.reset(OpARM64FMOVDstoreidx8)
v.AddArg4(ptr, idx, val, mem)
return true
}
// match: (FMOVDstoreidx (SLLconst [3] idx) ptr val mem)
// result: (FMOVDstoreidx8 ptr idx val mem)
for {
if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 3 {
break
}
idx := v_0.Args[0]
ptr := v_1
val := v_2
mem := v_3
v.reset(OpARM64FMOVDstoreidx8)
v.AddArg4(ptr, idx, val, mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64FMOVDstoreidx8(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (FMOVDstoreidx8 ptr (MOVDconst [c]) val mem)
// cond: is32Bit(c<<3)
// result: (FMOVDstore [int32(c)<<3] ptr val mem)
for {
ptr := v_0
if v_1.Op != OpARM64MOVDconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
val := v_2
mem := v_3
if !(is32Bit(c << 3)) {
break
}
v.reset(OpARM64FMOVDstore)
v.AuxInt = int32ToAuxInt(int32(c) << 3)
v.AddArg3(ptr, val, mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64FMOVSload(v *Value) bool {
@ -4163,6 +4314,25 @@ func rewriteValueARM64_OpARM64FMOVSload(v *Value) bool {
v.AddArg3(ptr, idx, mem)
return true
}
// match: (FMOVSload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
// cond: off == 0 && sym == nil
// result: (FMOVSloadidx4 ptr idx mem)
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
break
}
idx := v_0.Args[1]
ptr := v_0.Args[0]
mem := v_1
if !(off == 0 && sym == nil) {
break
}
v.reset(OpARM64FMOVSloadidx4)
v.AddArg3(ptr, idx, mem)
return true
}
// match: (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
// result: (FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
@ -4227,6 +4397,56 @@ func rewriteValueARM64_OpARM64FMOVSloadidx(v *Value) bool {
v.AddArg2(ptr, mem)
return true
}
// match: (FMOVSloadidx ptr (SLLconst [2] idx) mem)
// result: (FMOVSloadidx4 ptr idx mem)
for {
ptr := v_0
if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 {
break
}
idx := v_1.Args[0]
mem := v_2
v.reset(OpARM64FMOVSloadidx4)
v.AddArg3(ptr, idx, mem)
return true
}
// match: (FMOVSloadidx (SLLconst [2] idx) ptr mem)
// result: (FMOVSloadidx4 ptr idx mem)
for {
if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 {
break
}
idx := v_0.Args[0]
ptr := v_1
mem := v_2
v.reset(OpARM64FMOVSloadidx4)
v.AddArg3(ptr, idx, mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64FMOVSloadidx4(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (FMOVSloadidx4 ptr (MOVDconst [c]) mem)
// cond: is32Bit(c<<2)
// result: (FMOVSload ptr [int32(c)<<2] mem)
for {
ptr := v_0
if v_1.Op != OpARM64MOVDconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
mem := v_2
if !(is32Bit(c << 2)) {
break
}
v.reset(OpARM64FMOVSload)
v.AuxInt = int32ToAuxInt(int32(c) << 2)
v.AddArg2(ptr, mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64FMOVSstore(v *Value) bool {
@ -4294,6 +4514,26 @@ func rewriteValueARM64_OpARM64FMOVSstore(v *Value) bool {
v.AddArg4(ptr, idx, val, mem)
return true
}
// match: (FMOVSstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem)
// cond: off == 0 && sym == nil
// result: (FMOVSstoreidx4 ptr idx val mem)
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
break
}
idx := v_0.Args[1]
ptr := v_0.Args[0]
val := v_1
mem := v_2
if !(off == 0 && sym == nil) {
break
}
v.reset(OpARM64FMOVSstoreidx4)
v.AddArg4(ptr, idx, val, mem)
return true
}
// match: (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
// result: (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
@ -4362,6 +4602,60 @@ func rewriteValueARM64_OpARM64FMOVSstoreidx(v *Value) bool {
v.AddArg3(idx, val, mem)
return true
}
// match: (FMOVSstoreidx ptr (SLLconst [2] idx) val mem)
// result: (FMOVSstoreidx4 ptr idx val mem)
for {
ptr := v_0
if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 {
break
}
idx := v_1.Args[0]
val := v_2
mem := v_3
v.reset(OpARM64FMOVSstoreidx4)
v.AddArg4(ptr, idx, val, mem)
return true
}
// match: (FMOVSstoreidx (SLLconst [2] idx) ptr val mem)
// result: (FMOVSstoreidx4 ptr idx val mem)
for {
if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 {
break
}
idx := v_0.Args[0]
ptr := v_1
val := v_2
mem := v_3
v.reset(OpARM64FMOVSstoreidx4)
v.AddArg4(ptr, idx, val, mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64FMOVSstoreidx4(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (FMOVSstoreidx4 ptr (MOVDconst [c]) val mem)
// cond: is32Bit(c<<2)
// result: (FMOVSstore [int32(c)<<2] ptr val mem)
for {
ptr := v_0
if v_1.Op != OpARM64MOVDconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
val := v_2
mem := v_3
if !(is32Bit(c << 2)) {
break
}
v.reset(OpARM64FMOVSstore)
v.AuxInt = int32ToAuxInt(int32(c) << 2)
v.AddArg3(ptr, val, mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64FMULD(v *Value) bool {

View file

@ -53,12 +53,12 @@ func DivPow2(f1, f2, f3 float64) (float64, float64, float64) {
}
func indexLoad(b0 []float32, b1 float32, idx int) float32 {
// arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+\),\sF[0-9]+`
// arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+<<2\),\sF[0-9]+`
return b0[idx] * b1
}
func indexStore(b0 []float64, b1 float64, idx int) {
// arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`
// arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<3\)`
b0[idx] = b1
}

View file

@ -177,9 +177,11 @@ func idxFloat32(x, y []float32, i int) {
var t float32
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
// arm64: `FMOVS\t\(R[0-9]*\)\(R[0-9]*<<2\), F[0-9]+`
t = x[i+1]
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
// arm64: `FMOVS\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<2\)`
y[i+1] = t
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
@ -193,9 +195,11 @@ func idxFloat64(x, y []float64, i int) {
var t float64
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
// arm64: `FMOVD\t\(R[0-9]*\)\(R[0-9]*<<3\), F[0-9]+`
t = x[i+1]
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
// arm64: `FMOVD\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<3\)`
y[i+1] = t
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`