mirror of
https://github.com/golang/go
synced 2024-11-05 18:36:08 +00:00
cmd/compile: optimize 386 code with MULLload/DIVSSload/DIVSDload
IMULL/DIVSS/DIVSD all can take the source operand from memory directly. And this CL implement that optimization. 1. The total size of pkg/linux_386 decreases about 84KB (excluding cmd/compile). 2. The go1 benchmark shows little regression in total (excluding noise). name old time/op new time/op delta BinaryTree17-4 3.29s ± 2% 3.27s ± 4% ~ (p=0.192 n=30+30) Fannkuch11-4 3.49s ± 2% 3.54s ± 1% +1.48% (p=0.000 n=30+30) FmtFprintfEmpty-4 45.9ns ± 3% 46.3ns ± 4% +0.89% (p=0.037 n=30+30) FmtFprintfString-4 78.8ns ± 3% 78.7ns ± 4% ~ (p=0.209 n=30+27) FmtFprintfInt-4 91.0ns ± 2% 90.3ns ± 2% -0.82% (p=0.031 n=30+27) FmtFprintfIntInt-4 142ns ± 4% 143ns ± 4% ~ (p=0.136 n=30+30) FmtFprintfPrefixedInt-4 181ns ± 3% 183ns ± 4% +1.40% (p=0.005 n=30+30) FmtFprintfFloat-4 404ns ± 4% 408ns ± 3% ~ (p=0.397 n=30+30) FmtManyArgs-4 601ns ± 3% 609ns ± 5% ~ (p=0.059 n=30+30) GobDecode-4 7.21ms ± 5% 7.24ms ± 5% ~ (p=0.612 n=30+30) GobEncode-4 6.91ms ± 6% 6.91ms ± 6% ~ (p=0.797 n=30+30) Gzip-4 398ms ± 6% 399ms ± 4% ~ (p=0.173 n=30+30) Gunzip-4 41.7ms ± 3% 41.8ms ± 3% ~ (p=0.423 n=30+30) HTTPClientServer-4 62.3µs ± 2% 62.7µs ± 3% ~ (p=0.085 n=29+30) JSONEncode-4 21.0ms ± 4% 20.7ms ± 5% -1.39% (p=0.014 n=30+30) JSONDecode-4 66.3ms ± 3% 67.4ms ± 1% +1.71% (p=0.003 n=30+24) Mandelbrot200-4 5.15ms ± 3% 5.16ms ± 3% ~ (p=0.697 n=30+30) GoParse-4 3.24ms ± 3% 3.27ms ± 4% +0.91% (p=0.032 n=30+30) RegexpMatchEasy0_32-4 101ns ± 5% 99ns ± 4% -1.82% (p=0.008 n=29+30) RegexpMatchEasy0_1K-4 848ns ± 4% 841ns ± 2% -0.77% (p=0.043 n=30+30) RegexpMatchEasy1_32-4 106ns ± 6% 106ns ± 3% ~ (p=0.939 n=29+30) RegexpMatchEasy1_1K-4 1.02µs ± 3% 1.03µs ± 4% ~ (p=0.297 n=28+30) RegexpMatchMedium_32-4 129ns ± 4% 127ns ± 4% ~ (p=0.073 n=30+30) RegexpMatchMedium_1K-4 43.9µs ± 3% 43.8µs ± 3% ~ (p=0.186 n=30+30) RegexpMatchHard_32-4 2.24µs ± 4% 2.22µs ± 4% ~ (p=0.332 n=30+29) RegexpMatchHard_1K-4 68.0µs ± 4% 67.5µs ± 3% ~ (p=0.290 n=30+30) Revcomp-4 1.85s ± 3% 1.85s ± 3% ~ (p=0.358 n=30+30) Template-4 69.6ms ± 3% 70.0ms ± 4% ~ (p=0.273 n=30+30) TimeParse-4 445ns ± 3% 441ns ± 3% ~ (p=0.494 n=30+30) TimeFormat-4 412ns ± 3% 412ns ± 6% ~ (p=0.841 n=30+30) [Geo mean] 66.7µs 66.8µs +0.13% name old speed new speed delta GobDecode-4 107MB/s ± 5% 106MB/s ± 5% ~ (p=0.615 n=30+30) GobEncode-4 111MB/s ± 6% 111MB/s ± 6% ~ (p=0.790 n=30+30) Gzip-4 48.8MB/s ± 6% 48.7MB/s ± 4% ~ (p=0.167 n=30+30) Gunzip-4 465MB/s ± 3% 465MB/s ± 3% ~ (p=0.420 n=30+30) JSONEncode-4 92.4MB/s ± 4% 93.7MB/s ± 5% +1.42% (p=0.015 n=30+30) JSONDecode-4 29.3MB/s ± 3% 28.8MB/s ± 1% -1.72% (p=0.003 n=30+24) GoParse-4 17.9MB/s ± 3% 17.7MB/s ± 4% -0.89% (p=0.037 n=30+30) RegexpMatchEasy0_32-4 317MB/s ± 8% 324MB/s ± 4% +2.14% (p=0.006 n=30+30) RegexpMatchEasy0_1K-4 1.21GB/s ± 4% 1.22GB/s ± 2% +0.77% (p=0.036 n=30+30) RegexpMatchEasy1_32-4 298MB/s ± 7% 299MB/s ± 4% ~ (p=0.511 n=30+30) RegexpMatchEasy1_1K-4 1.00GB/s ± 3% 1.00GB/s ± 4% ~ (p=0.304 n=28+30) RegexpMatchMedium_32-4 7.75MB/s ± 4% 7.82MB/s ± 4% ~ (p=0.089 n=30+30) RegexpMatchMedium_1K-4 23.3MB/s ± 3% 23.4MB/s ± 3% ~ (p=0.181 n=30+30) RegexpMatchHard_32-4 14.3MB/s ± 4% 14.4MB/s ± 4% ~ (p=0.320 n=30+29) RegexpMatchHard_1K-4 15.1MB/s ± 4% 15.2MB/s ± 3% ~ (p=0.273 n=30+30) Revcomp-4 137MB/s ± 3% 137MB/s ± 3% ~ (p=0.352 n=30+30) Template-4 27.9MB/s ± 3% 27.7MB/s ± 4% ~ (p=0.277 n=30+30) [Geo mean] 79.9MB/s 80.1MB/s +0.15% Change-Id: I97333cd8ddabb3c7c88ca5aa9e14a005b74d306d Reviewed-on: https://go-review.googlesource.com/120695 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
705f3c74e6
commit
90f2fa0037
6 changed files with 405 additions and 22 deletions
|
@ -636,12 +636,12 @@
|
|||
(MOVSSstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVSSstore [off1+off2] {sym} ptr val mem)
|
||||
(MOVSDstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVSDstore [off1+off2] {sym} ptr val mem)
|
||||
|
||||
((ADD|SUB|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
|
||||
((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {sym} val base mem)
|
||||
((ADD|SUB|MUL)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
|
||||
((ADD|SUB|MUL)SSload [off1+off2] {sym} val base mem)
|
||||
((ADD|SUB|MUL)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
|
||||
((ADD|SUB|MUL)SDload [off1+off2] {sym} val base mem)
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {sym} val base mem)
|
||||
((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
|
||||
((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
|
||||
((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
|
||||
((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
|
||||
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDLconst [off2] base) val mem) && is32Bit(off1+off2) ->
|
||||
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem)
|
||||
|
||||
|
@ -757,15 +757,15 @@
|
|||
(MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
||||
|
||||
((ADD|SUB|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
|
||||
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
|
||||
((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
|
||||
((ADD|SUB|MUL)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
|
||||
((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
|
||||
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
|
||||
((ADD|SUB|MUL)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
|
||||
((ADD|SUB|MUL)SDload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
|
||||
((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
|
||||
((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
|
||||
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
|
||||
((ADD|SUB|MUL)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
|
||||
((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
|
||||
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
|
||||
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
|
||||
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
||||
|
@ -846,9 +846,9 @@
|
|||
(MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem)
|
||||
|
||||
// Merge load/store to op
|
||||
((ADD|AND|OR|XOR|SUB)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR|SUB)Lload x [off] {sym} ptr mem)
|
||||
((ADD|SUB|MUL)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL)SDload x [off] {sym} ptr mem)
|
||||
((ADD|SUB|MUL)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL)SSload x [off] {sym} ptr mem)
|
||||
((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem)
|
||||
((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
|
||||
((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
|
||||
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
|
||||
(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
|
||||
((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
|
||||
|
|
|
@ -183,6 +183,8 @@ func init() {
|
|||
{name: "SUBSDload", argLength: 3, reg: fp21load, asm: "SUBSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 - tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
{name: "MULSSload", argLength: 3, reg: fp21load, asm: "MULSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 * tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
{name: "MULSDload", argLength: 3, reg: fp21load, asm: "MULSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 * tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
{name: "DIVSSload", argLength: 3, reg: fp21load, asm: "DIVSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 / tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
{name: "DIVSDload", argLength: 3, reg: fp21load, asm: "DIVSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 / tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
|
||||
// binary ops
|
||||
{name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true}, // arg0 + arg1
|
||||
|
@ -279,11 +281,12 @@ func init() {
|
|||
{name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-15
|
||||
{name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-7
|
||||
|
||||
{name: "ADDLload", argLength: 3, reg: gp21load, asm: "ADDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
{name: "SUBLload", argLength: 3, reg: gp21load, asm: "SUBL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
{name: "ANDLload", argLength: 3, reg: gp21load, asm: "ANDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
{name: "ORLload", argLength: 3, reg: gp21load, asm: "ORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
{name: "XORLload", argLength: 3, reg: gp21load, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
{name: "ADDLload", argLength: 3, reg: gp21load, asm: "ADDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
{name: "SUBLload", argLength: 3, reg: gp21load, asm: "SUBL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
{name: "MULLload", argLength: 3, reg: gp21load, asm: "IMULL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
{name: "ANDLload", argLength: 3, reg: gp21load, asm: "ANDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
{name: "ORLload", argLength: 3, reg: gp21load, asm: "ORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
{name: "XORLload", argLength: 3, reg: gp21load, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
|
||||
|
||||
// unary ops
|
||||
{name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true, clobberFlags: true}, // -arg0
|
||||
|
|
|
@ -262,6 +262,8 @@ const (
|
|||
Op386SUBSDload
|
||||
Op386MULSSload
|
||||
Op386MULSDload
|
||||
Op386DIVSSload
|
||||
Op386DIVSDload
|
||||
Op386ADDL
|
||||
Op386ADDLconst
|
||||
Op386ADDLcarry
|
||||
|
@ -333,6 +335,7 @@ const (
|
|||
Op386ROLBconst
|
||||
Op386ADDLload
|
||||
Op386SUBLload
|
||||
Op386MULLload
|
||||
Op386ANDLload
|
||||
Op386ORLload
|
||||
Op386XORLload
|
||||
|
@ -2752,6 +2755,42 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "DIVSSload",
|
||||
auxType: auxSymOff,
|
||||
argLen: 3,
|
||||
resultInArg0: true,
|
||||
faultOnNilArg1: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ADIVSS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
|
||||
{1, 65791}, // AX CX DX BX SP BP SI DI SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "DIVSDload",
|
||||
auxType: auxSymOff,
|
||||
argLen: 3,
|
||||
resultInArg0: true,
|
||||
faultOnNilArg1: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ADIVSD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
|
||||
{1, 65791}, // AX CX DX BX SP BP SI DI SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ADDL",
|
||||
argLen: 2,
|
||||
|
@ -3821,6 +3860,25 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MULLload",
|
||||
auxType: auxSymOff,
|
||||
argLen: 3,
|
||||
resultInArg0: true,
|
||||
clobberFlags: true,
|
||||
faultOnNilArg1: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AIMULL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 239}, // AX CX DX BX BP SI DI
|
||||
{1, 65791}, // AX CX DX BX SP BP SI DI SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 239}, // AX CX DX BX BP SI DI
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ANDLload",
|
||||
auxType: auxSymOff,
|
||||
|
|
|
@ -61,6 +61,14 @@ func rewriteValue386(v *Value) bool {
|
|||
return rewriteValue386_Op386CMPWconst_0(v)
|
||||
case Op386CMPWload:
|
||||
return rewriteValue386_Op386CMPWload_0(v)
|
||||
case Op386DIVSD:
|
||||
return rewriteValue386_Op386DIVSD_0(v)
|
||||
case Op386DIVSDload:
|
||||
return rewriteValue386_Op386DIVSDload_0(v)
|
||||
case Op386DIVSS:
|
||||
return rewriteValue386_Op386DIVSS_0(v)
|
||||
case Op386DIVSSload:
|
||||
return rewriteValue386_Op386DIVSSload_0(v)
|
||||
case Op386LEAL:
|
||||
return rewriteValue386_Op386LEAL_0(v)
|
||||
case Op386LEAL1:
|
||||
|
@ -163,6 +171,8 @@ func rewriteValue386(v *Value) bool {
|
|||
return rewriteValue386_Op386MULL_0(v)
|
||||
case Op386MULLconst:
|
||||
return rewriteValue386_Op386MULLconst_0(v) || rewriteValue386_Op386MULLconst_10(v) || rewriteValue386_Op386MULLconst_20(v) || rewriteValue386_Op386MULLconst_30(v)
|
||||
case Op386MULLload:
|
||||
return rewriteValue386_Op386MULLload_0(v)
|
||||
case Op386MULSD:
|
||||
return rewriteValue386_Op386MULSD_0(v)
|
||||
case Op386MULSDload:
|
||||
|
@ -3098,6 +3108,192 @@ func rewriteValue386_Op386CMPWload_0(v *Value) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValue386_Op386DIVSD_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
config := b.Func.Config
|
||||
_ = config
|
||||
// match: (DIVSD x l:(MOVSDload [off] {sym} ptr mem))
|
||||
// cond: canMergeLoad(v, l, x) && !config.use387 && clobber(l)
|
||||
// result: (DIVSDload x [off] {sym} ptr mem)
|
||||
for {
|
||||
_ = v.Args[1]
|
||||
x := v.Args[0]
|
||||
l := v.Args[1]
|
||||
if l.Op != Op386MOVSDload {
|
||||
break
|
||||
}
|
||||
off := l.AuxInt
|
||||
sym := l.Aux
|
||||
_ = l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
mem := l.Args[1]
|
||||
if !(canMergeLoad(v, l, x) && !config.use387 && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386DIVSDload)
|
||||
v.AuxInt = off
|
||||
v.Aux = sym
|
||||
v.AddArg(x)
|
||||
v.AddArg(ptr)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValue386_Op386DIVSDload_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
config := b.Func.Config
|
||||
_ = config
|
||||
// match: (DIVSDload [off1] {sym} val (ADDLconst [off2] base) mem)
|
||||
// cond: is32Bit(off1+off2)
|
||||
// result: (DIVSDload [off1+off2] {sym} val base mem)
|
||||
for {
|
||||
off1 := v.AuxInt
|
||||
sym := v.Aux
|
||||
_ = v.Args[2]
|
||||
val := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != Op386ADDLconst {
|
||||
break
|
||||
}
|
||||
off2 := v_1.AuxInt
|
||||
base := v_1.Args[0]
|
||||
mem := v.Args[2]
|
||||
if !(is32Bit(off1 + off2)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386DIVSDload)
|
||||
v.AuxInt = off1 + off2
|
||||
v.Aux = sym
|
||||
v.AddArg(val)
|
||||
v.AddArg(base)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
// match: (DIVSDload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
|
||||
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
|
||||
// result: (DIVSDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
|
||||
for {
|
||||
off1 := v.AuxInt
|
||||
sym1 := v.Aux
|
||||
_ = v.Args[2]
|
||||
val := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != Op386LEAL {
|
||||
break
|
||||
}
|
||||
off2 := v_1.AuxInt
|
||||
sym2 := v_1.Aux
|
||||
base := v_1.Args[0]
|
||||
mem := v.Args[2]
|
||||
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386DIVSDload)
|
||||
v.AuxInt = off1 + off2
|
||||
v.Aux = mergeSym(sym1, sym2)
|
||||
v.AddArg(val)
|
||||
v.AddArg(base)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValue386_Op386DIVSS_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
config := b.Func.Config
|
||||
_ = config
|
||||
// match: (DIVSS x l:(MOVSSload [off] {sym} ptr mem))
|
||||
// cond: canMergeLoad(v, l, x) && !config.use387 && clobber(l)
|
||||
// result: (DIVSSload x [off] {sym} ptr mem)
|
||||
for {
|
||||
_ = v.Args[1]
|
||||
x := v.Args[0]
|
||||
l := v.Args[1]
|
||||
if l.Op != Op386MOVSSload {
|
||||
break
|
||||
}
|
||||
off := l.AuxInt
|
||||
sym := l.Aux
|
||||
_ = l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
mem := l.Args[1]
|
||||
if !(canMergeLoad(v, l, x) && !config.use387 && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386DIVSSload)
|
||||
v.AuxInt = off
|
||||
v.Aux = sym
|
||||
v.AddArg(x)
|
||||
v.AddArg(ptr)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValue386_Op386DIVSSload_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
config := b.Func.Config
|
||||
_ = config
|
||||
// match: (DIVSSload [off1] {sym} val (ADDLconst [off2] base) mem)
|
||||
// cond: is32Bit(off1+off2)
|
||||
// result: (DIVSSload [off1+off2] {sym} val base mem)
|
||||
for {
|
||||
off1 := v.AuxInt
|
||||
sym := v.Aux
|
||||
_ = v.Args[2]
|
||||
val := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != Op386ADDLconst {
|
||||
break
|
||||
}
|
||||
off2 := v_1.AuxInt
|
||||
base := v_1.Args[0]
|
||||
mem := v.Args[2]
|
||||
if !(is32Bit(off1 + off2)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386DIVSSload)
|
||||
v.AuxInt = off1 + off2
|
||||
v.Aux = sym
|
||||
v.AddArg(val)
|
||||
v.AddArg(base)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
// match: (DIVSSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
|
||||
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
|
||||
// result: (DIVSSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
|
||||
for {
|
||||
off1 := v.AuxInt
|
||||
sym1 := v.Aux
|
||||
_ = v.Args[2]
|
||||
val := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != Op386LEAL {
|
||||
break
|
||||
}
|
||||
off2 := v_1.AuxInt
|
||||
sym2 := v_1.Aux
|
||||
base := v_1.Args[0]
|
||||
mem := v.Args[2]
|
||||
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386DIVSSload)
|
||||
v.AuxInt = off1 + off2
|
||||
v.Aux = mergeSym(sym1, sym2)
|
||||
v.AddArg(val)
|
||||
v.AddArg(base)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValue386_Op386LEAL_0(v *Value) bool {
|
||||
// match: (LEAL [c] {s} (ADDLconst [d] x))
|
||||
// cond: is32Bit(c+d)
|
||||
|
@ -9825,6 +10021,58 @@ func rewriteValue386_Op386MULL_0(v *Value) bool {
|
|||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (MULL x l:(MOVLload [off] {sym} ptr mem))
|
||||
// cond: canMergeLoad(v, l, x) && clobber(l)
|
||||
// result: (MULLload x [off] {sym} ptr mem)
|
||||
for {
|
||||
_ = v.Args[1]
|
||||
x := v.Args[0]
|
||||
l := v.Args[1]
|
||||
if l.Op != Op386MOVLload {
|
||||
break
|
||||
}
|
||||
off := l.AuxInt
|
||||
sym := l.Aux
|
||||
_ = l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
mem := l.Args[1]
|
||||
if !(canMergeLoad(v, l, x) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MULLload)
|
||||
v.AuxInt = off
|
||||
v.Aux = sym
|
||||
v.AddArg(x)
|
||||
v.AddArg(ptr)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
// match: (MULL l:(MOVLload [off] {sym} ptr mem) x)
|
||||
// cond: canMergeLoad(v, l, x) && clobber(l)
|
||||
// result: (MULLload x [off] {sym} ptr mem)
|
||||
for {
|
||||
_ = v.Args[1]
|
||||
l := v.Args[0]
|
||||
if l.Op != Op386MOVLload {
|
||||
break
|
||||
}
|
||||
off := l.AuxInt
|
||||
sym := l.Aux
|
||||
_ = l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
mem := l.Args[1]
|
||||
x := v.Args[1]
|
||||
if !(canMergeLoad(v, l, x) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MULLload)
|
||||
v.AuxInt = off
|
||||
v.Aux = sym
|
||||
v.AddArg(x)
|
||||
v.AddArg(ptr)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValue386_Op386MULLconst_0(v *Value) bool {
|
||||
|
@ -10332,6 +10580,66 @@ func rewriteValue386_Op386MULLconst_30(v *Value) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValue386_Op386MULLload_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
config := b.Func.Config
|
||||
_ = config
|
||||
// match: (MULLload [off1] {sym} val (ADDLconst [off2] base) mem)
|
||||
// cond: is32Bit(off1+off2)
|
||||
// result: (MULLload [off1+off2] {sym} val base mem)
|
||||
for {
|
||||
off1 := v.AuxInt
|
||||
sym := v.Aux
|
||||
_ = v.Args[2]
|
||||
val := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != Op386ADDLconst {
|
||||
break
|
||||
}
|
||||
off2 := v_1.AuxInt
|
||||
base := v_1.Args[0]
|
||||
mem := v.Args[2]
|
||||
if !(is32Bit(off1 + off2)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MULLload)
|
||||
v.AuxInt = off1 + off2
|
||||
v.Aux = sym
|
||||
v.AddArg(val)
|
||||
v.AddArg(base)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
// match: (MULLload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
|
||||
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
|
||||
// result: (MULLload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
|
||||
for {
|
||||
off1 := v.AuxInt
|
||||
sym1 := v.Aux
|
||||
_ = v.Args[2]
|
||||
val := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != Op386LEAL {
|
||||
break
|
||||
}
|
||||
off2 := v_1.AuxInt
|
||||
sym2 := v_1.Aux
|
||||
base := v_1.Args[0]
|
||||
mem := v.Args[2]
|
||||
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MULLload)
|
||||
v.AuxInt = off1 + off2
|
||||
v.Aux = mergeSym(sym1, sym2)
|
||||
v.AddArg(val)
|
||||
v.AddArg(base)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValue386_Op386MULSD_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
|
|
|
@ -525,8 +525,10 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||
gc.AddAux(&p.From, v)
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
case ssa.Op386ADDLload, ssa.Op386SUBLload, ssa.Op386ANDLload, ssa.Op386ORLload, ssa.Op386XORLload,
|
||||
ssa.Op386ADDSDload, ssa.Op386ADDSSload, ssa.Op386SUBSDload, ssa.Op386SUBSSload, ssa.Op386MULSDload, ssa.Op386MULSSload:
|
||||
case ssa.Op386ADDLload, ssa.Op386SUBLload, ssa.Op386MULLload,
|
||||
ssa.Op386ANDLload, ssa.Op386ORLload, ssa.Op386XORLload,
|
||||
ssa.Op386ADDSDload, ssa.Op386ADDSSload, ssa.Op386SUBSDload, ssa.Op386SUBSSload,
|
||||
ssa.Op386MULSDload, ssa.Op386MULSSload, ssa.Op386DIVSSload, ssa.Op386DIVSDload:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_MEM
|
||||
p.From.Reg = v.Args[1].Reg()
|
||||
|
|
|
@ -49,6 +49,13 @@ func Mul_96(n int) int {
|
|||
return n * 96
|
||||
}
|
||||
|
||||
func MulMemSrc(a []uint32, b []float32) {
|
||||
// 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+`
|
||||
a[0] *= a[1]
|
||||
// 386/sse2:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
|
||||
b[0] *= b[1]
|
||||
}
|
||||
|
||||
// Multiplications merging tests
|
||||
|
||||
func MergeMuls1(n int) int {
|
||||
|
@ -85,6 +92,11 @@ func MergeMuls5(a, n int) int {
|
|||
// Division //
|
||||
// -------------- //
|
||||
|
||||
func DivMemSrc(a []float64) {
|
||||
// 386/sse2:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
|
||||
a[0] /= a[1]
|
||||
}
|
||||
|
||||
func Pow2Divs(n1 uint, n2 int) (uint, int) {
|
||||
// 386:"SHRL\t[$]5",-"DIVL"
|
||||
// amd64:"SHRQ\t[$]5",-"DIVQ"
|
||||
|
|
Loading…
Reference in a new issue