mirror of
https://github.com/golang/go
synced 2024-10-14 11:53:56 +00:00
cmd/compile: convert 386 port to use addressing modes pass
Update #36468 Change-Id: Idfdb845d097994689be450d6e8a57fa9adb57166 Reviewed-on: https://go-review.googlesource.com/c/go/+/222782 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
This commit is contained in:
parent
c3b9042132
commit
d84cbec890
|
@ -11,8 +11,8 @@ func addressingModes(f *Func) {
|
|||
default:
|
||||
// Most architectures can't do this.
|
||||
return
|
||||
case "amd64":
|
||||
// TODO: 386, s390x?
|
||||
case "amd64", "386":
|
||||
// TODO: s390x?
|
||||
}
|
||||
|
||||
var tmp []*Value
|
||||
|
@ -21,7 +21,17 @@ func addressingModes(f *Func) {
|
|||
if !combineFirst[v.Op] {
|
||||
continue
|
||||
}
|
||||
p := v.Args[0]
|
||||
// All matched operations have the pointer in arg[0].
|
||||
// All results have the pointer in arg[0] and the index in arg[1].
|
||||
// *Except* for operations which update a register,
|
||||
// which are marked with resultInArg0. Those have
|
||||
// the pointer in arg[1], and the corresponding result op
|
||||
// has the pointer in arg[1] and the index in arg[2].
|
||||
ptrIndex := 0
|
||||
if opcodeTable[v.Op].resultInArg0 {
|
||||
ptrIndex = 1
|
||||
}
|
||||
p := v.Args[ptrIndex]
|
||||
c, ok := combine[[2]Op{v.Op, p.Op}]
|
||||
if !ok {
|
||||
continue
|
||||
|
@ -71,10 +81,11 @@ func addressingModes(f *Func) {
|
|||
f.Fatalf("unknown aux combining for %s and %s\n", v.Op, p.Op)
|
||||
}
|
||||
// Combine the operations.
|
||||
tmp = append(tmp[:0], v.Args[1:]...)
|
||||
tmp = append(tmp[:0], v.Args[:ptrIndex]...)
|
||||
tmp = append(tmp, p.Args...)
|
||||
tmp = append(tmp, v.Args[ptrIndex+1:]...)
|
||||
v.resetArgs()
|
||||
v.Op = c
|
||||
v.AddArgs(p.Args...)
|
||||
v.AddArgs(tmp...)
|
||||
}
|
||||
}
|
||||
|
@ -97,6 +108,7 @@ func init() {
|
|||
// x.Args[0].Args + x.Args[1:]
|
||||
// Additionally, the Aux/AuxInt from x.Args[0] is merged into x.
|
||||
var combine = map[[2]Op]Op{
|
||||
// amd64
|
||||
[2]Op{OpAMD64MOVBload, OpAMD64ADDQ}: OpAMD64MOVBloadidx1,
|
||||
[2]Op{OpAMD64MOVWload, OpAMD64ADDQ}: OpAMD64MOVWloadidx1,
|
||||
[2]Op{OpAMD64MOVLload, OpAMD64ADDQ}: OpAMD64MOVLloadidx1,
|
||||
|
@ -150,5 +162,64 @@ var combine = map[[2]Op]Op{
|
|||
[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1,
|
||||
[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8,
|
||||
|
||||
// TODO: 386
|
||||
// 386
|
||||
[2]Op{Op386MOVBload, Op386ADDL}: Op386MOVBloadidx1,
|
||||
[2]Op{Op386MOVWload, Op386ADDL}: Op386MOVWloadidx1,
|
||||
[2]Op{Op386MOVLload, Op386ADDL}: Op386MOVLloadidx1,
|
||||
[2]Op{Op386MOVSSload, Op386ADDL}: Op386MOVSSloadidx1,
|
||||
[2]Op{Op386MOVSDload, Op386ADDL}: Op386MOVSDloadidx1,
|
||||
|
||||
[2]Op{Op386MOVBstore, Op386ADDL}: Op386MOVBstoreidx1,
|
||||
[2]Op{Op386MOVWstore, Op386ADDL}: Op386MOVWstoreidx1,
|
||||
[2]Op{Op386MOVLstore, Op386ADDL}: Op386MOVLstoreidx1,
|
||||
[2]Op{Op386MOVSSstore, Op386ADDL}: Op386MOVSSstoreidx1,
|
||||
[2]Op{Op386MOVSDstore, Op386ADDL}: Op386MOVSDstoreidx1,
|
||||
|
||||
[2]Op{Op386MOVBstoreconst, Op386ADDL}: Op386MOVBstoreconstidx1,
|
||||
[2]Op{Op386MOVWstoreconst, Op386ADDL}: Op386MOVWstoreconstidx1,
|
||||
[2]Op{Op386MOVLstoreconst, Op386ADDL}: Op386MOVLstoreconstidx1,
|
||||
|
||||
[2]Op{Op386MOVBload, Op386LEAL1}: Op386MOVBloadidx1,
|
||||
[2]Op{Op386MOVWload, Op386LEAL1}: Op386MOVWloadidx1,
|
||||
[2]Op{Op386MOVWload, Op386LEAL2}: Op386MOVWloadidx2,
|
||||
[2]Op{Op386MOVLload, Op386LEAL1}: Op386MOVLloadidx1,
|
||||
[2]Op{Op386MOVLload, Op386LEAL4}: Op386MOVLloadidx4,
|
||||
[2]Op{Op386MOVSSload, Op386LEAL1}: Op386MOVSSloadidx1,
|
||||
[2]Op{Op386MOVSSload, Op386LEAL4}: Op386MOVSSloadidx4,
|
||||
[2]Op{Op386MOVSDload, Op386LEAL1}: Op386MOVSDloadidx1,
|
||||
[2]Op{Op386MOVSDload, Op386LEAL8}: Op386MOVSDloadidx8,
|
||||
|
||||
[2]Op{Op386MOVBstore, Op386LEAL1}: Op386MOVBstoreidx1,
|
||||
[2]Op{Op386MOVWstore, Op386LEAL1}: Op386MOVWstoreidx1,
|
||||
[2]Op{Op386MOVWstore, Op386LEAL2}: Op386MOVWstoreidx2,
|
||||
[2]Op{Op386MOVLstore, Op386LEAL1}: Op386MOVLstoreidx1,
|
||||
[2]Op{Op386MOVLstore, Op386LEAL4}: Op386MOVLstoreidx4,
|
||||
[2]Op{Op386MOVSSstore, Op386LEAL1}: Op386MOVSSstoreidx1,
|
||||
[2]Op{Op386MOVSSstore, Op386LEAL4}: Op386MOVSSstoreidx4,
|
||||
[2]Op{Op386MOVSDstore, Op386LEAL1}: Op386MOVSDstoreidx1,
|
||||
[2]Op{Op386MOVSDstore, Op386LEAL8}: Op386MOVSDstoreidx8,
|
||||
|
||||
[2]Op{Op386MOVBstoreconst, Op386LEAL1}: Op386MOVBstoreconstidx1,
|
||||
[2]Op{Op386MOVWstoreconst, Op386LEAL1}: Op386MOVWstoreconstidx1,
|
||||
[2]Op{Op386MOVWstoreconst, Op386LEAL2}: Op386MOVWstoreconstidx2,
|
||||
[2]Op{Op386MOVLstoreconst, Op386LEAL1}: Op386MOVLstoreconstidx1,
|
||||
[2]Op{Op386MOVLstoreconst, Op386LEAL4}: Op386MOVLstoreconstidx4,
|
||||
|
||||
[2]Op{Op386ADDLload, Op386LEAL4}: Op386ADDLloadidx4,
|
||||
[2]Op{Op386SUBLload, Op386LEAL4}: Op386SUBLloadidx4,
|
||||
[2]Op{Op386MULLload, Op386LEAL4}: Op386MULLloadidx4,
|
||||
[2]Op{Op386ANDLload, Op386LEAL4}: Op386ANDLloadidx4,
|
||||
[2]Op{Op386ORLload, Op386LEAL4}: Op386ORLloadidx4,
|
||||
[2]Op{Op386XORLload, Op386LEAL4}: Op386XORLloadidx4,
|
||||
|
||||
[2]Op{Op386ADDLmodify, Op386LEAL4}: Op386ADDLmodifyidx4,
|
||||
[2]Op{Op386SUBLmodify, Op386LEAL4}: Op386SUBLmodifyidx4,
|
||||
[2]Op{Op386ANDLmodify, Op386LEAL4}: Op386ANDLmodifyidx4,
|
||||
[2]Op{Op386ORLmodify, Op386LEAL4}: Op386ORLmodifyidx4,
|
||||
[2]Op{Op386XORLmodify, Op386LEAL4}: Op386XORLmodifyidx4,
|
||||
|
||||
[2]Op{Op386ADDLconstmodify, Op386LEAL4}: Op386ADDLconstmodifyidx4,
|
||||
[2]Op{Op386ANDLconstmodify, Op386LEAL4}: Op386ANDLconstmodifyidx4,
|
||||
[2]Op{Op386ORLconstmodify, Op386LEAL4}: Op386ORLconstmodifyidx4,
|
||||
[2]Op{Op386XORLconstmodify, Op386LEAL4}: Op386XORLconstmodifyidx4,
|
||||
}
|
||||
|
|
|
@ -588,10 +588,6 @@
|
|||
(MOVWLSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWLSXload <v.Type> [off] {sym} ptr mem)
|
||||
(MOVWLZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
|
||||
|
||||
(MOVBLZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
|
||||
(MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
|
||||
(MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
|
||||
|
||||
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
|
||||
(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBLZX x)
|
||||
(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWLZX x)
|
||||
|
@ -611,34 +607,22 @@
|
|||
|
||||
// fold constants into memory operations
|
||||
// Note that this is not always a good idea because if not all the uses of
|
||||
// the ADDQconst get eliminated, we still have to compute the ADDQconst and we now
|
||||
// have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one.
|
||||
// the ADDLconst get eliminated, we still have to compute the ADDLconst and we now
|
||||
// have potentially two live values (ptr and (ADDLconst [off] ptr)) instead of one.
|
||||
// Nevertheless, let's do it!
|
||||
(MOV(L|W|B|SS|SD)load [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)load [off1+off2] {sym} ptr mem)
|
||||
(MOV(L|W|B|SS|SD)store [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)store [off1+off2] {sym} ptr val mem)
|
||||
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {sym} val base mem)
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) && is32Bit(off1+off2) ->
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {sym} val base idx mem)
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) && is32Bit(off1+off2*4) ->
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2*4] {sym} val base idx mem)
|
||||
((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
|
||||
((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
|
||||
((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
|
||||
((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
|
||||
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDLconst [off2] base) val mem) && is32Bit(off1+off2) ->
|
||||
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem)
|
||||
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) && is32Bit(off1+off2) ->
|
||||
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {sym} base idx val mem)
|
||||
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) && is32Bit(off1+off2*4) ->
|
||||
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2*4] {sym} base idx val mem)
|
||||
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDLconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
|
||||
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
|
||||
((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) && ValAndOff(valoff1).canAdd(off2) ->
|
||||
((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
|
||||
((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) && ValAndOff(valoff1).canAdd(off2*4) ->
|
||||
((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
|
||||
|
||||
// Fold constants into stores.
|
||||
(MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
|
||||
|
@ -652,7 +636,7 @@
|
|||
(MOV(L|W|B)storeconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
|
||||
(MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem)
|
||||
|
||||
// We need to fold LEAQ into the MOVx ops so that the live variable analysis knows
|
||||
// We need to fold LEAL into the MOVx ops so that the live variable analysis knows
|
||||
// what variables are being read/written by the ops.
|
||||
// Note: we turn off this merging for operations on globals when building
|
||||
// position-independent code (when Flag_shared is set).
|
||||
|
@ -672,31 +656,9 @@
|
|||
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
|
||||
(MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
|
||||
|
||||
// generating indexed loads and stores
|
||||
(MOV(B|W|L|SS|SD)load [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOV(B|W|L|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
(MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
(MOV(L|SS)load [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
(MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
|
||||
(MOV(B|W|L|SS|SD)store [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOV(B|W|L|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
||||
(MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
||||
(MOV(L|SS)store [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
||||
(MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
||||
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
|
||||
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
|
||||
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
|
||||
((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
|
||||
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
|
||||
((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
|
||||
|
@ -706,97 +668,20 @@
|
|||
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
|
||||
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
|
||||
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
||||
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
|
||||
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
|
||||
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
|
||||
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAL [off2] {sym2} base) mem)
|
||||
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
|
||||
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
|
||||
((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
|
||||
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
|
||||
((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
|
||||
|
||||
(MOV(B|W|L|SS|SD)load [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)loadidx1 [off] {sym} ptr idx mem)
|
||||
(MOV(B|W|L|SS|SD)store [off] {sym} (ADDL ptr idx) val mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)storeidx1 [off] {sym} ptr idx val mem)
|
||||
|
||||
(MOV(B|W|L)storeconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
|
||||
(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
(MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
|
||||
(MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
(MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
|
||||
(MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
|
||||
(MOV(B|W|L)storeconst [x] {sym} (ADDL ptr idx) mem) -> (MOV(B|W|L)storeconstidx1 [x] {sym} ptr idx mem)
|
||||
|
||||
// combine SHLL into indexed loads and stores
|
||||
(MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem)
|
||||
(MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLloadidx4 [c] {sym} ptr idx mem)
|
||||
(MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem)
|
||||
(MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem) -> (MOVLstoreidx4 [c] {sym} ptr idx val mem)
|
||||
(MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
|
||||
(MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
|
||||
|
||||
// combine ADDL into indexed loads and stores
|
||||
(MOV(B|W|L|SS|SD)loadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
|
||||
(MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVWloadidx2 [int64(int32(c+d))] {sym} ptr idx mem)
|
||||
(MOV(L|SS)loadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+d))] {sym} ptr idx mem)
|
||||
(MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVSDloadidx8 [int64(int32(c+d))] {sym} ptr idx mem)
|
||||
|
||||
(MOV(B|W|L|SS|SD)storeidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(B|W|L|SS|SD)storeidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
|
||||
(MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVWstoreidx2 [int64(int32(c+d))] {sym} ptr idx val mem)
|
||||
(MOV(L|SS)storeidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+d))] {sym} ptr idx val mem)
|
||||
(MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVSDstoreidx8 [int64(int32(c+d))] {sym} ptr idx val mem)
|
||||
|
||||
(MOV(B|W|L|SS|SD)loadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
|
||||
(MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVWloadidx2 [int64(int32(c+2*d))] {sym} ptr idx mem)
|
||||
(MOV(L|SS)loadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem)
|
||||
(MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVSDloadidx8 [int64(int32(c+8*d))] {sym} ptr idx mem)
|
||||
|
||||
(MOV(B|W|L|SS|SD)storeidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(B|W|L|SS|SD)storeidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
|
||||
(MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVWstoreidx2 [int64(int32(c+2*d))] {sym} ptr idx val mem)
|
||||
(MOV(L|SS)storeidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem)
|
||||
(MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem)
|
||||
|
||||
// Merge load/store to op
|
||||
((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem)
|
||||
((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) && canMergeLoadClobber(v, l, x) && clobber(l) ->
|
||||
((ADD|AND|OR|XOR|SUB|MUL)Lloadidx4 x [off] {sym} ptr idx mem)
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
|
||||
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
|
||||
((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
|
||||
((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
|
||||
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
|
||||
(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) ->
|
||||
((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
|
||||
(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lloadidx4 x [off] {sym} ptr idx mem) mem) && y.Uses==1 && clobber(y) ->
|
||||
((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem)
|
||||
(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|SUB|AND|OR|XOR)L l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) ->
|
||||
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem)
|
||||
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr mem)) mem)
|
||||
&& y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) ->
|
||||
((ADD|AND|OR|XOR)Lconstmodify [makeValAndOff(c,off)] {sym} ptr mem)
|
||||
(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
|
||||
&& y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) ->
|
||||
((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
|
||||
((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(c,off) ->
|
||||
((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
|
||||
(SUBLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(-c,off) ->
|
||||
(ADDLconstmodifyidx4 [makeValAndOff(-c,off)] {sym} ptr idx mem)
|
||||
|
||||
(MOV(B|W|L)storeconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
|
||||
(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
|
||||
(MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
|
||||
(MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
|
||||
(MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
|
||||
(MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
|
||||
|
||||
(MOV(B|W|L)storeconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
|
||||
(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
|
||||
(MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
|
||||
(MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
|
||||
(MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
|
||||
(MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
|
||||
|
||||
// fold LEALs together
|
||||
(LEAL [off1] {sym1} (LEAL [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
|
@ -826,6 +711,16 @@
|
|||
(LEAL [off1] {sym1} (LEAL8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y)
|
||||
|
||||
// LEAL[1248] into LEAL[1248]. Only some such merges are possible.
|
||||
(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(LEAL2 [off1+off2] {mergeSym(sym1, sym2)} x y)
|
||||
(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(LEAL2 [off1+off2] {mergeSym(sym1, sym2)} y x)
|
||||
(LEAL2 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+2*off2) && sym2 == nil ->
|
||||
(LEAL4 [off1+2*off2] {sym1} x y)
|
||||
(LEAL4 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+4*off2) && sym2 == nil ->
|
||||
(LEAL8 [off1+4*off2] {sym1} x y)
|
||||
|
||||
// Absorb InvertFlags into branches.
|
||||
(LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
|
||||
(GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
|
||||
|
@ -1039,23 +934,27 @@
|
|||
// TEST %reg,%reg is shorter than CMP
|
||||
(CMP(L|W|B)const x [0]) -> (TEST(L|W|B) x x)
|
||||
|
||||
// Convert LEAL1 back to ADDL if we can
|
||||
(LEAL1 [0] x y) && v.Aux == nil -> (ADDL x y)
|
||||
|
||||
// Combining byte loads into larger (unaligned) loads.
|
||||
// There are many ways these combinations could occur. This is
|
||||
// designed to match the way encoding/binary.LittleEndian does it.
|
||||
(ORL x0:(MOVBload [i0] {s} p mem)
|
||||
s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
|
||||
(ORL x0:(MOVBload [i0] {s} p0 mem)
|
||||
s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem)))
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, s0)
|
||||
-> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
|
||||
-> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
|
||||
|
||||
(ORL o0:(ORL
|
||||
x0:(MOVWload [i0] {s} p mem)
|
||||
s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)))
|
||||
s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
|
||||
x0:(MOVWload [i0] {s} p0 mem)
|
||||
s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p1 mem)))
|
||||
s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p2 mem)))
|
||||
&& i2 == i0+2
|
||||
&& i3 == i0+3
|
||||
&& x0.Uses == 1
|
||||
|
@ -1064,126 +963,84 @@
|
|||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& same(p1, p2, 1)
|
||||
&& mergePoint(b,x0,x1,x2) != nil
|
||||
&& clobber(x0, x1, x2, s0, s1, o0)
|
||||
-> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
|
||||
|
||||
(ORL x0:(MOVBloadidx1 [i0] {s} p idx mem)
|
||||
s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
|
||||
&& i1==i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, s0)
|
||||
-> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
|
||||
|
||||
(ORL o0:(ORL
|
||||
x0:(MOVWloadidx1 [i0] {s} p idx mem)
|
||||
s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)))
|
||||
s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
|
||||
&& i2 == i0+2
|
||||
&& i3 == i0+3
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2) != nil
|
||||
&& clobber(x0, x1, x2, s0, s1, o0)
|
||||
-> @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
|
||||
-> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p0 mem)
|
||||
|
||||
// Combine constant stores into larger (unaligned) stores.
|
||||
(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
|
||||
(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
|
||||
(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
|
||||
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
|
||||
(MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
|
||||
(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
|
||||
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
|
||||
(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
|
||||
(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
|
||||
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
|
||||
(MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
|
||||
|
||||
(MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
|
||||
&& x.Uses == 1
|
||||
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
-> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem)
|
||||
(MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
|
||||
&& x.Uses == 1
|
||||
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
|
||||
|
||||
(MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
|
||||
&& x.Uses == 1
|
||||
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst <i.Type> [1] i) mem)
|
||||
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
|
||||
|
||||
// Combine stores into larger (unaligned) stores.
|
||||
(MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
|
||||
(MOVBstore [i] {s} p1 (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVWstore [i-1] {s} p w mem)
|
||||
(MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHR(W|L)const [8] w) mem))
|
||||
-> (MOVWstore [i-1] {s} p0 w mem)
|
||||
(MOVBstore [i] {s} p1 w x:(MOVBstore {s} [i+1] p0 (SHR(W|L)const [8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVWstore [i] {s} p w mem)
|
||||
(MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
|
||||
-> (MOVWstore [i] {s} p0 w mem)
|
||||
(MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRLconst [j-8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVWstore [i-1] {s} p w0 mem)
|
||||
(MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
|
||||
-> (MOVWstore [i-1] {s} p0 w0 mem)
|
||||
(MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVLstore [i-2] {s} p w mem)
|
||||
(MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
|
||||
-> (MOVLstore [i-2] {s} p0 w mem)
|
||||
(MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRLconst [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVLstore [i-2] {s} p w0 mem)
|
||||
-> (MOVLstore [i-2] {s} p0 w0 mem)
|
||||
|
||||
(MOVBstoreidx1 [i] {s} p idx (SHR(L|W)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVWstoreidx1 [i-1] {s} p idx w mem)
|
||||
(MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHR(L|W)const [8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVWstoreidx1 [i] {s} p idx w mem)
|
||||
(MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
|
||||
(MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreidx1 [i-2] {s} p idx w mem)
|
||||
(MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
|
||||
// Move constant offsets from LEALx up into load. This lets the above combining
|
||||
// rules discover indexed load-combining instances.
|
||||
(MOV(B|W|L)load [i0] {s0} l:(LEAL1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L)load [i0+i1] {s0} (LEAL1 <l.Type> [0] {s1} x y) mem)
|
||||
(MOV(B|W|L)load [i0] {s0} l:(LEAL2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L)load [i0+i1] {s0} (LEAL2 <l.Type> [0] {s1} x y) mem)
|
||||
(MOV(B|W|L)load [i0] {s0} l:(LEAL4 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L)load [i0+i1] {s0} (LEAL4 <l.Type> [0] {s1} x y) mem)
|
||||
(MOV(B|W|L)load [i0] {s0} l:(LEAL8 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L)load [i0+i1] {s0} (LEAL8 <l.Type> [0] {s1} x y) mem)
|
||||
|
||||
(MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w mem)
|
||||
(MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w0 mem)
|
||||
(MOV(B|W|L)store [i0] {s0} l:(LEAL1 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L)store [i0+i1] {s0} (LEAL1 <l.Type> [0] {s1} x y) val mem)
|
||||
(MOV(B|W|L)store [i0] {s0} l:(LEAL2 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L)store [i0+i1] {s0} (LEAL2 <l.Type> [0] {s1} x y) val mem)
|
||||
(MOV(B|W|L)store [i0] {s0} l:(LEAL4 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L)store [i0+i1] {s0} (LEAL4 <l.Type> [0] {s1} x y) val mem)
|
||||
(MOV(B|W|L)store [i0] {s0} l:(LEAL8 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L)store [i0+i1] {s0} (LEAL8 <l.Type> [0] {s1} x y) val mem)
|
||||
|
||||
// For PIC, break floating-point constant loading into two instructions so we have
|
||||
// a register to use for holding the address of the constant pool entry.
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -99,46 +99,61 @@ func compMem3(x, y *int) (int, bool) {
|
|||
func idxInt8(x, y []int8, i int) {
|
||||
var t int8
|
||||
// amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
|
||||
// 386: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
|
||||
t = x[i+1]
|
||||
// amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
|
||||
// 386: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
|
||||
y[i+1] = t
|
||||
// amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
|
||||
// 386: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
|
||||
x[i+1] = 77
|
||||
}
|
||||
|
||||
func idxInt16(x, y []int16, i int) {
|
||||
var t int16
|
||||
// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
|
||||
// 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
|
||||
t = x[i+1]
|
||||
// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
|
||||
// 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
|
||||
y[i+1] = t
|
||||
// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
|
||||
// 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
|
||||
t = x[16*i+1]
|
||||
// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
|
||||
// 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
|
||||
y[16*i+1] = t
|
||||
// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
|
||||
// 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
|
||||
x[i+1] = 77
|
||||
// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
|
||||
// 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
|
||||
x[16*i+1] = 77
|
||||
}
|
||||
|
||||
func idxInt32(x, y []int32, i int) {
|
||||
var t int32
|
||||
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
|
||||
// 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
|
||||
t = x[i+1]
|
||||
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
|
||||
// 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
|
||||
y[i+1] = t
|
||||
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
|
||||
t = x[2*i+1]
|
||||
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
|
||||
y[2*i+1] = t
|
||||
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
|
||||
// 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
|
||||
t = x[16*i+1]
|
||||
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
|
||||
// 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
|
||||
y[16*i+1] = t
|
||||
// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
|
||||
// 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
|
||||
x[i+1] = 77
|
||||
// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
|
||||
// 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
|
||||
x[16*i+1] = 77
|
||||
}
|
||||
|
||||
|
@ -160,24 +175,71 @@ func idxInt64(x, y []int64, i int) {
|
|||
|
||||
func idxFloat32(x, y []float32, i int) {
|
||||
var t float32
|
||||
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
|
||||
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
|
||||
// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
|
||||
t = x[i+1]
|
||||
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
|
||||
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
|
||||
// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
|
||||
y[i+1] = t
|
||||
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
|
||||
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
|
||||
// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
|
||||
t = x[16*i+1]
|
||||
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
|
||||
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
|
||||
// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
|
||||
y[16*i+1] = t
|
||||
}
|
||||
|
||||
func idxFloat64(x, y []float64, i int) {
|
||||
var t float64
|
||||
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
|
||||
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
|
||||
// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
|
||||
t = x[i+1]
|
||||
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
|
||||
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
|
||||
// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
|
||||
y[i+1] = t
|
||||
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
|
||||
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
|
||||
// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
|
||||
t = x[16*i+1]
|
||||
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
|
||||
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
|
||||
// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
|
||||
y[16*i+1] = t
|
||||
}
|
||||
|
||||
func idxLoadPlusOp(x []int32, i int) int32 {
|
||||
s := x[0]
|
||||
// 386: `ADDL\t4\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
|
||||
s += x[i+1]
|
||||
// 386: `SUBL\t8\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
|
||||
s -= x[i+2]
|
||||
// 386: `IMULL\t12\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
|
||||
s *= x[i+3]
|
||||
// 386: `ANDL\t16\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
|
||||
s &= x[i+4]
|
||||
// 386: `ORL\t20\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
|
||||
s |= x[i+5]
|
||||
// 386: `XORL\t24\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
|
||||
s ^= x[i+6]
|
||||
return s
|
||||
}
|
||||
|
||||
func idxStorePlusOp(x []int32, i int, v int32) {
|
||||
// 386: `ADDL\t[A-Z]+, 4\([A-Z]+\)\([A-Z]+\*4\)`
|
||||
x[i+1] += v
|
||||
// 386: `SUBL\t[A-Z]+, 8\([A-Z]+\)\([A-Z]+\*4\)`
|
||||
x[i+2] -= v
|
||||
// 386: `ANDL\t[A-Z]+, 12\([A-Z]+\)\([A-Z]+\*4\)`
|
||||
x[i+3] &= v
|
||||
// 386: `ORL\t[A-Z]+, 16\([A-Z]+\)\([A-Z]+\*4\)`
|
||||
x[i+4] |= v
|
||||
// 386: `XORL\t[A-Z]+, 20\([A-Z]+\)\([A-Z]+\*4\)`
|
||||
x[i+5] ^= v
|
||||
|
||||
// 386: `ADDL\t[$]77, 24\([A-Z]+\)\([A-Z]+\*4\)`
|
||||
x[i+6] += 77
|
||||
// 386: `ANDL\t[$]77, 28\([A-Z]+\)\([A-Z]+\*4\)`
|
||||
x[i+7] &= 77
|
||||
// 386: `ORL\t[$]77, 32\([A-Z]+\)\([A-Z]+\*4\)`
|
||||
x[i+8] |= 77
|
||||
// 386: `XORL\t[$]77, 36\([A-Z]+\)\([A-Z]+\*4\)`
|
||||
x[i+9] ^= 77
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue