cmd/compile: convert 386 port to use addressing modes pass

Update #36468

Change-Id: Idfdb845d097994689be450d6e8a57fa9adb57166
Reviewed-on: https://go-review.googlesource.com/c/go/+/222782
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
This commit is contained in:
Keith Randall 2020-03-10 14:08:40 -07:00
parent c3b9042132
commit d84cbec890
4 changed files with 1058 additions and 4882 deletions

View file

@ -11,8 +11,8 @@ func addressingModes(f *Func) {
default:
// Most architectures can't do this.
return
case "amd64":
// TODO: 386, s390x?
case "amd64", "386":
// TODO: s390x?
}
var tmp []*Value
@ -21,7 +21,17 @@ func addressingModes(f *Func) {
if !combineFirst[v.Op] {
continue
}
p := v.Args[0]
// All matched operations have the pointer in arg[0].
// All results have the pointer in arg[0] and the index in arg[1].
// *Except* for operations which update a register,
// which are marked with resultInArg0. Those have
// the pointer in arg[1], and the corresponding result op
// has the pointer in arg[1] and the index in arg[2].
ptrIndex := 0
if opcodeTable[v.Op].resultInArg0 {
ptrIndex = 1
}
p := v.Args[ptrIndex]
c, ok := combine[[2]Op{v.Op, p.Op}]
if !ok {
continue
@ -71,10 +81,11 @@ func addressingModes(f *Func) {
f.Fatalf("unknown aux combining for %s and %s\n", v.Op, p.Op)
}
// Combine the operations.
tmp = append(tmp[:0], v.Args[1:]...)
tmp = append(tmp[:0], v.Args[:ptrIndex]...)
tmp = append(tmp, p.Args...)
tmp = append(tmp, v.Args[ptrIndex+1:]...)
v.resetArgs()
v.Op = c
v.AddArgs(p.Args...)
v.AddArgs(tmp...)
}
}
@ -97,6 +108,7 @@ func init() {
// x.Args[0].Args + x.Args[1:]
// Additionally, the Aux/AuxInt from x.Args[0] is merged into x.
var combine = map[[2]Op]Op{
// amd64
[2]Op{OpAMD64MOVBload, OpAMD64ADDQ}: OpAMD64MOVBloadidx1,
[2]Op{OpAMD64MOVWload, OpAMD64ADDQ}: OpAMD64MOVWloadidx1,
[2]Op{OpAMD64MOVLload, OpAMD64ADDQ}: OpAMD64MOVLloadidx1,
@ -150,5 +162,64 @@ var combine = map[[2]Op]Op{
[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1,
[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8,
// TODO: 386
// 386
[2]Op{Op386MOVBload, Op386ADDL}: Op386MOVBloadidx1,
[2]Op{Op386MOVWload, Op386ADDL}: Op386MOVWloadidx1,
[2]Op{Op386MOVLload, Op386ADDL}: Op386MOVLloadidx1,
[2]Op{Op386MOVSSload, Op386ADDL}: Op386MOVSSloadidx1,
[2]Op{Op386MOVSDload, Op386ADDL}: Op386MOVSDloadidx1,
[2]Op{Op386MOVBstore, Op386ADDL}: Op386MOVBstoreidx1,
[2]Op{Op386MOVWstore, Op386ADDL}: Op386MOVWstoreidx1,
[2]Op{Op386MOVLstore, Op386ADDL}: Op386MOVLstoreidx1,
[2]Op{Op386MOVSSstore, Op386ADDL}: Op386MOVSSstoreidx1,
[2]Op{Op386MOVSDstore, Op386ADDL}: Op386MOVSDstoreidx1,
[2]Op{Op386MOVBstoreconst, Op386ADDL}: Op386MOVBstoreconstidx1,
[2]Op{Op386MOVWstoreconst, Op386ADDL}: Op386MOVWstoreconstidx1,
[2]Op{Op386MOVLstoreconst, Op386ADDL}: Op386MOVLstoreconstidx1,
[2]Op{Op386MOVBload, Op386LEAL1}: Op386MOVBloadidx1,
[2]Op{Op386MOVWload, Op386LEAL1}: Op386MOVWloadidx1,
[2]Op{Op386MOVWload, Op386LEAL2}: Op386MOVWloadidx2,
[2]Op{Op386MOVLload, Op386LEAL1}: Op386MOVLloadidx1,
[2]Op{Op386MOVLload, Op386LEAL4}: Op386MOVLloadidx4,
[2]Op{Op386MOVSSload, Op386LEAL1}: Op386MOVSSloadidx1,
[2]Op{Op386MOVSSload, Op386LEAL4}: Op386MOVSSloadidx4,
[2]Op{Op386MOVSDload, Op386LEAL1}: Op386MOVSDloadidx1,
[2]Op{Op386MOVSDload, Op386LEAL8}: Op386MOVSDloadidx8,
[2]Op{Op386MOVBstore, Op386LEAL1}: Op386MOVBstoreidx1,
[2]Op{Op386MOVWstore, Op386LEAL1}: Op386MOVWstoreidx1,
[2]Op{Op386MOVWstore, Op386LEAL2}: Op386MOVWstoreidx2,
[2]Op{Op386MOVLstore, Op386LEAL1}: Op386MOVLstoreidx1,
[2]Op{Op386MOVLstore, Op386LEAL4}: Op386MOVLstoreidx4,
[2]Op{Op386MOVSSstore, Op386LEAL1}: Op386MOVSSstoreidx1,
[2]Op{Op386MOVSSstore, Op386LEAL4}: Op386MOVSSstoreidx4,
[2]Op{Op386MOVSDstore, Op386LEAL1}: Op386MOVSDstoreidx1,
[2]Op{Op386MOVSDstore, Op386LEAL8}: Op386MOVSDstoreidx8,
[2]Op{Op386MOVBstoreconst, Op386LEAL1}: Op386MOVBstoreconstidx1,
[2]Op{Op386MOVWstoreconst, Op386LEAL1}: Op386MOVWstoreconstidx1,
[2]Op{Op386MOVWstoreconst, Op386LEAL2}: Op386MOVWstoreconstidx2,
[2]Op{Op386MOVLstoreconst, Op386LEAL1}: Op386MOVLstoreconstidx1,
[2]Op{Op386MOVLstoreconst, Op386LEAL4}: Op386MOVLstoreconstidx4,
[2]Op{Op386ADDLload, Op386LEAL4}: Op386ADDLloadidx4,
[2]Op{Op386SUBLload, Op386LEAL4}: Op386SUBLloadidx4,
[2]Op{Op386MULLload, Op386LEAL4}: Op386MULLloadidx4,
[2]Op{Op386ANDLload, Op386LEAL4}: Op386ANDLloadidx4,
[2]Op{Op386ORLload, Op386LEAL4}: Op386ORLloadidx4,
[2]Op{Op386XORLload, Op386LEAL4}: Op386XORLloadidx4,
[2]Op{Op386ADDLmodify, Op386LEAL4}: Op386ADDLmodifyidx4,
[2]Op{Op386SUBLmodify, Op386LEAL4}: Op386SUBLmodifyidx4,
[2]Op{Op386ANDLmodify, Op386LEAL4}: Op386ANDLmodifyidx4,
[2]Op{Op386ORLmodify, Op386LEAL4}: Op386ORLmodifyidx4,
[2]Op{Op386XORLmodify, Op386LEAL4}: Op386XORLmodifyidx4,
[2]Op{Op386ADDLconstmodify, Op386LEAL4}: Op386ADDLconstmodifyidx4,
[2]Op{Op386ANDLconstmodify, Op386LEAL4}: Op386ANDLconstmodifyidx4,
[2]Op{Op386ORLconstmodify, Op386LEAL4}: Op386ORLconstmodifyidx4,
[2]Op{Op386XORLconstmodify, Op386LEAL4}: Op386XORLconstmodifyidx4,
}

View file

@ -588,10 +588,6 @@
(MOVWLSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWLSXload <v.Type> [off] {sym} ptr mem)
(MOVWLZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
(MOVBLZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
(MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
(MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBLZX x)
(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWLZX x)
@ -611,34 +607,22 @@
// fold constants into memory operations
// Note that this is not always a good idea because if not all the uses of
// the ADDQconst get eliminated, we still have to compute the ADDQconst and we now
// have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one.
// the ADDLconst get eliminated, we still have to compute the ADDLconst and we now
// have potentially two live values (ptr and (ADDLconst [off] ptr)) instead of one.
// Nevertheless, let's do it!
(MOV(L|W|B|SS|SD)load [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)load [off1+off2] {sym} ptr mem)
(MOV(L|W|B|SS|SD)store [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)store [off1+off2] {sym} ptr val mem)
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {sym} val base mem)
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {sym} val base idx mem)
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) && is32Bit(off1+off2*4) ->
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2*4] {sym} val base idx mem)
((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDLconst [off2] base) val mem) && is32Bit(off1+off2) ->
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem)
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) && is32Bit(off1+off2) ->
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {sym} base idx val mem)
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) && is32Bit(off1+off2*4) ->
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2*4] {sym} base idx val mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDLconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) && ValAndOff(valoff1).canAdd(off2) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) && ValAndOff(valoff1).canAdd(off2*4) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
// Fold constants into stores.
(MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
@ -652,7 +636,7 @@
(MOV(L|W|B)storeconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
(MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem)
// We need to fold LEAQ into the MOVx ops so that the live variable analysis knows
// We need to fold LEAL into the MOVx ops so that the live variable analysis knows
// what variables are being read/written by the ops.
// Note: we turn off this merging for operations on globals when building
// position-independent code (when Flag_shared is set).
@ -672,31 +656,9 @@
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
// generating indexed loads and stores
(MOV(B|W|L|SS|SD)load [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOV(B|W|L|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
(MOV(L|SS)load [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
(MOV(B|W|L|SS|SD)store [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOV(B|W|L|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOV(L|SS)store [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
@ -706,97 +668,20 @@
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAL [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
(MOV(B|W|L|SS|SD)load [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)loadidx1 [off] {sym} ptr idx mem)
(MOV(B|W|L|SS|SD)store [off] {sym} (ADDL ptr idx) val mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)storeidx1 [off] {sym} ptr idx val mem)
(MOV(B|W|L)storeconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
(MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
(MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
(MOV(B|W|L)storeconst [x] {sym} (ADDL ptr idx) mem) -> (MOV(B|W|L)storeconstidx1 [x] {sym} ptr idx mem)
// combine SHLL into indexed loads and stores
(MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem)
(MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLloadidx4 [c] {sym} ptr idx mem)
(MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem)
(MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem) -> (MOVLstoreidx4 [c] {sym} ptr idx val mem)
(MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
(MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
// combine ADDL into indexed loads and stores
(MOV(B|W|L|SS|SD)loadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
(MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVWloadidx2 [int64(int32(c+d))] {sym} ptr idx mem)
(MOV(L|SS)loadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+d))] {sym} ptr idx mem)
(MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVSDloadidx8 [int64(int32(c+d))] {sym} ptr idx mem)
(MOV(B|W|L|SS|SD)storeidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(B|W|L|SS|SD)storeidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
(MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVWstoreidx2 [int64(int32(c+d))] {sym} ptr idx val mem)
(MOV(L|SS)storeidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+d))] {sym} ptr idx val mem)
(MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVSDstoreidx8 [int64(int32(c+d))] {sym} ptr idx val mem)
(MOV(B|W|L|SS|SD)loadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
(MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVWloadidx2 [int64(int32(c+2*d))] {sym} ptr idx mem)
(MOV(L|SS)loadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem)
(MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVSDloadidx8 [int64(int32(c+8*d))] {sym} ptr idx mem)
(MOV(B|W|L|SS|SD)storeidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(B|W|L|SS|SD)storeidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
(MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVWstoreidx2 [int64(int32(c+2*d))] {sym} ptr idx val mem)
(MOV(L|SS)storeidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem)
(MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem)
// Merge load/store to op
((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem)
((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) && canMergeLoadClobber(v, l, x) && clobber(l) ->
((ADD|AND|OR|XOR|SUB|MUL)Lloadidx4 x [off] {sym} ptr idx mem)
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) ->
((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lloadidx4 x [off] {sym} ptr idx mem) mem) && y.Uses==1 && clobber(y) ->
((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem)
(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|SUB|AND|OR|XOR)L l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) ->
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem)
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr mem)) mem)
&& y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) ->
((ADD|AND|OR|XOR)Lconstmodify [makeValAndOff(c,off)] {sym} ptr mem)
(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
&& y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(c,off) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
(SUBLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(-c,off) ->
(ADDLconstmodifyidx4 [makeValAndOff(-c,off)] {sym} ptr idx mem)
(MOV(B|W|L)storeconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
(MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
(MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
(MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
(MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
(MOV(B|W|L)storeconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
(MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
(MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
(MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
(MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
// fold LEALs together
(LEAL [off1] {sym1} (LEAL [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
@ -826,6 +711,16 @@
(LEAL [off1] {sym1} (LEAL8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y)
// LEAL[1248] into LEAL[1248]. Only some such merges are possible.
(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAL2 [off1+off2] {mergeSym(sym1, sym2)} x y)
(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAL2 [off1+off2] {mergeSym(sym1, sym2)} y x)
(LEAL2 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+2*off2) && sym2 == nil ->
(LEAL4 [off1+2*off2] {sym1} x y)
(LEAL4 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+4*off2) && sym2 == nil ->
(LEAL8 [off1+4*off2] {sym1} x y)
// Absorb InvertFlags into branches.
(LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
(GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
@ -1039,23 +934,27 @@
// TEST %reg,%reg is shorter than CMP
(CMP(L|W|B)const x [0]) -> (TEST(L|W|B) x x)
// Convert LEAL1 back to ADDL if we can
(LEAL1 [0] x y) && v.Aux == nil -> (ADDL x y)
// Combining byte loads into larger (unaligned) loads.
// There are many ways these combinations could occur. This is
// designed to match the way encoding/binary.LittleEndian does it.
(ORL x0:(MOVBload [i0] {s} p mem)
s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
(ORL x0:(MOVBload [i0] {s} p0 mem)
s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem)))
&& i1 == i0+1
&& x0.Uses == 1
&& x1.Uses == 1
&& s0.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, s0)
-> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
-> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
(ORL o0:(ORL
x0:(MOVWload [i0] {s} p mem)
s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)))
s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
x0:(MOVWload [i0] {s} p0 mem)
s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p1 mem)))
s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p2 mem)))
&& i2 == i0+2
&& i3 == i0+3
&& x0.Uses == 1
@ -1064,126 +963,84 @@
&& s0.Uses == 1
&& s1.Uses == 1
&& o0.Uses == 1
&& same(p0, p1, 1)
&& same(p1, p2, 1)
&& mergePoint(b,x0,x1,x2) != nil
&& clobber(x0, x1, x2, s0, s1, o0)
-> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
(ORL x0:(MOVBloadidx1 [i0] {s} p idx mem)
s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
&& i1==i0+1
&& x0.Uses == 1
&& x1.Uses == 1
&& s0.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, s0)
-> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
(ORL o0:(ORL
x0:(MOVWloadidx1 [i0] {s} p idx mem)
s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)))
s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
&& i2 == i0+2
&& i3 == i0+3
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& o0.Uses == 1
&& mergePoint(b,x0,x1,x2) != nil
&& clobber(x0, x1, x2, s0, s1, o0)
-> @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
-> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p0 mem)
// Combine constant stores into larger (unaligned) stores.
(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
&& x.Uses == 1
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
(MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem))
&& x.Uses == 1
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
&& x.Uses == 1
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
(MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem))
&& x.Uses == 1
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
(MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
&& x.Uses == 1
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem)
(MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
&& x.Uses == 1
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
(MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
&& x.Uses == 1
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst <i.Type> [1] i) mem)
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
// Combine stores into larger (unaligned) stores.
(MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
(MOVBstore [i] {s} p1 (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVWstore [i-1] {s} p w mem)
(MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHR(W|L)const [8] w) mem))
-> (MOVWstore [i-1] {s} p0 w mem)
(MOVBstore [i] {s} p1 w x:(MOVBstore {s} [i+1] p0 (SHR(W|L)const [8] w) mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVWstore [i] {s} p w mem)
(MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
-> (MOVWstore [i] {s} p0 w mem)
(MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRLconst [j-8] w) mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVWstore [i-1] {s} p w0 mem)
(MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
-> (MOVWstore [i-1] {s} p0 w0 mem)
(MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVLstore [i-2] {s} p w mem)
(MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
-> (MOVLstore [i-2] {s} p0 w mem)
(MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRLconst [j-16] w) mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVLstore [i-2] {s} p w0 mem)
-> (MOVLstore [i-2] {s} p0 w0 mem)
(MOVBstoreidx1 [i] {s} p idx (SHR(L|W)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVWstoreidx1 [i-1] {s} p idx w mem)
(MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHR(L|W)const [8] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVWstoreidx1 [i] {s} p idx w mem)
(MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
(MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p idx w mem)
(MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
// Move constant offsets from LEALx up into load. This lets the above combining
// rules discover indexed load-combining instances.
(MOV(B|W|L)load [i0] {s0} l:(LEAL1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L)load [i0+i1] {s0} (LEAL1 <l.Type> [0] {s1} x y) mem)
(MOV(B|W|L)load [i0] {s0} l:(LEAL2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L)load [i0+i1] {s0} (LEAL2 <l.Type> [0] {s1} x y) mem)
(MOV(B|W|L)load [i0] {s0} l:(LEAL4 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L)load [i0+i1] {s0} (LEAL4 <l.Type> [0] {s1} x y) mem)
(MOV(B|W|L)load [i0] {s0} l:(LEAL8 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L)load [i0+i1] {s0} (LEAL8 <l.Type> [0] {s1} x y) mem)
(MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w mem)
(MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w0 mem)
(MOV(B|W|L)store [i0] {s0} l:(LEAL1 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L)store [i0+i1] {s0} (LEAL1 <l.Type> [0] {s1} x y) val mem)
(MOV(B|W|L)store [i0] {s0} l:(LEAL2 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L)store [i0+i1] {s0} (LEAL2 <l.Type> [0] {s1} x y) val mem)
(MOV(B|W|L)store [i0] {s0} l:(LEAL4 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L)store [i0+i1] {s0} (LEAL4 <l.Type> [0] {s1} x y) val mem)
(MOV(B|W|L)store [i0] {s0} l:(LEAL8 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L)store [i0+i1] {s0} (LEAL8 <l.Type> [0] {s1} x y) val mem)
// For PIC, break floating-point constant loading into two instructions so we have
// a register to use for holding the address of the constant pool entry.

File diff suppressed because it is too large Load diff

View file

@ -99,46 +99,61 @@ func compMem3(x, y *int) (int, bool) {
func idxInt8(x, y []int8, i int) {
var t int8
// amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
// 386: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
t = x[i+1]
// amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
// 386: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
y[i+1] = t
// amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
// 386: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
x[i+1] = 77
}
func idxInt16(x, y []int16, i int) {
var t int16
// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
// 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
t = x[i+1]
// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
// 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
y[i+1] = t
// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
// 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
t = x[16*i+1]
// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
// 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
y[16*i+1] = t
// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
// 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
x[i+1] = 77
// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
// 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
x[16*i+1] = 77
}
func idxInt32(x, y []int32, i int) {
var t int32
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
// 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
t = x[i+1]
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
// 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
y[i+1] = t
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
t = x[2*i+1]
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
y[2*i+1] = t
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
// 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
t = x[16*i+1]
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
// 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
y[16*i+1] = t
// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
// 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
x[i+1] = 77
// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
// 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
x[16*i+1] = 77
}
@ -160,24 +175,71 @@ func idxInt64(x, y []int64, i int) {
func idxFloat32(x, y []float32, i int) {
var t float32
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
t = x[i+1]
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
y[i+1] = t
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
t = x[16*i+1]
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
y[16*i+1] = t
}
func idxFloat64(x, y []float64, i int) {
var t float64
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
t = x[i+1]
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
y[i+1] = t
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
t = x[16*i+1]
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
y[16*i+1] = t
}
func idxLoadPlusOp(x []int32, i int) int32 {
s := x[0]
// 386: `ADDL\t4\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s += x[i+1]
// 386: `SUBL\t8\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s -= x[i+2]
// 386: `IMULL\t12\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s *= x[i+3]
// 386: `ANDL\t16\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s &= x[i+4]
// 386: `ORL\t20\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s |= x[i+5]
// 386: `XORL\t24\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s ^= x[i+6]
return s
}
func idxStorePlusOp(x []int32, i int, v int32) {
// 386: `ADDL\t[A-Z]+, 4\([A-Z]+\)\([A-Z]+\*4\)`
x[i+1] += v
// 386: `SUBL\t[A-Z]+, 8\([A-Z]+\)\([A-Z]+\*4\)`
x[i+2] -= v
// 386: `ANDL\t[A-Z]+, 12\([A-Z]+\)\([A-Z]+\*4\)`
x[i+3] &= v
// 386: `ORL\t[A-Z]+, 16\([A-Z]+\)\([A-Z]+\*4\)`
x[i+4] |= v
// 386: `XORL\t[A-Z]+, 20\([A-Z]+\)\([A-Z]+\*4\)`
x[i+5] ^= v
// 386: `ADDL\t[$]77, 24\([A-Z]+\)\([A-Z]+\*4\)`
x[i+6] += 77
// 386: `ANDL\t[$]77, 28\([A-Z]+\)\([A-Z]+\*4\)`
x[i+7] &= 77
// 386: `ORL\t[$]77, 32\([A-Z]+\)\([A-Z]+\*4\)`
x[i+8] |= 77
// 386: `XORL\t[$]77, 36\([A-Z]+\)\([A-Z]+\*4\)`
x[i+9] ^= 77
}