cmd/compile: convert 386 port to use addressing modes pass (take 2)

Retrying CL 222782, with a fix that will hopefully stop the random crashing.

The issue with the previous CL is that it does pointer arithmetic
in a way that may briefly generate an out-of-bounds pointer. If an
interrupt happens to occur in that state, the referenced object may
be collected incorrectly.

Suppose there was code that did s[x+c].  The previous CL had a rule
to the effect of ptr + (x + c) -> c + (ptr + x).  But ptr+x is not
guaranteed to point to the same object as ptr. In contrast,
ptr+(x+c) is guaranteed to point to the same object as ptr, because
we would have already checked that x+c is in bounds.

For example, strconv.trim used to have this code:
  MOVZX -0x1(BX)(DX*1), BP
  CMPL $0x30, AL
After CL 222782, it had this code:
  LEAL 0(BX)(DX*1), BP
  CMPB $0x30, -0x1(BP)

An interrupt between those last two instructions could see BP pointing
outside the backing store of the slice involved.

It's really hard to actually demonstrate a bug. First, you need to
have an interrupt occur at exactly the right time. Then, there must
be no other pointers to the object in question. Since the interrupted
frame will be scanned conservatively, there can't even be a dead
pointer in another register or on the stack. (In the example above,
a bug can't happen because BX still holds the original pointer.)
Then, the object in question needs to be collected (or at least
scanned?) before the interrupted code continues.

This CL needs to handle load combining somewhat differently than CL 222782
because of the new restriction on arithmetic. That's the only real
difference (other than removing the bad rules) from that old CL.

This bug is also present in the amd64 rewrite rules, and we haven't
seen any crashing as a result. I will fix up that code similarly to
this one in a separate CL.

Update #37881

Change-Id: I5f0d584d9bef4696bfe89a61ef0a27c8d507329f
Reviewed-on: https://go-review.googlesource.com/c/go/+/225798
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
Keith Randall 2020-03-24 13:39:44 -07:00
parent 4a8b9bd264
commit af7eafd150
9 changed files with 685 additions and 4740 deletions

View file

@ -11,8 +11,8 @@ func addressingModes(f *Func) {
default: default:
// Most architectures can't do this. // Most architectures can't do this.
return return
case "amd64": case "amd64", "386":
// TODO: 386, s390x? // TODO: s390x?
} }
var tmp []*Value var tmp []*Value
@ -21,7 +21,17 @@ func addressingModes(f *Func) {
if !combineFirst[v.Op] { if !combineFirst[v.Op] {
continue continue
} }
p := v.Args[0] // All matched operations have the pointer in arg[0].
// All results have the pointer in arg[0] and the index in arg[1].
// *Except* for operations which update a register,
// which are marked with resultInArg0. Those have
// the pointer in arg[1], and the corresponding result op
// has the pointer in arg[1] and the index in arg[2].
ptrIndex := 0
if opcodeTable[v.Op].resultInArg0 {
ptrIndex = 1
}
p := v.Args[ptrIndex]
c, ok := combine[[2]Op{v.Op, p.Op}] c, ok := combine[[2]Op{v.Op, p.Op}]
if !ok { if !ok {
continue continue
@ -71,10 +81,11 @@ func addressingModes(f *Func) {
f.Fatalf("unknown aux combining for %s and %s\n", v.Op, p.Op) f.Fatalf("unknown aux combining for %s and %s\n", v.Op, p.Op)
} }
// Combine the operations. // Combine the operations.
tmp = append(tmp[:0], v.Args[1:]...) tmp = append(tmp[:0], v.Args[:ptrIndex]...)
tmp = append(tmp, p.Args...)
tmp = append(tmp, v.Args[ptrIndex+1:]...)
v.resetArgs() v.resetArgs()
v.Op = c v.Op = c
v.AddArgs(p.Args...)
v.AddArgs(tmp...) v.AddArgs(tmp...)
} }
} }
@ -97,6 +108,7 @@ func init() {
// x.Args[0].Args + x.Args[1:] // x.Args[0].Args + x.Args[1:]
// Additionally, the Aux/AuxInt from x.Args[0] is merged into x. // Additionally, the Aux/AuxInt from x.Args[0] is merged into x.
var combine = map[[2]Op]Op{ var combine = map[[2]Op]Op{
// amd64
[2]Op{OpAMD64MOVBload, OpAMD64ADDQ}: OpAMD64MOVBloadidx1, [2]Op{OpAMD64MOVBload, OpAMD64ADDQ}: OpAMD64MOVBloadidx1,
[2]Op{OpAMD64MOVWload, OpAMD64ADDQ}: OpAMD64MOVWloadidx1, [2]Op{OpAMD64MOVWload, OpAMD64ADDQ}: OpAMD64MOVWloadidx1,
[2]Op{OpAMD64MOVLload, OpAMD64ADDQ}: OpAMD64MOVLloadidx1, [2]Op{OpAMD64MOVLload, OpAMD64ADDQ}: OpAMD64MOVLloadidx1,
@ -150,5 +162,64 @@ var combine = map[[2]Op]Op{
[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1, [2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1,
[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8, [2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8,
// TODO: 386 // 386
[2]Op{Op386MOVBload, Op386ADDL}: Op386MOVBloadidx1,
[2]Op{Op386MOVWload, Op386ADDL}: Op386MOVWloadidx1,
[2]Op{Op386MOVLload, Op386ADDL}: Op386MOVLloadidx1,
[2]Op{Op386MOVSSload, Op386ADDL}: Op386MOVSSloadidx1,
[2]Op{Op386MOVSDload, Op386ADDL}: Op386MOVSDloadidx1,
[2]Op{Op386MOVBstore, Op386ADDL}: Op386MOVBstoreidx1,
[2]Op{Op386MOVWstore, Op386ADDL}: Op386MOVWstoreidx1,
[2]Op{Op386MOVLstore, Op386ADDL}: Op386MOVLstoreidx1,
[2]Op{Op386MOVSSstore, Op386ADDL}: Op386MOVSSstoreidx1,
[2]Op{Op386MOVSDstore, Op386ADDL}: Op386MOVSDstoreidx1,
[2]Op{Op386MOVBstoreconst, Op386ADDL}: Op386MOVBstoreconstidx1,
[2]Op{Op386MOVWstoreconst, Op386ADDL}: Op386MOVWstoreconstidx1,
[2]Op{Op386MOVLstoreconst, Op386ADDL}: Op386MOVLstoreconstidx1,
[2]Op{Op386MOVBload, Op386LEAL1}: Op386MOVBloadidx1,
[2]Op{Op386MOVWload, Op386LEAL1}: Op386MOVWloadidx1,
[2]Op{Op386MOVWload, Op386LEAL2}: Op386MOVWloadidx2,
[2]Op{Op386MOVLload, Op386LEAL1}: Op386MOVLloadidx1,
[2]Op{Op386MOVLload, Op386LEAL4}: Op386MOVLloadidx4,
[2]Op{Op386MOVSSload, Op386LEAL1}: Op386MOVSSloadidx1,
[2]Op{Op386MOVSSload, Op386LEAL4}: Op386MOVSSloadidx4,
[2]Op{Op386MOVSDload, Op386LEAL1}: Op386MOVSDloadidx1,
[2]Op{Op386MOVSDload, Op386LEAL8}: Op386MOVSDloadidx8,
[2]Op{Op386MOVBstore, Op386LEAL1}: Op386MOVBstoreidx1,
[2]Op{Op386MOVWstore, Op386LEAL1}: Op386MOVWstoreidx1,
[2]Op{Op386MOVWstore, Op386LEAL2}: Op386MOVWstoreidx2,
[2]Op{Op386MOVLstore, Op386LEAL1}: Op386MOVLstoreidx1,
[2]Op{Op386MOVLstore, Op386LEAL4}: Op386MOVLstoreidx4,
[2]Op{Op386MOVSSstore, Op386LEAL1}: Op386MOVSSstoreidx1,
[2]Op{Op386MOVSSstore, Op386LEAL4}: Op386MOVSSstoreidx4,
[2]Op{Op386MOVSDstore, Op386LEAL1}: Op386MOVSDstoreidx1,
[2]Op{Op386MOVSDstore, Op386LEAL8}: Op386MOVSDstoreidx8,
[2]Op{Op386MOVBstoreconst, Op386LEAL1}: Op386MOVBstoreconstidx1,
[2]Op{Op386MOVWstoreconst, Op386LEAL1}: Op386MOVWstoreconstidx1,
[2]Op{Op386MOVWstoreconst, Op386LEAL2}: Op386MOVWstoreconstidx2,
[2]Op{Op386MOVLstoreconst, Op386LEAL1}: Op386MOVLstoreconstidx1,
[2]Op{Op386MOVLstoreconst, Op386LEAL4}: Op386MOVLstoreconstidx4,
[2]Op{Op386ADDLload, Op386LEAL4}: Op386ADDLloadidx4,
[2]Op{Op386SUBLload, Op386LEAL4}: Op386SUBLloadidx4,
[2]Op{Op386MULLload, Op386LEAL4}: Op386MULLloadidx4,
[2]Op{Op386ANDLload, Op386LEAL4}: Op386ANDLloadidx4,
[2]Op{Op386ORLload, Op386LEAL4}: Op386ORLloadidx4,
[2]Op{Op386XORLload, Op386LEAL4}: Op386XORLloadidx4,
[2]Op{Op386ADDLmodify, Op386LEAL4}: Op386ADDLmodifyidx4,
[2]Op{Op386SUBLmodify, Op386LEAL4}: Op386SUBLmodifyidx4,
[2]Op{Op386ANDLmodify, Op386LEAL4}: Op386ANDLmodifyidx4,
[2]Op{Op386ORLmodify, Op386LEAL4}: Op386ORLmodifyidx4,
[2]Op{Op386XORLmodify, Op386LEAL4}: Op386XORLmodifyidx4,
[2]Op{Op386ADDLconstmodify, Op386LEAL4}: Op386ADDLconstmodifyidx4,
[2]Op{Op386ANDLconstmodify, Op386LEAL4}: Op386ANDLconstmodifyidx4,
[2]Op{Op386ORLconstmodify, Op386LEAL4}: Op386ORLconstmodifyidx4,
[2]Op{Op386XORLconstmodify, Op386LEAL4}: Op386XORLconstmodifyidx4,
} }

View file

@ -588,10 +588,6 @@
(MOVWLSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWLSXload <v.Type> [off] {sym} ptr mem) (MOVWLSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWLSXload <v.Type> [off] {sym} ptr mem)
(MOVWLZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) (MOVWLZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
(MOVBLZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
(MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
(MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width) // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBLZX x) (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBLZX x)
(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWLZX x) (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWLZX x)
@ -611,34 +607,22 @@
// fold constants into memory operations // fold constants into memory operations
// Note that this is not always a good idea because if not all the uses of // Note that this is not always a good idea because if not all the uses of
// the ADDQconst get eliminated, we still have to compute the ADDQconst and we now // the ADDLconst get eliminated, we still have to compute the ADDLconst and we now
// have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one. // have potentially two live values (ptr and (ADDLconst [off] ptr)) instead of one.
// Nevertheless, let's do it! // Nevertheless, let's do it!
(MOV(L|W|B|SS|SD)load [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)load [off1+off2] {sym} ptr mem) (MOV(L|W|B|SS|SD)load [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)load [off1+off2] {sym} ptr mem)
(MOV(L|W|B|SS|SD)store [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)store [off1+off2] {sym} ptr val mem) (MOV(L|W|B|SS|SD)store [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)store [off1+off2] {sym} ptr val mem)
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) -> ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {sym} val base mem)
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {sym} val base idx mem)
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) && is32Bit(off1+off2*4) ->
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2*4] {sym} val base idx mem)
((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) -> ((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) -> ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDLconst [off2] base) val mem) && is32Bit(off1+off2) -> ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDLconst [off2] base) val mem) && is32Bit(off1+off2) ->
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem) ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem)
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) && is32Bit(off1+off2) ->
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {sym} base idx val mem)
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) && is32Bit(off1+off2*4) ->
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2*4] {sym} base idx val mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDLconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDLconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) && ValAndOff(valoff1).canAdd(off2) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) && ValAndOff(valoff1).canAdd(off2*4) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
// Fold constants into stores. // Fold constants into stores.
(MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) -> (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
@ -652,7 +636,7 @@
(MOV(L|W|B)storeconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> (MOV(L|W|B)storeconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
(MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem) (MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem)
// We need to fold LEAQ into the MOVx ops so that the live variable analysis knows // We need to fold LEAL into the MOVx ops so that the live variable analysis knows
// what variables are being read/written by the ops. // what variables are being read/written by the ops.
// Note: we turn off this merging for operations on globals when building // Note: we turn off this merging for operations on globals when building
// position-independent code (when Flag_shared is set). // position-independent code (when Flag_shared is set).
@ -672,31 +656,9 @@
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) (MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
// generating indexed loads and stores
(MOV(B|W|L|SS|SD)load [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOV(B|W|L|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
(MOV(L|SS)load [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
(MOV(B|W|L|SS|SD)store [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOV(B|W|L|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOV(L|SS)store [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem) ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) ((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
@ -706,97 +668,20 @@
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem) ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAL [off2] {sym2} base) mem) ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAL [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
(MOV(B|W|L|SS|SD)load [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)loadidx1 [off] {sym} ptr idx mem)
(MOV(B|W|L|SS|SD)store [off] {sym} (ADDL ptr idx) val mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)storeidx1 [off] {sym} ptr idx val mem)
(MOV(B|W|L)storeconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
(MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
(MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
(MOV(B|W|L)storeconst [x] {sym} (ADDL ptr idx) mem) -> (MOV(B|W|L)storeconstidx1 [x] {sym} ptr idx mem)
// combine SHLL into indexed loads and stores
(MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem)
(MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLloadidx4 [c] {sym} ptr idx mem)
(MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem)
(MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem) -> (MOVLstoreidx4 [c] {sym} ptr idx val mem)
(MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
(MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
// combine ADDL into indexed loads and stores
(MOV(B|W|L|SS|SD)loadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
(MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVWloadidx2 [int64(int32(c+d))] {sym} ptr idx mem)
(MOV(L|SS)loadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+d))] {sym} ptr idx mem)
(MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVSDloadidx8 [int64(int32(c+d))] {sym} ptr idx mem)
(MOV(B|W|L|SS|SD)storeidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(B|W|L|SS|SD)storeidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
(MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVWstoreidx2 [int64(int32(c+d))] {sym} ptr idx val mem)
(MOV(L|SS)storeidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+d))] {sym} ptr idx val mem)
(MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVSDstoreidx8 [int64(int32(c+d))] {sym} ptr idx val mem)
(MOV(B|W|L|SS|SD)loadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
(MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVWloadidx2 [int64(int32(c+2*d))] {sym} ptr idx mem)
(MOV(L|SS)loadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem)
(MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVSDloadidx8 [int64(int32(c+8*d))] {sym} ptr idx mem)
(MOV(B|W|L|SS|SD)storeidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(B|W|L|SS|SD)storeidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
(MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVWstoreidx2 [int64(int32(c+2*d))] {sym} ptr idx val mem)
(MOV(L|SS)storeidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem)
(MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem)
// Merge load/store to op // Merge load/store to op
((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem) ((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem)
((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) && canMergeLoadClobber(v, l, x) && clobber(l) ->
((ADD|AND|OR|XOR|SUB|MUL)Lloadidx4 x [off] {sym} ptr idx mem)
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) -> (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) ->
((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lloadidx4 x [off] {sym} ptr idx mem) mem) && y.Uses==1 && clobber(y) ->
((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem)
(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|SUB|AND|OR|XOR)L l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) ->
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem)
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr mem)) mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr mem)) mem)
&& y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) -> && y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) ->
((ADD|AND|OR|XOR)Lconstmodify [makeValAndOff(c,off)] {sym} ptr mem) ((ADD|AND|OR|XOR)Lconstmodify [makeValAndOff(c,off)] {sym} ptr mem)
(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
&& y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(c,off) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
(SUBLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(-c,off) ->
(ADDLconstmodifyidx4 [makeValAndOff(-c,off)] {sym} ptr idx mem)
(MOV(B|W|L)storeconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
(MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
(MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
(MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
(MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
(MOV(B|W|L)storeconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
(MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
(MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
(MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
(MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
// fold LEALs together // fold LEALs together
(LEAL [off1] {sym1} (LEAL [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> (LEAL [off1] {sym1} (LEAL [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
@ -826,6 +711,16 @@
(LEAL [off1] {sym1} (LEAL8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> (LEAL [off1] {sym1} (LEAL8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y) (LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y)
// LEAL[1248] into LEAL[1248]. Only some such merges are possible.
(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAL2 [off1+off2] {mergeSym(sym1, sym2)} x y)
(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAL2 [off1+off2] {mergeSym(sym1, sym2)} y x)
(LEAL2 [off1] {sym} x (LEAL1 [off2] {nil} y y)) && is32Bit(off1+2*off2) ->
(LEAL4 [off1+2*off2] {sym} x y)
(LEAL4 [off1] {sym} x (LEAL1 [off2] {nil} y y)) && is32Bit(off1+4*off2) ->
(LEAL8 [off1+4*off2] {sym} x y)
// Absorb InvertFlags into branches. // Absorb InvertFlags into branches.
(LT (InvertFlags cmp) yes no) -> (GT cmp yes no) (LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
(GT (InvertFlags cmp) yes no) -> (LT cmp yes no) (GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
@ -1039,6 +934,9 @@
// TEST %reg,%reg is shorter than CMP // TEST %reg,%reg is shorter than CMP
(CMP(L|W|B)const x [0]) -> (TEST(L|W|B) x x) (CMP(L|W|B)const x [0]) -> (TEST(L|W|B) x x)
// Convert LEAL1 back to ADDL if we can
(LEAL1 [0] {nil} x y) -> (ADDL x y)
// Combining byte loads into larger (unaligned) loads. // Combining byte loads into larger (unaligned) loads.
// There are many ways these combinations could occur. This is // There are many ways these combinations could occur. This is
// designed to match the way encoding/binary.LittleEndian does it. // designed to match the way encoding/binary.LittleEndian does it.
@ -1052,6 +950,16 @@
&& clobber(x0, x1, s0) && clobber(x0, x1, s0)
-> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
(ORL x0:(MOVBload [i] {s} p0 mem)
s0:(SHLLconst [8] x1:(MOVBload [i] {s} p1 mem)))
&& x0.Uses == 1
&& x1.Uses == 1
&& s0.Uses == 1
&& sequentialAddresses(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, s0)
-> @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
(ORL o0:(ORL (ORL o0:(ORL
x0:(MOVWload [i0] {s} p mem) x0:(MOVWload [i0] {s} p mem)
s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem))) s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)))
@ -1068,31 +976,21 @@
&& clobber(x0, x1, x2, s0, s1, o0) && clobber(x0, x1, x2, s0, s1, o0)
-> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem) -> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
(ORL x0:(MOVBloadidx1 [i0] {s} p idx mem)
s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
&& i1==i0+1
&& x0.Uses == 1
&& x1.Uses == 1
&& s0.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, s0)
-> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
(ORL o0:(ORL (ORL o0:(ORL
x0:(MOVWloadidx1 [i0] {s} p idx mem) x0:(MOVWload [i] {s} p0 mem)
s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) s0:(SHLLconst [16] x1:(MOVBload [i] {s} p1 mem)))
s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem))) s1:(SHLLconst [24] x2:(MOVBload [i] {s} p2 mem)))
&& i2 == i0+2
&& i3 == i0+3
&& x0.Uses == 1 && x0.Uses == 1
&& x1.Uses == 1 && x1.Uses == 1
&& x2.Uses == 1 && x2.Uses == 1
&& s0.Uses == 1 && s0.Uses == 1
&& s1.Uses == 1 && s1.Uses == 1
&& o0.Uses == 1 && o0.Uses == 1
&& sequentialAddresses(p0, p1, 2)
&& sequentialAddresses(p1, p2, 1)
&& mergePoint(b,x0,x1,x2) != nil && mergePoint(b,x0,x1,x2) != nil
&& clobber(x0, x1, x2, s0, s1, o0) && clobber(x0, x1, x2, s0, s1, o0)
-> @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem) -> @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p0 mem)
// Combine constant stores into larger (unaligned) stores. // Combine constant stores into larger (unaligned) stores.
(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
@ -1105,6 +1003,20 @@
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
&& clobber(x) && clobber(x)
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
&& x.Uses == 1
&& ValAndOff(a).Off() == ValAndOff(c).Off()
&& sequentialAddresses(p0, p1, 1)
&& clobber(x)
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
(MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem))
&& x.Uses == 1
&& ValAndOff(a).Off() == ValAndOff(c).Off()
&& sequentialAddresses(p0, p1, 1)
&& clobber(x)
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
&& x.Uses == 1 && x.Uses == 1
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
@ -1116,22 +1028,18 @@
&& clobber(x) && clobber(x)
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
(MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem)) (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
&& x.Uses == 1 && x.Uses == 1
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && ValAndOff(a).Off() == ValAndOff(c).Off()
&& sequentialAddresses(p0, p1, 2)
&& clobber(x) && clobber(x)
-> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem) -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
(MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem)) (MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem))
&& x.Uses == 1 && x.Uses == 1
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && ValAndOff(a).Off() == ValAndOff(c).Off()
&& sequentialAddresses(p0, p1, 2)
&& clobber(x) && clobber(x)
-> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem) -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
(MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
&& x.Uses == 1
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst <i.Type> [1] i) mem)
// Combine stores into larger (unaligned) stores. // Combine stores into larger (unaligned) stores.
(MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem)) (MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
@ -1146,6 +1054,23 @@
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVWstore [i-1] {s} p w0 mem) -> (MOVWstore [i-1] {s} p w0 mem)
(MOVBstore [i] {s} p1 (SHR(W|L)const [8] w) x:(MOVBstore [i] {s} p0 w mem))
&& x.Uses == 1
&& sequentialAddresses(p0, p1, 1)
&& clobber(x)
-> (MOVWstore [i] {s} p0 w mem)
(MOVBstore [i] {s} p0 w x:(MOVBstore {s} [i] p1 (SHR(W|L)const [8] w) mem))
&& x.Uses == 1
&& sequentialAddresses(p0, p1, 1)
&& clobber(x)
-> (MOVWstore [i] {s} p0 w mem)
(MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRLconst [j-8] w) mem))
&& x.Uses == 1
&& sequentialAddresses(p0, p1, 1)
&& clobber(x)
-> (MOVWstore [i] {s} p0 w0 mem)
(MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
@ -1155,35 +1080,16 @@
&& clobber(x) && clobber(x)
-> (MOVLstore [i-2] {s} p w0 mem) -> (MOVLstore [i-2] {s} p w0 mem)
(MOVBstoreidx1 [i] {s} p idx (SHR(L|W)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i] {s} p0 w mem))
&& x.Uses == 1 && x.Uses == 1
&& sequentialAddresses(p0, p1, 2)
&& clobber(x) && clobber(x)
-> (MOVWstoreidx1 [i-1] {s} p idx w mem) -> (MOVLstore [i] {s} p0 w mem)
(MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHR(L|W)const [8] w) mem)) (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRLconst [j-16] w) mem))
&& x.Uses == 1 && x.Uses == 1
&& sequentialAddresses(p0, p1, 2)
&& clobber(x) && clobber(x)
-> (MOVWstoreidx1 [i] {s} p idx w mem) -> (MOVLstore [i] {s} p0 w0 mem)
(MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
(MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p idx w mem)
(MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
(MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w mem)
(MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w0 mem)
// For PIC, break floating-point constant loading into two instructions so we have // For PIC, break floating-point constant loading into two instructions so we have
// a register to use for holding the address of the constant pool entry. // a register to use for holding the address of the constant pool entry.

View file

@ -1604,6 +1604,7 @@
// Move constants offsets from LEAQx up into load. This lets the above combining // Move constants offsets from LEAQx up into load. This lets the above combining
// rules discover indexed load-combining instances. // rules discover indexed load-combining instances.
//TODO:remove! These rules are bad.
(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) (MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem) -> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem)
(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) (MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)

View file

@ -917,7 +917,7 @@
(If (ConstBool [c]) yes no) && c == 0 -> (First no yes) (If (ConstBool [c]) yes no) && c == 0 -> (First no yes)
// Get rid of Convert ops for pointer arithmetic on unsafe.Pointer. // Get rid of Convert ops for pointer arithmetic on unsafe.Pointer.
(Convert (Add(64|32) (Convert ptr mem) off) mem) -> (Add(64|32) ptr off) (Convert (Add(64|32) (Convert ptr mem) off) mem) -> (AddPtr ptr off)
(Convert (Convert ptr mem) mem) -> ptr (Convert (Convert ptr mem) mem) -> ptr
// strength reduction of divide by a constant. // strength reduction of divide by a constant.
@ -1780,6 +1780,10 @@
// is constant, which pushes constants to the outside // is constant, which pushes constants to the outside
// of the expression. At that point, any constant-folding // of the expression. At that point, any constant-folding
// opportunities should be obvious. // opportunities should be obvious.
// Note: don't include AddPtr here! In order to maintain the
// invariant that pointers must stay within the pointed-to object,
// we can't pull part of a pointer computation above the AddPtr.
// See issue 37881.
// x + (C + z) -> C + (x + z) // x + (C + z) -> C + (x + z)
(Add64 (Add64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Add64 i (Add64 <t> z x)) (Add64 (Add64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Add64 i (Add64 <t> z x))

View file

@ -715,6 +715,11 @@ func (w *bodyBase) add(node Statement) {
// declared reports if the body contains a Declare with the given name. // declared reports if the body contains a Declare with the given name.
func (w *bodyBase) declared(name string) bool { func (w *bodyBase) declared(name string) bool {
if name == "nil" {
// Treat "nil" as having already been declared.
// This lets us use nil to match an aux field.
return true
}
for _, s := range w.list { for _, s := range w.list {
if decl, ok := s.(*Declare); ok && decl.name == name { if decl, ok := s.(*Declare); ok && decl.name == name {
return true return true

View file

@ -1248,9 +1248,25 @@ func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
return byteorder.Uint64(buf) return byteorder.Uint64(buf)
} }
// sequentialAddresses reports true if it can prove that x + n == y
func sequentialAddresses(x, y *Value, n int64) bool {
if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
return true
}
if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
return true
}
return false
}
// same reports whether x and y are the same value. // same reports whether x and y are the same value.
// It checks to a maximum depth of d, so it may report // It checks to a maximum depth of d, so it may report
// a false negative. // a false negative.
// TODO: remove when amd64 port is switched to using sequentialAddresses
func same(x, y *Value, depth int) bool { func same(x, y *Value, depth int) bool {
if x == y { if x == y {
return true return true

File diff suppressed because it is too large Load diff

View file

@ -3983,7 +3983,7 @@ func rewriteValuegeneric_OpConvert(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (Convert (Add64 (Convert ptr mem) off) mem) // match: (Convert (Add64 (Convert ptr mem) off) mem)
// result: (Add64 ptr off) // result: (AddPtr ptr off)
for { for {
if v_0.Op != OpAdd64 { if v_0.Op != OpAdd64 {
break break
@ -4001,14 +4001,14 @@ func rewriteValuegeneric_OpConvert(v *Value) bool {
if mem != v_1 { if mem != v_1 {
continue continue
} }
v.reset(OpAdd64) v.reset(OpAddPtr)
v.AddArg2(ptr, off) v.AddArg2(ptr, off)
return true return true
} }
break break
} }
// match: (Convert (Add32 (Convert ptr mem) off) mem) // match: (Convert (Add32 (Convert ptr mem) off) mem)
// result: (Add32 ptr off) // result: (AddPtr ptr off)
for { for {
if v_0.Op != OpAdd32 { if v_0.Op != OpAdd32 {
break break
@ -4026,7 +4026,7 @@ func rewriteValuegeneric_OpConvert(v *Value) bool {
if mem != v_1 { if mem != v_1 {
continue continue
} }
v.reset(OpAdd32) v.reset(OpAddPtr)
v.AddArg2(ptr, off) v.AddArg2(ptr, off)
return true return true
} }

View file

@ -99,46 +99,61 @@ func compMem3(x, y *int) (int, bool) {
func idxInt8(x, y []int8, i int) { func idxInt8(x, y []int8, i int) {
var t int8 var t int8
// amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*` // amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
// 386: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
t = x[i+1] t = x[i+1]
// amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` // amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
// 386: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
y[i+1] = t y[i+1] = t
// amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` // amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
// 386: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
x[i+1] = 77 x[i+1] = 77
} }
func idxInt16(x, y []int16, i int) { func idxInt16(x, y []int16, i int) {
var t int16 var t int16
// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*` // amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
// 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
t = x[i+1] t = x[i+1]
// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` // amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
// 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
y[i+1] = t y[i+1] = t
// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*` // amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
// 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
t = x[16*i+1] t = x[16*i+1]
// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` // amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
// 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
y[16*i+1] = t y[16*i+1] = t
// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` // amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
// 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
x[i+1] = 77 x[i+1] = 77
// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` // amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
// 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
x[16*i+1] = 77 x[16*i+1] = 77
} }
func idxInt32(x, y []int32, i int) { func idxInt32(x, y []int32, i int) {
var t int32 var t int32
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
// 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
t = x[i+1] t = x[i+1]
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
// 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
y[i+1] = t y[i+1] = t
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
t = x[2*i+1] t = x[2*i+1]
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
y[2*i+1] = t y[2*i+1] = t
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*` // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
// 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
t = x[16*i+1] t = x[16*i+1]
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
// 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
y[16*i+1] = t y[16*i+1] = t
// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` // amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
// 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
x[i+1] = 77 x[i+1] = 77
// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` // amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
// 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
x[16*i+1] = 77 x[16*i+1] = 77
} }
@ -160,24 +175,71 @@ func idxInt64(x, y []int64, i int) {
func idxFloat32(x, y []float32, i int) { func idxFloat32(x, y []float32, i int) {
var t float32 var t float32
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
t = x[i+1] t = x[i+1]
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
y[i+1] = t y[i+1] = t
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
t = x[16*i+1] t = x[16*i+1]
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
y[16*i+1] = t y[16*i+1] = t
} }
func idxFloat64(x, y []float64, i int) { func idxFloat64(x, y []float64, i int) {
var t float64 var t float64
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
t = x[i+1] t = x[i+1]
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
y[i+1] = t y[i+1] = t
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
t = x[16*i+1] t = x[16*i+1]
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
y[16*i+1] = t y[16*i+1] = t
} }
func idxLoadPlusOp(x []int32, i int) int32 {
s := x[0]
// 386: `ADDL\t4\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s += x[i+1]
// 386: `SUBL\t8\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s -= x[i+2]
// 386: `IMULL\t12\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s *= x[i+3]
// 386: `ANDL\t16\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s &= x[i+4]
// 386: `ORL\t20\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s |= x[i+5]
// 386: `XORL\t24\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s ^= x[i+6]
return s
}
func idxStorePlusOp(x []int32, i int, v int32) {
// 386: `ADDL\t[A-Z]+, 4\([A-Z]+\)\([A-Z]+\*4\)`
x[i+1] += v
// 386: `SUBL\t[A-Z]+, 8\([A-Z]+\)\([A-Z]+\*4\)`
x[i+2] -= v
// 386: `ANDL\t[A-Z]+, 12\([A-Z]+\)\([A-Z]+\*4\)`
x[i+3] &= v
// 386: `ORL\t[A-Z]+, 16\([A-Z]+\)\([A-Z]+\*4\)`
x[i+4] |= v
// 386: `XORL\t[A-Z]+, 20\([A-Z]+\)\([A-Z]+\*4\)`
x[i+5] ^= v
// 386: `ADDL\t[$]77, 24\([A-Z]+\)\([A-Z]+\*4\)`
x[i+6] += 77
// 386: `ANDL\t[$]77, 28\([A-Z]+\)\([A-Z]+\*4\)`
x[i+7] &= 77
// 386: `ORL\t[$]77, 32\([A-Z]+\)\([A-Z]+\*4\)`
x[i+8] |= 77
// 386: `XORL\t[$]77, 36\([A-Z]+\)\([A-Z]+\*4\)`
x[i+9] ^= 77
}