cmd/compile: insert complicated x86 addressing modes as a separate pass

Use a separate compiler pass to introduce complicated x86 addressing
modes.  Loads in the normal architecture rules (for x86 and all other
platforms) can have constant offsets (AuxInt values) and symbols (Aux
values), but no more.

The complex addressing modes (x+y, x+2*y, etc.) are introduced in a
separate pass that combines loads with LEAQx ops.

Organizing rewrites this way simplifies the number of rewrites
required, as there are lots of different rule orderings that have to
be specified to ensure these complex addressing modes are always found
if they are possible.

Update #36468

Change-Id: I5b4bf7b03a1e731d6dfeb9ef19b376175f3b4b44
Reviewed-on: https://go-review.googlesource.com/c/go/+/217097
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
This commit is contained in:
Keith Randall 2020-01-30 10:17:01 -08:00
parent d49fecc474
commit 98cb76799c
6 changed files with 1580 additions and 6523 deletions

View file

@ -0,0 +1,154 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
// addressingModes combines address calculations into memory operations
// that can perform complicated addressing modes.
func addressingModes(f *Func) {
switch f.Config.arch {
default:
// Most architectures can't do this.
return
case "amd64":
// TODO: 386, s390x?
}
var tmp []*Value
for _, b := range f.Blocks {
for _, v := range b.Values {
if !combineFirst[v.Op] {
continue
}
p := v.Args[0]
c, ok := combine[[2]Op{v.Op, p.Op}]
if !ok {
continue
}
// See if we can combine the Aux/AuxInt values.
switch [2]auxType{opcodeTable[v.Op].auxType, opcodeTable[p.Op].auxType} {
case [2]auxType{auxSymOff, auxInt32}:
// TODO: introduce auxSymOff32
if !is32Bit(v.AuxInt + p.AuxInt) {
continue
}
v.AuxInt += p.AuxInt
case [2]auxType{auxSymOff, auxSymOff}:
if v.Aux != nil && p.Aux != nil {
continue
}
if !is32Bit(v.AuxInt + p.AuxInt) {
continue
}
if p.Aux != nil {
v.Aux = p.Aux
}
v.AuxInt += p.AuxInt
case [2]auxType{auxSymValAndOff, auxInt32}:
vo := ValAndOff(v.AuxInt)
if !vo.canAdd(p.AuxInt) {
continue
}
v.AuxInt = vo.add(p.AuxInt)
case [2]auxType{auxSymValAndOff, auxSymOff}:
vo := ValAndOff(v.AuxInt)
if v.Aux != nil && p.Aux != nil {
continue
}
if !vo.canAdd(p.AuxInt) {
continue
}
if p.Aux != nil {
v.Aux = p.Aux
}
v.AuxInt = vo.add(p.AuxInt)
case [2]auxType{auxSymOff, auxNone}:
// nothing to do
case [2]auxType{auxSymValAndOff, auxNone}:
// nothing to do
default:
f.Fatalf("unknown aux combining for %s and %s\n", v.Op, p.Op)
}
// Combine the operations.
tmp = append(tmp[:0], v.Args[1:]...)
v.resetArgs()
v.Op = c
v.AddArgs(p.Args...)
v.AddArgs(tmp...)
}
}
}
// combineFirst contains ops which appear in combine as the
// first part of the key.
var combineFirst = map[Op]bool{}
func init() {
for k := range combine {
combineFirst[k[0]] = true
}
}
// For each entry k, v in this map, if we have a value x with:
// x.Op == k[0]
// x.Args[0].Op == k[1]
// then we can set x.Op to v and set x.Args like this:
// x.Args[0].Args + x.Args[1:]
// Additionally, the Aux/AuxInt from x.Args[0] is merged into x.
var combine = map[[2]Op]Op{
[2]Op{OpAMD64MOVBload, OpAMD64ADDQ}: OpAMD64MOVBloadidx1,
[2]Op{OpAMD64MOVWload, OpAMD64ADDQ}: OpAMD64MOVWloadidx1,
[2]Op{OpAMD64MOVLload, OpAMD64ADDQ}: OpAMD64MOVLloadidx1,
[2]Op{OpAMD64MOVQload, OpAMD64ADDQ}: OpAMD64MOVQloadidx1,
[2]Op{OpAMD64MOVSSload, OpAMD64ADDQ}: OpAMD64MOVSSloadidx1,
[2]Op{OpAMD64MOVSDload, OpAMD64ADDQ}: OpAMD64MOVSDloadidx1,
[2]Op{OpAMD64MOVBstore, OpAMD64ADDQ}: OpAMD64MOVBstoreidx1,
[2]Op{OpAMD64MOVWstore, OpAMD64ADDQ}: OpAMD64MOVWstoreidx1,
[2]Op{OpAMD64MOVLstore, OpAMD64ADDQ}: OpAMD64MOVLstoreidx1,
[2]Op{OpAMD64MOVQstore, OpAMD64ADDQ}: OpAMD64MOVQstoreidx1,
[2]Op{OpAMD64MOVSSstore, OpAMD64ADDQ}: OpAMD64MOVSSstoreidx1,
[2]Op{OpAMD64MOVSDstore, OpAMD64ADDQ}: OpAMD64MOVSDstoreidx1,
[2]Op{OpAMD64MOVBstoreconst, OpAMD64ADDQ}: OpAMD64MOVBstoreconstidx1,
[2]Op{OpAMD64MOVWstoreconst, OpAMD64ADDQ}: OpAMD64MOVWstoreconstidx1,
[2]Op{OpAMD64MOVLstoreconst, OpAMD64ADDQ}: OpAMD64MOVLstoreconstidx1,
[2]Op{OpAMD64MOVQstoreconst, OpAMD64ADDQ}: OpAMD64MOVQstoreconstidx1,
[2]Op{OpAMD64MOVBload, OpAMD64LEAQ1}: OpAMD64MOVBloadidx1,
[2]Op{OpAMD64MOVWload, OpAMD64LEAQ1}: OpAMD64MOVWloadidx1,
[2]Op{OpAMD64MOVWload, OpAMD64LEAQ2}: OpAMD64MOVWloadidx2,
[2]Op{OpAMD64MOVLload, OpAMD64LEAQ1}: OpAMD64MOVLloadidx1,
[2]Op{OpAMD64MOVLload, OpAMD64LEAQ4}: OpAMD64MOVLloadidx4,
[2]Op{OpAMD64MOVLload, OpAMD64LEAQ8}: OpAMD64MOVLloadidx8,
[2]Op{OpAMD64MOVQload, OpAMD64LEAQ1}: OpAMD64MOVQloadidx1,
[2]Op{OpAMD64MOVQload, OpAMD64LEAQ8}: OpAMD64MOVQloadidx8,
[2]Op{OpAMD64MOVSSload, OpAMD64LEAQ1}: OpAMD64MOVSSloadidx1,
[2]Op{OpAMD64MOVSSload, OpAMD64LEAQ4}: OpAMD64MOVSSloadidx4,
[2]Op{OpAMD64MOVSDload, OpAMD64LEAQ1}: OpAMD64MOVSDloadidx1,
[2]Op{OpAMD64MOVSDload, OpAMD64LEAQ8}: OpAMD64MOVSDloadidx8,
[2]Op{OpAMD64MOVBstore, OpAMD64LEAQ1}: OpAMD64MOVBstoreidx1,
[2]Op{OpAMD64MOVWstore, OpAMD64LEAQ1}: OpAMD64MOVWstoreidx1,
[2]Op{OpAMD64MOVWstore, OpAMD64LEAQ2}: OpAMD64MOVWstoreidx2,
[2]Op{OpAMD64MOVLstore, OpAMD64LEAQ1}: OpAMD64MOVLstoreidx1,
[2]Op{OpAMD64MOVLstore, OpAMD64LEAQ4}: OpAMD64MOVLstoreidx4,
[2]Op{OpAMD64MOVLstore, OpAMD64LEAQ8}: OpAMD64MOVLstoreidx8,
[2]Op{OpAMD64MOVQstore, OpAMD64LEAQ1}: OpAMD64MOVQstoreidx1,
[2]Op{OpAMD64MOVQstore, OpAMD64LEAQ8}: OpAMD64MOVQstoreidx8,
[2]Op{OpAMD64MOVSSstore, OpAMD64LEAQ1}: OpAMD64MOVSSstoreidx1,
[2]Op{OpAMD64MOVSSstore, OpAMD64LEAQ4}: OpAMD64MOVSSstoreidx4,
[2]Op{OpAMD64MOVSDstore, OpAMD64LEAQ1}: OpAMD64MOVSDstoreidx1,
[2]Op{OpAMD64MOVSDstore, OpAMD64LEAQ8}: OpAMD64MOVSDstoreidx8,
[2]Op{OpAMD64MOVBstoreconst, OpAMD64LEAQ1}: OpAMD64MOVBstoreconstidx1,
[2]Op{OpAMD64MOVWstoreconst, OpAMD64LEAQ1}: OpAMD64MOVWstoreconstidx1,
[2]Op{OpAMD64MOVWstoreconst, OpAMD64LEAQ2}: OpAMD64MOVWstoreconstidx2,
[2]Op{OpAMD64MOVLstoreconst, OpAMD64LEAQ1}: OpAMD64MOVLstoreconstidx1,
[2]Op{OpAMD64MOVLstoreconst, OpAMD64LEAQ4}: OpAMD64MOVLstoreconstidx4,
[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1,
[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8,
// TODO: 386
}

View file

@ -442,6 +442,7 @@ var passes = [...]pass{
{name: "insert resched checks", fn: insertLoopReschedChecks,
disabled: objabi.Preemptibleloops_enabled == 0}, // insert resched checks in loops.
{name: "lower", fn: lower, required: true},
{name: "addressing modes", fn: addressingModes, required: false},
{name: "lowered deadcode for cse", fn: deadcode}, // deadcode immediately before CSE avoids CSE making dead values live again
{name: "lowered cse", fn: cse},
{name: "elim unread autos", fn: elimUnreadAutos},

View file

@ -1043,12 +1043,6 @@
(MOVWQZX x) && zeroUpper48Bits(x,3) -> x
(MOVBQZX x) && zeroUpper56Bits(x,3) -> x
(MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
(MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
(MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
(MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem)
(MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem)
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQZX x)
(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQZX x)
@ -1166,86 +1160,6 @@
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
// generating indexed loads and stores
(MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOV(B|W|L|Q|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
(MOV(L|SS)load [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
(MOV(L|Q|SD)load [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOV(L|Q|SD)loadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
(MOV(B|W|L|Q|SS|SD)store [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOV(B|W|L|Q|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOV(L|SS)store [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOV(L|Q|SD)store [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOV(L|Q|SD)storeidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOV(B|W|L|Q|SS|SD)load [off] {sym} (ADDQ ptr idx) mem) && ptr.Op != OpSB ->
(MOV(B|W|L|Q|SS|SD)loadidx1 [off] {sym} ptr idx mem)
(MOV(B|W|L|Q|SS|SD)store [off] {sym} (ADDQ ptr idx) val mem) && ptr.Op != OpSB ->
(MOV(B|W|L|Q|SS|SD)storeidx1 [off] {sym} ptr idx val mem)
(MOV(B|W|L|Q)storeconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
(MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
(MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
(MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
(MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
(MOV(B|W|L|Q)storeconst [x] {sym} (ADDQ ptr idx) mem) -> (MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr idx mem)
// combine SHLQ into indexed loads and stores
(MOVWloadidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem)
(MOV(L|SS)loadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOV(L|SS)loadidx4 [c] {sym} ptr idx mem)
(MOV(L|Q|SD)loadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOV(L|Q|SD)loadidx8 [c] {sym} ptr idx mem)
(MOVWstoreidx1 [c] {sym} ptr (SHLQconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem)
(MOV(L|SS)storeidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem) -> (MOV(L|SS)storeidx4 [c] {sym} ptr idx val mem)
(MOV(L|Q|SD)storeidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem) -> (MOV(L|Q|SD)storeidx8 [c] {sym} ptr idx val mem)
(MOVWstoreconstidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
(MOVLstoreconstidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
(MOVQstoreconstidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOVQstoreconstidx8 [c] {sym} ptr idx mem)
// combine ADDQ into pointer of indexed loads and stores
(MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem)
(MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOVWloadidx2 [c+d] {sym} ptr idx mem)
(MOV(L|SS)loadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|SS)loadidx4 [c+d] {sym} ptr idx mem)
(MOV(L|Q|SD)loadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|Q|SD)loadidx8 [c+d] {sym} ptr idx mem)
(MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem)
(MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOVWstoreidx2 [c+d] {sym} ptr idx val mem)
(MOV(L|SS)storeidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|SS)storeidx4 [c+d] {sym} ptr idx val mem)
(MOV(L|Q|SD)storeidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|Q|SD)storeidx8 [c+d] {sym} ptr idx val mem)
// combine ADDQ into index of indexed loads and stores
(MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem)
(MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+2*d) -> (MOVWloadidx2 [c+2*d] {sym} ptr idx mem)
(MOV(L|SS)loadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+4*d) -> (MOV(L|SS)loadidx4 [c+4*d] {sym} ptr idx mem)
(MOV(L|Q|SD)loadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+8*d) -> (MOV(L|Q|SD)loadidx8 [c+8*d] {sym} ptr idx mem)
(MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem)
(MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+2*d) -> (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem)
(MOV(L|SS)storeidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+4*d) -> (MOV(L|SS)storeidx4 [c+4*d] {sym} ptr idx val mem)
(MOV(L|Q|SD)storeidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+8*d) -> (MOV(L|Q|SD)storeidx8 [c+8*d] {sym} ptr idx val mem)
(MOV(B|W|L|Q)storeconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
(MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
(MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
(MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem)
(MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
(MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(2*c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
(MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(4*c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
(MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(8*c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem)
// fold LEAQs together
(LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAQ [off1+off2] {mergeSym(sym1,sym2)} x)
@ -1274,6 +1188,17 @@
(LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
// LEAQ[1248] into LEAQ[1248]. Only some such merges are possible.
(LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} x y)
(LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} y x)
(LEAQ2 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+2*off2) && sym2 == nil ->
(LEAQ4 [off1+2*off2] {sym1} x y)
(LEAQ4 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+4*off2) && sym2 == nil ->
(LEAQ8 [off1+4*off2] {sym1} x y)
// TODO: more?
// Absorb InvertFlags into branches.
(LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
(GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
@ -1552,60 +1477,65 @@
// Little-endian loads
(ORL x0:(MOVBload [i0] {s} p mem)
sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
(ORL x0:(MOVBload [i0] {s} p0 mem)
sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem)))
&& i1 == i0+1
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
-> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
-> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
(ORQ x0:(MOVBload [i0] {s} p mem)
sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)))
(ORQ x0:(MOVBload [i0] {s} p0 mem)
sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p1 mem)))
&& i1 == i0+1
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
-> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
-> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
(ORL x0:(MOVWload [i0] {s} p mem)
sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)))
(ORL x0:(MOVWload [i0] {s} p0 mem)
sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p1 mem)))
&& i1 == i0+2
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
-> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
-> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem)
(ORQ x0:(MOVWload [i0] {s} p mem)
sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)))
(ORQ x0:(MOVWload [i0] {s} p0 mem)
sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p1 mem)))
&& i1 == i0+2
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
-> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
-> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem)
(ORQ x0:(MOVLload [i0] {s} p mem)
sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
(ORQ x0:(MOVLload [i0] {s} p0 mem)
sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p1 mem)))
&& i1 == i0+4
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
-> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
-> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p0 mem)
(ORL
s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))
s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p0 mem))
or:(ORL
s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))
s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p1 mem))
y))
&& i1 == i0+1
&& j1 == j0+8
@ -1615,14 +1545,15 @@
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
-> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
-> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y)
(ORQ
s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))
s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p0 mem))
or:(ORQ
s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))
s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p1 mem))
y))
&& i1 == i0+1
&& j1 == j0+8
@ -1632,14 +1563,15 @@
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y)
(ORQ
s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem))
s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p0 mem))
or:(ORQ
s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))
s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p1 mem))
y))
&& i1 == i0+2
&& j1 == j0+16
@ -1649,180 +1581,105 @@
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p0 mem)) y)
// Little-endian indexed loads
(ORL x0:(MOVBloadidx1 [i0] {s} p idx mem)
sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
&& i1 == i0+1
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
-> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
// Move constants offsets from LEAQx up into load. This lets the above combining
// rules discover indexed load-combining instances.
(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem)
(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem)
(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem)
(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem)
(ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem)
sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
&& i1 == i0+1
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
-> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem)
sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
&& i1 == i0+2
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
-> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
(ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem)
sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
&& i1 == i0+2
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
-> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
(ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem)
sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
&& i1 == i0+4
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
-> @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
(ORL
s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
or:(ORL
s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
y))
&& i1 == i0+1
&& j1 == j0+8
&& j0 % 16 == 0
&& x0.Uses == 1
&& x1.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
-> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
(ORQ
s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
or:(ORQ
s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
y))
&& i1 == i0+1
&& j1 == j0+8
&& j0 % 16 == 0
&& x0.Uses == 1
&& x1.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
(ORQ
s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem))
or:(ORQ
s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))
y))
&& i1 == i0+2
&& j1 == j0+16
&& j0 % 32 == 0
&& x0.Uses == 1
&& x1.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem)
(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem)
(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem)
(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem)
// Big-endian loads
(ORL
x1:(MOVBload [i1] {s} p mem)
sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem)))
x1:(MOVBload [i1] {s} p0 mem)
sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p1 mem)))
&& i1 == i0+1
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
-> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
-> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem))
(ORQ
x1:(MOVBload [i1] {s} p mem)
sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem)))
x1:(MOVBload [i1] {s} p0 mem)
sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p1 mem)))
&& i1 == i0+1
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
-> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
-> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem))
(ORL
r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem))
sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem))))
&& i1 == i0+2
&& x0.Uses == 1
&& x1.Uses == 1
&& r0.Uses == 1
&& r1.Uses == 1
&& sh.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, r0, r1, sh)
-> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
-> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem))
(ORQ
r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem))
sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem))))
&& i1 == i0+2
&& x0.Uses == 1
&& x1.Uses == 1
&& r0.Uses == 1
&& r1.Uses == 1
&& sh.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, r0, r1, sh)
-> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
-> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem))
(ORQ
r1:(BSWAPL x1:(MOVLload [i1] {s} p mem))
sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
r1:(BSWAPL x1:(MOVLload [i1] {s} p0 mem))
sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p1 mem))))
&& i1 == i0+4
&& x0.Uses == 1
&& x1.Uses == 1
&& r0.Uses == 1
&& r1.Uses == 1
&& sh.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, r0, r1, sh)
-> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
-> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p0 mem))
(ORL
s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))
s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p0 mem))
or:(ORL
s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))
s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p1 mem))
y))
&& i1 == i0+1
&& j1 == j0-8
@ -1832,14 +1689,15 @@
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
-> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
-> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y)
(ORQ
s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))
s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p0 mem))
or:(ORQ
s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))
s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p1 mem))
y))
&& i1 == i0+1
&& j1 == j0-8
@ -1849,14 +1707,15 @@
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y)
(ORQ
s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))
s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p0 mem)))
or:(ORQ
s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p1 mem)))
y))
&& i1 == i0+2
&& j1 == j0-16
@ -1868,168 +1727,41 @@
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
&& same(p0, p1, 1)
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, r0, r1, s0, s1, or)
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y)
// Big-endian indexed loads
(ORL
x1:(MOVBloadidx1 [i1] {s} p idx mem)
sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
&& i1 == i0+1
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
-> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
(ORQ
x1:(MOVBloadidx1 [i1] {s} p idx mem)
sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
&& i1 == i0+1
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
-> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
(ORL
r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))
sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
&& i1 == i0+2
&& x0.Uses == 1
&& x1.Uses == 1
&& r0.Uses == 1
&& r1.Uses == 1
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, r0, r1, sh)
-> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
(ORQ
r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))
sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
&& i1 == i0+2
&& x0.Uses == 1
&& x1.Uses == 1
&& r0.Uses == 1
&& r1.Uses == 1
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, r0, r1, sh)
-> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
(ORQ
r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem))
sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
&& i1 == i0+4
&& x0.Uses == 1
&& x1.Uses == 1
&& r0.Uses == 1
&& r1.Uses == 1
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, r0, r1, sh)
-> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
(ORL
s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
or:(ORL
s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
y))
&& i1 == i0+1
&& j1 == j0-8
&& j1 % 16 == 0
&& x0.Uses == 1
&& x1.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
-> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
(ORQ
s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
or:(ORQ
s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
y))
&& i1 == i0+1
&& j1 == j0-8
&& j1 % 16 == 0
&& x0.Uses == 1
&& x1.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
(ORQ
s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))
or:(ORQ
s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
y))
&& i1 == i0+2
&& j1 == j0-16
&& j1 % 32 == 0
&& x0.Uses == 1
&& x1.Uses == 1
&& r0.Uses == 1
&& r1.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, r0, r1, s0, s1, or)
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p0 mem))) y)
// Combine 2 byte stores + shift into rolw 8 + word store
(MOVBstore [i] {s} p w
x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
(MOVBstore [i] {s} p1 w
x0:(MOVBstore [i-1] {s} p0 (SHRWconst [8] w) mem))
&& x0.Uses == 1
&& same(p0, p1, 1)
&& clobber(x0)
-> (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
(MOVBstoreidx1 [i] {s} p idx w
x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem))
&& x0.Uses == 1
&& clobber(x0)
-> (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem)
-> (MOVWstore [i-1] {s} p0 (ROLWconst <w.Type> [8] w) mem)
// Combine stores + shifts into bswap and larger (unaligned) stores
(MOVBstore [i] {s} p w
x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w)
x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w)
x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
(MOVBstore [i] {s} p3 w
x2:(MOVBstore [i-1] {s} p2 (SHRLconst [8] w)
x1:(MOVBstore [i-2] {s} p1 (SHRLconst [16] w)
x0:(MOVBstore [i-3] {s} p0 (SHRLconst [24] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& same(p0, p1, 1)
&& same(p1, p2, 1)
&& same(p2, p3, 1)
&& clobber(x0, x1, x2)
-> (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
-> (MOVLstore [i-3] {s} p0 (BSWAPL <w.Type> w) mem)
(MOVBstoreidx1 [i] {s} p idx w
x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w)
x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w)
x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& clobber(x0, x1, x2)
-> (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
(MOVBstore [i] {s} p w
x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w)
x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w)
x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w)
x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w)
x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w)
x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w)
x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
(MOVBstore [i] {s} p7 w
x6:(MOVBstore [i-1] {s} p6 (SHRQconst [8] w)
x5:(MOVBstore [i-2] {s} p5 (SHRQconst [16] w)
x4:(MOVBstore [i-3] {s} p4 (SHRQconst [24] w)
x3:(MOVBstore [i-4] {s} p3 (SHRQconst [32] w)
x2:(MOVBstore [i-5] {s} p2 (SHRQconst [40] w)
x1:(MOVBstore [i-6] {s} p1 (SHRQconst [48] w)
x0:(MOVBstore [i-7] {s} p0 (SHRQconst [56] w) mem))))))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
@ -2037,165 +1769,99 @@
&& x4.Uses == 1
&& x5.Uses == 1
&& x6.Uses == 1
&& same(p0, p1, 1)
&& same(p1, p2, 1)
&& same(p2, p3, 1)
&& same(p3, p4, 1)
&& same(p4, p5, 1)
&& same(p5, p6, 1)
&& same(p6, p7, 1)
&& clobber(x0, x1, x2, x3, x4, x5, x6)
-> (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
(MOVBstoreidx1 [i] {s} p idx w
x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w)
x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w)
x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w)
x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w)
x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w)
x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w)
x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& x3.Uses == 1
&& x4.Uses == 1
&& x5.Uses == 1
&& x6.Uses == 1
&& clobber(x0, x1, x2, x3, x4, x5, x6)
-> (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
-> (MOVQstore [i-7] {s} p0 (BSWAPQ <w.Type> w) mem)
// Combine constant stores into larger (unaligned) stores.
(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
(MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
(MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
(MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem))
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
(MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
(MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem))
-> (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
(MOVLstoreconst [a] {s} p1 x:(MOVLstoreconst [c] {s} p0 mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
(MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
-> (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
(MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [c2] {s} p0 mem))
&& config.useSSE
&& x.Uses == 1
&& same(p0, p1, 1)
&& ValAndOff(c2).Off() + 8 == ValAndOff(c).Off()
&& ValAndOff(c).Val() == 0
&& ValAndOff(c2).Val() == 0
&& clobber(x)
-> (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem)
(MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
&& x.Uses == 1
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem)
(MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
&& x.Uses == 1
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
(MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem))
&& x.Uses == 1
&& ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
(MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
&& x.Uses == 1
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst <i.Type> [1] i) mem)
(MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem))
&& x.Uses == 1
&& ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
&& clobber(x)
-> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
-> (MOVOstore [ValAndOff(c2).Off()] {s} p0 (MOVOconst [0]) mem)
// Combine stores into larger (unaligned) stores.
(MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVWstore [i-1] {s} p w mem)
(MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem))
-> (MOVWstore [i-1] {s} p0 w mem)
(MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHR(W|L|Q)const [8] w) mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVWstore [i] {s} p w mem)
(MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem))
-> (MOVWstore [i] {s} p0 w mem)
(MOVBstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHR(L|Q)const [j-8] w) mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVWstore [i-1] {s} p w0 mem)
(MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem))
-> (MOVWstore [i-1] {s} p0 w0 mem)
(MOVWstore [i] {s} p1 (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p0 w mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVLstore [i-2] {s} p w mem)
(MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem))
-> (MOVLstore [i-2] {s} p0 w mem)
(MOVWstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHR(L|Q)const [j-16] w) mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVLstore [i-2] {s} p w0 mem)
(MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem))
-> (MOVLstore [i-2] {s} p0 w0 mem)
(MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p0 w mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVQstore [i-4] {s} p w mem)
(MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem))
-> (MOVQstore [i-4] {s} p0 w mem)
(MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p0 w0:(SHRQconst [j-32] w) mem))
&& x.Uses == 1
&& same(p0, p1, 1)
&& clobber(x)
-> (MOVQstore [i-4] {s} p w0 mem)
(MOVBstoreidx1 [i] {s} p idx (SHR(W|L|Q)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVWstoreidx1 [i-1] {s} p idx w mem)
(MOVBstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHR(L|Q)const [j-8] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
(MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p idx w mem)
(MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHR(L|Q)const [j-16] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
(MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVQstoreidx1 [i-4] {s} p idx w mem)
(MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVQstoreidx1 [i-4] {s} p idx w0 mem)
(MOVWstoreidx2 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
(MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem)
(MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w mem)
(MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem)
-> (MOVQstore [i-4] {s} p0 w0 mem)
(MOVBstore [i] {s} p
x1:(MOVBload [j] {s2} p2 mem)
@ -2320,41 +1986,6 @@
(BSFQ (ORQconst <t> [1<<8] (MOVBQZX x))) -> (BSFQ (ORQconst <t> [1<<8] x))
(BSFQ (ORQconst <t> [1<<16] (MOVWQZX x))) -> (BSFQ (ORQconst <t> [1<<16] x))
// Simplify indexed loads/stores
(MOVBstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVBstore [i+c] {s} p w mem)
(MOVWstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVWstore [i+c] {s} p w mem)
(MOVLstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVLstore [i+c] {s} p w mem)
(MOVQstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVQstore [i+c] {s} p w mem)
(MOVWstoreidx2 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+2*c) -> (MOVWstore [i+2*c] {s} p w mem)
(MOVLstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVLstore [i+4*c] {s} p w mem)
(MOVLstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVLstore [i+8*c] {s} p w mem)
(MOVQstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVQstore [i+8*c] {s} p w mem)
(MOVSSstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSSstore [i+c] {s} p w mem)
(MOVSSstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVSSstore [i+4*c] {s} p w mem)
(MOVSDstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSDstore [i+c] {s} p w mem)
(MOVSDstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVSDstore [i+8*c] {s} p w mem)
(MOVBloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVBload [i+c] {s} p mem)
(MOVWloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVWload [i+c] {s} p mem)
(MOVLloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVLload [i+c] {s} p mem)
(MOVQloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVQload [i+c] {s} p mem)
(MOVWloadidx2 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+2*c) -> (MOVWload [i+2*c] {s} p mem)
(MOVLloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVLload [i+4*c] {s} p mem)
(MOVLloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVLload [i+8*c] {s} p mem)
(MOVQloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVQload [i+8*c] {s} p mem)
(MOVSSloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSSload [i+c] {s} p mem)
(MOVSSloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVSSload [i+4*c] {s} p mem)
(MOVSDloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSDload [i+c] {s} p mem)
(MOVSDloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVSDload [i+8*c] {s} p mem)
// Combine consts into storeidx.
// Note that when c == 0, it takes more bytes to encode
// the immediate $0 than to zero a register and use it.
// We do the rewrite anyway, to minimize register pressure.
(MOVBstoreidx1 [off] {s} ptr idx (MOVLconst [c]) mem) && validValAndOff(int64(int8(c)), off) -> (MOVBstoreconstidx1 [makeValAndOff(int64(int8(c)), off)] {s} ptr idx mem)
(MOVWstoreidx(1|2) [off] {s} ptr idx (MOVLconst [c]) mem) && validValAndOff(int64(int16(c)), off) -> (MOVWstoreconstidx(1|2) [makeValAndOff(int64(int16(c)), off)] {s} ptr idx mem)
(MOVLstoreidx(1|4) [off] {s} ptr idx (MOVQconst [c]) mem) && validValAndOff(int64(int32(c)), off) -> (MOVLstoreconstidx(1|4) [makeValAndOff(int64(int32(c)), off)] {s} ptr idx mem)
(MOVQstoreidx(1|8) [off] {s} ptr idx (MOVQconst [c]) mem) && validValAndOff(c, off) -> (MOVQstoreconstidx(1|8) [makeValAndOff(c, off)] {s} ptr idx mem)
// Redundant sign/zero extensions
// Note: see issue 21963. We have to make sure we use the right type on
// the resulting extension (the outer type, not the inner type).

View file

@ -1247,3 +1247,43 @@ func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
copy(buf, src)
return byteorder.Uint64(buf)
}
// same reports whether x and y are the same value.
// It checks to a maximum depth of d, so it may report
// a false negative.
func same(x, y *Value, depth int) bool {
if x == y {
return true
}
if depth <= 0 {
return false
}
if x.Op != y.Op || x.Aux != y.Aux || x.AuxInt != y.AuxInt {
return false
}
if len(x.Args) != len(y.Args) {
return false
}
if opcodeTable[x.Op].commutative {
// Check exchanged ordering first.
for i, a := range x.Args {
j := i
if j < 2 {
j ^= 1
}
b := y.Args[j]
if !same(a, b, depth-1) {
goto checkNormalOrder
}
}
return true
checkNormalOrder:
}
for i, a := range x.Args {
b := y.Args[i]
if !same(a, b, depth-1) {
return false
}
}
return true
}

File diff suppressed because it is too large Load diff

View file

@ -93,3 +93,91 @@ func compMem3(x, y *int) (int, bool) {
// 386:`CMPL\t\(`
return r, r < *y
}
// The following functions test that indexed load/store operations get generated.
func idxInt8(x, y []int8, i int) {
var t int8
// amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
t = x[i+1]
// amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
y[i+1] = t
// amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
x[i+1] = 77
}
func idxInt16(x, y []int16, i int) {
var t int16
// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
t = x[i+1]
// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
y[i+1] = t
// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
t = x[16*i+1]
// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
y[16*i+1] = t
// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
x[i+1] = 77
// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
x[16*i+1] = 77
}
func idxInt32(x, y []int32, i int) {
var t int32
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
t = x[i+1]
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
y[i+1] = t
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
t = x[2*i+1]
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
y[2*i+1] = t
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
t = x[16*i+1]
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
y[16*i+1] = t
// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
x[i+1] = 77
// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
x[16*i+1] = 77
}
func idxInt64(x, y []int64, i int) {
var t int64
// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
t = x[i+1]
// amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
y[i+1] = t
// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
t = x[16*i+1]
// amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
y[16*i+1] = t
// amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
x[i+1] = 77
// amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
x[16*i+1] = 77
}
func idxFloat32(x, y []float32, i int) {
var t float32
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
t = x[i+1]
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
y[i+1] = t
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
t = x[16*i+1]
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
y[16*i+1] = t
}
func idxFloat64(x, y []float64, i int) {
var t float64
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
t = x[i+1]
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
y[i+1] = t
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
t = x[16*i+1]
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
y[16*i+1] = t
}