mirror of
https://github.com/golang/go
synced 2024-10-04 15:09:59 +00:00
cmd/compile: insert complicated x86 addressing modes as a separate pass
Use a separate compiler pass to introduce complicated x86 addressing modes. Loads in the normal architecture rules (for x86 and all other platforms) can have constant offsets (AuxInt values) and symbols (Aux values), but no more. The complex addressing modes (x+y, x+2*y, etc.) are introduced in a separate pass that combines loads with LEAQx ops. Organizing rewrites this way simplifies the number of rewrites required, as there are lots of different rule orderings that have to be specified to ensure these complex addressing modes are always found if they are possible. Update #36468 Change-Id: I5b4bf7b03a1e731d6dfeb9ef19b376175f3b4b44 Reviewed-on: https://go-review.googlesource.com/c/go/+/217097 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
This commit is contained in:
parent
d49fecc474
commit
98cb76799c
154
src/cmd/compile/internal/ssa/addressingmodes.go
Normal file
154
src/cmd/compile/internal/ssa/addressingmodes.go
Normal file
|
@ -0,0 +1,154 @@
|
|||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package ssa
|
||||
|
||||
// addressingModes combines address calculations into memory operations
|
||||
// that can perform complicated addressing modes.
|
||||
func addressingModes(f *Func) {
|
||||
switch f.Config.arch {
|
||||
default:
|
||||
// Most architectures can't do this.
|
||||
return
|
||||
case "amd64":
|
||||
// TODO: 386, s390x?
|
||||
}
|
||||
|
||||
var tmp []*Value
|
||||
for _, b := range f.Blocks {
|
||||
for _, v := range b.Values {
|
||||
if !combineFirst[v.Op] {
|
||||
continue
|
||||
}
|
||||
p := v.Args[0]
|
||||
c, ok := combine[[2]Op{v.Op, p.Op}]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
// See if we can combine the Aux/AuxInt values.
|
||||
switch [2]auxType{opcodeTable[v.Op].auxType, opcodeTable[p.Op].auxType} {
|
||||
case [2]auxType{auxSymOff, auxInt32}:
|
||||
// TODO: introduce auxSymOff32
|
||||
if !is32Bit(v.AuxInt + p.AuxInt) {
|
||||
continue
|
||||
}
|
||||
v.AuxInt += p.AuxInt
|
||||
case [2]auxType{auxSymOff, auxSymOff}:
|
||||
if v.Aux != nil && p.Aux != nil {
|
||||
continue
|
||||
}
|
||||
if !is32Bit(v.AuxInt + p.AuxInt) {
|
||||
continue
|
||||
}
|
||||
if p.Aux != nil {
|
||||
v.Aux = p.Aux
|
||||
}
|
||||
v.AuxInt += p.AuxInt
|
||||
case [2]auxType{auxSymValAndOff, auxInt32}:
|
||||
vo := ValAndOff(v.AuxInt)
|
||||
if !vo.canAdd(p.AuxInt) {
|
||||
continue
|
||||
}
|
||||
v.AuxInt = vo.add(p.AuxInt)
|
||||
case [2]auxType{auxSymValAndOff, auxSymOff}:
|
||||
vo := ValAndOff(v.AuxInt)
|
||||
if v.Aux != nil && p.Aux != nil {
|
||||
continue
|
||||
}
|
||||
if !vo.canAdd(p.AuxInt) {
|
||||
continue
|
||||
}
|
||||
if p.Aux != nil {
|
||||
v.Aux = p.Aux
|
||||
}
|
||||
v.AuxInt = vo.add(p.AuxInt)
|
||||
case [2]auxType{auxSymOff, auxNone}:
|
||||
// nothing to do
|
||||
case [2]auxType{auxSymValAndOff, auxNone}:
|
||||
// nothing to do
|
||||
default:
|
||||
f.Fatalf("unknown aux combining for %s and %s\n", v.Op, p.Op)
|
||||
}
|
||||
// Combine the operations.
|
||||
tmp = append(tmp[:0], v.Args[1:]...)
|
||||
v.resetArgs()
|
||||
v.Op = c
|
||||
v.AddArgs(p.Args...)
|
||||
v.AddArgs(tmp...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// combineFirst contains ops which appear in combine as the
|
||||
// first part of the key.
|
||||
var combineFirst = map[Op]bool{}
|
||||
|
||||
func init() {
|
||||
for k := range combine {
|
||||
combineFirst[k[0]] = true
|
||||
}
|
||||
}
|
||||
|
||||
// For each entry k, v in this map, if we have a value x with:
|
||||
// x.Op == k[0]
|
||||
// x.Args[0].Op == k[1]
|
||||
// then we can set x.Op to v and set x.Args like this:
|
||||
// x.Args[0].Args + x.Args[1:]
|
||||
// Additionally, the Aux/AuxInt from x.Args[0] is merged into x.
|
||||
var combine = map[[2]Op]Op{
|
||||
[2]Op{OpAMD64MOVBload, OpAMD64ADDQ}: OpAMD64MOVBloadidx1,
|
||||
[2]Op{OpAMD64MOVWload, OpAMD64ADDQ}: OpAMD64MOVWloadidx1,
|
||||
[2]Op{OpAMD64MOVLload, OpAMD64ADDQ}: OpAMD64MOVLloadidx1,
|
||||
[2]Op{OpAMD64MOVQload, OpAMD64ADDQ}: OpAMD64MOVQloadidx1,
|
||||
[2]Op{OpAMD64MOVSSload, OpAMD64ADDQ}: OpAMD64MOVSSloadidx1,
|
||||
[2]Op{OpAMD64MOVSDload, OpAMD64ADDQ}: OpAMD64MOVSDloadidx1,
|
||||
|
||||
[2]Op{OpAMD64MOVBstore, OpAMD64ADDQ}: OpAMD64MOVBstoreidx1,
|
||||
[2]Op{OpAMD64MOVWstore, OpAMD64ADDQ}: OpAMD64MOVWstoreidx1,
|
||||
[2]Op{OpAMD64MOVLstore, OpAMD64ADDQ}: OpAMD64MOVLstoreidx1,
|
||||
[2]Op{OpAMD64MOVQstore, OpAMD64ADDQ}: OpAMD64MOVQstoreidx1,
|
||||
[2]Op{OpAMD64MOVSSstore, OpAMD64ADDQ}: OpAMD64MOVSSstoreidx1,
|
||||
[2]Op{OpAMD64MOVSDstore, OpAMD64ADDQ}: OpAMD64MOVSDstoreidx1,
|
||||
|
||||
[2]Op{OpAMD64MOVBstoreconst, OpAMD64ADDQ}: OpAMD64MOVBstoreconstidx1,
|
||||
[2]Op{OpAMD64MOVWstoreconst, OpAMD64ADDQ}: OpAMD64MOVWstoreconstidx1,
|
||||
[2]Op{OpAMD64MOVLstoreconst, OpAMD64ADDQ}: OpAMD64MOVLstoreconstidx1,
|
||||
[2]Op{OpAMD64MOVQstoreconst, OpAMD64ADDQ}: OpAMD64MOVQstoreconstidx1,
|
||||
|
||||
[2]Op{OpAMD64MOVBload, OpAMD64LEAQ1}: OpAMD64MOVBloadidx1,
|
||||
[2]Op{OpAMD64MOVWload, OpAMD64LEAQ1}: OpAMD64MOVWloadidx1,
|
||||
[2]Op{OpAMD64MOVWload, OpAMD64LEAQ2}: OpAMD64MOVWloadidx2,
|
||||
[2]Op{OpAMD64MOVLload, OpAMD64LEAQ1}: OpAMD64MOVLloadidx1,
|
||||
[2]Op{OpAMD64MOVLload, OpAMD64LEAQ4}: OpAMD64MOVLloadidx4,
|
||||
[2]Op{OpAMD64MOVLload, OpAMD64LEAQ8}: OpAMD64MOVLloadidx8,
|
||||
[2]Op{OpAMD64MOVQload, OpAMD64LEAQ1}: OpAMD64MOVQloadidx1,
|
||||
[2]Op{OpAMD64MOVQload, OpAMD64LEAQ8}: OpAMD64MOVQloadidx8,
|
||||
[2]Op{OpAMD64MOVSSload, OpAMD64LEAQ1}: OpAMD64MOVSSloadidx1,
|
||||
[2]Op{OpAMD64MOVSSload, OpAMD64LEAQ4}: OpAMD64MOVSSloadidx4,
|
||||
[2]Op{OpAMD64MOVSDload, OpAMD64LEAQ1}: OpAMD64MOVSDloadidx1,
|
||||
[2]Op{OpAMD64MOVSDload, OpAMD64LEAQ8}: OpAMD64MOVSDloadidx8,
|
||||
|
||||
[2]Op{OpAMD64MOVBstore, OpAMD64LEAQ1}: OpAMD64MOVBstoreidx1,
|
||||
[2]Op{OpAMD64MOVWstore, OpAMD64LEAQ1}: OpAMD64MOVWstoreidx1,
|
||||
[2]Op{OpAMD64MOVWstore, OpAMD64LEAQ2}: OpAMD64MOVWstoreidx2,
|
||||
[2]Op{OpAMD64MOVLstore, OpAMD64LEAQ1}: OpAMD64MOVLstoreidx1,
|
||||
[2]Op{OpAMD64MOVLstore, OpAMD64LEAQ4}: OpAMD64MOVLstoreidx4,
|
||||
[2]Op{OpAMD64MOVLstore, OpAMD64LEAQ8}: OpAMD64MOVLstoreidx8,
|
||||
[2]Op{OpAMD64MOVQstore, OpAMD64LEAQ1}: OpAMD64MOVQstoreidx1,
|
||||
[2]Op{OpAMD64MOVQstore, OpAMD64LEAQ8}: OpAMD64MOVQstoreidx8,
|
||||
[2]Op{OpAMD64MOVSSstore, OpAMD64LEAQ1}: OpAMD64MOVSSstoreidx1,
|
||||
[2]Op{OpAMD64MOVSSstore, OpAMD64LEAQ4}: OpAMD64MOVSSstoreidx4,
|
||||
[2]Op{OpAMD64MOVSDstore, OpAMD64LEAQ1}: OpAMD64MOVSDstoreidx1,
|
||||
[2]Op{OpAMD64MOVSDstore, OpAMD64LEAQ8}: OpAMD64MOVSDstoreidx8,
|
||||
|
||||
[2]Op{OpAMD64MOVBstoreconst, OpAMD64LEAQ1}: OpAMD64MOVBstoreconstidx1,
|
||||
[2]Op{OpAMD64MOVWstoreconst, OpAMD64LEAQ1}: OpAMD64MOVWstoreconstidx1,
|
||||
[2]Op{OpAMD64MOVWstoreconst, OpAMD64LEAQ2}: OpAMD64MOVWstoreconstidx2,
|
||||
[2]Op{OpAMD64MOVLstoreconst, OpAMD64LEAQ1}: OpAMD64MOVLstoreconstidx1,
|
||||
[2]Op{OpAMD64MOVLstoreconst, OpAMD64LEAQ4}: OpAMD64MOVLstoreconstidx4,
|
||||
[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1,
|
||||
[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8,
|
||||
|
||||
// TODO: 386
|
||||
}
|
|
@ -442,6 +442,7 @@ var passes = [...]pass{
|
|||
{name: "insert resched checks", fn: insertLoopReschedChecks,
|
||||
disabled: objabi.Preemptibleloops_enabled == 0}, // insert resched checks in loops.
|
||||
{name: "lower", fn: lower, required: true},
|
||||
{name: "addressing modes", fn: addressingModes, required: false},
|
||||
{name: "lowered deadcode for cse", fn: deadcode}, // deadcode immediately before CSE avoids CSE making dead values live again
|
||||
{name: "lowered cse", fn: cse},
|
||||
{name: "elim unread autos", fn: elimUnreadAutos},
|
||||
|
|
|
@ -1043,12 +1043,6 @@
|
|||
(MOVWQZX x) && zeroUpper48Bits(x,3) -> x
|
||||
(MOVBQZX x) && zeroUpper56Bits(x,3) -> x
|
||||
|
||||
(MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
|
||||
(MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
|
||||
(MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
|
||||
(MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem)
|
||||
(MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem)
|
||||
|
||||
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
|
||||
(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQZX x)
|
||||
(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQZX x)
|
||||
|
@ -1166,86 +1160,6 @@
|
|||
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
||||
|
||||
// generating indexed loads and stores
|
||||
(MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOV(B|W|L|Q|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
(MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
(MOV(L|SS)load [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
(MOV(L|Q|SD)load [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOV(L|Q|SD)loadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
|
||||
(MOV(B|W|L|Q|SS|SD)store [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOV(B|W|L|Q|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
||||
(MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
||||
(MOV(L|SS)store [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
||||
(MOV(L|Q|SD)store [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(MOV(L|Q|SD)storeidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
||||
|
||||
(MOV(B|W|L|Q|SS|SD)load [off] {sym} (ADDQ ptr idx) mem) && ptr.Op != OpSB ->
|
||||
(MOV(B|W|L|Q|SS|SD)loadidx1 [off] {sym} ptr idx mem)
|
||||
(MOV(B|W|L|Q|SS|SD)store [off] {sym} (ADDQ ptr idx) val mem) && ptr.Op != OpSB ->
|
||||
(MOV(B|W|L|Q|SS|SD)storeidx1 [off] {sym} ptr idx val mem)
|
||||
|
||||
(MOV(B|W|L|Q)storeconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
|
||||
(MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
(MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
|
||||
(MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
(MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
|
||||
(MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
(MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
|
||||
(MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
|
||||
|
||||
(MOV(B|W|L|Q)storeconst [x] {sym} (ADDQ ptr idx) mem) -> (MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr idx mem)
|
||||
|
||||
// combine SHLQ into indexed loads and stores
|
||||
(MOVWloadidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem)
|
||||
(MOV(L|SS)loadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOV(L|SS)loadidx4 [c] {sym} ptr idx mem)
|
||||
(MOV(L|Q|SD)loadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOV(L|Q|SD)loadidx8 [c] {sym} ptr idx mem)
|
||||
|
||||
(MOVWstoreidx1 [c] {sym} ptr (SHLQconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem)
|
||||
(MOV(L|SS)storeidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem) -> (MOV(L|SS)storeidx4 [c] {sym} ptr idx val mem)
|
||||
(MOV(L|Q|SD)storeidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem) -> (MOV(L|Q|SD)storeidx8 [c] {sym} ptr idx val mem)
|
||||
(MOVWstoreconstidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
|
||||
(MOVLstoreconstidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
|
||||
(MOVQstoreconstidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOVQstoreconstidx8 [c] {sym} ptr idx mem)
|
||||
|
||||
// combine ADDQ into pointer of indexed loads and stores
|
||||
(MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem)
|
||||
(MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOVWloadidx2 [c+d] {sym} ptr idx mem)
|
||||
(MOV(L|SS)loadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|SS)loadidx4 [c+d] {sym} ptr idx mem)
|
||||
(MOV(L|Q|SD)loadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|Q|SD)loadidx8 [c+d] {sym} ptr idx mem)
|
||||
|
||||
(MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem)
|
||||
(MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOVWstoreidx2 [c+d] {sym} ptr idx val mem)
|
||||
(MOV(L|SS)storeidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|SS)storeidx4 [c+d] {sym} ptr idx val mem)
|
||||
(MOV(L|Q|SD)storeidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|Q|SD)storeidx8 [c+d] {sym} ptr idx val mem)
|
||||
|
||||
|
||||
// combine ADDQ into index of indexed loads and stores
|
||||
(MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem)
|
||||
(MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+2*d) -> (MOVWloadidx2 [c+2*d] {sym} ptr idx mem)
|
||||
(MOV(L|SS)loadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+4*d) -> (MOV(L|SS)loadidx4 [c+4*d] {sym} ptr idx mem)
|
||||
(MOV(L|Q|SD)loadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+8*d) -> (MOV(L|Q|SD)loadidx8 [c+8*d] {sym} ptr idx mem)
|
||||
|
||||
(MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem)
|
||||
(MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+2*d) -> (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem)
|
||||
(MOV(L|SS)storeidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+4*d) -> (MOV(L|SS)storeidx4 [c+4*d] {sym} ptr idx val mem)
|
||||
(MOV(L|Q|SD)storeidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+8*d) -> (MOV(L|Q|SD)storeidx8 [c+8*d] {sym} ptr idx val mem)
|
||||
|
||||
(MOV(B|W|L|Q)storeconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
|
||||
(MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
|
||||
(MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
|
||||
(MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem)
|
||||
|
||||
(MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
|
||||
(MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(2*c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
|
||||
(MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(4*c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
|
||||
(MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(8*c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem)
|
||||
|
||||
// fold LEAQs together
|
||||
(LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(LEAQ [off1+off2] {mergeSym(sym1,sym2)} x)
|
||||
|
@ -1274,6 +1188,17 @@
|
|||
(LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
|
||||
|
||||
// LEAQ[1248] into LEAQ[1248]. Only some such merges are possible.
|
||||
(LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} x y)
|
||||
(LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||
(LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} y x)
|
||||
(LEAQ2 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+2*off2) && sym2 == nil ->
|
||||
(LEAQ4 [off1+2*off2] {sym1} x y)
|
||||
(LEAQ4 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+4*off2) && sym2 == nil ->
|
||||
(LEAQ8 [off1+4*off2] {sym1} x y)
|
||||
// TODO: more?
|
||||
|
||||
// Absorb InvertFlags into branches.
|
||||
(LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
|
||||
(GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
|
||||
|
@ -1552,60 +1477,65 @@
|
|||
|
||||
// Little-endian loads
|
||||
|
||||
(ORL x0:(MOVBload [i0] {s} p mem)
|
||||
sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
|
||||
(ORL x0:(MOVBload [i0] {s} p0 mem)
|
||||
sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem)))
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
-> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
|
||||
-> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
|
||||
|
||||
(ORQ x0:(MOVBload [i0] {s} p mem)
|
||||
sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)))
|
||||
(ORQ x0:(MOVBload [i0] {s} p0 mem)
|
||||
sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p1 mem)))
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
-> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
|
||||
-> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
|
||||
|
||||
(ORL x0:(MOVWload [i0] {s} p mem)
|
||||
sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)))
|
||||
(ORL x0:(MOVWload [i0] {s} p0 mem)
|
||||
sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p1 mem)))
|
||||
&& i1 == i0+2
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
-> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
|
||||
-> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem)
|
||||
|
||||
(ORQ x0:(MOVWload [i0] {s} p mem)
|
||||
sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)))
|
||||
(ORQ x0:(MOVWload [i0] {s} p0 mem)
|
||||
sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p1 mem)))
|
||||
&& i1 == i0+2
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
-> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
|
||||
-> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem)
|
||||
|
||||
(ORQ x0:(MOVLload [i0] {s} p mem)
|
||||
sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
|
||||
(ORQ x0:(MOVLload [i0] {s} p0 mem)
|
||||
sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p1 mem)))
|
||||
&& i1 == i0+4
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
-> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
|
||||
-> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p0 mem)
|
||||
|
||||
(ORL
|
||||
s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))
|
||||
s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p0 mem))
|
||||
or:(ORL
|
||||
s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))
|
||||
s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p1 mem))
|
||||
y))
|
||||
&& i1 == i0+1
|
||||
&& j1 == j0+8
|
||||
|
@ -1615,14 +1545,15 @@
|
|||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
-> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
|
||||
-> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y)
|
||||
|
||||
(ORQ
|
||||
s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))
|
||||
s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p0 mem))
|
||||
or:(ORQ
|
||||
s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))
|
||||
s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p1 mem))
|
||||
y))
|
||||
&& i1 == i0+1
|
||||
&& j1 == j0+8
|
||||
|
@ -1632,14 +1563,15 @@
|
|||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
|
||||
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y)
|
||||
|
||||
(ORQ
|
||||
s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem))
|
||||
s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p0 mem))
|
||||
or:(ORQ
|
||||
s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))
|
||||
s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p1 mem))
|
||||
y))
|
||||
&& i1 == i0+2
|
||||
&& j1 == j0+16
|
||||
|
@ -1649,180 +1581,105 @@
|
|||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
|
||||
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p0 mem)) y)
|
||||
|
||||
// Little-endian indexed loads
|
||||
|
||||
(ORL x0:(MOVBloadidx1 [i0] {s} p idx mem)
|
||||
sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
-> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
|
||||
// Move constants offsets from LEAQx up into load. This lets the above combining
|
||||
// rules discover indexed load-combining instances.
|
||||
(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem)
|
||||
(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem)
|
||||
(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem)
|
||||
(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem)
|
||||
|
||||
(ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem)
|
||||
sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
-> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
|
||||
|
||||
(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem)
|
||||
sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
|
||||
&& i1 == i0+2
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
-> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
|
||||
|
||||
(ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem)
|
||||
sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
|
||||
&& i1 == i0+2
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
-> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
|
||||
|
||||
(ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem)
|
||||
sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
|
||||
&& i1 == i0+4
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
-> @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
|
||||
|
||||
(ORL
|
||||
s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
|
||||
or:(ORL
|
||||
s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
|
||||
y))
|
||||
&& i1 == i0+1
|
||||
&& j1 == j0+8
|
||||
&& j0 % 16 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
-> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
|
||||
|
||||
(ORQ
|
||||
s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
|
||||
or:(ORQ
|
||||
s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
|
||||
y))
|
||||
&& i1 == i0+1
|
||||
&& j1 == j0+8
|
||||
&& j0 % 16 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
|
||||
|
||||
(ORQ
|
||||
s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem))
|
||||
or:(ORQ
|
||||
s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))
|
||||
y))
|
||||
&& i1 == i0+2
|
||||
&& j1 == j0+16
|
||||
&& j0 % 32 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
|
||||
(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem)
|
||||
(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem)
|
||||
(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem)
|
||||
(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
|
||||
-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem)
|
||||
|
||||
// Big-endian loads
|
||||
|
||||
(ORL
|
||||
x1:(MOVBload [i1] {s} p mem)
|
||||
sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem)))
|
||||
x1:(MOVBload [i1] {s} p0 mem)
|
||||
sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p1 mem)))
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
-> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
|
||||
-> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem))
|
||||
|
||||
(ORQ
|
||||
x1:(MOVBload [i1] {s} p mem)
|
||||
sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem)))
|
||||
x1:(MOVBload [i1] {s} p0 mem)
|
||||
sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p1 mem)))
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
-> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
|
||||
-> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem))
|
||||
|
||||
(ORL
|
||||
r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
|
||||
sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
|
||||
r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem))
|
||||
sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem))))
|
||||
&& i1 == i0+2
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, r0, r1, sh)
|
||||
-> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
|
||||
-> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem))
|
||||
|
||||
(ORQ
|
||||
r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
|
||||
sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
|
||||
r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem))
|
||||
sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem))))
|
||||
&& i1 == i0+2
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, r0, r1, sh)
|
||||
-> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
|
||||
-> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem))
|
||||
|
||||
(ORQ
|
||||
r1:(BSWAPL x1:(MOVLload [i1] {s} p mem))
|
||||
sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
|
||||
r1:(BSWAPL x1:(MOVLload [i1] {s} p0 mem))
|
||||
sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p1 mem))))
|
||||
&& i1 == i0+4
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, r0, r1, sh)
|
||||
-> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
|
||||
-> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p0 mem))
|
||||
|
||||
(ORL
|
||||
s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))
|
||||
s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p0 mem))
|
||||
or:(ORL
|
||||
s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))
|
||||
s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p1 mem))
|
||||
y))
|
||||
&& i1 == i0+1
|
||||
&& j1 == j0-8
|
||||
|
@ -1832,14 +1689,15 @@
|
|||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
-> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
|
||||
-> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y)
|
||||
|
||||
(ORQ
|
||||
s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))
|
||||
s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p0 mem))
|
||||
or:(ORQ
|
||||
s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))
|
||||
s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p1 mem))
|
||||
y))
|
||||
&& i1 == i0+1
|
||||
&& j1 == j0-8
|
||||
|
@ -1849,14 +1707,15 @@
|
|||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
|
||||
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y)
|
||||
|
||||
(ORQ
|
||||
s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))
|
||||
s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p0 mem)))
|
||||
or:(ORQ
|
||||
s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
|
||||
s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p1 mem)))
|
||||
y))
|
||||
&& i1 == i0+2
|
||||
&& j1 == j0-16
|
||||
|
@ -1868,168 +1727,41 @@
|
|||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, r0, r1, s0, s1, or)
|
||||
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y)
|
||||
|
||||
// Big-endian indexed loads
|
||||
|
||||
(ORL
|
||||
x1:(MOVBloadidx1 [i1] {s} p idx mem)
|
||||
sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
-> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
|
||||
|
||||
(ORQ
|
||||
x1:(MOVBloadidx1 [i1] {s} p idx mem)
|
||||
sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
-> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
|
||||
|
||||
(ORL
|
||||
r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))
|
||||
sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
|
||||
&& i1 == i0+2
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, r0, r1, sh)
|
||||
-> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
|
||||
|
||||
(ORQ
|
||||
r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))
|
||||
sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
|
||||
&& i1 == i0+2
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, r0, r1, sh)
|
||||
-> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
|
||||
|
||||
(ORQ
|
||||
r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem))
|
||||
sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
|
||||
&& i1 == i0+4
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, r0, r1, sh)
|
||||
-> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
|
||||
|
||||
(ORL
|
||||
s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
|
||||
or:(ORL
|
||||
s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
|
||||
y))
|
||||
&& i1 == i0+1
|
||||
&& j1 == j0-8
|
||||
&& j1 % 16 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
-> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
|
||||
|
||||
(ORQ
|
||||
s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
|
||||
or:(ORQ
|
||||
s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
|
||||
y))
|
||||
&& i1 == i0+1
|
||||
&& j1 == j0-8
|
||||
&& j1 % 16 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
|
||||
|
||||
(ORQ
|
||||
s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))
|
||||
or:(ORQ
|
||||
s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
|
||||
y))
|
||||
&& i1 == i0+2
|
||||
&& j1 == j0-16
|
||||
&& j1 % 32 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, r0, r1, s0, s1, or)
|
||||
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
|
||||
-> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p0 mem))) y)
|
||||
|
||||
// Combine 2 byte stores + shift into rolw 8 + word store
|
||||
(MOVBstore [i] {s} p w
|
||||
x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
|
||||
(MOVBstore [i] {s} p1 w
|
||||
x0:(MOVBstore [i-1] {s} p0 (SHRWconst [8] w) mem))
|
||||
&& x0.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x0)
|
||||
-> (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
|
||||
|
||||
(MOVBstoreidx1 [i] {s} p idx w
|
||||
x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem))
|
||||
&& x0.Uses == 1
|
||||
&& clobber(x0)
|
||||
-> (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem)
|
||||
-> (MOVWstore [i-1] {s} p0 (ROLWconst <w.Type> [8] w) mem)
|
||||
|
||||
// Combine stores + shifts into bswap and larger (unaligned) stores
|
||||
(MOVBstore [i] {s} p w
|
||||
x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w)
|
||||
x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w)
|
||||
x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
|
||||
(MOVBstore [i] {s} p3 w
|
||||
x2:(MOVBstore [i-1] {s} p2 (SHRLconst [8] w)
|
||||
x1:(MOVBstore [i-2] {s} p1 (SHRLconst [16] w)
|
||||
x0:(MOVBstore [i-3] {s} p0 (SHRLconst [24] w) mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& same(p1, p2, 1)
|
||||
&& same(p2, p3, 1)
|
||||
&& clobber(x0, x1, x2)
|
||||
-> (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
|
||||
-> (MOVLstore [i-3] {s} p0 (BSWAPL <w.Type> w) mem)
|
||||
|
||||
(MOVBstoreidx1 [i] {s} p idx w
|
||||
x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w)
|
||||
x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w)
|
||||
x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& clobber(x0, x1, x2)
|
||||
-> (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
|
||||
|
||||
(MOVBstore [i] {s} p w
|
||||
x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w)
|
||||
x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w)
|
||||
x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w)
|
||||
x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w)
|
||||
x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w)
|
||||
x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w)
|
||||
x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
|
||||
(MOVBstore [i] {s} p7 w
|
||||
x6:(MOVBstore [i-1] {s} p6 (SHRQconst [8] w)
|
||||
x5:(MOVBstore [i-2] {s} p5 (SHRQconst [16] w)
|
||||
x4:(MOVBstore [i-3] {s} p4 (SHRQconst [24] w)
|
||||
x3:(MOVBstore [i-4] {s} p3 (SHRQconst [32] w)
|
||||
x2:(MOVBstore [i-5] {s} p2 (SHRQconst [40] w)
|
||||
x1:(MOVBstore [i-6] {s} p1 (SHRQconst [48] w)
|
||||
x0:(MOVBstore [i-7] {s} p0 (SHRQconst [56] w) mem))))))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
|
@ -2037,165 +1769,99 @@
|
|||
&& x4.Uses == 1
|
||||
&& x5.Uses == 1
|
||||
&& x6.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& same(p1, p2, 1)
|
||||
&& same(p2, p3, 1)
|
||||
&& same(p3, p4, 1)
|
||||
&& same(p4, p5, 1)
|
||||
&& same(p5, p6, 1)
|
||||
&& same(p6, p7, 1)
|
||||
&& clobber(x0, x1, x2, x3, x4, x5, x6)
|
||||
-> (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
|
||||
|
||||
(MOVBstoreidx1 [i] {s} p idx w
|
||||
x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w)
|
||||
x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w)
|
||||
x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w)
|
||||
x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w)
|
||||
x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w)
|
||||
x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w)
|
||||
x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& x3.Uses == 1
|
||||
&& x4.Uses == 1
|
||||
&& x5.Uses == 1
|
||||
&& x6.Uses == 1
|
||||
&& clobber(x0, x1, x2, x3, x4, x5, x6)
|
||||
-> (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
|
||||
-> (MOVQstore [i-7] {s} p0 (BSWAPQ <w.Type> w) mem)
|
||||
|
||||
// Combine constant stores into larger (unaligned) stores.
|
||||
(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
|
||||
(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
|
||||
(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
|
||||
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
|
||||
(MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
|
||||
(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
|
||||
-> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
|
||||
(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
|
||||
(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
|
||||
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
|
||||
(MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
|
||||
(MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem))
|
||||
-> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
|
||||
(MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
-> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
|
||||
(MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem))
|
||||
-> (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
|
||||
(MOVLstoreconst [a] {s} p1 x:(MOVLstoreconst [c] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
-> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
|
||||
(MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
|
||||
-> (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
|
||||
(MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [c2] {s} p0 mem))
|
||||
&& config.useSSE
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& ValAndOff(c2).Off() + 8 == ValAndOff(c).Off()
|
||||
&& ValAndOff(c).Val() == 0
|
||||
&& ValAndOff(c2).Val() == 0
|
||||
&& clobber(x)
|
||||
-> (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem)
|
||||
|
||||
(MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
|
||||
&& x.Uses == 1
|
||||
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
-> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem)
|
||||
(MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
|
||||
&& x.Uses == 1
|
||||
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
|
||||
(MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem))
|
||||
&& x.Uses == 1
|
||||
&& ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
-> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
|
||||
|
||||
(MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
|
||||
&& x.Uses == 1
|
||||
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst <i.Type> [1] i) mem)
|
||||
(MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem))
|
||||
&& x.Uses == 1
|
||||
&& ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
-> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
|
||||
-> (MOVOstore [ValAndOff(c2).Off()] {s} p0 (MOVOconst [0]) mem)
|
||||
|
||||
// Combine stores into larger (unaligned) stores.
|
||||
(MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
|
||||
(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVWstore [i-1] {s} p w mem)
|
||||
(MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem))
|
||||
-> (MOVWstore [i-1] {s} p0 w mem)
|
||||
(MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHR(W|L|Q)const [8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVWstore [i] {s} p w mem)
|
||||
(MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem))
|
||||
-> (MOVWstore [i] {s} p0 w mem)
|
||||
(MOVBstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHR(L|Q)const [j-8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVWstore [i-1] {s} p w0 mem)
|
||||
(MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem))
|
||||
-> (MOVWstore [i-1] {s} p0 w0 mem)
|
||||
(MOVWstore [i] {s} p1 (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p0 w mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVLstore [i-2] {s} p w mem)
|
||||
(MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem))
|
||||
-> (MOVLstore [i-2] {s} p0 w mem)
|
||||
(MOVWstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHR(L|Q)const [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVLstore [i-2] {s} p w0 mem)
|
||||
(MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem))
|
||||
-> (MOVLstore [i-2] {s} p0 w0 mem)
|
||||
(MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p0 w mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVQstore [i-4] {s} p w mem)
|
||||
(MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem))
|
||||
-> (MOVQstore [i-4] {s} p0 w mem)
|
||||
(MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p0 w0:(SHRQconst [j-32] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& same(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
-> (MOVQstore [i-4] {s} p w0 mem)
|
||||
|
||||
(MOVBstoreidx1 [i] {s} p idx (SHR(W|L|Q)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVWstoreidx1 [i-1] {s} p idx w mem)
|
||||
(MOVBstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHR(L|Q)const [j-8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
|
||||
(MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreidx1 [i-2] {s} p idx w mem)
|
||||
(MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHR(L|Q)const [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
|
||||
(MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVQstoreidx1 [i-4] {s} p idx w mem)
|
||||
(MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVQstoreidx1 [i-4] {s} p idx w0 mem)
|
||||
|
||||
(MOVWstoreidx2 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
|
||||
(MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem)
|
||||
(MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w mem)
|
||||
(MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem)
|
||||
-> (MOVQstore [i-4] {s} p0 w0 mem)
|
||||
|
||||
(MOVBstore [i] {s} p
|
||||
x1:(MOVBload [j] {s2} p2 mem)
|
||||
|
@ -2320,41 +1986,6 @@
|
|||
(BSFQ (ORQconst <t> [1<<8] (MOVBQZX x))) -> (BSFQ (ORQconst <t> [1<<8] x))
|
||||
(BSFQ (ORQconst <t> [1<<16] (MOVWQZX x))) -> (BSFQ (ORQconst <t> [1<<16] x))
|
||||
|
||||
// Simplify indexed loads/stores
|
||||
(MOVBstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVBstore [i+c] {s} p w mem)
|
||||
(MOVWstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVWstore [i+c] {s} p w mem)
|
||||
(MOVLstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVLstore [i+c] {s} p w mem)
|
||||
(MOVQstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVQstore [i+c] {s} p w mem)
|
||||
(MOVWstoreidx2 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+2*c) -> (MOVWstore [i+2*c] {s} p w mem)
|
||||
(MOVLstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVLstore [i+4*c] {s} p w mem)
|
||||
(MOVLstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVLstore [i+8*c] {s} p w mem)
|
||||
(MOVQstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVQstore [i+8*c] {s} p w mem)
|
||||
(MOVSSstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSSstore [i+c] {s} p w mem)
|
||||
(MOVSSstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVSSstore [i+4*c] {s} p w mem)
|
||||
(MOVSDstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSDstore [i+c] {s} p w mem)
|
||||
(MOVSDstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVSDstore [i+8*c] {s} p w mem)
|
||||
(MOVBloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVBload [i+c] {s} p mem)
|
||||
(MOVWloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVWload [i+c] {s} p mem)
|
||||
(MOVLloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVLload [i+c] {s} p mem)
|
||||
(MOVQloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVQload [i+c] {s} p mem)
|
||||
(MOVWloadidx2 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+2*c) -> (MOVWload [i+2*c] {s} p mem)
|
||||
(MOVLloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVLload [i+4*c] {s} p mem)
|
||||
(MOVLloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVLload [i+8*c] {s} p mem)
|
||||
(MOVQloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVQload [i+8*c] {s} p mem)
|
||||
(MOVSSloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSSload [i+c] {s} p mem)
|
||||
(MOVSSloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVSSload [i+4*c] {s} p mem)
|
||||
(MOVSDloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSDload [i+c] {s} p mem)
|
||||
(MOVSDloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVSDload [i+8*c] {s} p mem)
|
||||
|
||||
// Combine consts into storeidx.
|
||||
// Note that when c == 0, it takes more bytes to encode
|
||||
// the immediate $0 than to zero a register and use it.
|
||||
// We do the rewrite anyway, to minimize register pressure.
|
||||
(MOVBstoreidx1 [off] {s} ptr idx (MOVLconst [c]) mem) && validValAndOff(int64(int8(c)), off) -> (MOVBstoreconstidx1 [makeValAndOff(int64(int8(c)), off)] {s} ptr idx mem)
|
||||
(MOVWstoreidx(1|2) [off] {s} ptr idx (MOVLconst [c]) mem) && validValAndOff(int64(int16(c)), off) -> (MOVWstoreconstidx(1|2) [makeValAndOff(int64(int16(c)), off)] {s} ptr idx mem)
|
||||
(MOVLstoreidx(1|4) [off] {s} ptr idx (MOVQconst [c]) mem) && validValAndOff(int64(int32(c)), off) -> (MOVLstoreconstidx(1|4) [makeValAndOff(int64(int32(c)), off)] {s} ptr idx mem)
|
||||
(MOVQstoreidx(1|8) [off] {s} ptr idx (MOVQconst [c]) mem) && validValAndOff(c, off) -> (MOVQstoreconstidx(1|8) [makeValAndOff(c, off)] {s} ptr idx mem)
|
||||
|
||||
// Redundant sign/zero extensions
|
||||
// Note: see issue 21963. We have to make sure we use the right type on
|
||||
// the resulting extension (the outer type, not the inner type).
|
||||
|
|
|
@ -1247,3 +1247,43 @@ func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
|
|||
copy(buf, src)
|
||||
return byteorder.Uint64(buf)
|
||||
}
|
||||
|
||||
// same reports whether x and y are the same value.
|
||||
// It checks to a maximum depth of d, so it may report
|
||||
// a false negative.
|
||||
func same(x, y *Value, depth int) bool {
|
||||
if x == y {
|
||||
return true
|
||||
}
|
||||
if depth <= 0 {
|
||||
return false
|
||||
}
|
||||
if x.Op != y.Op || x.Aux != y.Aux || x.AuxInt != y.AuxInt {
|
||||
return false
|
||||
}
|
||||
if len(x.Args) != len(y.Args) {
|
||||
return false
|
||||
}
|
||||
if opcodeTable[x.Op].commutative {
|
||||
// Check exchanged ordering first.
|
||||
for i, a := range x.Args {
|
||||
j := i
|
||||
if j < 2 {
|
||||
j ^= 1
|
||||
}
|
||||
b := y.Args[j]
|
||||
if !same(a, b, depth-1) {
|
||||
goto checkNormalOrder
|
||||
}
|
||||
}
|
||||
return true
|
||||
checkNormalOrder:
|
||||
}
|
||||
for i, a := range x.Args {
|
||||
b := y.Args[i]
|
||||
if !same(a, b, depth-1) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -93,3 +93,91 @@ func compMem3(x, y *int) (int, bool) {
|
|||
// 386:`CMPL\t\(`
|
||||
return r, r < *y
|
||||
}
|
||||
|
||||
// The following functions test that indexed load/store operations get generated.
|
||||
|
||||
func idxInt8(x, y []int8, i int) {
|
||||
var t int8
|
||||
// amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
|
||||
t = x[i+1]
|
||||
// amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
|
||||
y[i+1] = t
|
||||
// amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
|
||||
x[i+1] = 77
|
||||
}
|
||||
|
||||
func idxInt16(x, y []int16, i int) {
|
||||
var t int16
|
||||
// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
|
||||
t = x[i+1]
|
||||
// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
|
||||
y[i+1] = t
|
||||
// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
|
||||
t = x[16*i+1]
|
||||
// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
|
||||
y[16*i+1] = t
|
||||
// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
|
||||
x[i+1] = 77
|
||||
// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
|
||||
x[16*i+1] = 77
|
||||
}
|
||||
|
||||
func idxInt32(x, y []int32, i int) {
|
||||
var t int32
|
||||
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
|
||||
t = x[i+1]
|
||||
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
|
||||
y[i+1] = t
|
||||
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
|
||||
t = x[2*i+1]
|
||||
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
|
||||
y[2*i+1] = t
|
||||
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
|
||||
t = x[16*i+1]
|
||||
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
|
||||
y[16*i+1] = t
|
||||
// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
|
||||
x[i+1] = 77
|
||||
// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
|
||||
x[16*i+1] = 77
|
||||
}
|
||||
|
||||
func idxInt64(x, y []int64, i int) {
|
||||
var t int64
|
||||
// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
|
||||
t = x[i+1]
|
||||
// amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
|
||||
y[i+1] = t
|
||||
// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
|
||||
t = x[16*i+1]
|
||||
// amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
|
||||
y[16*i+1] = t
|
||||
// amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
|
||||
x[i+1] = 77
|
||||
// amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
|
||||
x[16*i+1] = 77
|
||||
}
|
||||
|
||||
func idxFloat32(x, y []float32, i int) {
|
||||
var t float32
|
||||
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
|
||||
t = x[i+1]
|
||||
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
|
||||
y[i+1] = t
|
||||
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
|
||||
t = x[16*i+1]
|
||||
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
|
||||
y[16*i+1] = t
|
||||
}
|
||||
|
||||
func idxFloat64(x, y []float64, i int) {
|
||||
var t float64
|
||||
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
|
||||
t = x[i+1]
|
||||
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
|
||||
y[i+1] = t
|
||||
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
|
||||
t = x[16*i+1]
|
||||
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
|
||||
y[16*i+1] = t
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue