cmd/compile: improve load/store merging on s390x

This commit makes the process of load/store merging more incremental
for both big and little endian operations. It also adds support for
32-bit shifts (needed to merge 16- and 32-bit loads/stores).

In addition, the merging of little endian stores is now supported.
Little endian stores are now up to 30 times faster.

Change-Id: Iefdd81eda4a65b335f23c3ff222146540083ad9c
Reviewed-on: https://go-review.googlesource.com/29956
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Michael Munday 2016-09-27 20:30:01 -04:00
parent 41fa42b447
commit 962dc4b44d
5 changed files with 2150 additions and 524 deletions

View file

@ -368,6 +368,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpS390XMOVBstore, ssa.OpS390XMOVHstore, ssa.OpS390XMOVWstore, ssa.OpS390XMOVDstore,
ssa.OpS390XMOVHBRstore, ssa.OpS390XMOVWBRstore, ssa.OpS390XMOVDBRstore,
ssa.OpS390XFMOVSstore, ssa.OpS390XFMOVDstore:
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
@ -376,6 +377,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = v.Args[0].Reg()
gc.AddAux(&p.To, v)
case ssa.OpS390XMOVBstoreidx, ssa.OpS390XMOVHstoreidx, ssa.OpS390XMOVWstoreidx, ssa.OpS390XMOVDstoreidx,
ssa.OpS390XMOVHBRstoreidx, ssa.OpS390XMOVWBRstoreidx, ssa.OpS390XMOVDBRstoreidx,
ssa.OpS390XFMOVSstoreidx, ssa.OpS390XFMOVDstoreidx:
r := v.Args[0].Reg()
i := v.Args[1].Reg()

View file

@ -543,6 +543,14 @@
(MOVWZreg x:(MOVHZload _ _)) -> x
(MOVWZreg x:(MOVWZload _ _)) -> x
// don't extend if argument is already extended
(MOVBreg x:(Arg <t>)) && is8BitInt(t) && isSigned(t) -> x
(MOVBZreg x:(Arg <t>)) && is8BitInt(t) && !isSigned(t) -> x
(MOVHreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && isSigned(t) -> x
(MOVHZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && !isSigned(t) -> x
(MOVWreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t) -> x
(MOVWZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t) -> x
// fold double extensions
(MOVBreg x:(MOVBreg _)) -> x
(MOVBZreg x:(MOVBZreg _)) -> x
@ -943,8 +951,8 @@
-> (MOVDstore [ValAndOff(a).Off()] {s} p (MOVDconst [ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32]) mem)
// Combine stores into larger (unaligned) stores.
// It doesn't work to global data (based on SB),
// because STGRL doesn't support unaligned address
// It doesn't work on global data (based on SB) because stores with relative addressing
// require that the memory operand be aligned.
(MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRDconst [8] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
@ -955,6 +963,16 @@
&& x.Uses == 1
&& clobber(x)
-> (MOVHstore [i-1] {s} p w0 mem)
(MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRWconst [8] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVHstore [i-1] {s} p w mem)
(MOVBstore [i] {s} p w0:(SRWconst [j] w) x:(MOVBstore [i-1] {s} p (SRWconst [j+8] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVHstore [i-1] {s} p w0 mem)
(MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRDconst [16] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
@ -965,6 +983,16 @@
&& x.Uses == 1
&& clobber(x)
-> (MOVWstore [i-2] {s} p w0 mem)
(MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRWconst [16] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVWstore [i-2] {s} p w mem)
(MOVHstore [i] {s} p w0:(SRWconst [j] w) x:(MOVHstore [i-2] {s} p (SRWconst [j+16] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVWstore [i-2] {s} p w0 mem)
(MOVWstore [i] {s} p (SRDconst [32] w) x:(MOVWstore [i-4] {s} p w mem))
&& p.Op != OpSB
&& x.Uses == 1
@ -986,6 +1014,16 @@
&& x.Uses == 1
&& clobber(x)
-> (MOVHstoreidx [i-1] {s} p idx w0 mem)
(MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [8] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVHstoreidx [i-1] {s} p idx w mem)
(MOVBstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [j+8] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVHstoreidx [i-1] {s} p idx w0 mem)
(MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [16] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
@ -996,6 +1034,16 @@
&& x.Uses == 1
&& clobber(x)
-> (MOVWstoreidx [i-2] {s} p idx w0 mem)
(MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [16] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVWstoreidx [i-2] {s} p idx w mem)
(MOVHstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [j+16] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVWstoreidx [i-2] {s} p idx w0 mem)
(MOVWstoreidx [i] {s} p idx w x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [32] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
@ -1007,12 +1055,117 @@
&& clobber(x)
-> (MOVDstoreidx [i-4] {s} p idx w0 mem)
// Combine stores into larger (unaligned) stores with the bytes reversed (little endian).
// Store-with-bytes-reversed instructions do not support relative memory addresses,
// so these stores can't operate on global data (SB).
(MOVBstore [i] {s} p (SRDconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVHBRstore [i-1] {s} p w mem)
(MOVBstore [i] {s} p (SRDconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRDconst [j-8] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVHBRstore [i-1] {s} p w0 mem)
(MOVBstore [i] {s} p (SRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVHBRstore [i-1] {s} p w mem)
(MOVBstore [i] {s} p (SRWconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRWconst [j-8] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVHBRstore [i-1] {s} p w0 mem)
(MOVHBRstore [i] {s} p (SRDconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVWBRstore [i-2] {s} p w mem)
(MOVHBRstore [i] {s} p (SRDconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRDconst [j-16] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVWBRstore [i-2] {s} p w0 mem)
(MOVHBRstore [i] {s} p (SRWconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVWBRstore [i-2] {s} p w mem)
(MOVHBRstore [i] {s} p (SRWconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRWconst [j-16] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVWBRstore [i-2] {s} p w0 mem)
(MOVWBRstore [i] {s} p (SRDconst [32] w) x:(MOVWBRstore [i-4] {s} p w mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVDBRstore [i-4] {s} p w mem)
(MOVWBRstore [i] {s} p (SRDconst [j] w) x:(MOVWBRstore [i-4] {s} p w0:(SRDconst [j-32] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVDBRstore [i-4] {s} p w0 mem)
(MOVBstoreidx [i] {s} p idx (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVHBRstoreidx [i-1] {s} p idx w mem)
(MOVBstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRDconst [j-8] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
(MOVBstoreidx [i] {s} p idx (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVHBRstoreidx [i-1] {s} p idx w mem)
(MOVBstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRWconst [j-8] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
(MOVHBRstoreidx [i] {s} p idx (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVWBRstoreidx [i-2] {s} p idx w mem)
(MOVHBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRDconst [j-16] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
(MOVHBRstoreidx [i] {s} p idx (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVWBRstoreidx [i-2] {s} p idx w mem)
(MOVHBRstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRWconst [j-16] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
(MOVWBRstoreidx [i] {s} p idx (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} p idx w mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVDBRstoreidx [i-4] {s} p idx w mem)
(MOVWBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} p idx w0:(SRDconst [j-32] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
-> (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
// Combining byte loads into larger (unaligned) loads.
// Little endian loads.
// b[0] | b[1]<<8 -> load 16-bit, reverse bytes
(ORW x0:(MOVBZload [i] {s} p mem)
(ORW x0:(MOVBZload [i] {s} p mem)
s0:(SLWconst [8] x1:(MOVBZload [i+1] {s} p mem)))
&& p.Op != OpSB
&& x0.Uses == 1
@ -1025,36 +1178,30 @@
-> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i] {s} p mem))
// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit, reverse bytes
(ORW o0:(ORW o1:(ORW
x0:(MOVBZload [i] {s} p mem)
s0:(SLWconst [8] x1:(MOVBZload [i+1] {s} p mem)))
s1:(SLWconst [16] x2:(MOVBZload [i+2] {s} p mem)))
s2:(SLWconst [24] x3:(MOVBZload [i+3] {s} p mem)))
(ORW o0:(ORW z0:(MOVHZreg x0:(MOVHBRload [i] {s} p mem))
s0:(SLWconst [16] x1:(MOVBZload [i+2] {s} p mem)))
s1:(SLWconst [24] x2:(MOVBZload [i+3] {s} p mem)))
&& p.Op != OpSB
&& z0.Uses == 1
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& x3.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& s2.Uses == 1
&& o0.Uses == 1
&& o1.Uses == 1
&& mergePoint(b,x0,x1,x2,x3) != nil
&& mergePoint(b,x0,x1,x2) != nil
&& clobber(z0)
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
&& clobber(x3)
&& clobber(s0)
&& clobber(s1)
&& clobber(s2)
&& clobber(o0)
&& clobber(o1)
-> @mergePoint(b,x0,x1,x2,x3) (MOVWZreg (MOVWBRload [i] {s} p mem))
-> @mergePoint(b,x0,x1,x2) (MOVWBRload [i] {s} p mem)
// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit, reverse bytes
(OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR
x0:(MOVBZload [i] {s} p mem)
x0:(MOVBZload [i] {s} p mem)
s0:(SLDconst [8] x1:(MOVBZload [i+1] {s} p mem)))
s1:(SLDconst [16] x2:(MOVBZload [i+2] {s} p mem)))
s2:(SLDconst [24] x3:(MOVBZload [i+3] {s} p mem)))
@ -1109,7 +1256,7 @@
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDBRload [i] {s} p mem)
// b[0] | b[1]<<8 -> load 16-bit, reverse bytes
(ORW x0:(MOVBZloadidx [i] {s} p idx mem)
(ORW x0:(MOVBZloadidx [i] {s} p idx mem)
s0:(SLWconst [8] x1:(MOVBZloadidx [i+1] {s} p idx mem)))
&& p.Op != OpSB
&& x0.Uses == 1
@ -1122,36 +1269,30 @@
-> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx <v.Type> [i] {s} p idx mem))
// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit, reverse bytes
(ORW o0:(ORW o1:(ORW
x0:(MOVBZloadidx [i] {s} p idx mem)
s0:(SLWconst [8] x1:(MOVBZloadidx [i+1] {s} p idx mem)))
s1:(SLWconst [16] x2:(MOVBZloadidx [i+2] {s} p idx mem)))
s2:(SLWconst [24] x3:(MOVBZloadidx [i+3] {s} p idx mem)))
(ORW o0:(ORW z0:(MOVHZreg x0:(MOVHBRloadidx [i] {s} p idx mem))
s0:(SLWconst [16] x1:(MOVBZloadidx [i+2] {s} p idx mem)))
s1:(SLWconst [24] x2:(MOVBZloadidx [i+3] {s} p idx mem)))
&& p.Op != OpSB
&& z0.Uses == 1
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& x3.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& s2.Uses == 1
&& o0.Uses == 1
&& o1.Uses == 1
&& mergePoint(b,x0,x1,x2,x3) != nil
&& mergePoint(b,x0,x1,x2) != nil
&& clobber(z0)
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
&& clobber(x3)
&& clobber(s0)
&& clobber(s1)
&& clobber(s2)
&& clobber(o0)
&& clobber(o1)
-> @mergePoint(b,x0,x1,x2,x3) (MOVWZreg (MOVWBRloadidx <v.Type> [i] {s} p idx mem))
-> @mergePoint(b,x0,x1,x2) (MOVWZreg (MOVWBRloadidx <v.Type> [i] {s} p idx mem))
// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit, reverse bytes
(OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR
x0:(MOVBZloadidx [i] {s} p idx mem)
x0:(MOVBZloadidx [i] {s} p idx mem)
s0:(SLDconst [8] x1:(MOVBZloadidx [i+1] {s} p idx mem)))
s1:(SLDconst [16] x2:(MOVBZloadidx [i+2] {s} p idx mem)))
s2:(SLDconst [24] x3:(MOVBZloadidx [i+3] {s} p idx mem)))
@ -1221,36 +1362,28 @@
-> @mergePoint(b,x0,x1) (MOVHZload [i-1] {s} p mem)
// b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit
(ORW o0:(ORW o1:(ORW
x0:(MOVBZload [i] {s} p mem)
s0:(SLWconst [8] x1:(MOVBZload [i-1] {s} p mem)))
s1:(SLWconst [16] x2:(MOVBZload [i-2] {s} p mem)))
s2:(SLWconst [24] x3:(MOVBZload [i-3] {s} p mem)))
(ORW o0:(ORW x0:(MOVHZload [i] {s} p mem)
s0:(SLWconst [16] x1:(MOVBZload [i-1] {s} p mem)))
s1:(SLWconst [24] x2:(MOVBZload [i-2] {s} p mem)))
&& p.Op != OpSB
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& x3.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& s2.Uses == 1
&& o0.Uses == 1
&& o1.Uses == 1
&& mergePoint(b,x0,x1,x2,x3) != nil
&& mergePoint(b,x0,x1,x2) != nil
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
&& clobber(x3)
&& clobber(s0)
&& clobber(s1)
&& clobber(s2)
&& clobber(o0)
&& clobber(o1)
-> @mergePoint(b,x0,x1,x2,x3) (MOVWZload [i-3] {s} p mem)
-> @mergePoint(b,x0,x1,x2) (MOVWZload [i-2] {s} p mem)
// b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit
(OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR
x0:(MOVBZload [i] {s} p mem)
x0:(MOVBZload [i] {s} p mem)
s0:(SLDconst [8] x1:(MOVBZload [i-1] {s} p mem)))
s1:(SLDconst [16] x2:(MOVBZload [i-2] {s} p mem)))
s2:(SLDconst [24] x3:(MOVBZload [i-3] {s} p mem)))
@ -1305,7 +1438,7 @@
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload [i-7] {s} p mem)
// b[1] | b[0]<<8 -> load 16-bit
(ORW x0:(MOVBZloadidx [i] {s} p idx mem)
(ORW x0:(MOVBZloadidx [i] {s} p idx mem)
s0:(SLWconst [8] x1:(MOVBZloadidx [i-1] {s} p idx mem)))
&& p.Op != OpSB
&& x0.Uses == 1
@ -1318,36 +1451,28 @@
-> @mergePoint(b,x0,x1) (MOVHZloadidx <v.Type> [i-1] {s} p idx mem)
// b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit
(ORW o0:(ORW o1:(ORW
x0:(MOVBZloadidx [i] {s} p idx mem)
s0:(SLWconst [8] x1:(MOVBZloadidx [i-1] {s} p idx mem)))
s1:(SLWconst [16] x2:(MOVBZloadidx [i-2] {s} p idx mem)))
s2:(SLWconst [24] x3:(MOVBZloadidx [i-3] {s} p idx mem)))
(ORW o0:(ORW x0:(MOVHZloadidx [i] {s} p idx mem)
s0:(SLWconst [16] x1:(MOVBZloadidx [i-1] {s} p idx mem)))
s1:(SLWconst [24] x2:(MOVBZloadidx [i-2] {s} p idx mem)))
&& p.Op != OpSB
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& x3.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& s2.Uses == 1
&& o0.Uses == 1
&& o1.Uses == 1
&& mergePoint(b,x0,x1,x2,x3) != nil
&& mergePoint(b,x0,x1,x2) != nil
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
&& clobber(x3)
&& clobber(s0)
&& clobber(s1)
&& clobber(s2)
&& clobber(o0)
&& clobber(o1)
-> @mergePoint(b,x0,x1,x2,x3) (MOVWZloadidx <v.Type> [i-3] {s} p idx mem)
-> @mergePoint(b,x0,x1,x2) (MOVWZloadidx <v.Type> [i-2] {s} p idx mem)
// b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit
(OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR
x0:(MOVBZloadidx [i] {s} p idx mem)
x0:(MOVBZloadidx [i] {s} p idx mem)
s0:(SLDconst [8] x1:(MOVBZloadidx [i-1] {s} p idx mem)))
s1:(SLDconst [16] x2:(MOVBZloadidx [i-2] {s} p idx mem)))
s2:(SLDconst [24] x3:(MOVBZloadidx [i-3] {s} p idx mem)))
@ -1402,61 +1527,56 @@
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <v.Type> [i-7] {s} p idx mem)
// Combine stores into store multiples.
(MOVWstore [i] {s} p w3
x2:(MOVWstore [i-4] {s} p w2
x1:(MOVWstore [i-8] {s} p w1
x0:(MOVWstore [i-12] {s} p w0 mem))))
&& p.Op != OpSB
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& is20Bit(i-12)
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
-> (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
(MOVWstore [i] {s} p w2
x1:(MOVWstore [i-4] {s} p w1
x0:(MOVWstore [i-8] {s} p w0 mem)))
&& p.Op != OpSB
&& x0.Uses == 1
&& x1.Uses == 1
&& is20Bit(i-8)
&& clobber(x0)
&& clobber(x1)
-> (STM3 [i-8] {s} p w0 w1 w2 mem)
// 32-bit
(MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
&& p.Op != OpSB
&& x.Uses == 1
&& is20Bit(i-4)
&& clobber(x)
-> (STM2 [i-4] {s} p w0 w1 mem)
(MOVDstore [i] {s} p w3
x2:(MOVDstore [i-8] {s} p w2
x1:(MOVDstore [i-16] {s} p w1
x0:(MOVDstore [i-24] {s} p w0 mem))))
(MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
&& p.Op != OpSB
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& is20Bit(i-24)
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
-> (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
(MOVDstore [i] {s} p w2
x1:(MOVDstore [i-8] {s} p w1
x0:(MOVDstore [i-16] {s} p w0 mem)))
&& x.Uses == 1
&& is20Bit(i-8)
&& clobber(x)
-> (STM3 [i-8] {s} p w0 w1 w2 mem)
(MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
&& p.Op != OpSB
&& x0.Uses == 1
&& x1.Uses == 1
&& is20Bit(i-16)
&& clobber(x0)
&& clobber(x1)
-> (STMG3 [i-16] {s} p w0 w1 w2 mem)
&& x.Uses == 1
&& is20Bit(i-12)
&& clobber(x)
-> (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
(STM2 [i] {s} p w2 w3 x:(STM2 [i-8] {s} p w0 w1 mem))
&& p.Op != OpSB
&& x.Uses == 1
&& is20Bit(i-8)
&& clobber(x)
-> (STM4 [i-8] {s} p w0 w1 w2 w3 mem)
// 64-bit
(MOVDstore [i] {s} p w1 x:(MOVDstore [i-8] {s} p w0 mem))
&& p.Op != OpSB
&& x.Uses == 1
&& is20Bit(i-8)
&& clobber(x)
-> (STMG2 [i-8] {s} p w0 w1 mem)
(MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem))
&& p.Op != OpSB
&& x.Uses == 1
&& is20Bit(i-16)
&& clobber(x)
-> (STMG3 [i-16] {s} p w0 w1 w2 mem)
(MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem))
&& p.Op != OpSB
&& x.Uses == 1
&& is20Bit(i-24)
&& clobber(x)
-> (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
(STMG2 [i] {s} p w2 w3 x:(STMG2 [i-16] {s} p w0 w1 mem))
&& p.Op != OpSB
&& x.Uses == 1
&& is20Bit(i-16)
&& clobber(x)
-> (STMG4 [i-16] {s} p w0 w1 w2 w3 mem)
// Convert 32-bit store multiples into 64-bit stores.
(STM2 [i] {s} p (SRDconst [32] x) x mem) -> (MOVDstore [i] {s} p x mem)

View file

@ -133,6 +133,7 @@ func init() {
gpstore = regInfo{inputs: []regMask{ptrspsb, gpsp, 0}}
gpstoreconst = regInfo{inputs: []regMask{ptrspsb, 0}}
gpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, gpsp, 0}}
gpstorebr = regInfo{inputs: []regMask{ptrsp, gpsp, 0}}
gpmvc = regInfo{inputs: []regMask{ptrsp, ptrsp, 0}}
@ -322,26 +323,32 @@ func init() {
{name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", aux: "SymOff", typ: "UInt32", clobberFlags: true, faultOnNilArg0: true}, // load 4 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes.
{name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", aux: "SymOff", typ: "UInt64", clobberFlags: true, faultOnNilArg0: true}, // load 8 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes.
{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true}, // store byte in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true}, // store byte in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVHBRstore", argLength: 3, reg: gpstorebr, asm: "MOVHBR", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes.
{name: "MOVWBRstore", argLength: 3, reg: gpstorebr, asm: "MOVWBR", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes.
{name: "MOVDBRstore", argLength: 3, reg: gpstorebr, asm: "MOVDBR", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes.
{name: "MVC", argLength: 3, reg: gpmvc, asm: "MVC", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, faultOnNilArg1: true}, // arg0=destptr, arg1=srcptr, arg2=mem, auxint=size,off
// indexed loads/stores
// TODO(mundaym): add sign-extended indexed loads
{name: "MOVBZloadidx", argLength: 3, reg: gploadidx, asm: "MOVBZ", aux: "SymOff", clobberFlags: true}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
{name: "MOVHZloadidx", argLength: 3, reg: gploadidx, asm: "MOVHZ", aux: "SymOff", clobberFlags: true}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
{name: "MOVWZloadidx", argLength: 3, reg: gploadidx, asm: "MOVWZ", aux: "SymOff", clobberFlags: true}, // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
{name: "MOVDloadidx", argLength: 3, reg: gploadidx, asm: "MOVD", aux: "SymOff", clobberFlags: true}, // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
{name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVHBR", aux: "SymOff", clobberFlags: true}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
{name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVWBR", aux: "SymOff", clobberFlags: true}, // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
{name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVDBR", aux: "SymOff", clobberFlags: true}, // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
{name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVB", aux: "SymOff", clobberFlags: true}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
{name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVH", aux: "SymOff", clobberFlags: true}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
{name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", clobberFlags: true}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
{name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVD", aux: "SymOff", clobberFlags: true}, // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
{name: "MOVBZloadidx", argLength: 3, reg: gploadidx, asm: "MOVBZ", aux: "SymOff", clobberFlags: true}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
{name: "MOVHZloadidx", argLength: 3, reg: gploadidx, asm: "MOVHZ", aux: "SymOff", clobberFlags: true}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
{name: "MOVWZloadidx", argLength: 3, reg: gploadidx, asm: "MOVWZ", aux: "SymOff", clobberFlags: true}, // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
{name: "MOVDloadidx", argLength: 3, reg: gploadidx, asm: "MOVD", aux: "SymOff", clobberFlags: true}, // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
{name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVHBR", aux: "SymOff", clobberFlags: true}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
{name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVWBR", aux: "SymOff", clobberFlags: true}, // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
{name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVDBR", aux: "SymOff", clobberFlags: true}, // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
{name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVB", aux: "SymOff", clobberFlags: true}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
{name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVH", aux: "SymOff", clobberFlags: true}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
{name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", clobberFlags: true}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
{name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVD", aux: "SymOff", clobberFlags: true}, // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
{name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVHBR", aux: "SymOff", clobberFlags: true}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
{name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVWBR", aux: "SymOff", clobberFlags: true}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
{name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVDBR", aux: "SymOff", clobberFlags: true}, // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
// For storeconst ops, the AuxInt field encodes both
// the value to store and an address offset of the store.

View file

@ -1359,6 +1359,9 @@ const (
OpS390XMOVHstore
OpS390XMOVWstore
OpS390XMOVDstore
OpS390XMOVHBRstore
OpS390XMOVWBRstore
OpS390XMOVDBRstore
OpS390XMVC
OpS390XMOVBZloadidx
OpS390XMOVHZloadidx
@ -1371,6 +1374,9 @@ const (
OpS390XMOVHstoreidx
OpS390XMOVWstoreidx
OpS390XMOVDstoreidx
OpS390XMOVHBRstoreidx
OpS390XMOVWBRstoreidx
OpS390XMOVDBRstoreidx
OpS390XMOVBstoreconst
OpS390XMOVHstoreconst
OpS390XMOVWstoreconst
@ -17188,6 +17194,48 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "MOVHBRstore",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
asm: s390x.AMOVHBR,
reg: regInfo{
inputs: []inputInfo{
{0, 37886}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
{1, 37887}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
},
},
},
{
name: "MOVWBRstore",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
asm: s390x.AMOVWBR,
reg: regInfo{
inputs: []inputInfo{
{0, 37886}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
{1, 37887}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
},
},
},
{
name: "MOVDBRstore",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
asm: s390x.AMOVDBR,
reg: regInfo{
inputs: []inputInfo{
{0, 37886}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
{1, 37887}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
},
},
},
{
name: "MVC",
auxType: auxSymValAndOff,
@ -17371,6 +17419,48 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "MOVHBRstoreidx",
auxType: auxSymOff,
argLen: 4,
clobberFlags: true,
asm: s390x.AMOVHBR,
reg: regInfo{
inputs: []inputInfo{
{0, 37886}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
{1, 37886}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
{2, 37887}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
},
},
},
{
name: "MOVWBRstoreidx",
auxType: auxSymOff,
argLen: 4,
clobberFlags: true,
asm: s390x.AMOVWBR,
reg: regInfo{
inputs: []inputInfo{
{0, 37886}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
{1, 37886}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
{2, 37887}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
},
},
},
{
name: "MOVDBRstoreidx",
auxType: auxSymOff,
argLen: 4,
clobberFlags: true,
asm: s390x.AMOVDBR,
reg: regInfo{
inputs: []inputInfo{
{0, 37886}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
{1, 37886}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
{2, 37887}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
},
},
},
{
name: "MOVBstoreconst",
auxType: auxSymValAndOff,

File diff suppressed because it is too large Load diff