cmd/{asm,compile}: replace TESTB op with CMPWconst on s390x

TESTB was implemented as AND $0xff, Rx, REGTMP. Unfortunately there
is no 3-operand AND-with-immediate instruction and so it was emulated
by the assembler using two instructions.

This CL uses CMPW instead of AND and also optimizes CMPW to use
the chi instruction where possible.

Overall this CL reduces the size of the .text section of the
bin/go binary by ~2%.

Change-Id: Ic335c29fc1129378fcbb1265bfb10f5b744a0f3f
Reviewed-on: https://go-review.googlesource.com/30690
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
Michael Munday 2016-10-07 14:29:55 -04:00
parent f4e37c8ec5
commit 45b26a93f3
7 changed files with 286 additions and 50 deletions

View file

@ -164,10 +164,14 @@ TEXT main·foo(SB),7,$16-0 // TEXT main.foo(SB), 7, $16-0
MVC $256, 8192(R1), 8192(R2) // MVC 8192(R1), $256, 8192(R2) // b90400a2c2a800002000b90400b1c2b800002000d2ffa000b000
CMP R1, R2 // b9200012
CMP R3, $32767 // a73f7fff
CMP R3, $32768 // c23c00008000
CMP R3, $-2147483648 // c23c80000000
CMPU R4, R5 // b9210045
CMPU R6, $4294967295 // c26effffffff
CMPW R7, R8 // 1978
CMPW R9, $-32768 // a79e8000
CMPW R9, $-32769 // c29dffff7fff
CMPW R9, $-2147483648 // c29d80000000
CMPWU R1, R2 // 1512
CMPWU R3, $4294967295 // c23fffffffff

View file

@ -311,13 +311,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = v.Reg()
case ssa.OpS390XCMP, ssa.OpS390XCMPW, ssa.OpS390XCMPU, ssa.OpS390XCMPWU:
opregreg(v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
case ssa.OpS390XTESTB:
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = 0xFF
p.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = s390x.REGTMP
case ssa.OpS390XFCMPS, ssa.OpS390XFCMP:
opregreg(v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
case ssa.OpS390XCMPconst, ssa.OpS390XCMPWconst, ssa.OpS390XCMPUconst, ssa.OpS390XCMPWUconst:

View file

@ -409,7 +409,7 @@
(If (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GTF cmp yes no)
(If (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GEF cmp yes no)
(If cond yes no) -> (NE (TESTB cond) yes no)
(If cond yes no) -> (NE (CMPWconst [0] (MOVBZreg cond)) yes no)
// ***************************
// Above: lowering rules
@ -417,15 +417,26 @@
// ***************************
// TODO: Should the optimizations be a separate pass?
// Fold boolean tests into blocks
(NE (TESTB (MOVDLT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (LT cmp yes no)
(NE (TESTB (MOVDLE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (LE cmp yes no)
(NE (TESTB (MOVDGT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GT cmp yes no)
(NE (TESTB (MOVDGE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GE cmp yes no)
(NE (TESTB (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (EQ cmp yes no)
(NE (TESTB (MOVDNE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (NE cmp yes no)
(NE (TESTB (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GTF cmp yes no)
(NE (TESTB (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GEF cmp yes no)
// Fold sign extensions into conditional moves of constants.
// Designed to remove the MOVBZreg inserted by the If lowering.
(MOVBZreg x:(MOVDLT (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
(MOVBZreg x:(MOVDLE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
(MOVBZreg x:(MOVDGT (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
(MOVBZreg x:(MOVDGE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
(MOVBZreg x:(MOVDEQ (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
(MOVBZreg x:(MOVDNE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
(MOVBZreg x:(MOVDGTnoinv (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
(MOVBZreg x:(MOVDGEnoinv (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
// Fold boolean tests into blocks.
(NE (CMPWconst [0] (MOVDLT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (LT cmp yes no)
(NE (CMPWconst [0] (MOVDLE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (LE cmp yes no)
(NE (CMPWconst [0] (MOVDGT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GT cmp yes no)
(NE (CMPWconst [0] (MOVDGE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GE cmp yes no)
(NE (CMPWconst [0] (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (EQ cmp yes no)
(NE (CMPWconst [0] (MOVDNE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (NE cmp yes no)
(NE (CMPWconst [0] (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GTF cmp yes no)
(NE (CMPWconst [0] (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GEF cmp yes no)
// Fold constants into instructions.
(ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x)

View file

@ -237,8 +237,6 @@ func init() {
{name: "FCMPS", argLength: 2, reg: fp2flags, asm: "CEBR", typ: "Flags"}, // arg0 compare to arg1, f32
{name: "FCMP", argLength: 2, reg: fp2flags, asm: "FCMPU", typ: "Flags"}, // arg0 compare to arg1, f64
{name: "TESTB", argLength: 1, reg: gp1flags, asm: "AND", typ: "Flags"}, // (arg0 & 0xFF) compare to 0
{name: "SLD", argLength: 2, reg: sh21, asm: "SLD"}, // arg0 << arg1, shift amount is mod 64
{name: "SLW", argLength: 2, reg: sh21, asm: "SLW"}, // arg0 << arg1, shift amount is mod 32
{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"}, // arg0 << auxint, shift amount 0-63

View file

@ -1302,7 +1302,6 @@ const (
OpS390XCMPWUconst
OpS390XFCMPS
OpS390XFCMP
OpS390XTESTB
OpS390XSLD
OpS390XSLW
OpS390XSLDconst
@ -16304,16 +16303,6 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "TESTB",
argLen: 1,
asm: s390x.AAND,
reg: regInfo{
inputs: []inputInfo{
{0, 37887}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 SP
},
},
},
{
name: "SLD",
argLen: 2,

View file

@ -7321,6 +7321,214 @@ func rewriteValueS390X_OpS390XMOVBZloadidx(v *Value, config *Config) bool {
func rewriteValueS390X_OpS390XMOVBZreg(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (MOVBZreg x:(MOVDLT (MOVDconst [c]) (MOVDconst [d]) _))
// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
// result: x
for {
x := v.Args[0]
if x.Op != OpS390XMOVDLT {
break
}
x_0 := x.Args[0]
if x_0.Op != OpS390XMOVDconst {
break
}
c := x_0.AuxInt
x_1 := x.Args[1]
if x_1.Op != OpS390XMOVDconst {
break
}
d := x_1.AuxInt
if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (MOVBZreg x:(MOVDLE (MOVDconst [c]) (MOVDconst [d]) _))
// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
// result: x
for {
x := v.Args[0]
if x.Op != OpS390XMOVDLE {
break
}
x_0 := x.Args[0]
if x_0.Op != OpS390XMOVDconst {
break
}
c := x_0.AuxInt
x_1 := x.Args[1]
if x_1.Op != OpS390XMOVDconst {
break
}
d := x_1.AuxInt
if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (MOVBZreg x:(MOVDGT (MOVDconst [c]) (MOVDconst [d]) _))
// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
// result: x
for {
x := v.Args[0]
if x.Op != OpS390XMOVDGT {
break
}
x_0 := x.Args[0]
if x_0.Op != OpS390XMOVDconst {
break
}
c := x_0.AuxInt
x_1 := x.Args[1]
if x_1.Op != OpS390XMOVDconst {
break
}
d := x_1.AuxInt
if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (MOVBZreg x:(MOVDGE (MOVDconst [c]) (MOVDconst [d]) _))
// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
// result: x
for {
x := v.Args[0]
if x.Op != OpS390XMOVDGE {
break
}
x_0 := x.Args[0]
if x_0.Op != OpS390XMOVDconst {
break
}
c := x_0.AuxInt
x_1 := x.Args[1]
if x_1.Op != OpS390XMOVDconst {
break
}
d := x_1.AuxInt
if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (MOVBZreg x:(MOVDEQ (MOVDconst [c]) (MOVDconst [d]) _))
// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
// result: x
for {
x := v.Args[0]
if x.Op != OpS390XMOVDEQ {
break
}
x_0 := x.Args[0]
if x_0.Op != OpS390XMOVDconst {
break
}
c := x_0.AuxInt
x_1 := x.Args[1]
if x_1.Op != OpS390XMOVDconst {
break
}
d := x_1.AuxInt
if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (MOVBZreg x:(MOVDNE (MOVDconst [c]) (MOVDconst [d]) _))
// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
// result: x
for {
x := v.Args[0]
if x.Op != OpS390XMOVDNE {
break
}
x_0 := x.Args[0]
if x_0.Op != OpS390XMOVDconst {
break
}
c := x_0.AuxInt
x_1 := x.Args[1]
if x_1.Op != OpS390XMOVDconst {
break
}
d := x_1.AuxInt
if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (MOVBZreg x:(MOVDGTnoinv (MOVDconst [c]) (MOVDconst [d]) _))
// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
// result: x
for {
x := v.Args[0]
if x.Op != OpS390XMOVDGTnoinv {
break
}
x_0 := x.Args[0]
if x_0.Op != OpS390XMOVDconst {
break
}
c := x_0.AuxInt
x_1 := x.Args[1]
if x_1.Op != OpS390XMOVDconst {
break
}
d := x_1.AuxInt
if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (MOVBZreg x:(MOVDGEnoinv (MOVDconst [c]) (MOVDconst [d]) _))
// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
// result: x
for {
x := v.Args[0]
if x.Op != OpS390XMOVDGEnoinv {
break
}
x_0 := x.Args[0]
if x_0.Op != OpS390XMOVDconst {
break
}
c := x_0.AuxInt
x_1 := x.Args[1]
if x_1.Op != OpS390XMOVDconst {
break
}
d := x_1.AuxInt
if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (MOVBZreg x:(MOVBZload _ _))
// cond:
// result: x
@ -16895,7 +17103,7 @@ func rewriteBlockS390X(b *Block, config *Config) bool {
}
// match: (If cond yes no)
// cond:
// result: (NE (TESTB cond) yes no)
// result: (NE (CMPWconst [0] (MOVBZreg cond)) yes no)
for {
v := b.Control
_ = v
@ -16903,8 +17111,11 @@ func rewriteBlockS390X(b *Block, config *Config) bool {
yes := b.Succs[0]
no := b.Succs[1]
b.Kind = BlockS390XNE
v0 := b.NewValue0(v.Line, OpS390XTESTB, TypeFlags)
v0.AddArg(cond)
v0 := b.NewValue0(v.Line, OpS390XCMPWconst, TypeFlags)
v0.AuxInt = 0
v1 := b.NewValue0(v.Line, OpS390XMOVBZreg, config.fe.TypeUInt64())
v1.AddArg(cond)
v0.AddArg(v1)
b.SetControl(v0)
_ = yes
_ = no
@ -17046,12 +17257,15 @@ func rewriteBlockS390X(b *Block, config *Config) bool {
return true
}
case BlockS390XNE:
// match: (NE (TESTB (MOVDLT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// match: (NE (CMPWconst [0] (MOVDLT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// cond:
// result: (LT cmp yes no)
for {
v := b.Control
if v.Op != OpS390XTESTB {
if v.Op != OpS390XCMPWconst {
break
}
if v.AuxInt != 0 {
break
}
v_0 := v.Args[0]
@ -17081,12 +17295,15 @@ func rewriteBlockS390X(b *Block, config *Config) bool {
_ = no
return true
}
// match: (NE (TESTB (MOVDLE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// match: (NE (CMPWconst [0] (MOVDLE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// cond:
// result: (LE cmp yes no)
for {
v := b.Control
if v.Op != OpS390XTESTB {
if v.Op != OpS390XCMPWconst {
break
}
if v.AuxInt != 0 {
break
}
v_0 := v.Args[0]
@ -17116,12 +17333,15 @@ func rewriteBlockS390X(b *Block, config *Config) bool {
_ = no
return true
}
// match: (NE (TESTB (MOVDGT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// match: (NE (CMPWconst [0] (MOVDGT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// cond:
// result: (GT cmp yes no)
for {
v := b.Control
if v.Op != OpS390XTESTB {
if v.Op != OpS390XCMPWconst {
break
}
if v.AuxInt != 0 {
break
}
v_0 := v.Args[0]
@ -17151,12 +17371,15 @@ func rewriteBlockS390X(b *Block, config *Config) bool {
_ = no
return true
}
// match: (NE (TESTB (MOVDGE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// match: (NE (CMPWconst [0] (MOVDGE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// cond:
// result: (GE cmp yes no)
for {
v := b.Control
if v.Op != OpS390XTESTB {
if v.Op != OpS390XCMPWconst {
break
}
if v.AuxInt != 0 {
break
}
v_0 := v.Args[0]
@ -17186,12 +17409,15 @@ func rewriteBlockS390X(b *Block, config *Config) bool {
_ = no
return true
}
// match: (NE (TESTB (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// match: (NE (CMPWconst [0] (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// cond:
// result: (EQ cmp yes no)
for {
v := b.Control
if v.Op != OpS390XTESTB {
if v.Op != OpS390XCMPWconst {
break
}
if v.AuxInt != 0 {
break
}
v_0 := v.Args[0]
@ -17221,12 +17447,15 @@ func rewriteBlockS390X(b *Block, config *Config) bool {
_ = no
return true
}
// match: (NE (TESTB (MOVDNE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// match: (NE (CMPWconst [0] (MOVDNE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// cond:
// result: (NE cmp yes no)
for {
v := b.Control
if v.Op != OpS390XTESTB {
if v.Op != OpS390XCMPWconst {
break
}
if v.AuxInt != 0 {
break
}
v_0 := v.Args[0]
@ -17256,12 +17485,15 @@ func rewriteBlockS390X(b *Block, config *Config) bool {
_ = no
return true
}
// match: (NE (TESTB (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// match: (NE (CMPWconst [0] (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// cond:
// result: (GTF cmp yes no)
for {
v := b.Control
if v.Op != OpS390XTESTB {
if v.Op != OpS390XCMPWconst {
break
}
if v.AuxInt != 0 {
break
}
v_0 := v.Args[0]
@ -17291,12 +17523,15 @@ func rewriteBlockS390X(b *Block, config *Config) bool {
_ = no
return true
}
// match: (NE (TESTB (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// match: (NE (CMPWconst [0] (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no)
// cond:
// result: (GEF cmp yes no)
for {
v := b.Control
if v.Op != OpS390XTESTB {
if v.Op != OpS390XCMPWconst {
break
}
if v.AuxInt != 0 {
break
}
v_0 := v.Args[0]

View file

@ -3288,7 +3288,13 @@ func asmout(ctxt *obj.Link, asm *[]byte) {
ctxt.Diag("%v overflows a uint32", v)
}
}
zRIL(_a, zopril(ctxt, p.As), uint32(p.From.Reg), uint32(regoff(ctxt, &p.To)), asm)
if p.As == ACMP && int64(int16(v)) == v {
zRI(op_CGHI, uint32(p.From.Reg), uint32(v), asm)
} else if p.As == ACMPW && int64(int16(v)) == v {
zRI(op_CHI, uint32(p.From.Reg), uint32(v), asm)
} else {
zRIL(_a, zopril(ctxt, p.As), uint32(p.From.Reg), uint32(v), asm)
}
case 72: // mov $constant/$addr mem
v := regoff(ctxt, &p.From)