[dev.ssa] cmd/compile: disable xor clearing when flags must be preserved

The x86 backend automatically rewrites MOV $0, AX to
XOR AX, AX.  That rewrite isn't ok when the flags register
is live across the MOV.  Keep track of which moves care
about preserving flags, then disable this rewrite for them.

On x86, Prog.Mark was being used to hold the length of the
instruction.  We already store that in Prog.Isize, so no
need to store it in Prog.Mark also.  This frees up Prog.Mark
to hold a bitmask on x86 just like all the other architectures.

Update #12405

Change-Id: Ibad8a8f41fc6222bec1e4904221887d3cc3ca029
Reviewed-on: https://go-review.googlesource.com/18861
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Russ Cox <rsc@golang.org>
This commit is contained in:
Keith Randall 2016-01-22 13:44:58 -08:00
parent 733bf6ef67
commit 7b773946c0
11 changed files with 66 additions and 37 deletions

View file

@ -3405,6 +3405,7 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) {
for i, b := range f.Blocks {
s.bstart[b.ID] = Pc
// Emit values in block
s.markMoves(b)
for _, v := range b.Values {
x := Pc
s.genValue(v)
@ -3864,6 +3865,11 @@ func (s *genState) genValue(v *ssa.Value) {
p.From.Offset = i
p.To.Type = obj.TYPE_REG
p.To.Reg = x
// If flags are live at this instruction, suppress the
// MOV $0,AX -> XOR AX,AX optimization.
if v.Aux != nil {
p.Mark |= x86.PRESERVEFLAGS
}
case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
x := regnum(v)
p := Prog(v.Op.Asm())
@ -4237,6 +4243,29 @@ func (s *genState) genValue(v *ssa.Value) {
}
}
// markMoves marks any MOVXconst ops that need to avoid clobbering flags.
func (s *genState) markMoves(b *ssa.Block) {
flive := b.FlagsLiveAtEnd
if b.Control != nil && b.Control.Type.IsFlags() {
flive = true
}
for i := len(b.Values) - 1; i >= 0; i-- {
v := b.Values[i]
if flive && (v.Op == ssa.OpAMD64MOVWconst || v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
// The "mark" is any non-nil Aux value.
v.Aux = v
}
if v.Type.IsFlags() {
flive = false
}
for _, a := range v.Args {
if a.Type.IsFlags() {
flive = true
}
}
}
}
// movZero generates a register indirect move with a 0 immediate and keeps track of bytes left and next offset
func movZero(as int, width int64, nbytes int64, offset int64, regnum int16) (nleft int64, noff int64) {
p := Prog(as)

View file

@ -50,6 +50,9 @@ type Block struct {
// Ignored if len(Succs) < 2.
// Fatal if not BranchUnknown and len(Succs) > 2.
Likely BranchPrediction
// After flagalloc, records whether flags are live at the end of the block.
FlagsLiveAtEnd bool
}
// kind control successors

View file

@ -120,4 +120,9 @@ func flagalloc(f *Func) {
// standard regs, and it runs next.)
}
}
// Save live flag state for later.
for _, b := range f.Blocks {
b.FlagsLiveAtEnd = end[b.ID] != nil
}
}

View file

@ -93,7 +93,6 @@ func init() {
// Common regInfo
var (
gp01 = regInfo{inputs: []regMask{}, outputs: gponly}
gp01flags = regInfo{inputs: []regMask{}, outputs: gponly, clobbers: flags}
gp11 = regInfo{inputs: []regMask{gpsp}, outputs: gponly, clobbers: flags}
gp11nf = regInfo{inputs: []regMask{gpsp}, outputs: gponly} // nf: no flags clobbered
gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
@ -340,12 +339,10 @@ func init() {
{name: "MOVLQSX", reg: gp11nf, asm: "MOVLQSX"}, // sign extend arg0 from int32 to int64
{name: "MOVLQZX", reg: gp11nf, asm: "MOVLQZX"}, // zero extend arg0 from int32 to int64
// clobbers flags as liblink will rewrite these to XOR reg, reg if the constant is zero
// TODO: revisit when issue 12405 is fixed
{name: "MOVBconst", reg: gp01flags, asm: "MOVB", typ: "UInt8"}, // 8 low bits of auxint
{name: "MOVWconst", reg: gp01flags, asm: "MOVW", typ: "UInt16"}, // 16 low bits of auxint
{name: "MOVLconst", reg: gp01flags, asm: "MOVL", typ: "UInt32"}, // 32 low bits of auxint
{name: "MOVQconst", reg: gp01flags, asm: "MOVQ", typ: "UInt64"}, // auxint
{name: "MOVBconst", reg: gp01, asm: "MOVB", typ: "UInt8"}, // 8 low bits of auxint
{name: "MOVWconst", reg: gp01, asm: "MOVW", typ: "UInt16"}, // 16 low bits of auxint
{name: "MOVLconst", reg: gp01, asm: "MOVL", typ: "UInt32"}, // 32 low bits of auxint
{name: "MOVQconst", reg: gp01, asm: "MOVQ", typ: "UInt64"}, // auxint
{name: "CVTTSD2SL", reg: fpgp, asm: "CVTTSD2SL"}, // convert float64 to int32
{name: "CVTTSD2SQ", reg: fpgp, asm: "CVTTSD2SQ"}, // convert float64 to int64

View file

@ -2694,7 +2694,6 @@ var opcodeTable = [...]opInfo{
name: "MOVBconst",
asm: x86.AMOVB,
reg: regInfo{
clobbers: 8589934592, // .FLAGS
outputs: []regMask{
65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
@ -2704,7 +2703,6 @@ var opcodeTable = [...]opInfo{
name: "MOVWconst",
asm: x86.AMOVW,
reg: regInfo{
clobbers: 8589934592, // .FLAGS
outputs: []regMask{
65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
@ -2714,7 +2712,6 @@ var opcodeTable = [...]opInfo{
name: "MOVLconst",
asm: x86.AMOVL,
reg: regInfo{
clobbers: 8589934592, // .FLAGS
outputs: []regMask{
65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
@ -2724,7 +2721,6 @@ var opcodeTable = [...]opInfo{
name: "MOVQconst",
asm: x86.AMOVQ,
reg: regInfo{
clobbers: 8589934592, // .FLAGS
outputs: []regMask{
65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},

View file

@ -1415,15 +1415,6 @@ func (v *Value) rematerializeable() bool {
// We can't rematerialize instructions which
// clobber the flags register.
if regspec.clobbers&flagRegMask != 0 {
if v.Op == OpAMD64MOVQconst && v.AuxInt != 0 ||
v.Op == OpAMD64MOVLconst && int32(v.AuxInt) != 0 ||
v.Op == OpAMD64MOVWconst && int16(v.AuxInt) != 0 ||
v.Op == OpAMD64MOVBconst && int8(v.AuxInt) != 0 {
// These are marked as clobbering flags, but only
// the 0 versions actually do. TODO: fix MOV->XOR rewrites
// to understand when they are allowed to clobber flags?
return true
}
return false
}

View file

@ -214,14 +214,14 @@ type Prog struct {
Spadj int32
As int16
Reg int16
RegTo2 int16 // 2nd register output operand
Mark uint16
RegTo2 int16 // 2nd register output operand
Mark uint16 // bitmask of arch-specific items
Optab uint16
Scond uint8
Back uint8
Ft uint8
Tt uint8
Isize uint8
Isize uint8 // size of the instruction in bytes (x86 only)
Mode int8
Info ProgInfo

View file

@ -203,7 +203,6 @@ func linkpatch(ctxt *Link, sym *LSym) {
}
for p := sym.Text; p != nil; p = p.Link {
p.Mark = 0 /* initialization for follow */
if p.Pcond != nil {
p.Pcond = brloop(ctxt, p.Pcond)
if p.Pcond != nil {

View file

@ -34,6 +34,12 @@ import "cmd/internal/obj"
//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p x86
const (
/* mark flags */
DONE = 1 << iota
PRESERVEFLAGS // not allowed to clobber flags
)
/*
* amd64
*/

View file

@ -1748,7 +1748,7 @@ func span6(ctxt *obj.Link, s *obj.LSym) {
// process forward jumps to p
for q = p.Rel; q != nil; q = q.Forwd {
v = int32(p.Pc - (q.Pc + int64(q.Mark)))
v = int32(p.Pc - (q.Pc + int64(q.Isize)))
if q.Back&2 != 0 { // short
if v > 127 {
loop++
@ -1761,7 +1761,7 @@ func span6(ctxt *obj.Link, s *obj.LSym) {
s.P[q.Pc+1] = byte(v)
}
} else {
bp = s.P[q.Pc+int64(q.Mark)-4:]
bp = s.P[q.Pc+int64(q.Isize)-4:]
bp[0] = byte(v)
bp = bp[1:]
bp[0] = byte(v >> 8)
@ -1784,7 +1784,6 @@ func span6(ctxt *obj.Link, s *obj.LSym) {
obj.Symgrow(ctxt, s, p.Pc+int64(m))
copy(s.P[p.Pc:][:m], ctxt.And[:m])
p.Mark = uint16(m)
c += int32(m)
}
@ -2157,6 +2156,10 @@ func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
v = int64(int32(v))
}
if v == 0 {
if p.Mark&PRESERVEFLAGS != 0 {
// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
return Yu7
}
return Yi0
}
if v == 1 {

View file

@ -1214,16 +1214,16 @@ loop:
q = p.Pcond
if q != nil && q.As != obj.ATEXT {
/* mark instruction as done and continue layout at target of jump */
p.Mark = 1
p.Mark |= DONE
p = q
if p.Mark == 0 {
if p.Mark&DONE == 0 {
goto loop
}
}
}
if p.Mark != 0 {
if p.Mark&DONE != 0 {
/*
* p goes here, but already used it elsewhere.
* copy up to 4 instructions or else branch to other copy.
@ -1246,7 +1246,7 @@ loop:
if nofollow(a) || pushpop(a) {
break // NOTE(rsc): arm does goto copy
}
if q.Pcond == nil || q.Pcond.Mark != 0 {
if q.Pcond == nil || q.Pcond.Mark&DONE != 0 {
continue
}
if a == obj.ACALL || a == ALOOP {
@ -1260,10 +1260,10 @@ loop:
q = obj.Copyp(ctxt, p)
p = p.Link
q.Mark = 1
q.Mark |= DONE
(*last).Link = q
*last = q
if int(q.As) != a || q.Pcond == nil || q.Pcond.Mark != 0 {
if int(q.As) != a || q.Pcond == nil || q.Pcond.Mark&DONE != 0 {
continue
}
@ -1273,7 +1273,7 @@ loop:
q.Link = p
xfol(ctxt, q.Link, last)
p = q.Link
if p.Mark != 0 {
if p.Mark&DONE != 0 {
return
}
goto loop
@ -1290,7 +1290,7 @@ loop:
}
/* emit p */
p.Mark = 1
p.Mark |= DONE
(*last).Link = p
*last = p
@ -1328,7 +1328,7 @@ loop:
}
} else {
q = p.Link
if q.Mark != 0 {
if q.Mark&DONE != 0 {
if a != ALOOP {
p.As = relinv(int16(a))
p.Link = p.Pcond
@ -1338,7 +1338,7 @@ loop:
}
xfol(ctxt, p.Link, last)
if p.Pcond.Mark != 0 {
if p.Pcond.Mark&DONE != 0 {
return
}
p = p.Pcond