mirror of
https://github.com/golang/go
synced 2024-11-02 11:50:30 +00:00
cmd/compile: PPC64, elide unnecessary sign extension
Inputs to store[BHW] and cmpW(U) need not be correct in more bits than are used by the instruction. Added a pattern tailored to what appears to be cgo boilerplate. Added a pattern (also seen in cgo boilerplate and hashing) to replace {EQ,NE}-CMP-ANDconst with {EQ-NE}-ANDCCconst. Added a pattern to clean up ANDconst shift distance inputs (this was seen in hashing). Simplify repeated and,or,xor. Fixes #17109. Change-Id: I68eac83e3e614d69ffe473a08953048c8b066d88 Reviewed-on: https://go-review.googlesource.com/30455 Run-TryBot: David Chase <drchase@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
672e579444
commit
2f0b8f88df
6 changed files with 1192 additions and 60 deletions
|
@ -352,6 +352,21 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
|
||||
case ssa.OpPPC64ANDCCconst:
|
||||
p := gc.Prog(v.Op.Asm())
|
||||
p.Reg = v.Args[0].Reg()
|
||||
|
||||
if v.Aux != nil {
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = gc.AuxOffset(v)
|
||||
} else {
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = v.AuxInt
|
||||
}
|
||||
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = ppc64.REGTMP // discard result
|
||||
|
||||
case ssa.OpPPC64MOVDaddr:
|
||||
p := gc.Prog(ppc64.AMOVD)
|
||||
p.From.Type = obj.TYPE_ADDR
|
||||
|
|
|
@ -221,6 +221,10 @@
|
|||
(Rsh8Ux8 x y) -> (SRW (ZeroExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt8to64 y)))))
|
||||
(Lsh8x8 x y) -> (SLW x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt8to64 y)))))
|
||||
|
||||
// Cleaning up shift ops when input is masked
|
||||
(MaskIfNotCarry (ADDconstForCarry [c] (ANDconst [d] _))) && c < 0 && d > 0 && c + d < 0 -> (MOVDconst [-1])
|
||||
(ORN x (MOVDconst [-1])) -> x
|
||||
|
||||
// Potentially useful optimizing rewrites.
|
||||
// (ADDconstForCarry [k] c), k < 0 && (c < 0 || k+c >= 0) -> CarrySet
|
||||
// (ADDconstForCarry [k] c), K < 0 && (c >= 0 && k+c < 0) -> CarryClear
|
||||
|
@ -362,6 +366,12 @@
|
|||
// (NE (CMPWconst [0] (FGreaterThan cc)) yes no) -> (FGT cc yes no)
|
||||
// (NE (CMPWconst [0] (FGreaterEqual cc)) yes no) -> (FGE cc yes no)
|
||||
|
||||
// Elide compares of bit tests // TODO need to make both CC and result of ANDCC available.
|
||||
(EQ (CMPconst [0] (ANDconst [c] x)) yes no) -> (EQ (ANDCCconst [c] x) yes no)
|
||||
(NE (CMPconst [0] (ANDconst [c] x)) yes no) -> (NE (ANDCCconst [c] x) yes no)
|
||||
(EQ (CMPWconst [0] (ANDconst [c] x)) yes no) -> (EQ (ANDCCconst [c] x) yes no)
|
||||
(NE (CMPWconst [0] (ANDconst [c] x)) yes no) -> (NE (ANDCCconst [c] x) yes no)
|
||||
|
||||
// absorb flag constants into branches
|
||||
(EQ (FlagEQ) yes no) -> (First nil yes no)
|
||||
(EQ (FlagLT) yes no) -> (First nil no yes)
|
||||
|
@ -588,20 +598,94 @@
|
|||
(NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
|
||||
|
||||
// Optimizations
|
||||
// Note that PPC "logical" immediates come in 0:15 and 16:31 unsigned immediate forms,
|
||||
// so ORconst, XORconst easily expand into a pair.
|
||||
|
||||
// Include very-large constants in the const-const case.
|
||||
(AND (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c&d])
|
||||
(OR (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c|d])
|
||||
(XOR (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c^d])
|
||||
|
||||
// Discover consts
|
||||
(AND x (MOVDconst [c])) && isU16Bit(c) -> (ANDconst [c] x)
|
||||
(XOR x (MOVDconst [c])) && isU32Bit(c) -> (XORconst [c] x)
|
||||
(OR x (MOVDconst [c])) && isU32Bit(c) -> (ORconst [c] x)
|
||||
(AND (MOVDconst [c]) x) && isU16Bit(c) -> (ANDconst [c] x)
|
||||
(XOR (MOVDconst [c]) x) && isU32Bit(c) -> (XORconst [c] x)
|
||||
(OR (MOVDconst [c]) x) && isU32Bit(c) -> (ORconst [c] x)
|
||||
|
||||
// Simplify consts
|
||||
(ANDconst [c] (ANDconst [d] x)) -> (ANDconst [c&d] x)
|
||||
(ORconst [c] (ORconst [d] x)) -> (ORconst [c|d] x)
|
||||
(XORconst [c] (XORconst [d] x)) -> (XORconst [c^d] x)
|
||||
(ANDconst [-1] x) -> x
|
||||
(ANDconst [0] _) -> (MOVDconst [0])
|
||||
(XORconst [0] x) -> x
|
||||
(ORconst [-1] _) -> (MOVDconst [-1])
|
||||
(ORconst [0] x) -> x
|
||||
|
||||
// zero-extend of small and -> small and
|
||||
(MOVBZreg y:(ANDconst [c] _)) && uint64(c) <= 0xFF -> y
|
||||
(MOVHZreg y:(ANDconst [c] _)) && uint64(c) <= 0xFFFF -> y
|
||||
(MOVWZreg y:(ANDconst [c] _)) && uint64(c) <= 0xFFFFFFFF -> y
|
||||
(MOVWZreg y:(AND (MOVDconst [c]) _)) && uint64(c) <= 0xFFFFFFFF -> y
|
||||
|
||||
// sign extend of small-positive and -> small-positive-and
|
||||
(MOVBreg y:(ANDconst [c] _)) && uint64(c) <= 0x7F -> y
|
||||
(MOVHreg y:(ANDconst [c] _)) && uint64(c) <= 0x7FFF -> y
|
||||
(MOVWreg y:(ANDconst [c] _)) && uint64(c) <= 0xFFFF -> y // 0xFFFF is largest immediate constant, when regarded as 32-bit is > 0
|
||||
(MOVWreg y:(AND (MOVDconst [c]) _)) && uint64(c) <= 0x7FFFFFFF -> y
|
||||
|
||||
// small and of zero-extend -> either zero-extend or small and
|
||||
// degenerate-and
|
||||
(ANDconst [c] y:(MOVBZreg _)) && c&0xFF == 0xFF -> y
|
||||
(ANDconst [c] y:(MOVHZreg _)) && c&0xFFFF == 0xFFFF -> y
|
||||
(ANDconst [c] y:(MOVWZreg _)) && c&0xFFFFFFFF == 0xFFFFFFFF -> y
|
||||
// normal case
|
||||
(ANDconst [c] (MOVBZreg x)) -> (ANDconst [c&0xFF] x)
|
||||
(ANDconst [c] (MOVHZreg x)) -> (ANDconst [c&0xFFFF] x)
|
||||
(ANDconst [c] (MOVWZreg x)) -> (ANDconst [c&0xFFFFFFFF] x)
|
||||
|
||||
// Various redundant zero/sign extension combinations.
|
||||
(MOVBZreg y:(MOVBZreg _)) -> y // repeat
|
||||
(MOVBreg y:(MOVBreg _)) -> y // repeat
|
||||
(MOVBreg (MOVBZreg x)) -> (MOVBreg x)
|
||||
(MOVBZreg (MOVBreg x)) -> (MOVBZreg x)
|
||||
|
||||
// H - there are more combinations than these
|
||||
|
||||
(MOVHZreg y:(MOVHZreg _)) -> y // repeat
|
||||
(MOVHZreg y:(MOVBZreg _)) -> y // wide of narrow
|
||||
|
||||
(MOVHreg y:(MOVHreg _)) -> y // repeat
|
||||
(MOVHreg y:(MOVBreg _)) -> y // wide of narrow
|
||||
|
||||
(MOVHreg y:(MOVHZreg x)) -> (MOVHreg x)
|
||||
(MOVHZreg y:(MOVHreg x)) -> (MOVHZreg x)
|
||||
|
||||
// W - there are more combinations than these
|
||||
|
||||
(MOVWZreg y:(MOVWZreg _)) -> y // repeat
|
||||
(MOVWZreg y:(MOVHZreg _)) -> y // wide of narrow
|
||||
(MOVWZreg y:(MOVBZreg _)) -> y // wide of narrow
|
||||
|
||||
(MOVWreg y:(MOVWreg _)) -> y // repeat
|
||||
(MOVWreg y:(MOVHreg _)) -> y // wide of narrow
|
||||
(MOVWreg y:(MOVBreg _)) -> y // wide of narrow
|
||||
|
||||
(MOVWreg y:(MOVWZreg x)) -> (MOVWreg x)
|
||||
(MOVWZreg y:(MOVWreg x)) -> (MOVWZreg x)
|
||||
|
||||
// Arithmetic constant ops
|
||||
|
||||
(ADD (MOVDconst [c]) x) && is32Bit(c) -> (ADDconst [c] x)
|
||||
(ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x)
|
||||
(ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) -> (ADDconst [c+d] x)
|
||||
(ADDconst [c] (MOVDaddr [d] {sym} x)) -> (MOVDaddr [c+d] {sym} x)
|
||||
(ADDconst [0] x) -> x
|
||||
(ANDconst [-1] x) -> x
|
||||
(ANDconst [0] _) -> (MOVDconst [0])
|
||||
(XORconst [0] x) -> x
|
||||
(SUB x (MOVDconst [c])) && is32Bit(-c) -> (ADDconst [-c] x)
|
||||
// TODO deal with subtract-from-const
|
||||
|
||||
(XOR (MOVDconst [0]) x) -> x
|
||||
(XOR x (MOVDconst [0])) -> x
|
||||
(ADD (MOVDconst [0]) x) -> x
|
||||
(ADD x (MOVDconst [0])) -> x
|
||||
(ADDconst [c] (MOVDaddr [d] {sym} x)) -> (MOVDaddr [c+d] {sym} x)
|
||||
|
||||
// Fold offsets for stores.
|
||||
(MOVDstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVDstore [off1+off2] {sym} x val mem)
|
||||
|
@ -714,10 +798,19 @@
|
|||
(MOVHZreg (MOVDconst [c])) -> (MOVDconst [int64(uint16(c))])
|
||||
(MOVHreg (MOVDconst [c])) -> (MOVDconst [int64(int16(c))])
|
||||
|
||||
// Lose widening ops fed to to stores
|
||||
(MOVBstore [off] {sym} ptr (MOVBreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
|
||||
(MOVBstore [off] {sym} ptr (MOVBZreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
|
||||
(MOVHstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
|
||||
(MOVHstore [off] {sym} ptr (MOVHZreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
|
||||
(MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
|
||||
(MOVWstore [off] {sym} ptr (MOVWZreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
|
||||
|
||||
// Lose W-widening ops fed to compare-W
|
||||
(CMPW x (MOVWreg y)) -> (CMPW x y)
|
||||
(CMPW (MOVWreg x) y) -> (CMPW x y)
|
||||
(CMPWU x (MOVWZreg y)) -> (CMPWU x y)
|
||||
(CMPWU (MOVWZreg x) y) -> (CMPWU x y)
|
||||
|
||||
(CMP x (MOVDconst [c])) && is16Bit(c) -> (CMPconst x [c])
|
||||
(CMP (MOVDconst [c]) y) && is16Bit(c) -> (InvertFlags (CMPconst y [c]))
|
||||
|
@ -728,3 +821,7 @@
|
|||
(CMPU (MOVDconst [c]) y) && isU16Bit(c) -> (InvertFlags (CMPUconst y [c]))
|
||||
(CMPWU x (MOVDconst [c])) && isU16Bit(c) -> (CMPWUconst x [c])
|
||||
(CMPWU (MOVDconst [c]) y) && isU16Bit(c) -> (InvertFlags (CMPWUconst y [c]))
|
||||
|
||||
// A particular pattern seen in cgo code:
|
||||
(AND (MOVDconst [c]) x:(MOVBZload _ _)) -> (ANDconst [c&0xFF] x)
|
||||
(AND x:(MOVBZload _ _) (MOVDconst [c])) -> (ANDconst [c&0xFF] x)
|
||||
|
|
|
@ -226,6 +226,7 @@ func init() {
|
|||
{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"}, // arg0|aux
|
||||
{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"}, // arg0^aux
|
||||
{name: "ANDconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", clobberFlags: true}, // arg0&aux // and-immediate sets CC on PPC, always.
|
||||
{name: "ANDCCconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}}, asm: "ANDCC", aux: "Int64", typ: "Flags"}, // arg0&aux == 0 // and-immediate sets CC on PPC, always.
|
||||
|
||||
{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"}, // sign extend int8 to int64
|
||||
{name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"}, // zero extend uint8 to uint64
|
||||
|
@ -256,9 +257,9 @@ func init() {
|
|||
|
||||
{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB
|
||||
|
||||
{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", rematerializeable: true}, //
|
||||
{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true}, //
|
||||
{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true}, //
|
||||
{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "Int64", rematerializeable: true}, //
|
||||
{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true}, //
|
||||
{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true}, //
|
||||
{name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"},
|
||||
|
||||
{name: "CMP", argLength: 2, reg: gp2cr, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1
|
||||
|
|
|
@ -1176,6 +1176,7 @@ const (
|
|||
OpPPC64ORconst
|
||||
OpPPC64XORconst
|
||||
OpPPC64ANDconst
|
||||
OpPPC64ANDCCconst
|
||||
OpPPC64MOVBreg
|
||||
OpPPC64MOVBZreg
|
||||
OpPPC64MOVHreg
|
||||
|
@ -14652,6 +14653,17 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ANDCCconst",
|
||||
auxType: auxInt64,
|
||||
argLen: 1,
|
||||
asm: ppc64.AANDCC,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MOVBreg",
|
||||
argLen: 1,
|
||||
|
|
|
@ -229,11 +229,16 @@ func is16Bit(n int64) bool {
|
|||
return n == int64(int16(n))
|
||||
}
|
||||
|
||||
// is16Bit reports whether n can be represented as an unsigned 16 bit integer.
|
||||
// isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
|
||||
func isU16Bit(n int64) bool {
|
||||
return n == int64(uint16(n))
|
||||
}
|
||||
|
||||
// isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
|
||||
func isU32Bit(n int64) bool {
|
||||
return n == int64(uint32(n))
|
||||
}
|
||||
|
||||
// is20Bit reports whether n can be represented as a signed 20 bit integer.
|
||||
func is20Bit(n int64) bool {
|
||||
return -(1<<19) <= n && n < (1<<19)
|
||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue