mirror of
https://github.com/golang/go
synced 2024-09-15 22:20:06 +00:00
cmd/compile/internal/ssa: improve masking codegen on PPC64
Generate RLDIC[LR] instead of MOVD mask, Rx; AND Rx, Ry, Rz. This helps reduce code size, and reduces the latency caused by the constant load. Similarly, for smaller-than-register values, truncate constants which exceed the range of the value's type to avoid needing to load a constant. Change-Id: I6019684795eb8962d4fd6d9585d08b17c15e7d64 Reviewed-on: https://go-review.googlesource.com/c/go/+/515576 Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Run-TryBot: Paul Murphy <murp@ibm.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
parent
2186909d86
commit
5cdb132228
|
@ -17,3 +17,16 @@
|
|||
(SETBCR [0] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [4] (MOVDconst [1]) cmp)
|
||||
(SETBC [1] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [1] (MOVDconst [1]) cmp)
|
||||
(SETBCR [1] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [5] (MOVDconst [1]) cmp)
|
||||
|
||||
// The upper bits of the smaller than register values is undefined. Take advantage of that.
|
||||
(AND <t> x:(MOVDconst [m]) n) && t.Size() <= 2 => (Select0 (ANDCCconst [int64(int16(m))] n))
|
||||
|
||||
// Convert simple bit masks to an equivalent rldic[lr] if possible.
|
||||
(AND x:(MOVDconst [m]) n) && isPPC64ValidShiftMask(m) => (RLDICL [encodePPC64RotateMask(0,m,64)] n)
|
||||
(AND x:(MOVDconst [m]) n) && m != 0 && isPPC64ValidShiftMask(^m) => (RLDICR [encodePPC64RotateMask(0,m,64)] n)
|
||||
|
||||
// If the RLDICL does not rotate its value, a shifted value can be merged.
|
||||
(RLDICL [em] x:(SRDconst [s] a)) && (em&0xFF0000) == 0 => (RLDICL [mergePPC64RLDICLandSRDconst(em, s)] a)
|
||||
|
||||
// Convert rotated 32 bit masks on 32 bit values into rlwinm. In general, this leaves the upper 32 bits in an undefined state.
|
||||
(AND <t> x:(MOVDconst [m]) n) && t.Size() == 4 && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(0,m,32)] n)
|
||||
|
|
|
@ -1499,6 +1499,25 @@ func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
|
|||
return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
|
||||
}
|
||||
|
||||
// Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
|
||||
// SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
|
||||
// RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
|
||||
// operations can be combined. This functions assumes the two opcodes can
|
||||
// be merged, and returns an encoded rotate+mask value of the combined RLDICL.
|
||||
func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
|
||||
mb := s
|
||||
r := 64 - s
|
||||
// A larger mb is a smaller mask.
|
||||
if (encoded>>8)&0xFF < mb {
|
||||
encoded = (encoded &^ 0xFF00) | mb<<8
|
||||
}
|
||||
// The rotate is expected to be 0.
|
||||
if (encoded & 0xFF0000) != 0 {
|
||||
panic("non-zero rotate")
|
||||
}
|
||||
return encoded | r<<16
|
||||
}
|
||||
|
||||
// DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask. The values returned as
|
||||
// mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
|
||||
func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
|
||||
|
|
|
@ -3,11 +3,16 @@
|
|||
package ssa
|
||||
|
||||
import "internal/buildcfg"
|
||||
import "cmd/compile/internal/types"
|
||||
|
||||
func rewriteValuePPC64latelower(v *Value) bool {
|
||||
switch v.Op {
|
||||
case OpPPC64AND:
|
||||
return rewriteValuePPC64latelower_OpPPC64AND(v)
|
||||
case OpPPC64ISEL:
|
||||
return rewriteValuePPC64latelower_OpPPC64ISEL(v)
|
||||
case OpPPC64RLDICL:
|
||||
return rewriteValuePPC64latelower_OpPPC64RLDICL(v)
|
||||
case OpPPC64SETBC:
|
||||
return rewriteValuePPC64latelower_OpPPC64SETBC(v)
|
||||
case OpPPC64SETBCR:
|
||||
|
@ -15,6 +20,101 @@ func rewriteValuePPC64latelower(v *Value) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValuePPC64latelower_OpPPC64AND(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
typ := &b.Func.Config.Types
|
||||
// match: (AND <t> x:(MOVDconst [m]) n)
|
||||
// cond: t.Size() <= 2
|
||||
// result: (Select0 (ANDCCconst [int64(int16(m))] n))
|
||||
for {
|
||||
t := v.Type
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
x := v_0
|
||||
if x.Op != OpPPC64MOVDconst {
|
||||
continue
|
||||
}
|
||||
m := auxIntToInt64(x.AuxInt)
|
||||
n := v_1
|
||||
if !(t.Size() <= 2) {
|
||||
continue
|
||||
}
|
||||
v.reset(OpSelect0)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
|
||||
v0.AuxInt = int64ToAuxInt(int64(int16(m)))
|
||||
v0.AddArg(n)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (AND x:(MOVDconst [m]) n)
|
||||
// cond: isPPC64ValidShiftMask(m)
|
||||
// result: (RLDICL [encodePPC64RotateMask(0,m,64)] n)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
x := v_0
|
||||
if x.Op != OpPPC64MOVDconst {
|
||||
continue
|
||||
}
|
||||
m := auxIntToInt64(x.AuxInt)
|
||||
n := v_1
|
||||
if !(isPPC64ValidShiftMask(m)) {
|
||||
continue
|
||||
}
|
||||
v.reset(OpPPC64RLDICL)
|
||||
v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 64))
|
||||
v.AddArg(n)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (AND x:(MOVDconst [m]) n)
|
||||
// cond: m != 0 && isPPC64ValidShiftMask(^m)
|
||||
// result: (RLDICR [encodePPC64RotateMask(0,m,64)] n)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
x := v_0
|
||||
if x.Op != OpPPC64MOVDconst {
|
||||
continue
|
||||
}
|
||||
m := auxIntToInt64(x.AuxInt)
|
||||
n := v_1
|
||||
if !(m != 0 && isPPC64ValidShiftMask(^m)) {
|
||||
continue
|
||||
}
|
||||
v.reset(OpPPC64RLDICR)
|
||||
v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 64))
|
||||
v.AddArg(n)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (AND <t> x:(MOVDconst [m]) n)
|
||||
// cond: t.Size() == 4 && isPPC64WordRotateMask(m)
|
||||
// result: (RLWINM [encodePPC64RotateMask(0,m,32)] n)
|
||||
for {
|
||||
t := v.Type
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
x := v_0
|
||||
if x.Op != OpPPC64MOVDconst {
|
||||
continue
|
||||
}
|
||||
m := auxIntToInt64(x.AuxInt)
|
||||
n := v_1
|
||||
if !(t.Size() == 4 && isPPC64WordRotateMask(m)) {
|
||||
continue
|
||||
}
|
||||
v.reset(OpPPC64RLWINM)
|
||||
v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 32))
|
||||
v.AddArg(n)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValuePPC64latelower_OpPPC64ISEL(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
|
@ -49,6 +149,29 @@ func rewriteValuePPC64latelower_OpPPC64ISEL(v *Value) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValuePPC64latelower_OpPPC64RLDICL(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (RLDICL [em] x:(SRDconst [s] a))
|
||||
// cond: (em&0xFF0000)==0
|
||||
// result: (RLDICL [mergePPC64RLDICLandSRDconst(em, s)] a)
|
||||
for {
|
||||
em := auxIntToInt64(v.AuxInt)
|
||||
x := v_0
|
||||
if x.Op != OpPPC64SRDconst {
|
||||
break
|
||||
}
|
||||
s := auxIntToInt64(x.AuxInt)
|
||||
a := x.Args[0]
|
||||
if !((em & 0xFF0000) == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpPPC64RLDICL)
|
||||
v.AuxInt = int64ToAuxInt(mergePPC64RLDICLandSRDconst(em, s))
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValuePPC64latelower_OpPPC64SETBC(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
|
|
|
@ -394,3 +394,29 @@ func zeroextendAndMask8to64(a int8, b int16) (x, y uint64) {
|
|||
return
|
||||
|
||||
}
|
||||
|
||||
// Verify rotate and mask instructions, and further simplified instructions for small types
|
||||
func bitRotateAndMask(io64 [4]uint64, io32 [4]uint32, io16 [4]uint16, io8 [4]uint8) {
|
||||
// ppc64x: "RLDICR\t[$]0, R[0-9]*, [$]47, R"
|
||||
io64[0] = io64[0] & 0xFFFFFFFFFFFF0000
|
||||
// ppc64x: "RLDICL\t[$]0, R[0-9]*, [$]16, R"
|
||||
io64[1] = io64[1] & 0x0000FFFFFFFFFFFF
|
||||
// ppc64x: -"SRD", -"AND", "RLDICL\t[$]60, R[0-9]*, [$]16, R"
|
||||
io64[2] = (io64[2] >> 4) & 0x0000FFFFFFFFFFFF
|
||||
// ppc64x: -"SRD", -"AND", "RLDICL\t[$]36, R[0-9]*, [$]28, R"
|
||||
io64[3] = (io64[3] >> 28) & 0x0000FFFFFFFFFFFF
|
||||
|
||||
// ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]4, [$]19, R"
|
||||
io32[0] = io32[0] & 0x0FFFF000
|
||||
// ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]20, [$]3, R"
|
||||
io32[1] = io32[1] & 0xF0000FFF
|
||||
// ppc64x: -"RLWNM", MOVD, AND
|
||||
io32[2] = io32[2] & 0xFFFF0002
|
||||
|
||||
var bigc uint32 = 0x12345678
|
||||
// ppc64x: "ANDCC\t[$]22136"
|
||||
io16[0] = io16[0] & uint16(bigc)
|
||||
|
||||
// ppc64x: "ANDCC\t[$]120"
|
||||
io8[0] = io8[0] & uint8(bigc)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue