cmd/compile/internal/ssa: improve masking codegen on PPC64

Generate RLDIC[LR] instead of MOVD mask, Rx; AND Rx, Ry, Rz.
This helps reduce code size, and reduces the latency caused
by the constant load.

Similarly, for smaller-than-register values, truncate constants
which exceed the range of the value's type to avoid needing to
load a constant.

Change-Id: I6019684795eb8962d4fd6d9585d08b17c15e7d64
Reviewed-on: https://go-review.googlesource.com/c/go/+/515576
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Run-TryBot: Paul Murphy <murp@ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Paul E. Murphy 2023-06-27 17:17:33 -05:00 committed by Paul Murphy
parent 2186909d86
commit 5cdb132228
4 changed files with 181 additions and 0 deletions

View file

@ -17,3 +17,16 @@
(SETBCR [0] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [4] (MOVDconst [1]) cmp)
(SETBC [1] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [1] (MOVDconst [1]) cmp)
(SETBCR [1] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [5] (MOVDconst [1]) cmp)
// The upper bits of the smaller than register values is undefined. Take advantage of that.
(AND <t> x:(MOVDconst [m]) n) && t.Size() <= 2 => (Select0 (ANDCCconst [int64(int16(m))] n))
// Convert simple bit masks to an equivalent rldic[lr] if possible.
(AND x:(MOVDconst [m]) n) && isPPC64ValidShiftMask(m) => (RLDICL [encodePPC64RotateMask(0,m,64)] n)
(AND x:(MOVDconst [m]) n) && m != 0 && isPPC64ValidShiftMask(^m) => (RLDICR [encodePPC64RotateMask(0,m,64)] n)
// If the RLDICL does not rotate its value, a shifted value can be merged.
(RLDICL [em] x:(SRDconst [s] a)) && (em&0xFF0000) == 0 => (RLDICL [mergePPC64RLDICLandSRDconst(em, s)] a)
// Convert rotated 32 bit masks on 32 bit values into rlwinm. In general, this leaves the upper 32 bits in an undefined state.
(AND <t> x:(MOVDconst [m]) n) && t.Size() == 4 && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(0,m,32)] n)

View file

@ -1499,6 +1499,25 @@ func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
}
// Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
// SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
// RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
// operations can be combined. This functions assumes the two opcodes can
// be merged, and returns an encoded rotate+mask value of the combined RLDICL.
func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
mb := s
r := 64 - s
// A larger mb is a smaller mask.
if (encoded>>8)&0xFF < mb {
encoded = (encoded &^ 0xFF00) | mb<<8
}
// The rotate is expected to be 0.
if (encoded & 0xFF0000) != 0 {
panic("non-zero rotate")
}
return encoded | r<<16
}
// DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask. The values returned as
// mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {

View file

@ -3,11 +3,16 @@
package ssa
import "internal/buildcfg"
import "cmd/compile/internal/types"
func rewriteValuePPC64latelower(v *Value) bool {
switch v.Op {
case OpPPC64AND:
return rewriteValuePPC64latelower_OpPPC64AND(v)
case OpPPC64ISEL:
return rewriteValuePPC64latelower_OpPPC64ISEL(v)
case OpPPC64RLDICL:
return rewriteValuePPC64latelower_OpPPC64RLDICL(v)
case OpPPC64SETBC:
return rewriteValuePPC64latelower_OpPPC64SETBC(v)
case OpPPC64SETBCR:
@ -15,6 +20,101 @@ func rewriteValuePPC64latelower(v *Value) bool {
}
return false
}
func rewriteValuePPC64latelower_OpPPC64AND(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (AND <t> x:(MOVDconst [m]) n)
// cond: t.Size() <= 2
// result: (Select0 (ANDCCconst [int64(int16(m))] n))
for {
t := v.Type
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if x.Op != OpPPC64MOVDconst {
continue
}
m := auxIntToInt64(x.AuxInt)
n := v_1
if !(t.Size() <= 2) {
continue
}
v.reset(OpSelect0)
v0 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
v0.AuxInt = int64ToAuxInt(int64(int16(m)))
v0.AddArg(n)
v.AddArg(v0)
return true
}
break
}
// match: (AND x:(MOVDconst [m]) n)
// cond: isPPC64ValidShiftMask(m)
// result: (RLDICL [encodePPC64RotateMask(0,m,64)] n)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if x.Op != OpPPC64MOVDconst {
continue
}
m := auxIntToInt64(x.AuxInt)
n := v_1
if !(isPPC64ValidShiftMask(m)) {
continue
}
v.reset(OpPPC64RLDICL)
v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 64))
v.AddArg(n)
return true
}
break
}
// match: (AND x:(MOVDconst [m]) n)
// cond: m != 0 && isPPC64ValidShiftMask(^m)
// result: (RLDICR [encodePPC64RotateMask(0,m,64)] n)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if x.Op != OpPPC64MOVDconst {
continue
}
m := auxIntToInt64(x.AuxInt)
n := v_1
if !(m != 0 && isPPC64ValidShiftMask(^m)) {
continue
}
v.reset(OpPPC64RLDICR)
v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 64))
v.AddArg(n)
return true
}
break
}
// match: (AND <t> x:(MOVDconst [m]) n)
// cond: t.Size() == 4 && isPPC64WordRotateMask(m)
// result: (RLWINM [encodePPC64RotateMask(0,m,32)] n)
for {
t := v.Type
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if x.Op != OpPPC64MOVDconst {
continue
}
m := auxIntToInt64(x.AuxInt)
n := v_1
if !(t.Size() == 4 && isPPC64WordRotateMask(m)) {
continue
}
v.reset(OpPPC64RLWINM)
v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 32))
v.AddArg(n)
return true
}
break
}
return false
}
func rewriteValuePPC64latelower_OpPPC64ISEL(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
@ -49,6 +149,29 @@ func rewriteValuePPC64latelower_OpPPC64ISEL(v *Value) bool {
}
return false
}
func rewriteValuePPC64latelower_OpPPC64RLDICL(v *Value) bool {
v_0 := v.Args[0]
// match: (RLDICL [em] x:(SRDconst [s] a))
// cond: (em&0xFF0000)==0
// result: (RLDICL [mergePPC64RLDICLandSRDconst(em, s)] a)
for {
em := auxIntToInt64(v.AuxInt)
x := v_0
if x.Op != OpPPC64SRDconst {
break
}
s := auxIntToInt64(x.AuxInt)
a := x.Args[0]
if !((em & 0xFF0000) == 0) {
break
}
v.reset(OpPPC64RLDICL)
v.AuxInt = int64ToAuxInt(mergePPC64RLDICLandSRDconst(em, s))
v.AddArg(a)
return true
}
return false
}
func rewriteValuePPC64latelower_OpPPC64SETBC(v *Value) bool {
v_0 := v.Args[0]
b := v.Block

View file

@ -394,3 +394,29 @@ func zeroextendAndMask8to64(a int8, b int16) (x, y uint64) {
return
}
// Verify rotate and mask instructions, and further simplified instructions for small types
func bitRotateAndMask(io64 [4]uint64, io32 [4]uint32, io16 [4]uint16, io8 [4]uint8) {
// ppc64x: "RLDICR\t[$]0, R[0-9]*, [$]47, R"
io64[0] = io64[0] & 0xFFFFFFFFFFFF0000
// ppc64x: "RLDICL\t[$]0, R[0-9]*, [$]16, R"
io64[1] = io64[1] & 0x0000FFFFFFFFFFFF
// ppc64x: -"SRD", -"AND", "RLDICL\t[$]60, R[0-9]*, [$]16, R"
io64[2] = (io64[2] >> 4) & 0x0000FFFFFFFFFFFF
// ppc64x: -"SRD", -"AND", "RLDICL\t[$]36, R[0-9]*, [$]28, R"
io64[3] = (io64[3] >> 28) & 0x0000FFFFFFFFFFFF
// ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]4, [$]19, R"
io32[0] = io32[0] & 0x0FFFF000
// ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]20, [$]3, R"
io32[1] = io32[1] & 0xF0000FFF
// ppc64x: -"RLWNM", MOVD, AND
io32[2] = io32[2] & 0xFFFF0002
var bigc uint32 = 0x12345678
// ppc64x: "ANDCC\t[$]22136"
io16[0] = io16[0] & uint16(bigc)
// ppc64x: "ANDCC\t[$]120"
io8[0] = io8[0] & uint8(bigc)
}