mirror of
https://github.com/golang/go
synced 2024-11-02 13:42:29 +00:00
cmd/compile: use prove pass to detect Ctz of non-zero values
On amd64, Ctz must include special handling of zeros. But the prove pass has enough information to detect whether the input is non-zero, allowing a more efficient lowering. Introduce new CtzNonZero ops to capture and use this information. Benchmark code: func BenchmarkVisitBits(b *testing.B) { b.Run("8", func(b *testing.B) { for i := 0; i < b.N; i++ { x := uint8(0xff) for x != 0 { sink = bits.TrailingZeros8(x) x &= x - 1 } } }) // and similarly so for 16, 32, 64 } name old time/op new time/op delta VisitBits/8-8 7.27ns ± 4% 5.58ns ± 4% -23.35% (p=0.000 n=28+26) VisitBits/16-8 14.7ns ± 7% 10.5ns ± 4% -28.43% (p=0.000 n=30+28) VisitBits/32-8 27.6ns ± 8% 19.3ns ± 3% -30.14% (p=0.000 n=30+26) VisitBits/64-8 44.0ns ±11% 38.0ns ± 5% -13.48% (p=0.000 n=30+30) Fixes #25077 Change-Id: Ie6e5bd86baf39ee8a4ca7cadcf56d934e047f957 Reviewed-on: https://go-review.googlesource.com/109358 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
adbb6ec903
commit
d9a50a6531
19 changed files with 347 additions and 32 deletions
|
@ -60,6 +60,11 @@
|
|||
(Ctz16 x) -> (Select0 (BSFL (BTSLconst <typ.UInt32> [16] x)))
|
||||
(Ctz8 x) -> (Select0 (BSFL (BTSLconst <typ.UInt32> [ 8] x)))
|
||||
|
||||
(Ctz64NonZero x) -> (Select0 (BSFQ x))
|
||||
(Ctz32NonZero x) -> (Select0 (BSFL x))
|
||||
(Ctz16NonZero x) -> (Select0 (BSFL x))
|
||||
(Ctz8NonZero x) -> (Select0 (BSFL x))
|
||||
|
||||
// BitLen64 of a 64 bit value x requires checking whether x == 0, since BSRQ is undefined when x == 0.
|
||||
// However, for zero-extended values, we can cheat a bit, and calculate
|
||||
// BSR(x<<1 + 1), which is guaranteed to be non-zero, and which conveniently
|
||||
|
|
|
@ -57,6 +57,9 @@
|
|||
|
||||
(Sqrt x) -> (SQRTD x)
|
||||
|
||||
// TODO: optimize this for ARMv5 and ARMv6
|
||||
(Ctz32NonZero x) -> (Ctz32 x)
|
||||
|
||||
// count trailing zero for ARMv5 and ARMv6
|
||||
// 32 - CLZ(x&-x - 1)
|
||||
(Ctz32 <t> x) && objabi.GOARM<=6 -> (RSBconst [32] (CLZ <t> (SUBconst <t> (AND <t> x (RSBconst <t> [0] x)) [1])))
|
||||
|
|
|
@ -89,6 +89,9 @@
|
|||
(Round x) -> (FRINTAD x)
|
||||
(Trunc x) -> (FRINTZD x)
|
||||
|
||||
(Ctz64NonZero x) -> (Ctz64 x)
|
||||
(Ctz32NonZero x) -> (Ctz32 x)
|
||||
|
||||
(Ctz64 <t> x) -> (CLZ (RBIT <t> x))
|
||||
(Ctz32 <t> x) -> (CLZW (RBITW <t> x))
|
||||
|
||||
|
|
|
@ -116,6 +116,9 @@
|
|||
|
||||
(Sqrt x) -> (SQRTD x)
|
||||
|
||||
// TODO: optimize this case?
|
||||
(Ctz32NonZero x) -> (Ctz32 x)
|
||||
|
||||
// count trailing zero
|
||||
// 32 - CLZ(x&-x - 1)
|
||||
(Ctz32 <t> x) -> (SUB (MOVWconst [32]) (CLZ <t> (SUBconst <t> [1] (AND <t> x (NEG <t> x)))))
|
||||
|
|
|
@ -275,6 +275,10 @@
|
|||
(Addr {sym} base) -> (MOVDaddr {sym} base)
|
||||
(OffPtr [off] ptr) -> (ADD (MOVDconst <typ.Int64> [off]) ptr)
|
||||
|
||||
// TODO: optimize these cases?
|
||||
(Ctz32NonZero x) -> (Ctz32 x)
|
||||
(Ctz64NonZero x) -> (Ctz64 x)
|
||||
|
||||
(Ctz64 x) -> (POPCNTD (ANDN <typ.Int64> (ADDconst <typ.Int64> [-1] x) x))
|
||||
(Ctz32 x) -> (POPCNTW (MOVWZreg (ANDN <typ.Int> (ADDconst <typ.Int> [-1] x) x)))
|
||||
|
||||
|
|
|
@ -78,6 +78,10 @@
|
|||
(OffPtr [off] ptr) && is32Bit(off) -> (ADDconst [off] ptr)
|
||||
(OffPtr [off] ptr) -> (ADD (MOVDconst [off]) ptr)
|
||||
|
||||
// TODO: optimize these cases?
|
||||
(Ctz64NonZero x) -> (Ctz64 x)
|
||||
(Ctz32NonZero x) -> (Ctz32 x)
|
||||
|
||||
// Ctz(x) = 64 - findLeftmostOne((x-1)&^x)
|
||||
(Ctz64 <t> x) -> (SUB (MOVDconst [64]) (FLOGR (AND <t> (SUBconst <t> [1] x) (NOT <t> x))))
|
||||
(Ctz32 <t> x) -> (SUB (MOVDconst [64]) (FLOGR (MOVWZreg (ANDW <t> (SUBWconst <t> [1] x) (NOTW <t> x)))))
|
||||
|
|
|
@ -107,6 +107,11 @@
|
|||
(Com32 <typ.UInt32> (Int64Hi x))
|
||||
(Com32 <typ.UInt32> (Int64Lo x)))
|
||||
|
||||
// Sadly, just because we know that x is non-zero,
|
||||
// we don't know whether either component is,
|
||||
// so just treat Ctz64NonZero the same as Ctz64.
|
||||
(Ctz64NonZero x) -> (Ctz64 x)
|
||||
|
||||
(Ctz64 x) ->
|
||||
(Add32 <typ.UInt32>
|
||||
(Ctz32 <typ.UInt32> (Int64Lo x))
|
||||
|
|
|
@ -244,6 +244,10 @@ var genericOps = []opData{
|
|||
{name: "Ctz16", argLength: 1}, // Count trailing (low order) zeroes (returns 0-16)
|
||||
{name: "Ctz32", argLength: 1}, // Count trailing (low order) zeroes (returns 0-32)
|
||||
{name: "Ctz64", argLength: 1}, // Count trailing (low order) zeroes (returns 0-64)
|
||||
{name: "Ctz8NonZero", argLength: 1}, // same as above, but arg[0] known to be non-zero, returns 0-7
|
||||
{name: "Ctz16NonZero", argLength: 1}, // same as above, but arg[0] known to be non-zero, returns 0-15
|
||||
{name: "Ctz32NonZero", argLength: 1}, // same as above, but arg[0] known to be non-zero, returns 0-31
|
||||
{name: "Ctz64NonZero", argLength: 1}, // same as above, but arg[0] known to be non-zero, returns 0-63
|
||||
{name: "BitLen8", argLength: 1}, // Number of bits in arg[0] (returns 0-8)
|
||||
{name: "BitLen16", argLength: 1}, // Number of bits in arg[0] (returns 0-16)
|
||||
{name: "BitLen32", argLength: 1}, // Number of bits in arg[0] (returns 0-32)
|
||||
|
|
|
@ -2028,6 +2028,10 @@ const (
|
|||
OpCtz16
|
||||
OpCtz32
|
||||
OpCtz64
|
||||
OpCtz8NonZero
|
||||
OpCtz16NonZero
|
||||
OpCtz32NonZero
|
||||
OpCtz64NonZero
|
||||
OpBitLen8
|
||||
OpBitLen16
|
||||
OpBitLen32
|
||||
|
@ -25531,6 +25535,26 @@ var opcodeTable = [...]opInfo{
|
|||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Ctz8NonZero",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Ctz16NonZero",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Ctz32NonZero",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Ctz64NonZero",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "BitLen8",
|
||||
argLen: 1,
|
||||
|
|
|
@ -365,7 +365,7 @@ var opMax = map[Op]int64{
|
|||
OpAdd32: math.MaxInt32, OpSub32: math.MaxInt32,
|
||||
}
|
||||
|
||||
// isNonNegative returns true if v is known to be non-negative.
|
||||
// isNonNegative reports whether v is known to be non-negative.
|
||||
func (ft *factsTable) isNonNegative(v *Value) bool {
|
||||
if isNonNegative(v) {
|
||||
return true
|
||||
|
@ -734,14 +734,15 @@ func addRestrictions(parent *Block, ft *factsTable, t domain, v, w *Value, r rel
|
|||
}
|
||||
}
|
||||
|
||||
var ctzNonZeroOp = map[Op]Op{OpCtz8: OpCtz8NonZero, OpCtz16: OpCtz16NonZero, OpCtz32: OpCtz32NonZero, OpCtz64: OpCtz64NonZero}
|
||||
|
||||
// simplifyBlock simplifies some constant values in b and evaluates
|
||||
// branches to non-uniquely dominated successors of b.
|
||||
func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) {
|
||||
// Replace OpSlicemask operations in b with constants where possible.
|
||||
for _, v := range b.Values {
|
||||
if v.Op != OpSlicemask {
|
||||
continue
|
||||
}
|
||||
switch v.Op {
|
||||
case OpSlicemask:
|
||||
// Replace OpSlicemask operations in b with constants where possible.
|
||||
x, delta := isConstDelta(v.Args[0])
|
||||
if x == nil {
|
||||
continue
|
||||
|
@ -763,6 +764,19 @@ func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) {
|
|||
}
|
||||
v.AuxInt = -1
|
||||
}
|
||||
case OpCtz8, OpCtz16, OpCtz32, OpCtz64:
|
||||
// On some architectures, notably amd64, we can generate much better
|
||||
// code for CtzNN if we know that the argument is non-zero.
|
||||
// Capture that information here for use in arch-specific optimizations.
|
||||
x := v.Args[0]
|
||||
lim, ok := ft.limits[x.ID]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if lim.umin > 0 || lim.min > 0 || lim.max < 0 {
|
||||
v.Op = ctzNonZeroOp[v.Op]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if b.Kind != BlockIf {
|
||||
|
@ -818,7 +832,7 @@ func removeBranch(b *Block, branch branch) {
|
|||
}
|
||||
}
|
||||
|
||||
// isNonNegative returns true is v is known to be greater or equal to zero.
|
||||
// isNonNegative reports whether v is known to be greater or equal to zero.
|
||||
func isNonNegative(v *Value) bool {
|
||||
switch v.Op {
|
||||
case OpConst64:
|
||||
|
|
|
@ -593,12 +593,20 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return rewriteValueAMD64_OpConstNil_0(v)
|
||||
case OpCtz16:
|
||||
return rewriteValueAMD64_OpCtz16_0(v)
|
||||
case OpCtz16NonZero:
|
||||
return rewriteValueAMD64_OpCtz16NonZero_0(v)
|
||||
case OpCtz32:
|
||||
return rewriteValueAMD64_OpCtz32_0(v)
|
||||
case OpCtz32NonZero:
|
||||
return rewriteValueAMD64_OpCtz32NonZero_0(v)
|
||||
case OpCtz64:
|
||||
return rewriteValueAMD64_OpCtz64_0(v)
|
||||
case OpCtz64NonZero:
|
||||
return rewriteValueAMD64_OpCtz64NonZero_0(v)
|
||||
case OpCtz8:
|
||||
return rewriteValueAMD64_OpCtz8_0(v)
|
||||
case OpCtz8NonZero:
|
||||
return rewriteValueAMD64_OpCtz8NonZero_0(v)
|
||||
case OpCvt32Fto32:
|
||||
return rewriteValueAMD64_OpCvt32Fto32_0(v)
|
||||
case OpCvt32Fto64:
|
||||
|
@ -53306,6 +53314,23 @@ func rewriteValueAMD64_OpCtz16_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpCtz16NonZero_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
typ := &b.Func.Config.Types
|
||||
_ = typ
|
||||
// match: (Ctz16NonZero x)
|
||||
// cond:
|
||||
// result: (Select0 (BSFL x))
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpSelect0)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64BSFL, types.NewTuple(typ.UInt32, types.TypeFlags))
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpCtz32_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
|
@ -53326,6 +53351,23 @@ func rewriteValueAMD64_OpCtz32_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpCtz32NonZero_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
typ := &b.Func.Config.Types
|
||||
_ = typ
|
||||
// match: (Ctz32NonZero x)
|
||||
// cond:
|
||||
// result: (Select0 (BSFL x))
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpSelect0)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64BSFL, types.NewTuple(typ.UInt32, types.TypeFlags))
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpCtz64_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
|
@ -53354,6 +53396,23 @@ func rewriteValueAMD64_OpCtz64_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpCtz64NonZero_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
typ := &b.Func.Config.Types
|
||||
_ = typ
|
||||
// match: (Ctz64NonZero x)
|
||||
// cond:
|
||||
// result: (Select0 (BSFQ x))
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpSelect0)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags))
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpCtz8_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
|
@ -53374,6 +53433,23 @@ func rewriteValueAMD64_OpCtz8_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpCtz8NonZero_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
typ := &b.Func.Config.Types
|
||||
_ = typ
|
||||
// match: (Ctz8NonZero x)
|
||||
// cond:
|
||||
// result: (Select0 (BSFL x))
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpSelect0)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64BSFL, types.NewTuple(typ.UInt32, types.TypeFlags))
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpCvt32Fto32_0(v *Value) bool {
|
||||
// match: (Cvt32Fto32 x)
|
||||
// cond:
|
||||
|
|
|
@ -483,6 +483,8 @@ func rewriteValueARM(v *Value) bool {
|
|||
return rewriteValueARM_OpConstNil_0(v)
|
||||
case OpCtz32:
|
||||
return rewriteValueARM_OpCtz32_0(v)
|
||||
case OpCtz32NonZero:
|
||||
return rewriteValueARM_OpCtz32NonZero_0(v)
|
||||
case OpCvt32Fto32:
|
||||
return rewriteValueARM_OpCvt32Fto32_0(v)
|
||||
case OpCvt32Fto32U:
|
||||
|
@ -17959,6 +17961,17 @@ func rewriteValueARM_OpCtz32_0(v *Value) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM_OpCtz32NonZero_0(v *Value) bool {
|
||||
// match: (Ctz32NonZero x)
|
||||
// cond:
|
||||
// result: (Ctz32 x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpCtz32)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM_OpCvt32Fto32_0(v *Value) bool {
|
||||
// match: (Cvt32Fto32 x)
|
||||
// cond:
|
||||
|
|
|
@ -393,8 +393,12 @@ func rewriteValueARM64(v *Value) bool {
|
|||
return rewriteValueARM64_OpConstNil_0(v)
|
||||
case OpCtz32:
|
||||
return rewriteValueARM64_OpCtz32_0(v)
|
||||
case OpCtz32NonZero:
|
||||
return rewriteValueARM64_OpCtz32NonZero_0(v)
|
||||
case OpCtz64:
|
||||
return rewriteValueARM64_OpCtz64_0(v)
|
||||
case OpCtz64NonZero:
|
||||
return rewriteValueARM64_OpCtz64NonZero_0(v)
|
||||
case OpCvt32Fto32:
|
||||
return rewriteValueARM64_OpCvt32Fto32_0(v)
|
||||
case OpCvt32Fto32U:
|
||||
|
@ -21487,6 +21491,17 @@ func rewriteValueARM64_OpCtz32_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpCtz32NonZero_0(v *Value) bool {
|
||||
// match: (Ctz32NonZero x)
|
||||
// cond:
|
||||
// result: (Ctz32 x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpCtz32)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpCtz64_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
|
@ -21503,6 +21518,17 @@ func rewriteValueARM64_OpCtz64_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpCtz64NonZero_0(v *Value) bool {
|
||||
// match: (Ctz64NonZero x)
|
||||
// cond:
|
||||
// result: (Ctz64 x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpCtz64)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpCvt32Fto32_0(v *Value) bool {
|
||||
// match: (Cvt32Fto32 x)
|
||||
// cond:
|
||||
|
|
|
@ -85,6 +85,8 @@ func rewriteValueMIPS(v *Value) bool {
|
|||
return rewriteValueMIPS_OpConstNil_0(v)
|
||||
case OpCtz32:
|
||||
return rewriteValueMIPS_OpCtz32_0(v)
|
||||
case OpCtz32NonZero:
|
||||
return rewriteValueMIPS_OpCtz32NonZero_0(v)
|
||||
case OpCvt32Fto32:
|
||||
return rewriteValueMIPS_OpCvt32Fto32_0(v)
|
||||
case OpCvt32Fto64F:
|
||||
|
@ -1190,6 +1192,17 @@ func rewriteValueMIPS_OpCtz32_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueMIPS_OpCtz32NonZero_0(v *Value) bool {
|
||||
// match: (Ctz32NonZero x)
|
||||
// cond:
|
||||
// result: (Ctz32 x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpCtz32)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueMIPS_OpCvt32Fto32_0(v *Value) bool {
|
||||
// match: (Cvt32Fto32 x)
|
||||
// cond:
|
||||
|
|
|
@ -107,8 +107,12 @@ func rewriteValuePPC64(v *Value) bool {
|
|||
return rewriteValuePPC64_OpCopysign_0(v)
|
||||
case OpCtz32:
|
||||
return rewriteValuePPC64_OpCtz32_0(v)
|
||||
case OpCtz32NonZero:
|
||||
return rewriteValuePPC64_OpCtz32NonZero_0(v)
|
||||
case OpCtz64:
|
||||
return rewriteValuePPC64_OpCtz64_0(v)
|
||||
case OpCtz64NonZero:
|
||||
return rewriteValuePPC64_OpCtz64NonZero_0(v)
|
||||
case OpCvt32Fto32:
|
||||
return rewriteValuePPC64_OpCvt32Fto32_0(v)
|
||||
case OpCvt32Fto64:
|
||||
|
@ -1312,6 +1316,17 @@ func rewriteValuePPC64_OpCtz32_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValuePPC64_OpCtz32NonZero_0(v *Value) bool {
|
||||
// match: (Ctz32NonZero x)
|
||||
// cond:
|
||||
// result: (Ctz32 x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpCtz32)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValuePPC64_OpCtz64_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
|
@ -1333,6 +1348,17 @@ func rewriteValuePPC64_OpCtz64_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValuePPC64_OpCtz64NonZero_0(v *Value) bool {
|
||||
// match: (Ctz64NonZero x)
|
||||
// cond:
|
||||
// result: (Ctz64 x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpCtz64)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValuePPC64_OpCvt32Fto32_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
|
|
|
@ -103,8 +103,12 @@ func rewriteValueS390X(v *Value) bool {
|
|||
return rewriteValueS390X_OpConstNil_0(v)
|
||||
case OpCtz32:
|
||||
return rewriteValueS390X_OpCtz32_0(v)
|
||||
case OpCtz32NonZero:
|
||||
return rewriteValueS390X_OpCtz32NonZero_0(v)
|
||||
case OpCtz64:
|
||||
return rewriteValueS390X_OpCtz64_0(v)
|
||||
case OpCtz64NonZero:
|
||||
return rewriteValueS390X_OpCtz64NonZero_0(v)
|
||||
case OpCvt32Fto32:
|
||||
return rewriteValueS390X_OpCvt32Fto32_0(v)
|
||||
case OpCvt32Fto64:
|
||||
|
@ -1420,6 +1424,17 @@ func rewriteValueS390X_OpCtz32_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueS390X_OpCtz32NonZero_0(v *Value) bool {
|
||||
// match: (Ctz32NonZero x)
|
||||
// cond:
|
||||
// result: (Ctz32 x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpCtz32)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueS390X_OpCtz64_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
|
@ -1449,6 +1464,17 @@ func rewriteValueS390X_OpCtz64_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueS390X_OpCtz64NonZero_0(v *Value) bool {
|
||||
// match: (Ctz64NonZero x)
|
||||
// cond:
|
||||
// result: (Ctz64 x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpCtz64)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueS390X_OpCvt32Fto32_0(v *Value) bool {
|
||||
// match: (Cvt32Fto32 x)
|
||||
// cond:
|
||||
|
|
|
@ -31,6 +31,8 @@ func rewriteValuedec64(v *Value) bool {
|
|||
return rewriteValuedec64_OpConst64_0(v)
|
||||
case OpCtz64:
|
||||
return rewriteValuedec64_OpCtz64_0(v)
|
||||
case OpCtz64NonZero:
|
||||
return rewriteValuedec64_OpCtz64NonZero_0(v)
|
||||
case OpEq64:
|
||||
return rewriteValuedec64_OpEq64_0(v)
|
||||
case OpGeq64:
|
||||
|
@ -454,6 +456,17 @@ func rewriteValuedec64_OpCtz64_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValuedec64_OpCtz64NonZero_0(v *Value) bool {
|
||||
// match: (Ctz64NonZero x)
|
||||
// cond:
|
||||
// result: (Ctz64 x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpCtz64)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValuedec64_OpEq64_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
|
|
|
@ -215,3 +215,55 @@ func TrailingZeros8(n uint8) int {
|
|||
// s390x:"FLOGR","OR\t\\$256"
|
||||
return bits.TrailingZeros8(n)
|
||||
}
|
||||
|
||||
// IterateBitsNN checks special handling of TrailingZerosNN when the input is known to be non-zero.
|
||||
|
||||
func IterateBits(n uint) int {
|
||||
i := 0
|
||||
for n != 0 {
|
||||
// amd64:"BSFQ",-"CMOVEQ"
|
||||
i += bits.TrailingZeros(n)
|
||||
n &= n - 1
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func IterateBits64(n uint64) int {
|
||||
i := 0
|
||||
for n != 0 {
|
||||
// amd64:"BSFQ",-"CMOVEQ"
|
||||
i += bits.TrailingZeros64(n)
|
||||
n &= n - 1
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func IterateBits32(n uint32) int {
|
||||
i := 0
|
||||
for n != 0 {
|
||||
// amd64:"BSFL",-"BTSQ"
|
||||
i += bits.TrailingZeros32(n)
|
||||
n &= n - 1
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func IterateBits16(n uint16) int {
|
||||
i := 0
|
||||
for n != 0 {
|
||||
// amd64:"BSFL",-"BTSL"
|
||||
i += bits.TrailingZeros16(n)
|
||||
n &= n - 1
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func IterateBits8(n uint8) int {
|
||||
i := 0
|
||||
for n != 0 {
|
||||
// amd64:"BSFL",-"BTSL"
|
||||
i += bits.TrailingZeros8(n)
|
||||
n &= n - 1
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
|
|
@ -618,6 +618,7 @@ func (t *test) run() {
|
|||
var buf bytes.Buffer
|
||||
cmd.Stdout, cmd.Stderr = &buf, &buf
|
||||
if err := cmd.Run(); err != nil {
|
||||
fmt.Println(env, "\n", cmd.Stderr)
|
||||
t.err = err
|
||||
return
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue