mirror of
https://github.com/golang/go
synced 2024-10-02 22:25:08 +00:00
runtime: replace all uses of CtzXX with TrailingZerosXX
Replace all uses of Ctz64/32/8 with TrailingZeros64/32/8, because they are the same and maybe duplicated. Also renamed CtzXX functions in 386 assembly code. Change-Id: I19290204858083750f4be589bb0923393950ae6d Reviewed-on: https://go-review.googlesource.com/c/go/+/438935 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Bryan Mills <bcmills@google.com> Auto-Submit: Keith Randall <khr@golang.org> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Keith Randall <khr@google.com> Run-TryBot: Keith Randall <khr@golang.org>
This commit is contained in:
parent
c45ebef05e
commit
7ae652b7c0
|
@ -4003,16 +4003,6 @@ func InitTables() {
|
||||||
sys.ARM64, sys.PPC64)
|
sys.ARM64, sys.PPC64)
|
||||||
|
|
||||||
/******** runtime/internal/sys ********/
|
/******** runtime/internal/sys ********/
|
||||||
addF("runtime/internal/sys", "Ctz32",
|
|
||||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
|
||||||
return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0])
|
|
||||||
},
|
|
||||||
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
|
|
||||||
addF("runtime/internal/sys", "Ctz64",
|
|
||||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
|
||||||
return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], args[0])
|
|
||||||
},
|
|
||||||
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
|
|
||||||
addF("runtime/internal/sys", "Bswap32",
|
addF("runtime/internal/sys", "Bswap32",
|
||||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0])
|
return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0])
|
||||||
|
@ -4750,8 +4740,8 @@ func InitTables() {
|
||||||
sys.AMD64)
|
sys.AMD64)
|
||||||
alias("math/bits", "Div", "math/bits", "Div64", sys.ArchAMD64)
|
alias("math/bits", "Div", "math/bits", "Div64", sys.ArchAMD64)
|
||||||
|
|
||||||
alias("runtime/internal/sys", "Ctz8", "math/bits", "TrailingZeros8", all...)
|
|
||||||
alias("runtime/internal/sys", "TrailingZeros8", "math/bits", "TrailingZeros8", all...)
|
alias("runtime/internal/sys", "TrailingZeros8", "math/bits", "TrailingZeros8", all...)
|
||||||
|
alias("runtime/internal/sys", "TrailingZeros32", "math/bits", "TrailingZeros32", all...)
|
||||||
alias("runtime/internal/sys", "TrailingZeros64", "math/bits", "TrailingZeros64", all...)
|
alias("runtime/internal/sys", "TrailingZeros64", "math/bits", "TrailingZeros64", all...)
|
||||||
alias("runtime/internal/sys", "Len8", "math/bits", "Len8", all...)
|
alias("runtime/internal/sys", "Len8", "math/bits", "Len8", all...)
|
||||||
alias("runtime/internal/sys", "Len64", "math/bits", "Len64", all...)
|
alias("runtime/internal/sys", "Len64", "math/bits", "Len64", all...)
|
||||||
|
|
|
@ -212,19 +212,19 @@ func TestIntendedInlining(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if runtime.GOARCH != "386" && runtime.GOARCH != "loong64" && runtime.GOARCH != "mips64" && runtime.GOARCH != "mips64le" && runtime.GOARCH != "riscv64" {
|
if runtime.GOARCH != "386" && runtime.GOARCH != "loong64" && runtime.GOARCH != "mips64" && runtime.GOARCH != "mips64le" && runtime.GOARCH != "riscv64" {
|
||||||
// nextFreeFast calls sys.Ctz64, which on 386 is implemented in asm and is not inlinable.
|
// nextFreeFast calls sys.TrailingZeros64, which on 386 is implemented in asm and is not inlinable.
|
||||||
// We currently don't have midstack inlining so nextFreeFast is also not inlinable on 386.
|
// We currently don't have midstack inlining so nextFreeFast is also not inlinable on 386.
|
||||||
// On loong64, mips64x and riscv64, Ctz64 is not intrinsified and causes nextFreeFast too expensive
|
// On loong64, mips64x and riscv64, TrailingZeros64 is not intrinsified and causes nextFreeFast
|
||||||
// to inline (Issue 22239).
|
// too expensive to inline (Issue 22239).
|
||||||
want["runtime"] = append(want["runtime"], "nextFreeFast")
|
want["runtime"] = append(want["runtime"], "nextFreeFast")
|
||||||
// Same behavior for heapBits.nextFast.
|
// Same behavior for heapBits.nextFast.
|
||||||
want["runtime"] = append(want["runtime"], "heapBits.nextFast")
|
want["runtime"] = append(want["runtime"], "heapBits.nextFast")
|
||||||
}
|
}
|
||||||
if runtime.GOARCH != "386" {
|
if runtime.GOARCH != "386" {
|
||||||
// As explained above, Ctz64 and Ctz32 are not Go code on 386.
|
// As explained above, TrailingZeros64 and TrailingZeros32 are not Go code on 386.
|
||||||
// The same applies to Bswap32.
|
// The same applies to Bswap32.
|
||||||
want["runtime/internal/sys"] = append(want["runtime/internal/sys"], "Ctz64")
|
want["runtime/internal/sys"] = append(want["runtime/internal/sys"], "TrailingZeros64")
|
||||||
want["runtime/internal/sys"] = append(want["runtime/internal/sys"], "Ctz32")
|
want["runtime/internal/sys"] = append(want["runtime/internal/sys"], "TrailingZeros32")
|
||||||
want["runtime/internal/sys"] = append(want["runtime/internal/sys"], "Bswap32")
|
want["runtime/internal/sys"] = append(want["runtime/internal/sys"], "Bswap32")
|
||||||
}
|
}
|
||||||
if bits.UintSize == 64 {
|
if bits.UintSize == 64 {
|
||||||
|
|
|
@ -5,56 +5,75 @@
|
||||||
//go:build !386
|
//go:build !386
|
||||||
|
|
||||||
// TODO finish intrinsifying 386, deadcode the assembly, remove build tags, merge w/ intrinsics_common
|
// TODO finish intrinsifying 386, deadcode the assembly, remove build tags, merge w/ intrinsics_common
|
||||||
// TODO replace all uses of CtzXX with TrailingZerosXX; they are the same.
|
|
||||||
|
|
||||||
package sys
|
package sys
|
||||||
|
|
||||||
// Using techniques from http://supertech.csail.mit.edu/papers/debruijn.pdf
|
// Copied from math/bits to avoid dependence.
|
||||||
|
|
||||||
const deBruijn64ctz = 0x0218a392cd3d5dbf
|
var deBruijn32tab = [32]byte{
|
||||||
|
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||||
var deBruijnIdx64ctz = [64]byte{
|
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
|
||||||
0, 1, 2, 7, 3, 13, 8, 19,
|
|
||||||
4, 25, 14, 28, 9, 34, 20, 40,
|
|
||||||
5, 17, 26, 38, 15, 46, 29, 48,
|
|
||||||
10, 31, 35, 54, 21, 50, 41, 57,
|
|
||||||
63, 6, 12, 18, 24, 27, 33, 39,
|
|
||||||
16, 37, 45, 47, 30, 53, 49, 56,
|
|
||||||
62, 11, 23, 32, 36, 44, 52, 55,
|
|
||||||
61, 22, 43, 51, 60, 42, 59, 58,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const deBruijn32ctz = 0x04653adf
|
const deBruijn32 = 0x077CB531
|
||||||
|
|
||||||
var deBruijnIdx32ctz = [32]byte{
|
var deBruijn64tab = [64]byte{
|
||||||
0, 1, 2, 6, 3, 11, 7, 16,
|
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
|
||||||
4, 14, 12, 21, 8, 23, 17, 26,
|
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
|
||||||
31, 5, 10, 15, 13, 20, 22, 25,
|
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
|
||||||
30, 9, 19, 24, 29, 18, 28, 27,
|
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ctz64 counts trailing (low-order) zeroes,
|
const deBruijn64 = 0x03f79d71b4ca8b09
|
||||||
// and if all are zero, then 64.
|
|
||||||
func Ctz64(x uint64) int {
|
const ntz8tab = "" +
|
||||||
x &= -x // isolate low-order bit
|
"\x08\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
y := x * deBruijn64ctz >> 58 // extract part of deBruijn sequence
|
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
i := int(deBruijnIdx64ctz[y]) // convert to bit index
|
"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
z := int((x - 1) >> 57 & 64) // adjustment if zero
|
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
return i + z
|
"\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
|
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
|
"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
|
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
|
"\x07\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
|
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
|
"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
|
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
|
"\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
|
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
|
"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
|
||||||
|
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00"
|
||||||
|
|
||||||
|
// TrailingZeros32 returns the number of trailing zero bits in x; the result is 32 for x == 0.
|
||||||
|
func TrailingZeros32(x uint32) int {
|
||||||
|
if x == 0 {
|
||||||
|
return 32
|
||||||
|
}
|
||||||
|
// see comment in TrailingZeros64
|
||||||
|
return int(deBruijn32tab[(x&-x)*deBruijn32>>(32-5)])
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ctz32 counts trailing (low-order) zeroes,
|
// TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
|
||||||
// and if all are zero, then 32.
|
func TrailingZeros64(x uint64) int {
|
||||||
func Ctz32(x uint32) int {
|
if x == 0 {
|
||||||
x &= -x // isolate low-order bit
|
return 64
|
||||||
y := x * deBruijn32ctz >> 27 // extract part of deBruijn sequence
|
}
|
||||||
i := int(deBruijnIdx32ctz[y]) // convert to bit index
|
// If popcount is fast, replace code below with return popcount(^x & (x - 1)).
|
||||||
z := int((x - 1) >> 26 & 32) // adjustment if zero
|
//
|
||||||
return i + z
|
// x & -x leaves only the right-most bit set in the word. Let k be the
|
||||||
|
// index of that bit. Since only a single bit is set, the value is two
|
||||||
|
// to the power of k. Multiplying by a power of two is equivalent to
|
||||||
|
// left shifting, in this case by k bits. The de Bruijn (64 bit) constant
|
||||||
|
// is such that all six bit, consecutive substrings are distinct.
|
||||||
|
// Therefore, if we have a left shifted version of this constant we can
|
||||||
|
// find by how many bits it was shifted by looking at which six bit
|
||||||
|
// substring ended up at the top of the word.
|
||||||
|
// (Knuth, volume 4, section 7.3.1)
|
||||||
|
return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ctz8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
|
// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
|
||||||
func Ctz8(x uint8) int {
|
func TrailingZeros8(x uint8) int {
|
||||||
return int(ntz8tab[x])
|
return int(ntz8tab[x])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
TEXT runtime∕internal∕sys·Ctz64(SB), NOSPLIT, $0-12
|
TEXT runtime∕internal∕sys·TrailingZeros64(SB), NOSPLIT, $0-12
|
||||||
// Try low 32 bits.
|
// Try low 32 bits.
|
||||||
MOVL x_lo+0(FP), AX
|
MOVL x_lo+0(FP), AX
|
||||||
BSFL AX, AX
|
BSFL AX, AX
|
||||||
|
@ -26,7 +26,7 @@ none:
|
||||||
MOVL $64, ret+8(FP)
|
MOVL $64, ret+8(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
TEXT runtime∕internal∕sys·Ctz32(SB), NOSPLIT, $0-8
|
TEXT runtime∕internal∕sys·TrailingZeros32(SB), NOSPLIT, $0-8
|
||||||
MOVL x+0(FP), AX
|
MOVL x+0(FP), AX
|
||||||
BSFL AX, AX
|
BSFL AX, AX
|
||||||
JNZ 2(PC)
|
JNZ 2(PC)
|
||||||
|
@ -34,7 +34,7 @@ TEXT runtime∕internal∕sys·Ctz32(SB), NOSPLIT, $0-8
|
||||||
MOVL AX, ret+4(FP)
|
MOVL AX, ret+4(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
TEXT runtime∕internal∕sys·Ctz8(SB), NOSPLIT, $0-8
|
TEXT runtime∕internal∕sys·TrailingZeros8(SB), NOSPLIT, $0-8
|
||||||
MOVBLZX x+0(FP), AX
|
MOVBLZX x+0(FP), AX
|
||||||
BSFL AX, AX
|
BSFL AX, AX
|
||||||
JNZ 2(PC)
|
JNZ 2(PC)
|
||||||
|
|
|
@ -6,43 +6,23 @@ package sys
|
||||||
|
|
||||||
// Copied from math/bits to avoid dependence.
|
// Copied from math/bits to avoid dependence.
|
||||||
|
|
||||||
var len8tab = [256]uint8{
|
const len8tab = "" +
|
||||||
0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
|
"\x00\x01\x02\x02\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04" +
|
||||||
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
|
"\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05" +
|
||||||
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
|
"\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
|
||||||
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
|
"\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
|
||||||
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
|
"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
|
||||||
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
|
"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
|
||||||
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
|
"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
|
||||||
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
|
"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
|
||||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
|
||||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
|
||||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
|
||||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
|
||||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
|
||||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
|
||||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
|
||||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08"
|
||||||
}
|
|
||||||
|
|
||||||
var ntz8tab = [256]uint8{
|
|
||||||
0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
|
||||||
}
|
|
||||||
|
|
||||||
// len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
|
// len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
|
||||||
//
|
//
|
||||||
|
@ -102,45 +82,12 @@ func OnesCount64(x uint64) int {
|
||||||
return int(x) & (1<<7 - 1)
|
return int(x) & (1<<7 - 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
var deBruijn64tab = [64]byte{
|
|
||||||
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
|
|
||||||
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
|
|
||||||
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
|
|
||||||
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
|
|
||||||
}
|
|
||||||
|
|
||||||
const deBruijn64 = 0x03f79d71b4ca8b09
|
|
||||||
|
|
||||||
// TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
|
|
||||||
func TrailingZeros64(x uint64) int {
|
|
||||||
if x == 0 {
|
|
||||||
return 64
|
|
||||||
}
|
|
||||||
// If popcount is fast, replace code below with return popcount(^x & (x - 1)).
|
|
||||||
//
|
|
||||||
// x & -x leaves only the right-most bit set in the word. Let k be the
|
|
||||||
// index of that bit. Since only a single bit is set, the value is two
|
|
||||||
// to the power of k. Multiplying by a power of two is equivalent to
|
|
||||||
// left shifting, in this case by k bits. The de Bruijn (64 bit) constant
|
|
||||||
// is such that all six bit, consecutive substrings are distinct.
|
|
||||||
// Therefore, if we have a left shifted version of this constant we can
|
|
||||||
// find by how many bits it was shifted by looking at which six bit
|
|
||||||
// substring ended up at the top of the word.
|
|
||||||
// (Knuth, volume 4, section 7.3.1)
|
|
||||||
return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
|
|
||||||
}
|
|
||||||
|
|
||||||
// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
|
// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
|
||||||
func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
|
func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
|
||||||
|
|
||||||
// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
|
// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
|
||||||
func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
|
func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
|
||||||
|
|
||||||
// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
|
|
||||||
func TrailingZeros8(x uint8) int {
|
|
||||||
return int(ntz8tab[x])
|
|
||||||
}
|
|
||||||
|
|
||||||
// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
|
// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
|
||||||
func Len8(x uint8) int {
|
func Len8(x uint8) int {
|
||||||
return int(len8tab[x])
|
return int(len8tab[x])
|
||||||
|
|
|
@ -6,8 +6,8 @@
|
||||||
|
|
||||||
package sys
|
package sys
|
||||||
|
|
||||||
func Ctz64(x uint64) int
|
func TrailingZeros64(x uint64) int
|
||||||
func Ctz32(x uint32) int
|
func TrailingZeros32(x uint32) int
|
||||||
func Ctz8(x uint8) int
|
func TrailingZeros8(x uint8) int
|
||||||
func Bswap64(x uint64) uint64
|
func Bswap64(x uint64) uint64
|
||||||
func Bswap32(x uint32) uint32
|
func Bswap32(x uint32) uint32
|
||||||
|
|
|
@ -5,19 +5,19 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestCtz64(t *testing.T) {
|
func TestTrailingZeros64(t *testing.T) {
|
||||||
for i := 0; i <= 64; i++ {
|
for i := 0; i <= 64; i++ {
|
||||||
x := uint64(5) << uint(i)
|
x := uint64(5) << uint(i)
|
||||||
if got := sys.Ctz64(x); got != i {
|
if got := sys.TrailingZeros64(x); got != i {
|
||||||
t.Errorf("Ctz64(%d)=%d, want %d", x, got, i)
|
t.Errorf("TrailingZeros64(%d)=%d, want %d", x, got, i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func TestCtz32(t *testing.T) {
|
func TestTrailingZeros32(t *testing.T) {
|
||||||
for i := 0; i <= 32; i++ {
|
for i := 0; i <= 32; i++ {
|
||||||
x := uint32(5) << uint(i)
|
x := uint32(5) << uint(i)
|
||||||
if got := sys.Ctz32(x); got != i {
|
if got := sys.TrailingZeros32(x); got != i {
|
||||||
t.Errorf("Ctz32(%d)=%d, want %d", x, got, i)
|
t.Errorf("TrailingZeros32(%d)=%d, want %d", x, got, i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -816,7 +816,7 @@ var zerobase uintptr
|
||||||
// nextFreeFast returns the next free object if one is quickly available.
|
// nextFreeFast returns the next free object if one is quickly available.
|
||||||
// Otherwise it returns 0.
|
// Otherwise it returns 0.
|
||||||
func nextFreeFast(s *mspan) gclinkptr {
|
func nextFreeFast(s *mspan) gclinkptr {
|
||||||
theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache?
|
theBit := sys.TrailingZeros64(s.allocCache) // Is there a free object in the allocCache?
|
||||||
if theBit < 64 {
|
if theBit < 64 {
|
||||||
result := s.freeindex + uintptr(theBit)
|
result := s.freeindex + uintptr(theBit)
|
||||||
if result < s.nelems {
|
if result < s.nelems {
|
||||||
|
|
|
@ -147,7 +147,7 @@ func (s *mspan) nextFreeIndex() uintptr {
|
||||||
|
|
||||||
aCache := s.allocCache
|
aCache := s.allocCache
|
||||||
|
|
||||||
bitIndex := sys.Ctz64(aCache)
|
bitIndex := sys.TrailingZeros64(aCache)
|
||||||
for bitIndex == 64 {
|
for bitIndex == 64 {
|
||||||
// Move index to start of next cached bits.
|
// Move index to start of next cached bits.
|
||||||
sfreeindex = (sfreeindex + 64) &^ (64 - 1)
|
sfreeindex = (sfreeindex + 64) &^ (64 - 1)
|
||||||
|
@ -159,7 +159,7 @@ func (s *mspan) nextFreeIndex() uintptr {
|
||||||
// Refill s.allocCache with the next 64 alloc bits.
|
// Refill s.allocCache with the next 64 alloc bits.
|
||||||
s.refillAllocCache(whichByte)
|
s.refillAllocCache(whichByte)
|
||||||
aCache = s.allocCache
|
aCache = s.allocCache
|
||||||
bitIndex = sys.Ctz64(aCache)
|
bitIndex = sys.TrailingZeros64(aCache)
|
||||||
// nothing available in cached bits
|
// nothing available in cached bits
|
||||||
// grab the next 8 bytes and try again.
|
// grab the next 8 bytes and try again.
|
||||||
}
|
}
|
||||||
|
@ -452,9 +452,9 @@ func (h heapBits) next() (heapBits, uintptr) {
|
||||||
if h.mask != 0 {
|
if h.mask != 0 {
|
||||||
var i int
|
var i int
|
||||||
if goarch.PtrSize == 8 {
|
if goarch.PtrSize == 8 {
|
||||||
i = sys.Ctz64(uint64(h.mask))
|
i = sys.TrailingZeros64(uint64(h.mask))
|
||||||
} else {
|
} else {
|
||||||
i = sys.Ctz32(uint32(h.mask))
|
i = sys.TrailingZeros32(uint32(h.mask))
|
||||||
}
|
}
|
||||||
h.mask ^= uintptr(1) << (i & (ptrBits - 1))
|
h.mask ^= uintptr(1) << (i & (ptrBits - 1))
|
||||||
return h, h.addr + uintptr(i)*goarch.PtrSize
|
return h, h.addr + uintptr(i)*goarch.PtrSize
|
||||||
|
@ -494,9 +494,9 @@ func (h heapBits) nextFast() (heapBits, uintptr) {
|
||||||
// BSFQ
|
// BSFQ
|
||||||
var i int
|
var i int
|
||||||
if goarch.PtrSize == 8 {
|
if goarch.PtrSize == 8 {
|
||||||
i = sys.Ctz64(uint64(h.mask))
|
i = sys.TrailingZeros64(uint64(h.mask))
|
||||||
} else {
|
} else {
|
||||||
i = sys.Ctz32(uint32(h.mask))
|
i = sys.TrailingZeros32(uint32(h.mask))
|
||||||
}
|
}
|
||||||
// BTCQ
|
// BTCQ
|
||||||
h.mask ^= uintptr(1) << (i & (ptrBits - 1))
|
h.mask ^= uintptr(1) << (i & (ptrBits - 1))
|
||||||
|
|
|
@ -225,9 +225,9 @@ func growslice(oldPtr unsafe.Pointer, newLen, oldCap, num int, et *_type) slice
|
||||||
var shift uintptr
|
var shift uintptr
|
||||||
if goarch.PtrSize == 8 {
|
if goarch.PtrSize == 8 {
|
||||||
// Mask shift for better code generation.
|
// Mask shift for better code generation.
|
||||||
shift = uintptr(sys.Ctz64(uint64(et.size))) & 63
|
shift = uintptr(sys.TrailingZeros64(uint64(et.size))) & 63
|
||||||
} else {
|
} else {
|
||||||
shift = uintptr(sys.Ctz32(uint32(et.size))) & 31
|
shift = uintptr(sys.TrailingZeros32(uint32(et.size))) & 31
|
||||||
}
|
}
|
||||||
lenmem = uintptr(oldLen) << shift
|
lenmem = uintptr(oldLen) << shift
|
||||||
newlenmem = uintptr(newLen) << shift
|
newlenmem = uintptr(newLen) << shift
|
||||||
|
|
|
@ -617,7 +617,7 @@ func adjustpointers(scanp unsafe.Pointer, bv *bitvector, adjinfo *adjustinfo, f
|
||||||
}
|
}
|
||||||
b := *(addb(bv.bytedata, i/8))
|
b := *(addb(bv.bytedata, i/8))
|
||||||
for b != 0 {
|
for b != 0 {
|
||||||
j := uintptr(sys.Ctz8(b))
|
j := uintptr(sys.TrailingZeros8(b))
|
||||||
b &= b - 1
|
b &= b - 1
|
||||||
pp := (*uintptr)(add(scanp, (i+j)*goarch.PtrSize))
|
pp := (*uintptr)(add(scanp, (i+j)*goarch.PtrSize))
|
||||||
retry:
|
retry:
|
||||||
|
|
|
@ -23,26 +23,26 @@ func logf(f string, args ...interface{}) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func test(i int, x uint64) {
|
func test(i int, x uint64) {
|
||||||
t := T.Ctz64(x) // ERROR "intrinsic substitution for Ctz64"
|
t := T.TrailingZeros64(x) // ERROR "intrinsic substitution for TrailingZeros64"
|
||||||
if i != t {
|
if i != t {
|
||||||
logf("Ctz64(0x%x) expected %d but got %d\n", x, i, t)
|
logf("TrailingZeros64(0x%x) expected %d but got %d\n", x, i, t)
|
||||||
}
|
}
|
||||||
x = -x
|
x = -x
|
||||||
t = T.Ctz64(x) // ERROR "intrinsic substitution for Ctz64"
|
t = T.TrailingZeros64(x) // ERROR "intrinsic substitution for TrailingZeros64"
|
||||||
if i != t {
|
if i != t {
|
||||||
logf("Ctz64(0x%x) expected %d but got %d\n", x, i, t)
|
logf("TrailingZeros64(0x%x) expected %d but got %d\n", x, i, t)
|
||||||
}
|
}
|
||||||
|
|
||||||
if i <= 32 {
|
if i <= 32 {
|
||||||
x32 := uint32(x)
|
x32 := uint32(x)
|
||||||
t32 := T.Ctz32(x32) // ERROR "intrinsic substitution for Ctz32"
|
t32 := T.TrailingZeros32(x32) // ERROR "intrinsic substitution for TrailingZeros32"
|
||||||
if i != t32 {
|
if i != t32 {
|
||||||
logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32)
|
logf("TrailingZeros32(0x%x) expected %d but got %d\n", x32, i, t32)
|
||||||
}
|
}
|
||||||
x32 = -x32
|
x32 = -x32
|
||||||
t32 = T.Ctz32(x32) // ERROR "intrinsic substitution for Ctz32"
|
t32 = T.TrailingZeros32(x32) // ERROR "intrinsic substitution for TrailingZeros32"
|
||||||
if i != t32 {
|
if i != t32 {
|
||||||
logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32)
|
logf("TrailingZeros32(0x%x) expected %d but got %d\n", x32, i, t32)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -76,11 +76,11 @@ func main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Zero is a special case, be sure it is done right.
|
// Zero is a special case, be sure it is done right.
|
||||||
if T.Ctz32(0) != 32 { // ERROR "intrinsic substitution for Ctz32"
|
if T.TrailingZeros32(0) != 32 { // ERROR "intrinsic substitution for TrailingZeros32"
|
||||||
logf("ctz32(0) != 32")
|
logf("TrailingZeros32(0) != 32")
|
||||||
}
|
}
|
||||||
if T.Ctz64(0) != 64 { // ERROR "intrinsic substitution for Ctz64"
|
if T.TrailingZeros64(0) != 64 { // ERROR "intrinsic substitution for TrailingZeros64"
|
||||||
logf("ctz64(0) != 64")
|
logf("TrailingZeros64(0) != 64")
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := 0; i <= 64; i++ {
|
for i := 0; i <= 64; i++ {
|
||||||
|
|
Loading…
Reference in a new issue