mirror of
https://github.com/golang/go
synced 2024-10-06 08:00:07 +00:00
runtime: shrink time histogram buckets
There are lots of useless buckets with too much precision. Introduce a minimum level of precision with a minimum bucket bit. This cuts down on the size of a time histogram dramatically (~3x). Also, pick a smaller sub bucket count; we don't need 6% precision. Also, rename super-buckets to buckets to more closely line up with HDR histogram literature. Change-Id: I199449650e4b34f2a6dca3cf1d8edb071c6655c0 Reviewed-on: https://go-review.googlesource.com/c/go/+/427615 Run-TryBot: Michael Knyszek <mknyszek@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com>
This commit is contained in:
parent
1fc83690e6
commit
87eda2a782
|
@ -1228,22 +1228,28 @@ func MSpanCountAlloc(ms *MSpan, bits []byte) int {
|
|||
}
|
||||
|
||||
const (
|
||||
TimeHistSubBucketBits = timeHistSubBucketBits
|
||||
TimeHistNumSubBuckets = timeHistNumSubBuckets
|
||||
TimeHistNumSuperBuckets = timeHistNumSuperBuckets
|
||||
TimeHistSubBucketBits = timeHistSubBucketBits
|
||||
TimeHistNumSubBuckets = timeHistNumSubBuckets
|
||||
TimeHistNumBuckets = timeHistNumBuckets
|
||||
TimeHistMinBucketBits = timeHistMinBucketBits
|
||||
TimeHistMaxBucketBits = timeHistMaxBucketBits
|
||||
)
|
||||
|
||||
type TimeHistogram timeHistogram
|
||||
|
||||
// Counts returns the counts for the given bucket, subBucket indices.
|
||||
// Returns true if the bucket was valid, otherwise returns the counts
|
||||
// for the underflow bucket and false.
|
||||
func (th *TimeHistogram) Count(bucket, subBucket uint) (uint64, bool) {
|
||||
// for the overflow bucket if bucket > 0 or the underflow bucket if
|
||||
// bucket < 0, and false.
|
||||
func (th *TimeHistogram) Count(bucket, subBucket int) (uint64, bool) {
|
||||
t := (*timeHistogram)(th)
|
||||
i := bucket*TimeHistNumSubBuckets + subBucket
|
||||
if i >= uint(len(t.counts)) {
|
||||
if bucket < 0 {
|
||||
return t.underflow.Load(), false
|
||||
}
|
||||
i := bucket*TimeHistNumSubBuckets + subBucket
|
||||
if i >= len(t.counts) {
|
||||
return t.overflow.Load(), false
|
||||
}
|
||||
return t.counts[i].Load(), true
|
||||
}
|
||||
|
||||
|
|
|
@ -12,63 +12,77 @@ import (
|
|||
|
||||
const (
|
||||
// For the time histogram type, we use an HDR histogram.
|
||||
// Values are placed in super-buckets based solely on the most
|
||||
// significant set bit. Thus, super-buckets are power-of-2 sized.
|
||||
// Values are placed in buckets based solely on the most
|
||||
// significant set bit. Thus, buckets are power-of-2 sized.
|
||||
// Values are then placed into sub-buckets based on the value of
|
||||
// the next timeHistSubBucketBits most significant bits. Thus,
|
||||
// sub-buckets are linear within a super-bucket.
|
||||
// sub-buckets are linear within a bucket.
|
||||
//
|
||||
// Therefore, the number of sub-buckets (timeHistNumSubBuckets)
|
||||
// defines the error. This error may be computed as
|
||||
// 1/timeHistNumSubBuckets*100%. For example, for 16 sub-buckets
|
||||
// per super-bucket the error is approximately 6%.
|
||||
// per bucket the error is approximately 6%.
|
||||
//
|
||||
// The number of super-buckets (timeHistNumSuperBuckets), on the
|
||||
// other hand, defines the range. To reserve room for sub-buckets,
|
||||
// bit timeHistSubBucketBits is the first bit considered for
|
||||
// super-buckets, so super-bucket indices are adjusted accordingly.
|
||||
// The number of buckets (timeHistNumBuckets), on the
|
||||
// other hand, defines the range. To avoid producing a large number
|
||||
// of buckets that are close together, especially for small numbers
|
||||
// (e.g. 1, 2, 3, 4, 5 ns) that aren't very useful, timeHistNumBuckets
|
||||
// is defined in terms of the least significant bit (timeHistMinBucketBits)
|
||||
// that needs to be set before we start bucketing and the most
|
||||
// significant bit (timeHistMaxBucketBits) that we bucket before we just
|
||||
// dump it into a catch-all bucket.
|
||||
//
|
||||
// As an example, consider 45 super-buckets with 16 sub-buckets.
|
||||
// As an example, consider the configuration:
|
||||
//
|
||||
// 00110
|
||||
// ^----
|
||||
// │ ^
|
||||
// │ └---- Lowest 4 bits -> sub-bucket 6
|
||||
// └------- Bit 4 unset -> super-bucket 0
|
||||
// timeHistMinBucketBits = 9
|
||||
// timeHistMaxBucketBits = 48
|
||||
// timeHistSubBucketBits = 2
|
||||
//
|
||||
// 10110
|
||||
// ^----
|
||||
// │ ^
|
||||
// │ └---- Next 4 bits -> sub-bucket 6
|
||||
// └------- Bit 4 set -> super-bucket 1
|
||||
// 100010
|
||||
// ^----^
|
||||
// │ ^ └-- Lower bits ignored
|
||||
// │ └---- Next 4 bits -> sub-bucket 1
|
||||
// └------- Bit 5 set -> super-bucket 2
|
||||
// Then:
|
||||
//
|
||||
// Following this pattern, super-bucket 44 will have the bit 47 set. We don't
|
||||
// have any buckets for higher values, so the highest sub-bucket will
|
||||
// contain values of 2^48-1 nanoseconds or approx. 3 days. This range is
|
||||
// more than enough to handle durations produced by the runtime.
|
||||
timeHistSubBucketBits = 4
|
||||
timeHistNumSubBuckets = 1 << timeHistSubBucketBits
|
||||
timeHistNumSuperBuckets = 45
|
||||
timeHistTotalBuckets = timeHistNumSuperBuckets*timeHistNumSubBuckets + 1
|
||||
// 011000001
|
||||
// ^--
|
||||
// │ ^
|
||||
// │ └---- Next 2 bits -> sub-bucket 3
|
||||
// └------- Bit 9 unset -> bucket 0
|
||||
//
|
||||
// 110000001
|
||||
// ^--
|
||||
// │ ^
|
||||
// │ └---- Next 2 bits -> sub-bucket 2
|
||||
// └------- Bit 9 set -> bucket 1
|
||||
//
|
||||
// 1000000010
|
||||
// ^-- ^
|
||||
// │ ^ └-- Lower bits ignored
|
||||
// │ └---- Next 2 bits -> sub-bucket 0
|
||||
// └------- Bit 10 set -> bucket 2
|
||||
//
|
||||
// Following this pattern, bucket 38 will have the bit 46 set. We don't
|
||||
// have any buckets for higher values, so we spill the rest into an overflow
|
||||
// bucket containing values of 2^47-1 nanoseconds or approx. 1 day or more.
|
||||
// This range is more than enough to handle durations produced by the runtime.
|
||||
timeHistMinBucketBits = 9
|
||||
timeHistMaxBucketBits = 48 // Note that this is exclusive; 1 higher than the actual range.
|
||||
timeHistSubBucketBits = 2
|
||||
timeHistNumSubBuckets = 1 << timeHistSubBucketBits
|
||||
timeHistNumBuckets = timeHistMaxBucketBits - timeHistMinBucketBits + 1
|
||||
// Two extra buckets, one for underflow, one for overflow.
|
||||
timeHistTotalBuckets = timeHistNumBuckets*timeHistNumSubBuckets + 2
|
||||
)
|
||||
|
||||
// timeHistogram represents a distribution of durations in
|
||||
// nanoseconds.
|
||||
//
|
||||
// The accuracy and range of the histogram is defined by the
|
||||
// timeHistSubBucketBits and timeHistNumSuperBuckets constants.
|
||||
// timeHistSubBucketBits and timeHistNumBuckets constants.
|
||||
//
|
||||
// It is an HDR histogram with exponentially-distributed
|
||||
// buckets and linearly distributed sub-buckets.
|
||||
//
|
||||
// The histogram is safe for concurrent reads and writes.
|
||||
type timeHistogram struct {
|
||||
counts [timeHistNumSuperBuckets * timeHistNumSubBuckets]atomic.Uint64
|
||||
counts [timeHistNumBuckets * timeHistNumSubBuckets]atomic.Uint64
|
||||
|
||||
// underflow counts all the times we got a negative duration
|
||||
// sample. Because of how time works on some platforms, it's
|
||||
|
@ -76,6 +90,10 @@ type timeHistogram struct {
|
|||
// but we record them anyway because it's better to have some
|
||||
// signal that it's happening than just missing samples.
|
||||
underflow atomic.Uint64
|
||||
|
||||
// overflow counts all the times we got a duration that exceeded
|
||||
// the range counts represents.
|
||||
overflow atomic.Uint64
|
||||
}
|
||||
|
||||
// record adds the given duration to the distribution.
|
||||
|
@ -85,36 +103,35 @@ type timeHistogram struct {
|
|||
//
|
||||
//go:nosplit
|
||||
func (h *timeHistogram) record(duration int64) {
|
||||
// If the duration is negative, capture that in underflow.
|
||||
if duration < 0 {
|
||||
h.underflow.Add(1)
|
||||
return
|
||||
}
|
||||
// The index of the exponential bucket is just the index
|
||||
// of the highest set bit adjusted for how many bits we
|
||||
// use for the subbucket. Note that it's timeHistSubBucketsBits-1
|
||||
// because we use the 0th bucket to hold values < timeHistNumSubBuckets.
|
||||
var superBucket, subBucket uint
|
||||
if duration >= timeHistNumSubBuckets {
|
||||
// At this point, we know the duration value will always be
|
||||
// at least timeHistSubBucketsBits long.
|
||||
superBucket = uint(sys.Len64(uint64(duration))) - timeHistSubBucketBits
|
||||
if superBucket*timeHistNumSubBuckets >= uint(len(h.counts)) {
|
||||
// The bucket index we got is larger than what we support, so
|
||||
// include this count in the highest bucket, which extends to
|
||||
// infinity.
|
||||
superBucket = timeHistNumSuperBuckets - 1
|
||||
subBucket = timeHistNumSubBuckets - 1
|
||||
} else {
|
||||
// The linear subbucket index is just the timeHistSubBucketsBits
|
||||
// bits after the top bit. To extract that value, shift down
|
||||
// the duration such that we leave the top bit and the next bits
|
||||
// intact, then extract the index.
|
||||
subBucket = uint((duration >> (superBucket - 1)) % timeHistNumSubBuckets)
|
||||
}
|
||||
// bucketBit is the target bit for the bucket which is usually the
|
||||
// highest 1 bit, but if we're less than the minimum, is the highest
|
||||
// 1 bit of the minimum (which will be zero in the duration).
|
||||
//
|
||||
// bucket is the bucket index, which is the bucketBit minus the
|
||||
// highest bit of the minimum, plus one to leave room for the catch-all
|
||||
// bucket for samples lower than the minimum.
|
||||
var bucketBit, bucket uint
|
||||
if l := sys.Len64(uint64(duration)); l < timeHistMinBucketBits {
|
||||
bucketBit = timeHistMinBucketBits
|
||||
bucket = 0 // bucketBit - timeHistMinBucketBits
|
||||
} else {
|
||||
subBucket = uint(duration)
|
||||
bucketBit = uint(l)
|
||||
bucket = bucketBit - timeHistMinBucketBits + 1
|
||||
}
|
||||
h.counts[superBucket*timeHistNumSubBuckets+subBucket].Add(1)
|
||||
// If the bucket we computed is greater than the number of buckets,
|
||||
// count that in overflow.
|
||||
if bucket >= timeHistNumBuckets {
|
||||
h.overflow.Add(1)
|
||||
return
|
||||
}
|
||||
// The sub-bucket index is just next timeHistSubBucketBits after the bucketBit.
|
||||
subBucket := uint(duration>>(bucketBit-1-timeHistSubBucketBits)) % timeHistNumSubBuckets
|
||||
h.counts[bucket*timeHistNumSubBuckets+subBucket].Add(1)
|
||||
}
|
||||
|
||||
const (
|
||||
|
@ -137,33 +154,37 @@ func float64NegInf() float64 {
|
|||
// not nanoseconds like the timeHistogram represents durations.
|
||||
func timeHistogramMetricsBuckets() []float64 {
|
||||
b := make([]float64, timeHistTotalBuckets+1)
|
||||
// Underflow bucket.
|
||||
b[0] = float64NegInf()
|
||||
// Super-bucket 0 has no bits above timeHistSubBucketBits
|
||||
// set, so just iterate over each bucket and assign the
|
||||
// incrementing bucket.
|
||||
for i := 0; i < timeHistNumSubBuckets; i++ {
|
||||
bucketNanos := uint64(i)
|
||||
b[i+1] = float64(bucketNanos) / 1e9
|
||||
|
||||
for j := 0; j < timeHistNumSubBuckets; j++ {
|
||||
// No bucket bit for the first few buckets. Just sub-bucket bits after the
|
||||
// min bucket bit.
|
||||
bucketNanos := uint64(j) << (timeHistMinBucketBits - 1 - timeHistSubBucketBits)
|
||||
// Convert nanoseconds to seconds via a division.
|
||||
// These values will all be exactly representable by a float64.
|
||||
b[j+1] = float64(bucketNanos) / 1e9
|
||||
}
|
||||
// Generate the rest of the super-buckets. It's easier to reason
|
||||
// about if we cut out the 0'th bucket, so subtract one since
|
||||
// we just handled that bucket.
|
||||
for i := 0; i < timeHistNumSuperBuckets-1; i++ {
|
||||
// Generate the rest of the buckets. It's easier to reason
|
||||
// about if we cut out the 0'th bucket.
|
||||
for i := timeHistMinBucketBits; i < timeHistMaxBucketBits; i++ {
|
||||
for j := 0; j < timeHistNumSubBuckets; j++ {
|
||||
// Set the super-bucket bit.
|
||||
bucketNanos := uint64(1) << (i + timeHistSubBucketBits)
|
||||
// Set the bucket bit.
|
||||
bucketNanos := uint64(1) << (i - 1)
|
||||
// Set the sub-bucket bits.
|
||||
bucketNanos |= uint64(j) << i
|
||||
// The index for this bucket is going to be the (i+1)'th super bucket
|
||||
// (note that we're starting from zero, but handled the first super-bucket
|
||||
bucketNanos |= uint64(j) << (i - 1 - timeHistSubBucketBits)
|
||||
// The index for this bucket is going to be the (i+1)'th bucket
|
||||
// (note that we're starting from zero, but handled the first bucket
|
||||
// earlier, so we need to compensate), and the j'th sub bucket.
|
||||
// Add 1 because we left space for -Inf.
|
||||
bucketIndex := (i+1)*timeHistNumSubBuckets + j + 1
|
||||
bucketIndex := (i-timeHistMinBucketBits+1)*timeHistNumSubBuckets + j + 1
|
||||
// Convert nanoseconds to seconds via a division.
|
||||
// These values will all be exactly representable by a float64.
|
||||
b[bucketIndex] = float64(bucketNanos) / 1e9
|
||||
}
|
||||
}
|
||||
// Overflow bucket.
|
||||
b[len(b)-2] = float64(uint64(1)<<(timeHistMaxBucketBits-1)) / 1e9
|
||||
b[len(b)-1] = float64Inf()
|
||||
return b
|
||||
}
|
||||
|
|
|
@ -20,50 +20,54 @@ func TestTimeHistogram(t *testing.T) {
|
|||
h := &dummyTimeHistogram
|
||||
|
||||
// Record exactly one sample in each bucket.
|
||||
for i := 0; i < TimeHistNumSuperBuckets; i++ {
|
||||
var base int64
|
||||
if i > 0 {
|
||||
base = int64(1) << (i + TimeHistSubBucketBits - 1)
|
||||
}
|
||||
for j := 0; j < TimeHistNumSubBuckets; j++ {
|
||||
v := int64(j)
|
||||
if i > 0 {
|
||||
v <<= i - 1
|
||||
}
|
||||
h.Record(base + v)
|
||||
for j := 0; j < TimeHistNumSubBuckets; j++ {
|
||||
v := int64(j) << (TimeHistMinBucketBits - 1 - TimeHistSubBucketBits)
|
||||
for k := 0; k < j; k++ {
|
||||
// Record a number of times equal to the bucket index.
|
||||
h.Record(v)
|
||||
}
|
||||
}
|
||||
// Hit the underflow bucket.
|
||||
for i := TimeHistMinBucketBits; i < TimeHistMaxBucketBits; i++ {
|
||||
base := int64(1) << (i - 1)
|
||||
for j := 0; j < TimeHistNumSubBuckets; j++ {
|
||||
v := int64(j) << (i - 1 - TimeHistSubBucketBits)
|
||||
for k := 0; k < (i+1-TimeHistMinBucketBits)*TimeHistNumSubBuckets+j; k++ {
|
||||
// Record a number of times equal to the bucket index.
|
||||
h.Record(base + v)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Hit the underflow and overflow buckets.
|
||||
h.Record(int64(-1))
|
||||
h.Record(math.MaxInt64)
|
||||
h.Record(math.MaxInt64)
|
||||
|
||||
// Check to make sure there's exactly one count in each
|
||||
// bucket.
|
||||
for i := uint(0); i < TimeHistNumSuperBuckets; i++ {
|
||||
for j := uint(0); j < TimeHistNumSubBuckets; j++ {
|
||||
for i := 0; i < TimeHistNumBuckets; i++ {
|
||||
for j := 0; j < TimeHistNumSubBuckets; j++ {
|
||||
c, ok := h.Count(i, j)
|
||||
if !ok {
|
||||
t.Errorf("hit underflow bucket unexpectedly: (%d, %d)", i, j)
|
||||
} else if c != 1 {
|
||||
t.Errorf("bucket (%d, %d) has count that is not 1: %d", i, j, c)
|
||||
t.Errorf("unexpected invalid bucket: (%d, %d)", i, j)
|
||||
} else if idx := uint64(i*TimeHistNumSubBuckets + j); c != idx {
|
||||
t.Errorf("bucket (%d, %d) has count that is not %d: %d", i, j, idx, c)
|
||||
}
|
||||
}
|
||||
}
|
||||
c, ok := h.Count(TimeHistNumSuperBuckets, 0)
|
||||
c, ok := h.Count(-1, 0)
|
||||
if ok {
|
||||
t.Errorf("expected to hit underflow bucket: (%d, %d)", TimeHistNumSuperBuckets, 0)
|
||||
t.Errorf("expected to hit underflow bucket: (%d, %d)", -1, 0)
|
||||
}
|
||||
if c != 1 {
|
||||
t.Errorf("underflow bucket has count that is not 1: %d", c)
|
||||
t.Errorf("overflow bucket has count that is not 1: %d", c)
|
||||
}
|
||||
|
||||
// Check overflow behavior.
|
||||
// By hitting a high value, we should just be adding into the highest bucket.
|
||||
h.Record(math.MaxInt64)
|
||||
c, ok = h.Count(TimeHistNumSuperBuckets-1, TimeHistNumSubBuckets-1)
|
||||
if !ok {
|
||||
t.Error("hit underflow bucket in highest bucket unexpectedly")
|
||||
} else if c != 2 {
|
||||
t.Errorf("highest has count that is not 2: %d", c)
|
||||
c, ok = h.Count(TimeHistNumBuckets+1, 0)
|
||||
if ok {
|
||||
t.Errorf("expected to hit overflow bucket: (%d, %d)", TimeHistNumBuckets+1, 0)
|
||||
}
|
||||
if c != 2 {
|
||||
t.Errorf("overflow bucket has count that is not 2: %d", c)
|
||||
}
|
||||
|
||||
dummyTimeHistogram = TimeHistogram{}
|
||||
|
@ -72,34 +76,32 @@ func TestTimeHistogram(t *testing.T) {
|
|||
func TestTimeHistogramMetricsBuckets(t *testing.T) {
|
||||
buckets := TimeHistogramMetricsBuckets()
|
||||
|
||||
nonInfBucketsLen := TimeHistNumSubBuckets * TimeHistNumSuperBuckets
|
||||
expBucketsLen := nonInfBucketsLen + 2 // Count -Inf and +Inf.
|
||||
nonInfBucketsLen := TimeHistNumSubBuckets * TimeHistNumBuckets
|
||||
expBucketsLen := nonInfBucketsLen + 3 // Count -Inf, the edge for the overflow bucket, and +Inf.
|
||||
if len(buckets) != expBucketsLen {
|
||||
t.Fatalf("unexpected length of buckets: got %d, want %d", len(buckets), expBucketsLen)
|
||||
}
|
||||
// Check the first non-Inf 2*TimeHistNumSubBuckets buckets in order, skipping the
|
||||
// first bucket which should be -Inf (checked later).
|
||||
//
|
||||
// Because of the way this scheme works, the bottom TimeHistNumSubBuckets
|
||||
// buckets are fully populated, and then the next TimeHistNumSubBuckets
|
||||
// have the TimeHistSubBucketBits'th bit set, while the bottom are once
|
||||
// again fully populated.
|
||||
for i := 1; i <= 2*TimeHistNumSubBuckets+1; i++ {
|
||||
if got, want := buckets[i], float64(i-1)/1e9; got != want {
|
||||
t.Errorf("expected bucket %d to have value %e, got %e", i, want, got)
|
||||
}
|
||||
}
|
||||
// Check some values.
|
||||
idxToBucket := map[int]float64{
|
||||
0: math.Inf(-1),
|
||||
33: float64(0x10<<1) / 1e9,
|
||||
34: float64(0x11<<1) / 1e9,
|
||||
49: float64(0x10<<2) / 1e9,
|
||||
58: float64(0x19<<2) / 1e9,
|
||||
65: float64(0x10<<3) / 1e9,
|
||||
513: float64(0x10<<31) / 1e9,
|
||||
519: float64(0x16<<31) / 1e9,
|
||||
expBucketsLen - 2: float64(0x1f<<43) / 1e9,
|
||||
1: 0.0,
|
||||
2: float64(0x040) / 1e9,
|
||||
3: float64(0x080) / 1e9,
|
||||
4: float64(0x0c0) / 1e9,
|
||||
5: float64(0x100) / 1e9,
|
||||
6: float64(0x140) / 1e9,
|
||||
7: float64(0x180) / 1e9,
|
||||
8: float64(0x1c0) / 1e9,
|
||||
9: float64(0x200) / 1e9,
|
||||
10: float64(0x280) / 1e9,
|
||||
11: float64(0x300) / 1e9,
|
||||
12: float64(0x380) / 1e9,
|
||||
13: float64(0x400) / 1e9,
|
||||
15: float64(0x600) / 1e9,
|
||||
81: float64(0x8000000) / 1e9,
|
||||
82: float64(0xa000000) / 1e9,
|
||||
108: float64(0x380000000) / 1e9,
|
||||
expBucketsLen - 2: float64(0x1<<47) / 1e9,
|
||||
expBucketsLen - 1: math.Inf(1),
|
||||
}
|
||||
for idx, bucket := range idxToBucket {
|
||||
|
|
|
@ -200,6 +200,7 @@ func initMetrics() {
|
|||
for i := range memstats.gcPauseDist.counts {
|
||||
hist.counts[i+1] = memstats.gcPauseDist.counts[i].Load()
|
||||
}
|
||||
hist.counts[len(hist.counts)-1] = memstats.gcPauseDist.overflow.Load()
|
||||
},
|
||||
},
|
||||
"/gc/stack/starting-size:bytes": {
|
||||
|
@ -330,6 +331,7 @@ func initMetrics() {
|
|||
for i := range sched.timeToRun.counts {
|
||||
hist.counts[i+1] = sched.timeToRun.counts[i].Load()
|
||||
}
|
||||
hist.counts[len(hist.counts)-1] = sched.timeToRun.overflow.Load()
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue