mirror of
https://github.com/golang/go
synced 2024-11-02 13:21:55 +00:00
runtime: account for idle mark time in the GC CPU limiter
Currently the GC CPU limiter doesn't account for idle application time at all. This means that the GC could start thrashing, for example if the live heap exceeds the max heap set by the memory limit, but the limiter will fail to kick in when there's a lot of available idle time. User goroutines will still be assisting at a really high rate because of assist pacing rules, but the GC CPU limiter will fail to kick in because the actual fraction of GC CPU time will be low if there's a lot of otherwise idle time (for example, on an overprovisioned system). Luckily, that idle time is usually eaten up entirely by idle mark workers, at least during the GC cycle. And in these cases where we're GCing continuously, that's all of our idle time. So we can take idle mark work time and subtract it from the mutator time accumulated in the GC CPU limiter, and that will give us a more accurate picture of how much CPU is being spent by user goroutines on GC. This will allow the GC CPU limiter to kick in, and reduce the impact of the thrashing. There is a corner case here if the idle mark workers are disabled, for example for the periodic GC, but in the case of the periodic GC, I don't think it's possible for us to be thrashing at all, so it doesn't really matter. Fixes #52890. Change-Id: Ie133a7d1f89b603434b415d51eb8733c2708a858 Reviewed-on: https://go-review.googlesource.com/c/go/+/405898 Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
parent
845a95b1ae
commit
08d6c4c2b9
2 changed files with 43 additions and 2 deletions
|
@ -1324,8 +1324,13 @@ func gcBgMarkWorker() {
|
|||
})
|
||||
|
||||
// Account for time and mark us as stopped.
|
||||
duration := nanotime() - startTime
|
||||
now := nanotime()
|
||||
duration := now - startTime
|
||||
gcController.markWorkerStop(pp.gcMarkWorkerMode, duration)
|
||||
if pp.gcMarkWorkerMode == gcMarkWorkerIdleMode {
|
||||
gcCPULimiter.addIdleMarkTime(duration)
|
||||
gcCPULimiter.update(now)
|
||||
}
|
||||
if pp.gcMarkWorkerMode == gcMarkWorkerFractionalMode {
|
||||
atomic.Xaddint64(&pp.gcFractionalMarkTime, duration)
|
||||
}
|
||||
|
|
|
@ -60,6 +60,9 @@ type gcCPULimiterState struct {
|
|||
// assistTimePool is the accumulated assist time since the last update.
|
||||
assistTimePool atomic.Int64
|
||||
|
||||
// idleMarkTimePool is the accumulated idle mark time since the last update.
|
||||
idleMarkTimePool atomic.Int64
|
||||
|
||||
// lastUpdate is the nanotime timestamp of the last time update was called.
|
||||
//
|
||||
// Updated under lock, but may be read concurrently.
|
||||
|
@ -143,6 +146,12 @@ func (l *gcCPULimiterState) addAssistTime(t int64) {
|
|||
l.assistTimePool.Add(t)
|
||||
}
|
||||
|
||||
// addIdleMarkTime notifies the limiter of additional idle mark worker time. It will be
|
||||
// subtracted from the total CPU time in the next update.
|
||||
func (l *gcCPULimiterState) addIdleMarkTime(t int64) {
|
||||
l.idleMarkTimePool.Add(t)
|
||||
}
|
||||
|
||||
// update updates the bucket given runtime-specific information. now is the
|
||||
// current monotonic time in nanoseconds.
|
||||
//
|
||||
|
@ -177,11 +186,38 @@ func (l *gcCPULimiterState) updateLocked(now int64) {
|
|||
l.assistTimePool.Add(-assistTime)
|
||||
}
|
||||
|
||||
// Accumulate.
|
||||
// Drain the pool of idle mark time.
|
||||
idleMarkTime := l.idleMarkTimePool.Load()
|
||||
if idleMarkTime != 0 {
|
||||
l.idleMarkTimePool.Add(-idleMarkTime)
|
||||
}
|
||||
|
||||
// Compute total GC time.
|
||||
windowGCTime := assistTime
|
||||
if l.gcEnabled {
|
||||
windowGCTime += int64(float64(windowTotalTime) * gcBackgroundUtilization)
|
||||
}
|
||||
|
||||
// Subtract out idle mark time from the total time. Do this after computing
|
||||
// GC time, because the background utilization is dependent on the *real*
|
||||
// total time, not the total time after idle time is subtracted.
|
||||
//
|
||||
// Idle mark workers soak up time that the application spends idle. Any
|
||||
// additional idle time can skew GC CPU utilization, because the GC might
|
||||
// be executing continuously and thrashing, but the CPU utilization with
|
||||
// respect to GOMAXPROCS will be quite low, so the limiter will otherwise
|
||||
// never kick in. By subtracting idle mark time, we're removing time that
|
||||
// we know the application was idle giving a more accurate picture of whether
|
||||
// the GC is thrashing.
|
||||
//
|
||||
// TODO(mknyszek): Figure out if it's necessary to also track non-GC idle time.
|
||||
//
|
||||
// There is a corner case here where if the idle mark workers are disabled, such
|
||||
// as when the periodic GC is executing, then we definitely won't be accounting
|
||||
// for this correctly. However, if the periodic GC is running, the limiter is likely
|
||||
// totally irrelevant because GC CPU utilization is extremely low anyway.
|
||||
windowTotalTime -= idleMarkTime
|
||||
|
||||
l.accumulate(windowTotalTime-windowGCTime, windowGCTime)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue