diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 93cae48211..4aabf90fe7 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -525,6 +525,12 @@ func Getg() *G { return getg() } +func GIsWaitingOnMutex(gp *G) bool { + return readgstatus(gp) == _Gwaiting && gp.waitreason.isMutexWait() +} + +var CasGStatusAlwaysTrack = &casgstatusAlwaysTrack + //go:noinline func PanicForTesting(b []byte, i int) byte { return unexportedPanicForTesting(b, i) @@ -1617,3 +1623,5 @@ func (s *ScavengeIndex) Mark(base, limit uintptr) { func (s *ScavengeIndex) Clear(ci ChunkIdx) { s.i.clear(chunkIdx(ci)) } + +const GTrackingPeriod = gTrackingPeriod diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index 8e1c596852..0a255149e2 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -411,6 +411,12 @@ func initMetrics() { hist.counts[len(hist.counts)-1] = sched.timeToRun.overflow.Load() }, }, + "/sync/mutex/wait/total:seconds": { + compute: func(_ *statAggregate, out *metricValue) { + out.kind = metricKindFloat64 + out.scalar = float64bits(nsToSec(sched.totalMutexWaitTime.Load())) + }, + }, } metricsInit = true } diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go index abcdddd16c..dcfe01e67c 100644 --- a/src/runtime/metrics/description.go +++ b/src/runtime/metrics/description.go @@ -366,6 +366,12 @@ var allDesc = []Description{ Description: "Distribution of the time goroutines have spent in the scheduler in a runnable state before actually running.", Kind: KindFloat64Histogram, }, + { + Name: "/sync/mutex/wait/total:seconds", + Description: "Approximate cumulative time goroutines have spent blocked on a sync.Mutex or sync.RWMutex. This metric is useful for identifying global changes in lock contention. Collect a mutex or block profile using the runtime/pprof package for more detailed contention data.", + Kind: KindFloat64, + Cumulative: true, + }, } // All returns a slice of containing metric descriptions for all supported metrics. diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go index bb7e96b3b5..b593d8d812 100644 --- a/src/runtime/metrics/doc.go +++ b/src/runtime/metrics/doc.go @@ -272,5 +272,12 @@ Below is the full list of supported metrics, ordered lexicographically. /sched/latencies:seconds Distribution of the time goroutines have spent in the scheduler in a runnable state before actually running. + + /sync/mutex/wait/total:seconds + Approximate cumulative time goroutines have spent blocked on a + sync.Mutex or sync.RWMutex. This metric is useful for identifying + global changes in lock contention. Collect a mutex or block + profile using the runtime/pprof package for more detailed + contention data. */ package metrics diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index 645d3a519b..b4d921b82e 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -5,6 +5,7 @@ package runtime_test import ( + "reflect" "runtime" "runtime/metrics" "sort" @@ -156,7 +157,7 @@ func TestReadMetricsConsistency(t *testing.T) { // Tests whether readMetrics produces consistent, sensible values. // The values are read concurrently with the runtime doing other // things (e.g. allocating) so what we read can't reasonably compared - // to runtime values. + // to other runtime values (e.g. MemStats). // Run a few GC cycles to get some of the stats to be non-zero. runtime.GC() @@ -486,3 +487,127 @@ func TestReadMetricsCumulative(t *testing.T) { func withinEpsilon(v1, v2, e float64) bool { return v2-v2*e <= v1 && v1 <= v2+v2*e } + +func TestMutexWaitTimeMetric(t *testing.T) { + var sample [1]metrics.Sample + sample[0].Name = "/sync/mutex/wait/total:seconds" + + locks := []locker2{ + new(mutex), + new(rwmutexWrite), + new(rwmutexReadWrite), + new(rwmutexWriteRead), + } + for _, lock := range locks { + t.Run(reflect.TypeOf(lock).Elem().Name(), func(t *testing.T) { + metrics.Read(sample[:]) + before := time.Duration(sample[0].Value.Float64() * 1e9) + + minMutexWaitTime := generateMutexWaitTime(lock) + + metrics.Read(sample[:]) + after := time.Duration(sample[0].Value.Float64() * 1e9) + + if wt := after - before; wt < minMutexWaitTime { + t.Errorf("too little mutex wait time: got %s, want %s", wt, minMutexWaitTime) + } + }) + } +} + +// locker2 represents an API surface of two concurrent goroutines +// locking the same resource, but through different APIs. It's intended +// to abstract over the relationship of two Lock calls or an RLock +// and a Lock call. +type locker2 interface { + Lock1() + Unlock1() + Lock2() + Unlock2() +} + +type mutex struct { + mu sync.Mutex +} + +func (m *mutex) Lock1() { m.mu.Lock() } +func (m *mutex) Unlock1() { m.mu.Unlock() } +func (m *mutex) Lock2() { m.mu.Lock() } +func (m *mutex) Unlock2() { m.mu.Unlock() } + +type rwmutexWrite struct { + mu sync.RWMutex +} + +func (m *rwmutexWrite) Lock1() { m.mu.Lock() } +func (m *rwmutexWrite) Unlock1() { m.mu.Unlock() } +func (m *rwmutexWrite) Lock2() { m.mu.Lock() } +func (m *rwmutexWrite) Unlock2() { m.mu.Unlock() } + +type rwmutexReadWrite struct { + mu sync.RWMutex +} + +func (m *rwmutexReadWrite) Lock1() { m.mu.RLock() } +func (m *rwmutexReadWrite) Unlock1() { m.mu.RUnlock() } +func (m *rwmutexReadWrite) Lock2() { m.mu.Lock() } +func (m *rwmutexReadWrite) Unlock2() { m.mu.Unlock() } + +type rwmutexWriteRead struct { + mu sync.RWMutex +} + +func (m *rwmutexWriteRead) Lock1() { m.mu.Lock() } +func (m *rwmutexWriteRead) Unlock1() { m.mu.Unlock() } +func (m *rwmutexWriteRead) Lock2() { m.mu.RLock() } +func (m *rwmutexWriteRead) Unlock2() { m.mu.RUnlock() } + +// generateMutexWaitTime causes a couple of goroutines +// to block a whole bunch of times on a sync.Mutex, returning +// the minimum amount of time that should be visible in the +// /sync/mutex-wait:seconds metric. +func generateMutexWaitTime(mu locker2) time.Duration { + // Set up the runtime to always track casgstatus transitions for metrics. + *runtime.CasGStatusAlwaysTrack = true + + mu.Lock1() + + // Start up a goroutine to wait on the lock. + gc := make(chan *runtime.G) + done := make(chan bool) + go func() { + gc <- runtime.Getg() + + for { + mu.Lock2() + mu.Unlock2() + if <-done { + return + } + } + }() + gp := <-gc + + // Set the block time high enough so that it will always show up, even + // on systems with coarse timer granularity. + const blockTime = 100 * time.Millisecond + + // Make sure the goroutine spawned above actually blocks on the lock. + for { + if runtime.GIsWaitingOnMutex(gp) { + break + } + runtime.Gosched() + } + + // Let some amount of time pass. + time.Sleep(blockTime) + + // Let the other goroutine acquire the lock. + mu.Unlock1() + done <- true + + // Reset flag. + *runtime.CasGStatusAlwaysTrack = false + return blockTime +} diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 0fed91c61e..d7a8049f37 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -954,6 +954,10 @@ func castogscanstatus(gp *g, oldval, newval uint32) bool { panic("not reached") } +// casgstatusAlwaysTrack is a debug flag that causes casgstatus to always track +// various latencies on every transition instead of sampling them. +var casgstatusAlwaysTrack = false + // If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus // and casfrom_Gscanstatus instead. // casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that @@ -994,36 +998,65 @@ func casgstatus(gp *g, oldval, newval uint32) { } } - // Handle tracking for scheduling latencies. if oldval == _Grunning { - // Track every 8th time a goroutine transitions out of running. - if gp.trackingSeq%gTrackingPeriod == 0 { + // Track every gTrackingPeriod time a goroutine transitions out of running. + if casgstatusAlwaysTrack || gp.trackingSeq%gTrackingPeriod == 0 { gp.tracking = true } gp.trackingSeq++ } - if gp.tracking { - if oldval == _Grunnable { - // We transitioned out of runnable, so measure how much - // time we spent in this state and add it to - // runnableTime. - now := nanotime() - gp.runnableTime += now - gp.runnableStamp - gp.runnableStamp = 0 + if !gp.tracking { + return + } + + // Handle various kinds of tracking. + // + // Currently: + // - Time spent in runnable. + // - Time spent blocked on a sync.Mutex or sync.RWMutex. + switch oldval { + case _Grunnable: + // We transitioned out of runnable, so measure how much + // time we spent in this state and add it to + // runnableTime. + now := nanotime() + gp.runnableTime += now - gp.trackingStamp + gp.trackingStamp = 0 + case _Gwaiting: + if !gp.waitreason.isMutexWait() { + // Not blocking on a lock. + break } - if newval == _Grunnable { - // We just transitioned into runnable, so record what - // time that happened. - now := nanotime() - gp.runnableStamp = now - } else if newval == _Grunning { - // We're transitioning into running, so turn off - // tracking and record how much time we spent in - // runnable. - gp.tracking = false - sched.timeToRun.record(gp.runnableTime) - gp.runnableTime = 0 + // Blocking on a lock, measure it. Note that because we're + // sampling, we have to multiply by our sampling period to get + // a more representative estimate of the absolute value. + // gTrackingPeriod also represents an accurate sampling period + // because we can only enter this state from _Grunning. + now := nanotime() + sched.totalMutexWaitTime.Add((now - gp.trackingStamp) * gTrackingPeriod) + gp.trackingStamp = 0 + } + switch newval { + case _Gwaiting: + if !gp.waitreason.isMutexWait() { + // Not blocking on a lock. + break } + // Blocking on a lock. Write down the timestamp. + now := nanotime() + gp.trackingStamp = now + case _Grunnable: + // We just transitioned into runnable, so record what + // time that happened. + now := nanotime() + gp.trackingStamp = now + case _Grunning: + // We're transitioning into running, so turn off + // tracking and record how much time we spent in + // runnable. + gp.tracking = false + sched.timeToRun.record(gp.runnableTime) + gp.runnableTime = 0 } } @@ -1031,6 +1064,7 @@ func casgstatus(gp *g, oldval, newval uint32) { // // Use this over casgstatus when possible to ensure that a waitreason is set. func casGToWaiting(gp *g, old uint32, reason waitReason) { + // Set the wait reason before calling casgstatus, because casgstatus will use it. gp.waitreason = reason casgstatus(gp, old, _Gwaiting) } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 1d36126a03..284f9d395d 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -468,7 +468,7 @@ type g struct { sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine tracking bool // whether we're tracking this G for sched latency statistics trackingSeq uint8 // used to decide whether to track this G - runnableStamp int64 // timestamp of when the G last became runnable, only used when tracking + trackingStamp int64 // timestamp of when the G last started being tracked runnableTime int64 // the amount of time spent runnable, cleared when running, only used when tracking sysexitticks int64 // cputicks when syscall has returned (for tracing) traceseq uint64 // trace event sequencer @@ -843,6 +843,10 @@ type schedt struct { // // Reset on each GC cycle. idleTime atomic.Int64 + + // totalMutexWaitTime is the sum of time goroutines have spent in _Gwaiting + // with a waitreason of the form waitReasonSync{RW,}Mutex{R,}Lock. + totalMutexWaitTime atomic.Int64 } // Values for the flags field of a sigTabT. @@ -1109,6 +1113,12 @@ func (w waitReason) String() string { return waitReasonStrings[w] } +func (w waitReason) isMutexWait() bool { + return w == waitReasonSyncMutexLock || + w == waitReasonSyncRWMutexRLock || + w == waitReasonSyncRWMutexLock +} + var ( allm *m gomaxprocs int32