runtime: use pidleget for faketime jump

In faketime mode, checkdead is responsible for jumping time forward to
the next timer expiration, and waking an M to handle the newly ready
timer.

Currently it pulls the exact P that owns the next timer off of the pidle
list. In theory this is efficient because that P is immediately eligible
to run the timer without stealing. Unfortunately it is also fraught with
peril because we are skipping all of the bookkeeping in pidleget:

* Skipped updates to timerpMask mean that our timers may not be eligible
  for stealing, as they should be.
* Skipped updates to idlepMask mean that our runq may not be eligible
  for stealing, as they should be.
* Skipped updates to sched.npidle may break tracking of spinning Ms,
  potentially resulting in lost work.
* Finally, as of CL 410122, skipped updates to p.limiterEvent may affect
  the GC limiter, or cause a fatal throw when another event occurs.

The last case has finally undercovered this issue since it quickly
results in a hard crash.

We could add all of these updates into checkdead, but it is much more
maintainable to keep this logic in one place and use pidleget here like
everywhere else in the runtime. This means we probably won't wake the
P owning the timer, meaning that the P will need to steal the timer,
which is less efficient, but faketime is not a performance-sensitive
build mode. Note that the M will automatically make itself a spinning M
to make it eligible to steal since it is the only one running.

Fixes #53294
For #52890

Change-Id: I4acc3d259b9b4d7dc02608581c8b4fd259f272e9
Reviewed-on: https://go-review.googlesource.com/c/go/+/411119
Run-TryBot: Michael Pratt <mpratt@google.com>
Auto-Submit: Michael Pratt <mpratt@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
Michael Pratt 2022-06-08 15:59:37 -04:00 committed by Gopher Robot
parent 1292176bc9
commit 13f6be2833
2 changed files with 20 additions and 17 deletions

View file

@ -5071,14 +5071,15 @@ func checkdead() {
// Maybe jump time forward for playground.
if faketime != 0 {
when, _p_ := timeSleepUntil()
if _p_ != nil {
if when := timeSleepUntil(); when < maxWhen {
faketime = when
for pp := &sched.pidle; *pp != 0; pp = &(*pp).ptr().link {
if (*pp).ptr() == _p_ {
*pp = _p_.link
break
}
// Start an M to steal the timer.
pp, _ := pidleget(faketime)
if pp == nil {
// There should always be a free P since
// nothing is running.
throw("checkdead: no p for timer")
}
mp := mget()
if mp == nil {
@ -5086,7 +5087,12 @@ func checkdead() {
// nothing is running.
throw("checkdead: no m for timer")
}
mp.nextp.set(_p_)
// M must be spinning to steal. We set this to be
// explicit, but since this is the only M it would
// become spinning on its own anyways.
atomic.Xadd(&sched.nmspinning, 1)
mp.spinning = true
mp.nextp.set(pp)
notewakeup(&mp.park)
return
}
@ -5158,7 +5164,7 @@ func sysmon() {
lock(&sched.lock)
if atomic.Load(&sched.gcwaiting) != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs) {
syscallWake := false
next, _ := timeSleepUntil()
next := timeSleepUntil()
if next > now {
atomic.Store(&sched.sysmonwait, 1)
unlock(&sched.lock)
@ -5231,7 +5237,7 @@ func sysmon() {
//
// See issue 42515 and
// https://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=50094.
if next, _ := timeSleepUntil(); next < now {
if next := timeSleepUntil(); next < now {
startm(nil, false)
}
}

View file

@ -1016,12 +1016,11 @@ func updateTimerModifiedEarliest(pp *p, nextwhen int64) {
}
}
// timeSleepUntil returns the time when the next timer should fire,
// and the P that holds the timer heap that that timer is on.
// timeSleepUntil returns the time when the next timer should fire. Returns
// maxWhen if there are no timers.
// This is only called by sysmon and checkdead.
func timeSleepUntil() (int64, *p) {
func timeSleepUntil() int64 {
next := int64(maxWhen)
var pret *p
// Prevent allp slice changes. This is like retake.
lock(&allpLock)
@ -1035,18 +1034,16 @@ func timeSleepUntil() (int64, *p) {
w := int64(atomic.Load64(&pp.timer0When))
if w != 0 && w < next {
next = w
pret = pp
}
w = int64(atomic.Load64(&pp.timerModifiedEarliest))
if w != 0 && w < next {
next = w
pret = pp
}
}
unlock(&allpLock)
return next, pret
return next
}
// Heap maintenance algorithms.