mirror of
https://github.com/golang/go
synced 2024-10-14 11:53:56 +00:00
sync: allow inlining the Mutex.Unlock fast path
Make use of the newly-enabled limited midstack inlining. Similar changes will be done in followup CLs. name old time/op new time/op delta MutexUncontended 19.3ns ± 1% 18.9ns ± 0% -1.92% (p=0.000 n=20+19) MutexUncontended-4 5.24ns ± 0% 4.75ns ± 1% -9.25% (p=0.000 n=20+20) MutexUncontended-16 2.10ns ± 0% 2.05ns ± 0% -2.38% (p=0.000 n=15+19) Mutex 19.6ns ± 0% 19.3ns ± 1% -1.92% (p=0.000 n=20+17) Mutex-4 54.6ns ± 5% 52.4ns ± 4% -4.09% (p=0.000 n=20+20) Mutex-16 133ns ± 5% 139ns ± 2% +4.23% (p=0.000 n=20+16) MutexSlack 33.4ns ± 2% 18.9ns ± 1% -43.56% (p=0.000 n=19+20) MutexSlack-4 206ns ± 5% 225ns ± 8% +9.12% (p=0.000 n=20+18) MutexSlack-16 89.4ns ± 1% 98.4ns ± 1% +10.10% (p=0.000 n=18+17) MutexWork 60.5ns ± 0% 58.2ns ± 3% -3.75% (p=0.000 n=12+20) MutexWork-4 105ns ± 5% 103ns ± 7% -1.68% (p=0.007 n=20+20) MutexWork-16 157ns ± 1% 163ns ± 2% +3.90% (p=0.000 n=18+18) MutexWorkSlack 70.2ns ± 5% 57.7ns ± 1% -17.81% (p=0.000 n=19+20) MutexWorkSlack-4 277ns ±13% 276ns ±13% ~ (p=0.682 n=20+19) MutexWorkSlack-16 156ns ± 0% 147ns ± 0% -5.62% (p=0.000 n=16+14) MutexNoSpin 966ns ± 0% 968ns ± 0% +0.11% (p=0.029 n=15+20) MutexNoSpin-4 269ns ± 4% 270ns ± 2% ~ (p=0.807 n=20+19) MutexNoSpin-16 122ns ± 0% 120ns ± 4% -1.63% (p=0.000 n=19+19) MutexSpin 3.13µs ± 0% 3.13µs ± 1% +0.16% (p=0.004 n=18+20) MutexSpin-4 826ns ± 1% 832ns ± 2% +0.74% (p=0.000 n=19+16) MutexSpin-16 397ns ± 1% 395ns ± 0% -0.50% (p=0.000 n=19+17) RWMutexUncontended 71.4ns ± 0% 69.5ns ± 0% -2.72% (p=0.000 n=16+20) RWMutexUncontended-4 18.4ns ± 4% 17.5ns ± 0% -4.92% (p=0.000 n=20+18) RWMutexUncontended-16 8.01ns ± 0% 7.92ns ± 0% -1.15% (p=0.000 n=18+18) RWMutexWrite100 24.9ns ± 0% 24.9ns ± 1% ~ (p=0.099 n=19+20) RWMutexWrite100-4 46.5ns ± 3% 46.2ns ± 4% ~ (p=0.253 n=17+19) RWMutexWrite100-16 68.9ns ± 3% 69.9ns ± 5% +1.46% (p=0.012 n=18+20) RWMutexWrite10 27.1ns ± 0% 27.0ns ± 2% ~ (p=0.128 n=17+20) RWMutexWrite10-4 34.8ns ± 1% 34.7ns ± 2% ~ (p=0.180 n=20+18) RWMutexWrite10-16 37.5ns ± 2% 37.2ns ± 4% -0.89% (p=0.023 n=20+20) RWMutexWorkWrite100 164ns ± 0% 164ns ± 0% ~ (p=0.106 n=12+20) RWMutexWorkWrite100-4 186ns ± 3% 193ns ± 3% +3.46% (p=0.000 n=20+20) RWMutexWorkWrite100-16 204ns ± 2% 210ns ± 3% +2.96% (p=0.000 n=18+20) RWMutexWorkWrite10 153ns ± 0% 153ns ± 0% -0.20% (p=0.017 n=20+19) RWMutexWorkWrite10-4 179ns ± 1% 178ns ± 2% ~ (p=0.215 n=19+20) RWMutexWorkWrite10-16 191ns ± 1% 192ns ± 2% ~ (p=0.166 n=15+19) linux/amd64 bin/go 14630572 (previous commit 14605947, +24625/+0.17%) Change-Id: I3f9d1765801fe0b8deb1bc2728b8bba8a7508e23 Reviewed-on: https://go-review.googlesource.com/c/go/+/148958 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
94cbfc2f7f
commit
4c3f26076b
|
@ -62,8 +62,8 @@ func poll_runtime_Semacquire(addr *uint32) {
|
|||
}
|
||||
|
||||
//go:linkname sync_runtime_Semrelease sync.runtime_Semrelease
|
||||
func sync_runtime_Semrelease(addr *uint32, handoff bool) {
|
||||
semrelease1(addr, handoff)
|
||||
func sync_runtime_Semrelease(addr *uint32, handoff bool, skipframes int) {
|
||||
semrelease1(addr, handoff, skipframes)
|
||||
}
|
||||
|
||||
//go:linkname sync_runtime_SemacquireMutex sync.runtime_SemacquireMutex
|
||||
|
@ -153,10 +153,10 @@ func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags) {
|
|||
}
|
||||
|
||||
func semrelease(addr *uint32) {
|
||||
semrelease1(addr, false)
|
||||
semrelease1(addr, false, 0)
|
||||
}
|
||||
|
||||
func semrelease1(addr *uint32, handoff bool) {
|
||||
func semrelease1(addr *uint32, handoff bool, skipframes int) {
|
||||
root := semroot(addr)
|
||||
atomic.Xadd(addr, 1)
|
||||
|
||||
|
@ -183,7 +183,7 @@ func semrelease1(addr *uint32, handoff bool) {
|
|||
if s != nil { // May be slow, so unlock first
|
||||
acquiretime := s.acquiretime
|
||||
if acquiretime != 0 {
|
||||
mutexevent(t0-acquiretime, 3)
|
||||
mutexevent(t0-acquiretime, 3+skipframes)
|
||||
}
|
||||
if s.ticket != 0 {
|
||||
throw("corrupted semaphore ticket")
|
||||
|
@ -191,7 +191,7 @@ func semrelease1(addr *uint32, handoff bool) {
|
|||
if handoff && cansemacquire(addr) {
|
||||
s.ticket = 1
|
||||
}
|
||||
readyWithTime(s, 5)
|
||||
readyWithTime(s, 5+skipframes)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -180,6 +180,14 @@ func (m *Mutex) Unlock() {
|
|||
|
||||
// Fast path: drop lock bit.
|
||||
new := atomic.AddInt32(&m.state, -mutexLocked)
|
||||
if new != 0 {
|
||||
// Outlined slow path to allow inlining the fast path.
|
||||
// To hide unlockSlow during tracing we skip one extra frame when tracing GoUnblock.
|
||||
m.unlockSlow(new)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Mutex) unlockSlow(new int32) {
|
||||
if (new+mutexLocked)&mutexLocked == 0 {
|
||||
throw("sync: unlock of unlocked mutex")
|
||||
}
|
||||
|
@ -198,7 +206,7 @@ func (m *Mutex) Unlock() {
|
|||
// Grab the right to wake someone.
|
||||
new = (old - 1<<mutexWaiterShift) | mutexWoken
|
||||
if atomic.CompareAndSwapInt32(&m.state, old, new) {
|
||||
runtime_Semrelease(&m.sema, false)
|
||||
runtime_Semrelease(&m.sema, false, 1)
|
||||
return
|
||||
}
|
||||
old = m.state
|
||||
|
@ -208,6 +216,6 @@ func (m *Mutex) Unlock() {
|
|||
// Note: mutexLocked is not set, the waiter will set it after wakeup.
|
||||
// But mutex is still considered locked if mutexStarving is set,
|
||||
// so new coming goroutines won't acquire it.
|
||||
runtime_Semrelease(&m.sema, true)
|
||||
runtime_Semrelease(&m.sema, true, 1)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ import (
|
|||
func HammerSemaphore(s *uint32, loops int, cdone chan bool) {
|
||||
for i := 0; i < loops; i++ {
|
||||
Runtime_Semacquire(s)
|
||||
Runtime_Semrelease(s, false)
|
||||
Runtime_Semrelease(s, false, 0)
|
||||
}
|
||||
cdone <- true
|
||||
}
|
||||
|
|
|
@ -22,7 +22,9 @@ func runtime_SemacquireMutex(s *uint32, lifo bool)
|
|||
// It is intended as a simple wakeup primitive for use by the synchronization
|
||||
// library and should not be used directly.
|
||||
// If handoff is true, pass count directly to the first waiter.
|
||||
func runtime_Semrelease(s *uint32, handoff bool)
|
||||
// skipframes is the number of frames to omit during tracing, counting from
|
||||
// runtime_Semrelease's caller.
|
||||
func runtime_Semrelease(s *uint32, handoff bool, skipframes int)
|
||||
|
||||
// Approximation of notifyList in runtime/sema.go. Size and alignment must
|
||||
// agree.
|
||||
|
|
|
@ -18,7 +18,7 @@ func BenchmarkSemaUncontended(b *testing.B) {
|
|||
b.RunParallel(func(pb *testing.PB) {
|
||||
sem := new(PaddedSem)
|
||||
for pb.Next() {
|
||||
Runtime_Semrelease(&sem.sem, false)
|
||||
Runtime_Semrelease(&sem.sem, false, 0)
|
||||
Runtime_Semacquire(&sem.sem)
|
||||
}
|
||||
})
|
||||
|
@ -44,7 +44,7 @@ func benchmarkSema(b *testing.B, block, work bool) {
|
|||
b.RunParallel(func(pb *testing.PB) {
|
||||
foo := 0
|
||||
for pb.Next() {
|
||||
Runtime_Semrelease(&sem, false)
|
||||
Runtime_Semrelease(&sem, false, 0)
|
||||
if work {
|
||||
for i := 0; i < 100; i++ {
|
||||
foo *= 2
|
||||
|
@ -54,7 +54,7 @@ func benchmarkSema(b *testing.B, block, work bool) {
|
|||
Runtime_Semacquire(&sem)
|
||||
}
|
||||
_ = foo
|
||||
Runtime_Semrelease(&sem, false)
|
||||
Runtime_Semrelease(&sem, false, 0)
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -73,7 +73,7 @@ func (rw *RWMutex) RUnlock() {
|
|||
// A writer is pending.
|
||||
if atomic.AddInt32(&rw.readerWait, -1) == 0 {
|
||||
// The last reader unblocks the writer.
|
||||
runtime_Semrelease(&rw.writerSem, false)
|
||||
runtime_Semrelease(&rw.writerSem, false, 0)
|
||||
}
|
||||
}
|
||||
if race.Enabled {
|
||||
|
@ -125,7 +125,7 @@ func (rw *RWMutex) Unlock() {
|
|||
}
|
||||
// Unblock blocked readers, if any.
|
||||
for i := 0; i < int(r); i++ {
|
||||
runtime_Semrelease(&rw.readerSem, false)
|
||||
runtime_Semrelease(&rw.readerSem, false, 0)
|
||||
}
|
||||
// Allow other writers to proceed.
|
||||
rw.w.Unlock()
|
||||
|
|
|
@ -90,7 +90,7 @@ func (wg *WaitGroup) Add(delta int) {
|
|||
// Reset waiters count to 0.
|
||||
*statep = 0
|
||||
for ; w != 0; w-- {
|
||||
runtime_Semrelease(semap, false)
|
||||
runtime_Semrelease(semap, false, 0)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
24
test/inline_sync.go
Normal file
24
test/inline_sync.go
Normal file
|
@ -0,0 +1,24 @@
|
|||
// +build !nacl,!386
|
||||
// errorcheck -0 -m
|
||||
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Test, using compiler diagnostic flags, that inlining of functions
|
||||
// imported from the sync package is working.
|
||||
// Compiles but does not run.
|
||||
// FIXME: nacl-386 is excluded as inlining currently does not work there.
|
||||
|
||||
package foo
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
var mutex *sync.Mutex
|
||||
|
||||
func small5() { // ERROR "can inline small5"
|
||||
// the Unlock fast path should be inlined
|
||||
mutex.Unlock() // ERROR "inlining call to sync\.\(\*Mutex\)\.Unlock" "&sync\.m\.state escapes to heap"
|
||||
}
|
Loading…
Reference in a new issue