runtime: optimize permission changes with mprotect

On Linux, both mprotect() and mmap() acquire the mmap_lock (in writer mode),
posing scalability challenges.

The mmap_lock (formerly called mmap_sem) is a reader/writer lock that controls
access to a process's address space; before making changes there (mapping in a
new range, for example), the kernel must acquire that lock.

Page-fault handling must also acquire mmap_lock (in reader mode) to ensure that
the address space doesn't change in surprising ways while a fault is being resolved.

A process can have a large address space and many threads running (and incurring
page faults) concurrently, turning mmap_lock into a significant bottleneck.

While both mmap() and mprotect() are protected by the mmap_lock, the shorter
duration of mprotect system call, due to their simpler nature, results in a reduced
locking time for the mmap_lock.
This commit is contained in:
Lance Yang 2024-01-09 18:20:44 +08:00
parent b750841906
commit 6f27a216b4
12 changed files with 18 additions and 1 deletions

View file

@ -6,6 +6,7 @@ package syscall
const (
SYS_FCNTL = 55
SYS_MPROTECT = 125
SYS_EPOLL_CTL = 255
SYS_EPOLL_PWAIT = 319
SYS_EPOLL_CREATE1 = 329

View file

@ -5,6 +5,7 @@
package syscall
const (
SYS_MPROTECT = 10
SYS_FCNTL = 72
SYS_EPOLL_CTL = 233
SYS_EPOLL_PWAIT = 281

View file

@ -6,6 +6,7 @@ package syscall
const (
SYS_FCNTL = 55
SYS_MPROTECT = 125
SYS_EPOLL_CTL = 251
SYS_EPOLL_PWAIT = 346
SYS_EPOLL_CREATE1 = 357

View file

@ -9,6 +9,7 @@ const (
SYS_EPOLL_CTL = 21
SYS_EPOLL_PWAIT = 22
SYS_FCNTL = 25
SYS_MPROTECT = 226
SYS_EPOLL_PWAIT2 = 441
SYS_EVENTFD2 = 19

View file

@ -9,6 +9,7 @@ const (
SYS_EPOLL_CTL = 21
SYS_EPOLL_PWAIT = 22
SYS_FCNTL = 25
SYS_MPROTECT = 226
SYS_EPOLL_PWAIT2 = 441
SYS_EVENTFD2 = 19

View file

@ -7,6 +7,7 @@
package syscall
const (
SYS_MPROTECT = 5010
SYS_FCNTL = 5070
SYS_EPOLL_CTL = 5208
SYS_EPOLL_PWAIT = 5272

View file

@ -8,6 +8,7 @@ package syscall
const (
SYS_FCNTL = 4055
SYS_MPROTECT = 4125
SYS_EPOLL_CTL = 4249
SYS_EPOLL_PWAIT = 4313
SYS_EPOLL_CREATE1 = 4326

View file

@ -8,6 +8,7 @@ package syscall
const (
SYS_FCNTL = 55
SYS_MPROTECT = 125
SYS_EPOLL_CTL = 237
SYS_EPOLL_PWAIT = 303
SYS_EPOLL_CREATE1 = 315

View file

@ -9,6 +9,7 @@ const (
SYS_EPOLL_CTL = 21
SYS_EPOLL_PWAIT = 22
SYS_FCNTL = 25
SYS_MPROTECT = 226
SYS_EPOLL_PWAIT2 = 441
SYS_EVENTFD2 = 19

View file

@ -6,6 +6,7 @@ package syscall
const (
SYS_FCNTL = 55
SYS_MPROTECT = 125
SYS_EPOLL_CTL = 250
SYS_EPOLL_PWAIT = 312
SYS_EPOLL_CREATE1 = 327

View file

@ -150,7 +150,8 @@ func sysFreeOS(v unsafe.Pointer, n uintptr) {
}
func sysFaultOS(v unsafe.Pointer, n uintptr) {
mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
mprotect(v, n, _PROT_NONE)
madvise(v, n, _MADV_DONTNEED)
}
func sysReserveOS(v unsafe.Pointer, n uintptr) unsafe.Pointer {

View file

@ -891,3 +891,9 @@ func (c *sigctxt) sigFromUser() bool {
code := int32(c.sigcode())
return code == _SI_USER || code == _SI_TKILL
}
//go:nosplit
func mprotect(addr unsafe.Pointer, n uintptr, prot int32) (ret int32, errno int32) {
r, _, err := syscall.Syscall6(syscall.SYS_MPROTECT, uintptr(addr), n, uintptr(prot), 0, 0, 0)
return int32(r), int32(err)
}