From 6f27a216b4fb789181d00316561b44358a118b19 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Tue, 9 Jan 2024 18:20:44 +0800 Subject: [PATCH] runtime: optimize permission changes with mprotect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Linux, both mprotect() and mmap() acquire the mmap_lock (in writer mode), posing scalability challenges. The mmap_lock (formerly called mmap_sem) is a reader/writer lock that controls access to a process's address space; before making changes there (mapping in a new range, for example), the kernel must acquire that lock. Page-fault handling must also acquire mmap_lock (in reader mode) to ensure that the address space doesn't change in surprising ways while a fault is being resolved. A process can have a large address space and many threads running (and incurring page faults) concurrently, turning mmap_lock into a significant bottleneck. While both mmap() and mprotect() are protected by the mmap_lock, the shorter duration of mprotect system call, due to their simpler nature, results in a reduced locking time for the mmap_lock. --- src/internal/runtime/syscall/defs_linux_386.go | 1 + src/internal/runtime/syscall/defs_linux_amd64.go | 1 + src/internal/runtime/syscall/defs_linux_arm.go | 1 + src/internal/runtime/syscall/defs_linux_arm64.go | 1 + src/internal/runtime/syscall/defs_linux_loong64.go | 1 + src/internal/runtime/syscall/defs_linux_mips64x.go | 1 + src/internal/runtime/syscall/defs_linux_mipsx.go | 1 + src/internal/runtime/syscall/defs_linux_ppc64x.go | 1 + src/internal/runtime/syscall/defs_linux_riscv64.go | 1 + src/internal/runtime/syscall/defs_linux_s390x.go | 1 + src/runtime/mem_linux.go | 3 ++- src/runtime/os_linux.go | 6 ++++++ 12 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/internal/runtime/syscall/defs_linux_386.go b/src/internal/runtime/syscall/defs_linux_386.go index 613dc77d59..68e687fb14 100644 --- a/src/internal/runtime/syscall/defs_linux_386.go +++ b/src/internal/runtime/syscall/defs_linux_386.go @@ -6,6 +6,7 @@ package syscall const ( SYS_FCNTL = 55 + SYS_MPROTECT = 125 SYS_EPOLL_CTL = 255 SYS_EPOLL_PWAIT = 319 SYS_EPOLL_CREATE1 = 329 diff --git a/src/internal/runtime/syscall/defs_linux_amd64.go b/src/internal/runtime/syscall/defs_linux_amd64.go index 2ba3128813..ec480f5817 100644 --- a/src/internal/runtime/syscall/defs_linux_amd64.go +++ b/src/internal/runtime/syscall/defs_linux_amd64.go @@ -5,6 +5,7 @@ package syscall const ( + SYS_MPROTECT = 10 SYS_FCNTL = 72 SYS_EPOLL_CTL = 233 SYS_EPOLL_PWAIT = 281 diff --git a/src/internal/runtime/syscall/defs_linux_arm.go b/src/internal/runtime/syscall/defs_linux_arm.go index af3e0510b1..c5d1503012 100644 --- a/src/internal/runtime/syscall/defs_linux_arm.go +++ b/src/internal/runtime/syscall/defs_linux_arm.go @@ -6,6 +6,7 @@ package syscall const ( SYS_FCNTL = 55 + SYS_MPROTECT = 125 SYS_EPOLL_CTL = 251 SYS_EPOLL_PWAIT = 346 SYS_EPOLL_CREATE1 = 357 diff --git a/src/internal/runtime/syscall/defs_linux_arm64.go b/src/internal/runtime/syscall/defs_linux_arm64.go index c924f6211a..f743fe31a5 100644 --- a/src/internal/runtime/syscall/defs_linux_arm64.go +++ b/src/internal/runtime/syscall/defs_linux_arm64.go @@ -9,6 +9,7 @@ const ( SYS_EPOLL_CTL = 21 SYS_EPOLL_PWAIT = 22 SYS_FCNTL = 25 + SYS_MPROTECT = 226 SYS_EPOLL_PWAIT2 = 441 SYS_EVENTFD2 = 19 diff --git a/src/internal/runtime/syscall/defs_linux_loong64.go b/src/internal/runtime/syscall/defs_linux_loong64.go index c1a5649a42..82218d1509 100644 --- a/src/internal/runtime/syscall/defs_linux_loong64.go +++ b/src/internal/runtime/syscall/defs_linux_loong64.go @@ -9,6 +9,7 @@ const ( SYS_EPOLL_CTL = 21 SYS_EPOLL_PWAIT = 22 SYS_FCNTL = 25 + SYS_MPROTECT = 226 SYS_EPOLL_PWAIT2 = 441 SYS_EVENTFD2 = 19 diff --git a/src/internal/runtime/syscall/defs_linux_mips64x.go b/src/internal/runtime/syscall/defs_linux_mips64x.go index 07c0aba539..4e0fd1f5d1 100644 --- a/src/internal/runtime/syscall/defs_linux_mips64x.go +++ b/src/internal/runtime/syscall/defs_linux_mips64x.go @@ -7,6 +7,7 @@ package syscall const ( + SYS_MPROTECT = 5010 SYS_FCNTL = 5070 SYS_EPOLL_CTL = 5208 SYS_EPOLL_PWAIT = 5272 diff --git a/src/internal/runtime/syscall/defs_linux_mipsx.go b/src/internal/runtime/syscall/defs_linux_mipsx.go index a1bb5d720a..b87a355093 100644 --- a/src/internal/runtime/syscall/defs_linux_mipsx.go +++ b/src/internal/runtime/syscall/defs_linux_mipsx.go @@ -8,6 +8,7 @@ package syscall const ( SYS_FCNTL = 4055 + SYS_MPROTECT = 4125 SYS_EPOLL_CTL = 4249 SYS_EPOLL_PWAIT = 4313 SYS_EPOLL_CREATE1 = 4326 diff --git a/src/internal/runtime/syscall/defs_linux_ppc64x.go b/src/internal/runtime/syscall/defs_linux_ppc64x.go index 78558b360f..8235edd795 100644 --- a/src/internal/runtime/syscall/defs_linux_ppc64x.go +++ b/src/internal/runtime/syscall/defs_linux_ppc64x.go @@ -8,6 +8,7 @@ package syscall const ( SYS_FCNTL = 55 + SYS_MPROTECT = 125 SYS_EPOLL_CTL = 237 SYS_EPOLL_PWAIT = 303 SYS_EPOLL_CREATE1 = 315 diff --git a/src/internal/runtime/syscall/defs_linux_riscv64.go b/src/internal/runtime/syscall/defs_linux_riscv64.go index c1a5649a42..82218d1509 100644 --- a/src/internal/runtime/syscall/defs_linux_riscv64.go +++ b/src/internal/runtime/syscall/defs_linux_riscv64.go @@ -9,6 +9,7 @@ const ( SYS_EPOLL_CTL = 21 SYS_EPOLL_PWAIT = 22 SYS_FCNTL = 25 + SYS_MPROTECT = 226 SYS_EPOLL_PWAIT2 = 441 SYS_EVENTFD2 = 19 diff --git a/src/internal/runtime/syscall/defs_linux_s390x.go b/src/internal/runtime/syscall/defs_linux_s390x.go index b539b2d22a..08073c01f0 100644 --- a/src/internal/runtime/syscall/defs_linux_s390x.go +++ b/src/internal/runtime/syscall/defs_linux_s390x.go @@ -6,6 +6,7 @@ package syscall const ( SYS_FCNTL = 55 + SYS_MPROTECT = 125 SYS_EPOLL_CTL = 250 SYS_EPOLL_PWAIT = 312 SYS_EPOLL_CREATE1 = 327 diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go index d63c38c209..15a406d97a 100644 --- a/src/runtime/mem_linux.go +++ b/src/runtime/mem_linux.go @@ -150,7 +150,8 @@ func sysFreeOS(v unsafe.Pointer, n uintptr) { } func sysFaultOS(v unsafe.Pointer, n uintptr) { - mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0) + mprotect(v, n, _PROT_NONE) + madvise(v, n, _MADV_DONTNEED) } func sysReserveOS(v unsafe.Pointer, n uintptr) unsafe.Pointer { diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go index ddacaa585c..c4f503c8c9 100644 --- a/src/runtime/os_linux.go +++ b/src/runtime/os_linux.go @@ -891,3 +891,9 @@ func (c *sigctxt) sigFromUser() bool { code := int32(c.sigcode()) return code == _SI_USER || code == _SI_TKILL } + +//go:nosplit +func mprotect(addr unsafe.Pointer, n uintptr, prot int32) (ret int32, errno int32) { + r, _, err := syscall.Syscall6(syscall.SYS_MPROTECT, uintptr(addr), n, uintptr(prot), 0, 0, 0) + return int32(r), int32(err) +}