os: make use of pidfd on linux

Use Process.handle field to store pidfd, and make use of it. Only use
pidfd functionality if all the needed syscalls are available.

1. StartProcess: obtain the pidfd from the kernel, if available,
   using the functionality added by CL 520266. Note we could not modify
   syscall.StartProcess to return pidfd directly because it is a public
   API and its callers do not expect it, so we have to use ensurePidfd
   and getPidfd.

2. (*Process).Kill: use pidfdSendSignal, if the syscall is available
   and pidfd is known. This is slightly more complicated than it should
   be, since the syscall can be blocked by e.g. seccomp security policy,
   therefore the need for a function to check if it's actually working,
   and a soft fallback to kill. Perhaps this precaution is not really
   needed.

3. (*Process).Wait: use pidfdWait, if available, otherwise fall back to
   using waitid/wait4. This is also more complicated than expected due
   to struct siginfo_t idiosyncrasy.

NOTE pidfdSendSignal and pidfdWait are used without a race workaround
(blockUntilWaitable and sigMu, added by CL 23967) because with pidfd,
PID recycle issue doesn't exist (IOW, pidfd, unlike PID, is guaranteed
to refer to one particular process) and thus the race doesn't exist
either.

For #62654.
Updates #13987.

Change-Id: I22ebcc7142b16a3a94c422d2f32504d1a80e8a8f
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
Reviewed-on: https://go-review.googlesource.com/c/go/+/528438
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Kir Kolyshkin 2023-09-13 01:07:10 -07:00 committed by Michael Pratt
parent 4cd743e27e
commit 750738b5d1
19 changed files with 389 additions and 13 deletions

View file

@ -13,3 +13,11 @@ func PidFDSendSignal(pidfd uintptr, s syscall.Signal) error {
} }
return nil return nil
} }
func PidFDOpen(pid, flags int) (uintptr, error) {
pidfd, _, errno := syscall.Syscall(pidfdOpenTrap, uintptr(pid), uintptr(flags), 0)
if errno != 0 {
return ^uintptr(0), errno
}
return uintptr(pidfd), nil
}

View file

@ -0,0 +1,64 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package unix
import (
"syscall"
)
const is64bit = ^uint(0) >> 63 // 0 for 32-bit hosts, 1 for 64-bit ones.
// SiginfoChild is a struct filled in by Linux waitid syscall.
// In C, siginfo_t contains a union with multiple members;
// this struct corresponds to one used when Signo is SIGCHLD.
//
// NOTE fields are exported to be used by TestSiginfoChildLayout.
type SiginfoChild struct {
Signo int32
siErrnoCode // Two int32 fields, swapped on MIPS.
_ [is64bit]int32 // Extra padding for 64-bit hosts only.
// End of common part. Beginning of signal-specific part.
Pid int32
Uid uint32
Status int32
// Pad to 128 bytes.
_ [128 - (6+is64bit)*4]byte
}
const (
// Possible values for SiginfoChild.Code field.
_CLD_EXITED int32 = 1
_CLD_KILLED = 2
_CLD_DUMPED = 3
_CLD_TRAPPED = 4
_CLD_STOPPED = 5
_CLD_CONTINUED = 6
// These are the same as in syscall/syscall_linux.go.
core = 0x80
stopped = 0x7f
continued = 0xffff
)
// WaitStatus converts SiginfoChild, as filled in by the waitid syscall,
// to syscall.WaitStatus.
func (s *SiginfoChild) WaitStatus() (ws syscall.WaitStatus) {
switch s.Code {
case _CLD_EXITED:
ws = syscall.WaitStatus(s.Status << 8)
case _CLD_DUMPED:
ws = syscall.WaitStatus(s.Status) | core
case _CLD_KILLED:
ws = syscall.WaitStatus(s.Status)
case _CLD_TRAPPED, _CLD_STOPPED:
ws = syscall.WaitStatus(s.Status<<8) | stopped
case _CLD_CONTINUED:
ws = continued
}
return
}

View file

@ -0,0 +1,12 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build linux && (mips || mipsle || mips64 || mips64le)
package unix
type siErrnoCode struct {
Code int32
Errno int32
}

View file

@ -0,0 +1,12 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build linux && !(mips || mipsle || mips64 || mips64le)
package unix
type siErrnoCode struct {
Errno int32
Code int32
}

View file

@ -0,0 +1,59 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package unix_test
import (
"internal/goarch"
"internal/syscall/unix"
"runtime"
"strings"
"testing"
"unsafe"
)
// TestSiginfoChildLayout validates SiginfoChild layout. Modelled after
// static assertions in linux kernel's arch/*/kernel/signal*.c.
func TestSiginfoChildLayout(t *testing.T) {
var si unix.SiginfoChild
const host64bit = goarch.PtrSize == 8
if v := unsafe.Sizeof(si); v != 128 {
t.Fatalf("sizeof: got %d, want 128", v)
}
ofSigno := 0
ofErrno := 4
ofCode := 8
if strings.HasPrefix(runtime.GOARCH, "mips") {
// These two fields are swapped on MIPS platforms.
ofErrno, ofCode = ofCode, ofErrno
}
ofPid := 12
if host64bit {
ofPid = 16
}
ofUid := ofPid + 4
ofStatus := ofPid + 8
offsets := []struct {
name string
got uintptr
want int
}{
{"Signo", unsafe.Offsetof(si.Signo), ofSigno},
{"Errno", unsafe.Offsetof(si.Errno), ofErrno},
{"Code", unsafe.Offsetof(si.Code), ofCode},
{"Pid", unsafe.Offsetof(si.Pid), ofPid},
{"Uid", unsafe.Offsetof(si.Uid), ofUid},
{"Status", unsafe.Offsetof(si.Status), ofStatus},
}
for _, tc := range offsets {
if int(tc.got) != tc.want {
t.Errorf("offsetof %s: got %d, want %d", tc.name, tc.got, tc.want)
}
}
}

View file

@ -8,4 +8,5 @@ const (
getrandomTrap uintptr = 355 getrandomTrap uintptr = 355
copyFileRangeTrap uintptr = 377 copyFileRangeTrap uintptr = 377
pidfdSendSignalTrap uintptr = 424 pidfdSendSignalTrap uintptr = 424
pidfdOpenTrap uintptr = 434
) )

View file

@ -8,4 +8,5 @@ const (
getrandomTrap uintptr = 318 getrandomTrap uintptr = 318
copyFileRangeTrap uintptr = 326 copyFileRangeTrap uintptr = 326
pidfdSendSignalTrap uintptr = 424 pidfdSendSignalTrap uintptr = 424
pidfdOpenTrap uintptr = 434
) )

View file

@ -8,4 +8,5 @@ const (
getrandomTrap uintptr = 384 getrandomTrap uintptr = 384
copyFileRangeTrap uintptr = 391 copyFileRangeTrap uintptr = 391
pidfdSendSignalTrap uintptr = 424 pidfdSendSignalTrap uintptr = 424
pidfdOpenTrap uintptr = 434
) )

View file

@ -14,4 +14,5 @@ const (
getrandomTrap uintptr = 278 getrandomTrap uintptr = 278
copyFileRangeTrap uintptr = 285 copyFileRangeTrap uintptr = 285
pidfdSendSignalTrap uintptr = 424 pidfdSendSignalTrap uintptr = 424
pidfdOpenTrap uintptr = 434
) )

View file

@ -10,4 +10,5 @@ const (
getrandomTrap uintptr = 5313 getrandomTrap uintptr = 5313
copyFileRangeTrap uintptr = 5320 copyFileRangeTrap uintptr = 5320
pidfdSendSignalTrap uintptr = 5424 pidfdSendSignalTrap uintptr = 5424
pidfdOpenTrap uintptr = 5434
) )

View file

@ -10,4 +10,5 @@ const (
getrandomTrap uintptr = 4353 getrandomTrap uintptr = 4353
copyFileRangeTrap uintptr = 4360 copyFileRangeTrap uintptr = 4360
pidfdSendSignalTrap uintptr = 4424 pidfdSendSignalTrap uintptr = 4424
pidfdOpenTrap uintptr = 4434
) )

View file

@ -10,4 +10,5 @@ const (
getrandomTrap uintptr = 359 getrandomTrap uintptr = 359
copyFileRangeTrap uintptr = 379 copyFileRangeTrap uintptr = 379
pidfdSendSignalTrap uintptr = 424 pidfdSendSignalTrap uintptr = 424
pidfdOpenTrap uintptr = 434
) )

View file

@ -8,4 +8,5 @@ const (
getrandomTrap uintptr = 349 getrandomTrap uintptr = 349
copyFileRangeTrap uintptr = 375 copyFileRangeTrap uintptr = 375
pidfdSendSignalTrap uintptr = 424 pidfdSendSignalTrap uintptr = 424
pidfdOpenTrap uintptr = 434
) )

View file

@ -13,6 +13,10 @@ import (
"syscall" "syscall"
) )
// unsetHandle is a value for Process.handle used when the handle is not set.
// Same as syscall.InvalidHandle for Windows.
const unsetHandle = ^uintptr(0)
// The only signal values guaranteed to be present in the os package on all // The only signal values guaranteed to be present in the os package on all
// systems are os.Interrupt (send the process an interrupt) and os.Kill (force // systems are os.Interrupt (send the process an interrupt) and os.Kill (force
// the process to exit). On Windows, sending os.Interrupt to a process with // the process to exit). On Windows, sending os.Interrupt to a process with
@ -38,7 +42,7 @@ func startProcess(name string, argv []string, attr *ProcAttr) (p *Process, err e
sysattr := &syscall.ProcAttr{ sysattr := &syscall.ProcAttr{
Dir: attr.Dir, Dir: attr.Dir,
Env: attr.Env, Env: attr.Env,
Sys: attr.Sys, Sys: ensurePidfd(attr.Sys),
} }
if sysattr.Env == nil { if sysattr.Env == nil {
sysattr.Env, err = execenv.Default(sysattr.Sys) sysattr.Env, err = execenv.Default(sysattr.Sys)
@ -60,6 +64,10 @@ func startProcess(name string, argv []string, attr *ProcAttr) (p *Process, err e
return nil, &PathError{Op: "fork/exec", Path: name, Err: e} return nil, &PathError{Op: "fork/exec", Path: name, Err: e}
} }
if runtime.GOOS == "linux" {
h = getPidfd(sysattr.Sys)
}
return newProcess(pid, h), nil return newProcess(pid, h), nil
} }

View file

@ -17,6 +17,16 @@ func (p *Process) wait() (ps *ProcessState, err error) {
if p.Pid == -1 { if p.Pid == -1 {
return nil, syscall.EINVAL return nil, syscall.EINVAL
} }
// Use pidfd if possible; fallback on ENOSYS or EPERM (the latter can be
// returned if syscall is prohibited by seccomp or a similar mechanism).
//
// When pidfd is used, there is no wait/kill race (described in CL 23967)
// because PID recycle issue doesn't exist (IOW, pidfd, unlike PID, is
// guaranteed to refer to one particular process). Thus, there is no
// need for the workaround (blockUntilWaitable + sigMu) below.
if ps, e := p.pidfdWait(); e != syscall.ENOSYS && e != syscall.EPERM {
return ps, NewSyscallError("waitid", e)
}
// If we can block until Wait4 will succeed immediately, do so. // If we can block until Wait4 will succeed immediately, do so.
ready, err := p.blockUntilWaitable() ready, err := p.blockUntilWaitable()
@ -64,26 +74,31 @@ func (p *Process) signal(sig Signal) error {
if p.Pid == 0 { if p.Pid == 0 {
return errors.New("os: process not initialized") return errors.New("os: process not initialized")
} }
s, ok := sig.(syscall.Signal)
if !ok {
return errors.New("os: unsupported signal type")
}
// Use pidfd if possible; fallback on ENOSYS.
if err := p.pidfdSendSignal(s); err != syscall.ENOSYS {
return err
}
p.sigMu.RLock() p.sigMu.RLock()
defer p.sigMu.RUnlock() defer p.sigMu.RUnlock()
if p.done() { if p.done() {
return ErrProcessDone return ErrProcessDone
} }
s, ok := sig.(syscall.Signal) return convertESRCH(syscall.Kill(p.Pid, s))
if !ok { }
return errors.New("os: unsupported signal type")
func convertESRCH(err error) error {
if err == syscall.ESRCH {
return ErrProcessDone
} }
if e := syscall.Kill(p.Pid, s); e != nil { return err
if e == syscall.ESRCH {
return ErrProcessDone
}
return e
}
return nil
} }
func (p *Process) release() error { func (p *Process) release() error {
// NOOP for unix. p.pidfdRelease()
p.Pid = -1 p.Pid = -1
// no need for a finalizer anymore // no need for a finalizer anymore
runtime.SetFinalizer(p, nil) runtime.SetFinalizer(p, nil)
@ -92,7 +107,7 @@ func (p *Process) release() error {
func findProcess(pid int) (p *Process, err error) { func findProcess(pid int) (p *Process, err error) {
// NOOP for unix. // NOOP for unix.
return newProcess(pid, 0), nil return newProcess(pid, unsetHandle), nil
} }
func (p *ProcessState) userTime() time.Duration { func (p *ProcessState) userTime() time.Duration {

View file

@ -9,4 +9,5 @@ var (
PollSpliceFile = &pollSplice PollSpliceFile = &pollSplice
PollSendFile = &pollSendFile PollSendFile = &pollSendFile
GetPollFDAndNetwork = getPollFDAndNetwork GetPollFDAndNetwork = getPollFDAndNetwork
CheckPidfdOnce = checkPidfdOnce
) )

145
src/os/pidfd_linux.go Normal file
View file

@ -0,0 +1,145 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build linux
// Support for pidfd was added during the course of a few Linux releases:
// v5.1: pidfd_send_signal syscall;
// v5.2: CLONE_PIDFD flag for clone syscall;
// v5.3: pidfd_open syscall, clone3 syscall;
// v5.4: P_PIDFD idtype support for waitid syscall;
// v5.6: pidfd_getfd syscall.
package os
import (
"internal/syscall/unix"
"sync"
"syscall"
"unsafe"
)
func ensurePidfd(sysAttr *syscall.SysProcAttr) *syscall.SysProcAttr {
if !pidfdWorks() {
return sysAttr
}
var pidfd int
if sysAttr == nil {
return &syscall.SysProcAttr{
PidFD: &pidfd,
}
}
if sysAttr.PidFD == nil {
newSys := *sysAttr // copy
newSys.PidFD = &pidfd
return &newSys
}
return sysAttr
}
func getPidfd(sysAttr *syscall.SysProcAttr) uintptr {
if !pidfdWorks() {
return unsetHandle
}
return uintptr(*sysAttr.PidFD)
}
func (p *Process) pidfdRelease() {
// Release pidfd unconditionally.
handle := p.handle.Swap(unsetHandle)
if handle != unsetHandle {
syscall.Close(int(handle))
}
}
// _P_PIDFD is used as idtype argument to waitid syscall.
const _P_PIDFD = 3
func (p *Process) pidfdWait() (*ProcessState, error) {
handle := p.handle.Load()
if handle == unsetHandle || !pidfdWorks() {
return nil, syscall.ENOSYS
}
var (
info unix.SiginfoChild
rusage syscall.Rusage
e syscall.Errno
)
for {
_, _, e = syscall.Syscall6(syscall.SYS_WAITID, _P_PIDFD, handle, uintptr(unsafe.Pointer(&info)), syscall.WEXITED, uintptr(unsafe.Pointer(&rusage)), 0)
if e != syscall.EINTR {
break
}
}
if e != 0 {
if e == syscall.EINVAL {
// This is either invalid option value (which should not happen
// as we only use WEXITED), or missing P_PIDFD support (Linux
// kernel < 5.4), meaning pidfd support is not implemented.
e = syscall.ENOSYS
}
return nil, e
}
p.setDone()
defer p.pidfdRelease()
return &ProcessState{
pid: int(info.Pid),
status: info.WaitStatus(),
rusage: &rusage,
}, nil
}
func (p *Process) pidfdSendSignal(s syscall.Signal) error {
handle := p.handle.Load()
if handle == unsetHandle || !pidfdWorks() {
return syscall.ENOSYS
}
return convertESRCH(unix.PidFDSendSignal(handle, s))
}
func pidfdWorks() bool {
return checkPidfdOnce() == nil
}
var checkPidfdOnce = sync.OnceValue(checkPidfd)
// checkPidfd checks whether all required pidfd-related syscalls work.
// This consists of pidfd_open and pidfd_send_signal syscalls, and waitid
// syscall with idtype of P_PIDFD.
//
// Reasons for non-working pidfd syscalls include an older kernel and an
// execution environment in which the above system calls are restricted by
// seccomp or a similar technology.
func checkPidfd() error {
// Get a pidfd of the current process (opening of "/proc/self" won't
// work for waitid).
fd, err := unix.PidFDOpen(syscall.Getpid(), 0)
if err != nil {
return NewSyscallError("pidfd_open", err)
}
defer syscall.Close(int(fd))
// Check waitid(P_PIDFD) works.
for {
_, _, err = syscall.Syscall6(syscall.SYS_WAITID, _P_PIDFD, fd, 0, syscall.WEXITED, 0, 0)
if err != syscall.EINTR {
break
}
}
// Expect ECHILD from waitid since we're not our own parent.
if err != syscall.ECHILD {
return NewSyscallError("pidfd_wait", err)
}
// Check pidfd_send_signal works (should be able to send 0 to itself).
if err := unix.PidFDSendSignal(fd, 0); err != nil {
return NewSyscallError("pidfd_send_signal", err)
}
return nil
}

View file

@ -0,0 +1,17 @@
package os_test
import (
"os"
"testing"
)
func TestCheckPidfd(t *testing.T) {
if err := os.CheckPidfdOnce(); err != nil {
t.Log("checkPidfd:", err)
} else {
t.Log("pidfd syscalls work")
}
// TODO: make some reasonable assumptions that pidfd must or must not
// work in the current test environment (for example, it must work for
// kernel >= 5.4), and fail if pidfdWorks is not as expected.
}

27
src/os/pidfd_other.go Normal file
View file

@ -0,0 +1,27 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build (unix && !linux) || (js && wasm) || wasip1 || windows
package os
import "syscall"
func ensurePidfd(sysAttr *syscall.SysProcAttr) *syscall.SysProcAttr {
return sysAttr
}
func getPidfd(_ *syscall.SysProcAttr) uintptr {
return unsetHandle
}
func (p *Process) pidfdRelease() {}
func (_ *Process) pidfdWait() (*ProcessState, error) {
return nil, syscall.ENOSYS
}
func (_ *Process) pidfdSendSignal(_ syscall.Signal) error {
return syscall.ENOSYS
}