podman/libpod/oci_conmon_exec_linux.go
Giuseppe Scrivano aafa80918a
do not set the inheritable capabilities
The kernel never sets the inheritable capabilities for a process, they
are only set by userspace.  Emulate the same behavior.

Closes: CVE-2022-27649

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
2022-03-23 09:05:18 +01:00

799 lines
24 KiB
Go

package libpod
import (
"fmt"
"io/ioutil"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"syscall"
"time"
"github.com/containers/common/pkg/capabilities"
"github.com/containers/common/pkg/config"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/pkg/errorhandling"
"github.com/containers/podman/v4/pkg/lookup"
"github.com/containers/podman/v4/pkg/util"
"github.com/containers/podman/v4/utils"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// ExecContainer executes a command in a running container
func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams, newSize *define.TerminalSize) (int, chan error, error) {
if options == nil {
return -1, nil, errors.Wrapf(define.ErrInvalidArg, "must provide an ExecOptions struct to ExecContainer")
}
if len(options.Cmd) == 0 {
return -1, nil, errors.Wrapf(define.ErrInvalidArg, "must provide a command to execute")
}
if sessionID == "" {
return -1, nil, errors.Wrapf(define.ErrEmptyID, "must provide a session ID for exec")
}
// TODO: Should we default this to false?
// Or maybe make streams mandatory?
attachStdin := true
if streams != nil {
attachStdin = streams.AttachInput
}
var ociLog string
if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
ociLog = c.execOCILog(sessionID)
}
execCmd, pipes, err := r.startExec(c, sessionID, options, attachStdin, ociLog)
if err != nil {
return -1, nil, err
}
// Only close sync pipe. Start and attach are consumed in the attach
// goroutine.
defer func() {
if pipes.syncPipe != nil && !pipes.syncClosed {
errorhandling.CloseQuiet(pipes.syncPipe)
pipes.syncClosed = true
}
}()
// TODO Only create if !detach
// Attach to the container before starting it
attachChan := make(chan error)
go func() {
// attachToExec is responsible for closing pipes
attachChan <- c.attachToExec(streams, options.DetachKeys, sessionID, pipes.startPipe, pipes.attachPipe, newSize)
close(attachChan)
}()
if err := execCmd.Wait(); err != nil {
return -1, nil, errors.Wrapf(err, "cannot run conmon")
}
pid, err := readConmonPipeData(r.name, pipes.syncPipe, ociLog)
return pid, attachChan, err
}
// ExecContainerHTTP executes a new command in an existing container and
// forwards its standard streams over an attach
func (r *ConmonOCIRuntime) ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, req *http.Request, w http.ResponseWriter,
streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, newSize *define.TerminalSize) (int, chan error, error) {
if streams != nil {
if !streams.Stdin && !streams.Stdout && !streams.Stderr {
return -1, nil, errors.Wrapf(define.ErrInvalidArg, "must provide at least one stream to attach to")
}
}
if options == nil {
return -1, nil, errors.Wrapf(define.ErrInvalidArg, "must provide exec options to ExecContainerHTTP")
}
detachString := config.DefaultDetachKeys
if options.DetachKeys != nil {
detachString = *options.DetachKeys
}
detachKeys, err := processDetachKeys(detachString)
if err != nil {
return -1, nil, err
}
// TODO: Should we default this to false?
// Or maybe make streams mandatory?
attachStdin := true
if streams != nil {
attachStdin = streams.Stdin
}
var ociLog string
if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
ociLog = ctr.execOCILog(sessionID)
}
execCmd, pipes, err := r.startExec(ctr, sessionID, options, attachStdin, ociLog)
if err != nil {
return -1, nil, err
}
// Only close sync pipe. Start and attach are consumed in the attach
// goroutine.
defer func() {
if pipes.syncPipe != nil && !pipes.syncClosed {
errorhandling.CloseQuiet(pipes.syncPipe)
pipes.syncClosed = true
}
}()
attachChan := make(chan error)
conmonPipeDataChan := make(chan conmonPipeData)
go func() {
// attachToExec is responsible for closing pipes
attachChan <- attachExecHTTP(ctr, sessionID, req, w, streams, pipes, detachKeys, options.Terminal, cancel, hijackDone, holdConnOpen, execCmd, conmonPipeDataChan, ociLog, newSize, r.name)
close(attachChan)
}()
// NOTE: the channel is needed to communicate conmon's data. In case
// of an error, the error will be written on the hijacked http
// connection such that remote clients will receive the error.
pipeData := <-conmonPipeDataChan
return pipeData.pid, attachChan, pipeData.err
}
// conmonPipeData contains the data when reading from conmon's pipe.
type conmonPipeData struct {
pid int
err error
}
// ExecContainerDetached executes a command in a running container, but does
// not attach to it.
func (r *ConmonOCIRuntime) ExecContainerDetached(ctr *Container, sessionID string, options *ExecOptions, stdin bool) (int, error) {
if options == nil {
return -1, errors.Wrapf(define.ErrInvalidArg, "must provide exec options to ExecContainerHTTP")
}
var ociLog string
if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
ociLog = ctr.execOCILog(sessionID)
}
execCmd, pipes, err := r.startExec(ctr, sessionID, options, stdin, ociLog)
if err != nil {
return -1, err
}
defer func() {
pipes.cleanup()
}()
// Wait for Conmon to tell us we're ready to attach.
// We aren't actually *going* to attach, but this means that we're good
// to proceed.
if _, err := readConmonPipeData(r.name, pipes.attachPipe, ""); err != nil {
return -1, err
}
// Start the exec session
if err := writeConmonPipeData(pipes.startPipe); err != nil {
return -1, err
}
// Wait for conmon to succeed, when return.
if err := execCmd.Wait(); err != nil {
return -1, errors.Wrapf(err, "cannot run conmon")
}
pid, err := readConmonPipeData(r.name, pipes.syncPipe, ociLog)
return pid, err
}
// ExecAttachResize resizes the TTY of the given exec session.
func (r *ConmonOCIRuntime) ExecAttachResize(ctr *Container, sessionID string, newSize define.TerminalSize) error {
controlFile, err := openControlFile(ctr, ctr.execBundlePath(sessionID))
if err != nil {
return err
}
defer controlFile.Close()
if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, newSize.Height, newSize.Width); err != nil {
return errors.Wrapf(err, "failed to write to ctl file to resize terminal")
}
return nil
}
// ExecStopContainer stops a given exec session in a running container.
func (r *ConmonOCIRuntime) ExecStopContainer(ctr *Container, sessionID string, timeout uint) error {
pid, err := ctr.getExecSessionPID(sessionID)
if err != nil {
return err
}
logrus.Debugf("Going to stop container %s exec session %s", ctr.ID(), sessionID)
// Is the session dead?
// Ping the PID with signal 0 to see if it still exists.
if err := unix.Kill(pid, 0); err != nil {
if err == unix.ESRCH {
return nil
}
return errors.Wrapf(err, "error pinging container %s exec session %s PID %d with signal 0", ctr.ID(), sessionID, pid)
}
if timeout > 0 {
// Use SIGTERM by default, then SIGSTOP after timeout.
logrus.Debugf("Killing exec session %s (PID %d) of container %s with SIGTERM", sessionID, pid, ctr.ID())
if err := unix.Kill(pid, unix.SIGTERM); err != nil {
if err == unix.ESRCH {
return nil
}
return errors.Wrapf(err, "error killing container %s exec session %s PID %d with SIGTERM", ctr.ID(), sessionID, pid)
}
// Wait for the PID to stop
if err := waitPidStop(pid, time.Duration(timeout)*time.Second); err != nil {
logrus.Infof("Timed out waiting for container %s exec session %s to stop, resorting to SIGKILL: %v", ctr.ID(), sessionID, err)
} else {
// No error, container is dead
return nil
}
}
// SIGTERM did not work. On to SIGKILL.
logrus.Debugf("Killing exec session %s (PID %d) of container %s with SIGKILL", sessionID, pid, ctr.ID())
if err := unix.Kill(pid, unix.SIGTERM); err != nil {
if err == unix.ESRCH {
return nil
}
return errors.Wrapf(err, "error killing container %s exec session %s PID %d with SIGKILL", ctr.ID(), sessionID, pid)
}
// Wait for the PID to stop
if err := waitPidStop(pid, killContainerTimeout); err != nil {
return errors.Wrapf(err, "timed out waiting for container %s exec session %s PID %d to stop after SIGKILL", ctr.ID(), sessionID, pid)
}
return nil
}
// ExecUpdateStatus checks if the given exec session is still running.
func (r *ConmonOCIRuntime) ExecUpdateStatus(ctr *Container, sessionID string) (bool, error) {
pid, err := ctr.getExecSessionPID(sessionID)
if err != nil {
return false, err
}
logrus.Debugf("Checking status of container %s exec session %s", ctr.ID(), sessionID)
// Is the session dead?
// Ping the PID with signal 0 to see if it still exists.
if err := unix.Kill(pid, 0); err != nil {
if err == unix.ESRCH {
return false, nil
}
return false, errors.Wrapf(err, "error pinging container %s exec session %s PID %d with signal 0", ctr.ID(), sessionID, pid)
}
return true, nil
}
// ExecAttachSocketPath is the path to a container's exec session attach socket.
func (r *ConmonOCIRuntime) ExecAttachSocketPath(ctr *Container, sessionID string) (string, error) {
// We don't even use container, so don't validity check it
if sessionID == "" {
return "", errors.Wrapf(define.ErrInvalidArg, "must provide a valid session ID to get attach socket path")
}
return filepath.Join(ctr.execBundlePath(sessionID), "attach"), nil
}
// This contains pipes used by the exec API.
type execPipes struct {
syncPipe *os.File
syncClosed bool
startPipe *os.File
startClosed bool
attachPipe *os.File
attachClosed bool
}
func (p *execPipes) cleanup() {
if p.syncPipe != nil && !p.syncClosed {
errorhandling.CloseQuiet(p.syncPipe)
p.syncClosed = true
}
if p.startPipe != nil && !p.startClosed {
errorhandling.CloseQuiet(p.startPipe)
p.startClosed = true
}
if p.attachPipe != nil && !p.attachClosed {
errorhandling.CloseQuiet(p.attachPipe)
p.attachClosed = true
}
}
// Start an exec session's conmon parent from the given options.
func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *ExecOptions, attachStdin bool, ociLog string) (_ *exec.Cmd, _ *execPipes, deferredErr error) {
pipes := new(execPipes)
if options == nil {
return nil, nil, errors.Wrapf(define.ErrInvalidArg, "must provide an ExecOptions struct to ExecContainer")
}
if len(options.Cmd) == 0 {
return nil, nil, errors.Wrapf(define.ErrInvalidArg, "must provide a command to execute")
}
if sessionID == "" {
return nil, nil, errors.Wrapf(define.ErrEmptyID, "must provide a session ID for exec")
}
// create sync pipe to receive the pid
parentSyncPipe, childSyncPipe, err := newPipe()
if err != nil {
return nil, nil, errors.Wrapf(err, "error creating socket pair")
}
pipes.syncPipe = parentSyncPipe
defer func() {
if deferredErr != nil {
pipes.cleanup()
}
}()
// create start pipe to set the cgroup before running
// attachToExec is responsible for closing parentStartPipe
childStartPipe, parentStartPipe, err := newPipe()
if err != nil {
return nil, nil, errors.Wrapf(err, "error creating socket pair")
}
pipes.startPipe = parentStartPipe
// create the attach pipe to allow attach socket to be created before
// $RUNTIME exec starts running. This is to make sure we can capture all output
// from the process through that socket, rather than half reading the log, half attaching to the socket
// attachToExec is responsible for closing parentAttachPipe
parentAttachPipe, childAttachPipe, err := newPipe()
if err != nil {
return nil, nil, errors.Wrapf(err, "error creating socket pair")
}
pipes.attachPipe = parentAttachPipe
childrenClosed := false
defer func() {
if !childrenClosed {
errorhandling.CloseQuiet(childSyncPipe)
errorhandling.CloseQuiet(childAttachPipe)
errorhandling.CloseQuiet(childStartPipe)
}
}()
runtimeDir, err := util.GetRuntimeDir()
if err != nil {
return nil, nil, err
}
finalEnv := make([]string, 0, len(options.Env))
for k, v := range options.Env {
finalEnv = append(finalEnv, fmt.Sprintf("%s=%s", k, v))
}
processFile, err := prepareProcessExec(c, options, finalEnv, sessionID)
if err != nil {
return nil, nil, err
}
defer processFile.Close()
args := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), ociLog, define.NoLogging, "")
if options.PreserveFDs > 0 {
args = append(args, formatRuntimeOpts("--preserve-fds", fmt.Sprintf("%d", options.PreserveFDs))...)
}
if options.Terminal {
args = append(args, "-t")
}
if attachStdin {
args = append(args, "-i")
}
// Append container ID and command
args = append(args, "-e")
// TODO make this optional when we can detach
args = append(args, "--exec-attach")
args = append(args, "--exec-process-spec", processFile.Name())
if len(options.ExitCommand) > 0 {
args = append(args, "--exit-command", options.ExitCommand[0])
for _, arg := range options.ExitCommand[1:] {
args = append(args, []string{"--exit-command-arg", arg}...)
}
if options.ExitCommandDelay > 0 {
args = append(args, []string{"--exit-delay", fmt.Sprintf("%d", options.ExitCommandDelay)}...)
}
}
logrus.WithFields(logrus.Fields{
"args": args,
}).Debugf("running conmon: %s", r.conmonPath)
execCmd := exec.Command(r.conmonPath, args...)
// TODO: This is commented because it doesn't make much sense in HTTP
// attach, and I'm not certain it does for non-HTTP attach as well.
// if streams != nil {
// // Don't add the InputStream to the execCmd. Instead, the data should be passed
// // through CopyDetachable
// if streams.AttachOutput {
// execCmd.Stdout = options.Streams.OutputStream
// }
// if streams.AttachError {
// execCmd.Stderr = options.Streams.ErrorStream
// }
// }
conmonEnv := r.configureConmonEnv(c, runtimeDir)
var filesToClose []*os.File
if options.PreserveFDs > 0 {
for fd := 3; fd < int(3+options.PreserveFDs); fd++ {
f := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd))
filesToClose = append(filesToClose, f)
execCmd.ExtraFiles = append(execCmd.ExtraFiles, f)
}
}
// we don't want to step on users fds they asked to preserve
// Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3
execCmd.Env = r.conmonEnv
execCmd.Env = append(execCmd.Env, fmt.Sprintf("_OCI_SYNCPIPE=%d", options.PreserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", options.PreserveFDs+4), fmt.Sprintf("_OCI_ATTACHPIPE=%d", options.PreserveFDs+5))
execCmd.Env = append(execCmd.Env, conmonEnv...)
execCmd.ExtraFiles = append(execCmd.ExtraFiles, childSyncPipe, childStartPipe, childAttachPipe)
execCmd.Dir = c.execBundlePath(sessionID)
execCmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
err = startCommandGivenSelinux(execCmd, c)
// We don't need children pipes on the parent side
errorhandling.CloseQuiet(childSyncPipe)
errorhandling.CloseQuiet(childAttachPipe)
errorhandling.CloseQuiet(childStartPipe)
childrenClosed = true
if err != nil {
return nil, nil, errors.Wrapf(err, "cannot start container %s", c.ID())
}
if err := r.moveConmonToCgroupAndSignal(c, execCmd, parentStartPipe); err != nil {
return nil, nil, err
}
// These fds were passed down to the runtime. Close them
// and not interfere
for _, f := range filesToClose {
errorhandling.CloseQuiet(f)
}
return execCmd, pipes, nil
}
// Attach to a container over HTTP
func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, pipes *execPipes, detachKeys []byte, isTerminal bool, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, execCmd *exec.Cmd, conmonPipeDataChan chan<- conmonPipeData, ociLog string, newSize *define.TerminalSize, runtimeName string) (deferredErr error) {
// NOTE: As you may notice, the attach code is quite complex.
// Many things happen concurrently and yet are interdependent.
// If you ever change this function, make sure to write to the
// conmonPipeDataChan in case of an error.
if pipes == nil || pipes.startPipe == nil || pipes.attachPipe == nil {
err := errors.Wrapf(define.ErrInvalidArg, "must provide a start and attach pipe to finish an exec attach")
conmonPipeDataChan <- conmonPipeData{-1, err}
return err
}
defer func() {
if !pipes.startClosed {
errorhandling.CloseQuiet(pipes.startPipe)
pipes.startClosed = true
}
if !pipes.attachClosed {
errorhandling.CloseQuiet(pipes.attachPipe)
pipes.attachClosed = true
}
}()
logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID)
// set up the socket path, such that it is the correct length and location for exec
sockPath, err := c.execAttachSocketPath(sessionID)
if err != nil {
conmonPipeDataChan <- conmonPipeData{-1, err}
return err
}
// 2: read from attachFd that the parent process has set up the console socket
if _, err := readConmonPipeData(runtimeName, pipes.attachPipe, ""); err != nil {
conmonPipeDataChan <- conmonPipeData{-1, err}
return err
}
// resize before we start the container process
if newSize != nil {
err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize)
if err != nil {
logrus.Warnf("Resize failed: %v", err)
}
}
// 2: then attach
conn, err := openUnixSocket(sockPath)
if err != nil {
conmonPipeDataChan <- conmonPipeData{-1, err}
return errors.Wrapf(err, "failed to connect to container's attach socket: %v", sockPath)
}
defer func() {
if err := conn.Close(); err != nil {
logrus.Errorf("Unable to close socket: %q", err)
}
}()
attachStdout := true
attachStderr := true
attachStdin := true
if streams != nil {
attachStdout = streams.Stdout
attachStderr = streams.Stderr
attachStdin = streams.Stdin
}
// Perform hijack
hijacker, ok := w.(http.Hijacker)
if !ok {
conmonPipeDataChan <- conmonPipeData{-1, err}
return errors.Errorf("unable to hijack connection")
}
httpCon, httpBuf, err := hijacker.Hijack()
if err != nil {
conmonPipeDataChan <- conmonPipeData{-1, err}
return errors.Wrapf(err, "error hijacking connection")
}
hijackDone <- true
// Write a header to let the client know what happened
writeHijackHeader(r, httpBuf)
// Force a flush after the header is written.
if err := httpBuf.Flush(); err != nil {
conmonPipeDataChan <- conmonPipeData{-1, err}
return errors.Wrapf(err, "error flushing HTTP hijack header")
}
go func() {
// Wait for conmon to succeed, when return.
if err := execCmd.Wait(); err != nil {
conmonPipeDataChan <- conmonPipeData{-1, err}
} else {
pid, err := readConmonPipeData(runtimeName, pipes.syncPipe, ociLog)
if err != nil {
hijackWriteError(err, c.ID(), isTerminal, httpBuf)
conmonPipeDataChan <- conmonPipeData{pid, err}
} else {
conmonPipeDataChan <- conmonPipeData{pid, err}
}
}
// We need to hold the connection open until the complete exec
// function has finished. This channel will be closed in a defer
// in that function, so we can wait for it here.
// Can't be a defer, because this would block the function from
// returning.
<-holdConnOpen
hijackWriteErrorAndClose(deferredErr, c.ID(), isTerminal, httpCon, httpBuf)
}()
stdoutChan := make(chan error)
stdinChan := make(chan error)
// Next, STDIN. Avoid entirely if attachStdin unset.
if attachStdin {
go func() {
logrus.Debugf("Beginning STDIN copy")
_, err := utils.CopyDetachable(conn, httpBuf, detachKeys)
logrus.Debugf("STDIN copy completed")
stdinChan <- err
}()
}
// 4: send start message to child
if err := writeConmonPipeData(pipes.startPipe); err != nil {
return err
}
// Handle STDOUT/STDERR *after* start message is sent
go func() {
var err error
if isTerminal {
// Hack: return immediately if attachStdout not set to
// emulate Docker.
// Basically, when terminal is set, STDERR goes nowhere.
// Everything does over STDOUT.
// Therefore, if not attaching STDOUT - we'll never copy
// anything from here.
logrus.Debugf("Performing terminal HTTP attach for container %s", c.ID())
if attachStdout {
err = httpAttachTerminalCopy(conn, httpBuf, c.ID())
}
} else {
logrus.Debugf("Performing non-terminal HTTP attach for container %s", c.ID())
err = httpAttachNonTerminalCopy(conn, httpBuf, c.ID(), attachStdin, attachStdout, attachStderr)
}
stdoutChan <- err
logrus.Debugf("STDOUT/ERR copy completed")
}()
for {
select {
case err := <-stdoutChan:
if err != nil {
return err
}
return nil
case err := <-stdinChan:
if err != nil {
return err
}
// copy stdin is done, close it
if connErr := conn.CloseWrite(); connErr != nil {
logrus.Errorf("Unable to close conn: %v", connErr)
}
case <-cancel:
return nil
}
}
}
// prepareProcessExec returns the path of the process.json used in runc exec -p
// caller is responsible to close the returned *os.File if needed.
func prepareProcessExec(c *Container, options *ExecOptions, env []string, sessionID string) (*os.File, error) {
f, err := ioutil.TempFile(c.execBundlePath(sessionID), "exec-process-")
if err != nil {
return nil, err
}
pspec := new(spec.Process)
if err := JSONDeepCopy(c.config.Spec.Process, pspec); err != nil {
return nil, err
}
pspec.SelinuxLabel = c.config.ProcessLabel
pspec.Args = options.Cmd
// We need to default this to false else it will inherit terminal as true
// from the container.
pspec.Terminal = false
if options.Terminal {
pspec.Terminal = true
}
if len(env) > 0 {
pspec.Env = append(pspec.Env, env...)
}
// Add secret envs if they exist
manager, err := c.runtime.SecretsManager()
if err != nil {
return nil, err
}
for name, secr := range c.config.EnvSecrets {
_, data, err := manager.LookupSecretData(secr.Name)
if err != nil {
return nil, err
}
pspec.Env = append(pspec.Env, fmt.Sprintf("%s=%s", name, string(data)))
}
if options.Cwd != "" {
pspec.Cwd = options.Cwd
}
var addGroups []string
var sgids []uint32
// if the user is empty, we should inherit the user that the container is currently running with
user := options.User
if user == "" {
logrus.Debugf("Set user to %s", c.config.User)
user = c.config.User
addGroups = c.config.Groups
}
overrides := c.getUserOverrides()
execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, user, overrides)
if err != nil {
return nil, err
}
if len(addGroups) > 0 {
sgids, err = lookup.GetContainerGroups(addGroups, c.state.Mountpoint, overrides)
if err != nil {
return nil, errors.Wrapf(err, "error looking up supplemental groups for container %s exec session %s", c.ID(), sessionID)
}
}
// If user was set, look it up in the container to get a UID to use on
// the host
if user != "" || len(sgids) > 0 {
if user != "" {
for _, sgid := range execUser.Sgids {
sgids = append(sgids, uint32(sgid))
}
}
processUser := spec.User{
UID: uint32(execUser.Uid),
GID: uint32(execUser.Gid),
AdditionalGids: sgids,
}
pspec.User = processUser
}
ctrSpec, err := c.specFromState()
if err != nil {
return nil, err
}
allCaps, err := capabilities.BoundingSet()
if err != nil {
return nil, err
}
if options.Privileged {
pspec.Capabilities.Bounding = allCaps
} else {
pspec.Capabilities.Bounding = ctrSpec.Process.Capabilities.Bounding
}
// Always unset the inheritable capabilities similarly to what the Linux kernel does
// They are used only when using capabilities with uid != 0.
pspec.Capabilities.Inheritable = []string{}
if execUser.Uid == 0 {
pspec.Capabilities.Effective = pspec.Capabilities.Bounding
pspec.Capabilities.Permitted = pspec.Capabilities.Bounding
} else {
if user == c.config.User {
pspec.Capabilities.Effective = ctrSpec.Process.Capabilities.Effective
pspec.Capabilities.Inheritable = ctrSpec.Process.Capabilities.Effective
pspec.Capabilities.Permitted = ctrSpec.Process.Capabilities.Effective
pspec.Capabilities.Ambient = ctrSpec.Process.Capabilities.Effective
}
}
hasHomeSet := false
for _, s := range pspec.Env {
if strings.HasPrefix(s, "HOME=") {
hasHomeSet = true
break
}
}
if !hasHomeSet {
pspec.Env = append(pspec.Env, fmt.Sprintf("HOME=%s", execUser.Home))
}
processJSON, err := json.Marshal(pspec)
if err != nil {
return nil, err
}
if err := ioutil.WriteFile(f.Name(), processJSON, 0644); err != nil {
return nil, err
}
return f, nil
}