podman/libpod/container_internal_linux.go
OpenShift Merge Robot bce4a93575
Merge pull request #3297 from rhatdan/systemd
Accidently removed /run/lock from systemd mounts
2019-06-17 21:26:33 +02:00

1152 lines
34 KiB
Go

// +build linux
package libpod
import (
"context"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"sync"
"syscall"
"time"
cnitypes "github.com/containernetworking/cni/pkg/types/current"
"github.com/containernetworking/plugins/pkg/ns"
"github.com/containers/buildah/pkg/secrets"
crioAnnotations "github.com/containers/libpod/pkg/annotations"
"github.com/containers/libpod/pkg/apparmor"
"github.com/containers/libpod/pkg/criu"
"github.com/containers/libpod/pkg/lookup"
"github.com/containers/libpod/pkg/resolvconf"
"github.com/containers/libpod/pkg/rootless"
"github.com/containers/storage/pkg/archive"
securejoin "github.com/cyphar/filepath-securejoin"
"github.com/opencontainers/runc/libcontainer/user"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func (c *Container) mountSHM(shmOptions string) error {
if err := unix.Mount("shm", c.config.ShmDir, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV,
label.FormatMountLabel(shmOptions, c.config.MountLabel)); err != nil {
return errors.Wrapf(err, "failed to mount shm tmpfs %q", c.config.ShmDir)
}
return nil
}
func (c *Container) unmountSHM(mount string) error {
if err := unix.Unmount(mount, unix.MNT_DETACH); err != nil {
if err != syscall.EINVAL {
logrus.Warnf("container %s failed to unmount %s : %v", c.ID(), mount, err)
} else {
logrus.Debugf("container %s failed to unmount %s : %v", c.ID(), mount, err)
}
}
return nil
}
// prepare mounts the container and sets up other required resources like net
// namespaces
func (c *Container) prepare() (err error) {
var (
wg sync.WaitGroup
netNS ns.NetNS
networkStatus []*cnitypes.Result
createNetNSErr, mountStorageErr error
mountPoint string
tmpStateLock sync.Mutex
)
wg.Add(2)
go func() {
defer wg.Done()
// Set up network namespace if not already set up
if c.config.CreateNetNS && c.state.NetNS == nil && !c.config.PostConfigureNetNS {
netNS, networkStatus, createNetNSErr = c.runtime.createNetNS(c)
tmpStateLock.Lock()
defer tmpStateLock.Unlock()
// Assign NetNS attributes to container
if createNetNSErr == nil {
c.state.NetNS = netNS
c.state.NetworkStatus = networkStatus
}
}
}()
// Mount storage if not mounted
go func() {
defer wg.Done()
mountPoint, mountStorageErr = c.mountStorage()
if mountStorageErr != nil {
return
}
tmpStateLock.Lock()
defer tmpStateLock.Unlock()
// Finish up mountStorage
c.state.Mounted = true
c.state.Mountpoint = mountPoint
logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint)
}()
defer func() {
if err != nil {
if err2 := c.cleanupNetwork(); err2 != nil {
logrus.Errorf("Error cleaning up container %s network: %v", c.ID(), err2)
}
if err2 := c.cleanupStorage(); err2 != nil {
logrus.Errorf("Error cleaning up container %s storage: %v", c.ID(), err2)
}
}
}()
wg.Wait()
if createNetNSErr != nil {
if mountStorageErr != nil {
logrus.Error(createNetNSErr)
return mountStorageErr
}
return createNetNSErr
}
if mountStorageErr != nil {
return mountStorageErr
}
// Save the container
return c.save()
}
// cleanupNetwork unmounts and cleans up the container's network
func (c *Container) cleanupNetwork() error {
if c.config.NetNsCtr != "" {
return nil
}
netDisabled, err := c.NetworkDisabled()
if err != nil {
return err
}
if netDisabled {
return nil
}
if c.state.NetNS == nil {
logrus.Debugf("Network is already cleaned up, skipping...")
return nil
}
// Stop the container's network namespace (if it has one)
if err := c.runtime.teardownNetNS(c); err != nil {
logrus.Errorf("unable to cleanup network for container %s: %q", c.ID(), err)
}
c.state.NetNS = nil
c.state.NetworkStatus = nil
if c.valid {
return c.save()
}
return nil
}
// Generate spec for a container
// Accepts a map of the container's dependencies
func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
span, _ := opentracing.StartSpanFromContext(ctx, "generateSpec")
span.SetTag("type", "container")
defer span.Finish()
execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, nil)
if err != nil {
return nil, err
}
g := generate.NewFromSpec(c.config.Spec)
// If network namespace was requested, add it now
if c.config.CreateNetNS {
if c.config.PostConfigureNetNS {
g.AddOrReplaceLinuxNamespace(spec.NetworkNamespace, "")
} else {
g.AddOrReplaceLinuxNamespace(spec.NetworkNamespace, c.state.NetNS.Path())
}
}
// Apply AppArmor checks and load the default profile if needed.
if !c.config.Privileged {
updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile)
if err != nil {
return nil, err
}
g.SetProcessApparmorProfile(updatedProfile)
}
if err := c.makeBindMounts(); err != nil {
return nil, err
}
// Check if the spec file mounts contain the label Relabel flags z or Z.
// If they do, relabel the source directory and then remove the option.
for i := range g.Config.Mounts {
m := &g.Config.Mounts[i]
var options []string
for _, o := range m.Options {
switch o {
case "z":
fallthrough
case "Z":
if err := label.Relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil {
return nil, errors.Wrapf(err, "relabel failed %q", m.Source)
}
default:
options = append(options, o)
}
}
m.Options = options
}
g.SetProcessSelinuxLabel(c.ProcessLabel())
g.SetLinuxMountLabel(c.MountLabel())
// Add named volumes
for _, namedVol := range c.config.NamedVolumes {
volume, err := c.runtime.GetVolume(namedVol.Name)
if err != nil {
return nil, errors.Wrapf(err, "error retrieving volume %s to add to container %s", namedVol.Name, c.ID())
}
mountPoint := volume.MountPoint()
volMount := spec.Mount{
Type: "bind",
Source: mountPoint,
Destination: namedVol.Dest,
Options: namedVol.Options,
}
g.AddMount(volMount)
}
// Add bind mounts to container
for dstPath, srcPath := range c.state.BindMounts {
newMount := spec.Mount{
Type: "bind",
Source: srcPath,
Destination: dstPath,
Options: []string{"bind", "private"},
}
if c.IsReadOnly() && dstPath != "/dev/shm" {
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
}
if !MountExists(g.Mounts(), dstPath) {
g.AddMount(newMount)
} else {
logrus.Warnf("User mount overriding libpod mount at %q", dstPath)
}
}
if c.config.User != "" {
// User and Group must go together
g.SetProcessUID(uint32(execUser.Uid))
g.SetProcessGID(uint32(execUser.Gid))
}
// Add addition groups if c.config.GroupAdd is not empty
if len(c.config.Groups) > 0 {
gids, _ := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, nil)
for _, gid := range gids {
g.AddProcessAdditionalGid(gid)
}
}
if c.config.Systemd {
if err := c.setupSystemd(g.Mounts(), g); err != nil {
return nil, errors.Wrapf(err, "error adding systemd-specific mounts")
}
}
// Look up and add groups the user belongs to, if a group wasn't directly specified
if !rootless.IsRootless() && !strings.Contains(c.config.User, ":") {
for _, gid := range execUser.Sgids {
g.AddProcessAdditionalGid(uint32(gid))
}
}
// Add shared namespaces from other containers
if c.config.IPCNsCtr != "" {
if err := c.addNamespaceContainer(&g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil {
return nil, err
}
}
if c.config.MountNsCtr != "" {
if err := c.addNamespaceContainer(&g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil {
return nil, err
}
}
if c.config.NetNsCtr != "" {
if err := c.addNamespaceContainer(&g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil {
return nil, err
}
}
if c.config.PIDNsCtr != "" {
if err := c.addNamespaceContainer(&g, PIDNS, c.config.PIDNsCtr, string(spec.PIDNamespace)); err != nil {
return nil, err
}
}
if c.config.UserNsCtr != "" {
if err := c.addNamespaceContainer(&g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil {
return nil, err
}
}
if c.config.UTSNsCtr != "" {
if err := c.addNamespaceContainer(&g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil {
return nil, err
}
}
if c.config.CgroupNsCtr != "" {
if err := c.addNamespaceContainer(&g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil {
return nil, err
}
}
g.SetRootPath(c.state.Mountpoint)
g.AddAnnotation(crioAnnotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano))
g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal))
for _, i := range c.config.Spec.Linux.Namespaces {
if string(i.Type) == spec.UTSNamespace {
hostname := c.Hostname()
g.SetHostname(hostname)
g.AddProcessEnv("HOSTNAME", hostname)
break
}
}
// Only add container environment variable if not already present
foundContainerEnv := false
for _, env := range g.Config.Process.Env {
if strings.HasPrefix(env, "container=") {
foundContainerEnv = true
break
}
}
if !foundContainerEnv {
g.AddProcessEnv("container", "libpod")
}
if rootless.IsRootless() {
g.SetLinuxCgroupsPath("")
} else if c.runtime.config.CgroupManager == SystemdCgroupsManager {
// When runc is set to use Systemd as a cgroup manager, it
// expects cgroups to be passed as follows:
// slice:prefix:name
systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID())
logrus.Debugf("Setting CGroups for container %s to %s", c.ID(), systemdCgroups)
g.SetLinuxCgroupsPath(systemdCgroups)
} else {
cgroupPath, err := c.CGroupPath()
if err != nil {
return nil, err
}
logrus.Debugf("Setting CGroup path for container %s to %s", c.ID(), cgroupPath)
g.SetLinuxCgroupsPath(cgroupPath)
}
// Mounts need to be sorted so paths will not cover other paths
mounts := sortMounts(g.Mounts())
g.ClearMounts()
// Determine property of RootPropagation based on volume properties. If
// a volume is shared, then keep root propagation shared. This should
// work for slave and private volumes too.
//
// For slave volumes, it can be either [r]shared/[r]slave.
//
// For private volumes any root propagation value should work.
rootPropagation := ""
for _, m := range mounts {
// We need to remove all symlinks from tmpfs mounts.
// Runc and other runtimes may choke on them.
// Easy solution: use securejoin to do a scoped evaluation of
// the links, then trim off the mount prefix.
if m.Type == "tmpfs" {
finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination)
if err != nil {
return nil, errors.Wrapf(err, "error resolving symlinks for mount destination %s", m.Destination)
}
trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/"))
m.Destination = trimmedPath
}
g.AddMount(m)
for _, opt := range m.Options {
switch opt {
case MountShared, MountRShared:
if rootPropagation != MountShared && rootPropagation != MountRShared {
rootPropagation = MountShared
}
case MountSlave, MountRSlave:
if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave {
rootPropagation = MountRSlave
}
}
}
}
if rootPropagation != "" {
logrus.Debugf("set root propagation to %q", rootPropagation)
g.SetLinuxRootPropagation(rootPropagation)
}
// Warning: precreate hooks may alter g.Config in place.
if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
return nil, errors.Wrapf(err, "error setting up OCI Hooks")
}
return g.Config, nil
}
// systemd expects to have /run, /run/lock and /tmp on tmpfs
// It also expects to be able to write to /sys/fs/cgroup/systemd and /var/log/journal
func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) error {
options := []string{"rw", "rprivate", "noexec", "nosuid", "nodev"}
for _, dest := range []string{"/run", "/run/lock"} {
if MountExists(mounts, dest) {
continue
}
tmpfsMnt := spec.Mount{
Destination: dest,
Type: "tmpfs",
Source: "tmpfs",
Options: append(options, "tmpcopyup", "size=65536k"),
}
g.AddMount(tmpfsMnt)
}
for _, dest := range []string{"/tmp", "/var/log/journal"} {
if MountExists(mounts, dest) {
continue
}
tmpfsMnt := spec.Mount{
Destination: dest,
Type: "tmpfs",
Source: "tmpfs",
Options: append(options, "tmpcopyup"),
}
g.AddMount(tmpfsMnt)
}
// rootless containers have no write access to /sys/fs/cgroup, so don't
// add any mount into the container.
if !rootless.IsRootless() {
cgroupPath, err := c.CGroupPath()
if err != nil {
return err
}
sourcePath := filepath.Join("/sys/fs/cgroup/systemd", cgroupPath)
systemdMnt := spec.Mount{
Destination: "/sys/fs/cgroup/systemd",
Type: "bind",
Source: sourcePath,
Options: []string{"bind", "private"},
}
g.AddMount(systemdMnt)
} else {
systemdMnt := spec.Mount{
Destination: "/sys/fs/cgroup/systemd",
Type: "bind",
Source: "/sys/fs/cgroup/systemd",
Options: []string{"bind", "nodev", "noexec", "nosuid"},
}
g.AddMount(systemdMnt)
}
return nil
}
// Add an existing container's namespace to the spec
func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr string, specNS string) error {
nsCtr, err := c.runtime.state.Container(ctr)
if err != nil {
return errors.Wrapf(err, "error retrieving dependency %s of container %s from state", ctr, c.ID())
}
// TODO need unlocked version of this for use in pods
nsPath, err := nsCtr.NamespacePath(ns)
if err != nil {
return err
}
if err := g.AddOrReplaceLinuxNamespace(specNS, nsPath); err != nil {
return err
}
return nil
}
func (c *Container) exportCheckpoint(dest string) (err error) {
if (len(c.config.NamedVolumes) > 0) || (len(c.Dependencies()) > 0) {
return errors.Errorf("Cannot export checkpoints of containers with named volumes or dependencies")
}
logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), dest)
input, err := archive.TarWithOptions(c.bundlePath(), &archive.TarOptions{
Compression: archive.Gzip,
IncludeSourceDir: true,
IncludeFiles: []string{
"checkpoint",
"artifacts",
"ctr.log",
"config.dump",
"spec.dump",
"network.status"},
})
if err != nil {
return errors.Wrapf(err, "error reading checkpoint directory %q", c.ID())
}
outFile, err := os.Create(dest)
if err != nil {
return errors.Wrapf(err, "error creating checkpoint export file %q", dest)
}
defer outFile.Close()
if err := os.Chmod(dest, 0600); err != nil {
return errors.Wrapf(err, "cannot chmod %q", dest)
}
_, err = io.Copy(outFile, input)
if err != nil {
return err
}
return nil
}
func (c *Container) checkpointRestoreSupported() (err error) {
if !criu.CheckForCriu() {
return errors.Errorf("Checkpoint/Restore requires at least CRIU %d", criu.MinCriuVersion)
}
if !c.runtime.ociRuntime.featureCheckCheckpointing() {
return errors.Errorf("Configured runtime does not support checkpoint/restore")
}
return nil
}
func (c *Container) checkpointRestoreLabelLog(fileName string) (err error) {
// Create the CRIU log file and label it
dumpLog := filepath.Join(c.bundlePath(), fileName)
logFile, err := os.OpenFile(dumpLog, os.O_CREATE, 0600)
if err != nil {
return errors.Wrapf(err, "failed to create CRIU log file %q", dumpLog)
}
logFile.Close()
if err = label.SetFileLabel(dumpLog, c.MountLabel()); err != nil {
return errors.Wrapf(err, "failed to label CRIU log file %q", dumpLog)
}
return nil
}
func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (err error) {
if err := c.checkpointRestoreSupported(); err != nil {
return err
}
if c.state.State != ContainerStateRunning {
return errors.Wrapf(ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State)
}
if err := c.checkpointRestoreLabelLog("dump.log"); err != nil {
return err
}
if err := c.runtime.ociRuntime.checkpointContainer(c, options); err != nil {
return err
}
// Save network.status. This is needed to restore the container with
// the same IP. Currently limited to one IP address in a container
// with one interface.
formatJSON, err := json.MarshalIndent(c.state.NetworkStatus, "", " ")
if err != nil {
return err
}
if err := ioutil.WriteFile(filepath.Join(c.bundlePath(), "network.status"), formatJSON, 0644); err != nil {
return err
}
if options.TargetFile != "" {
if err = c.exportCheckpoint(options.TargetFile); err != nil {
return err
}
}
logrus.Debugf("Checkpointed container %s", c.ID())
if !options.KeepRunning {
c.state.State = ContainerStateStopped
// Cleanup Storage and Network
if err := c.cleanup(ctx); err != nil {
return err
}
}
if !options.Keep {
cleanup := []string{
"dump.log",
"stats-dump",
"config.dump",
"spec.dump",
}
for _, delete := range cleanup {
file := filepath.Join(c.bundlePath(), delete)
os.Remove(file)
}
}
return c.save()
}
func (c *Container) importCheckpoint(input string) (err error) {
archiveFile, err := os.Open(input)
if err != nil {
return errors.Wrapf(err, "Failed to open checkpoint archive %s for import", input)
}
defer archiveFile.Close()
options := &archive.TarOptions{
ExcludePatterns: []string{
// config.dump and spec.dump are only required
// container creation
"config.dump",
"spec.dump",
},
}
err = archive.Untar(archiveFile, c.bundlePath(), options)
if err != nil {
return errors.Wrapf(err, "Unpacking of checkpoint archive %s failed", input)
}
// Make sure the newly created config.json exists on disk
g := generate.NewFromSpec(c.config.Spec)
if err = c.saveSpec(g.Spec()); err != nil {
return errors.Wrap(err, "Saving imported container specification for restore failed")
}
return nil
}
func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (err error) {
if err := c.checkpointRestoreSupported(); err != nil {
return err
}
if (c.state.State != ContainerStateConfigured) && (c.state.State != ContainerStateExited) {
return errors.Wrapf(ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID())
}
if options.TargetFile != "" {
if err = c.importCheckpoint(options.TargetFile); err != nil {
return err
}
}
// Let's try to stat() CRIU's inventory file. If it does not exist, it makes
// no sense to try a restore. This is a minimal check if a checkpoint exist.
if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
return errors.Wrapf(err, "A complete checkpoint for this container cannot be found, cannot restore")
}
if err := c.checkpointRestoreLabelLog("restore.log"); err != nil {
return err
}
// Read network configuration from checkpoint
// Currently only one interface with one IP is supported.
networkStatusFile, err := os.Open(filepath.Join(c.bundlePath(), "network.status"))
// If the restored container should get a new name, the IP address of
// the container will not be restored. This assumes that if a new name is
// specified, the container is restored multiple times.
// TODO: This implicit restoring with or without IP depending on an
// unrelated restore parameter (--name) does not seem like the
// best solution.
if err == nil && options.Name == "" {
// The file with the network.status does exist. Let's restore the
// container with the same IP address as during checkpointing.
defer networkStatusFile.Close()
var networkStatus []*cnitypes.Result
networkJSON, err := ioutil.ReadAll(networkStatusFile)
if err != nil {
return err
}
json.Unmarshal(networkJSON, &networkStatus)
// Take the first IP address
var IP net.IP
if len(networkStatus) > 0 {
if len(networkStatus[0].IPs) > 0 {
IP = networkStatus[0].IPs[0].Address.IP
}
}
if IP != nil {
// Tell CNI which IP address we want.
c.requestedIP = IP
}
}
defer func() {
if err != nil {
if err2 := c.cleanup(ctx); err2 != nil {
logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2)
}
}
}()
if err := c.prepare(); err != nil {
return err
}
// Read config
jsonPath := filepath.Join(c.bundlePath(), "config.json")
logrus.Debugf("generate.NewFromFile at %v", jsonPath)
g, err := generate.NewFromFile(jsonPath)
if err != nil {
logrus.Debugf("generate.NewFromFile failed with %v", err)
return err
}
// Restoring from an import means that we are doing migration
if options.TargetFile != "" {
g.SetRootPath(c.state.Mountpoint)
}
// We want to have the same network namespace as before.
if c.config.CreateNetNS {
g.AddOrReplaceLinuxNamespace(spec.NetworkNamespace, c.state.NetNS.Path())
}
if err := c.makeBindMounts(); err != nil {
return err
}
if options.TargetFile != "" {
for dstPath, srcPath := range c.state.BindMounts {
newMount := spec.Mount{
Type: "bind",
Source: srcPath,
Destination: dstPath,
Options: []string{"bind", "private"},
}
if c.IsReadOnly() && dstPath != "/dev/shm" {
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
}
if !MountExists(g.Mounts(), dstPath) {
g.AddMount(newMount)
}
}
}
// Cleanup for a working restore.
c.removeConmonFiles()
// Save the OCI spec to disk
if err := c.saveSpec(g.Spec()); err != nil {
return err
}
if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, &options); err != nil {
return err
}
logrus.Debugf("Restored container %s", c.ID())
c.state.State = ContainerStateRunning
if !options.Keep {
// Delete all checkpoint related files. At this point, in theory, all files
// should exist. Still ignoring errors for now as the container should be
// restored and running. Not erroring out just because some cleanup operation
// failed. Starting with the checkpoint directory
err = os.RemoveAll(c.CheckpointPath())
if err != nil {
logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
}
cleanup := [...]string{"restore.log", "dump.log", "stats-dump", "stats-restore", "network.status"}
for _, delete := range cleanup {
file := filepath.Join(c.bundlePath(), delete)
err = os.Remove(file)
if err != nil {
logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
}
}
}
return c.save()
}
// Make standard bind mounts to include in the container
func (c *Container) makeBindMounts() error {
if err := os.Chown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
return errors.Wrapf(err, "cannot chown run directory %s", c.state.RunDir)
}
if c.state.BindMounts == nil {
c.state.BindMounts = make(map[string]string)
}
netDisabled, err := c.NetworkDisabled()
if err != nil {
return err
}
if !netDisabled {
// If /etc/resolv.conf and /etc/hosts exist, delete them so we
// will recreate. Only do this if we aren't sharing them with
// another container.
if c.config.NetNsCtr == "" {
if path, ok := c.state.BindMounts["/etc/resolv.conf"]; ok {
if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
return errors.Wrapf(err, "error removing container %s resolv.conf", c.ID())
}
delete(c.state.BindMounts, "/etc/resolv.conf")
}
if path, ok := c.state.BindMounts["/etc/hosts"]; ok {
if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
return errors.Wrapf(err, "error removing container %s hosts", c.ID())
}
delete(c.state.BindMounts, "/etc/hosts")
}
}
if c.config.NetNsCtr != "" && (!c.config.UseImageResolvConf || !c.config.UseImageHosts) {
// We share a net namespace.
// We want /etc/resolv.conf and /etc/hosts from the
// other container. Unless we're not creating both of
// them.
depCtr, err := c.runtime.state.Container(c.config.NetNsCtr)
if err != nil {
return errors.Wrapf(err, "error fetching dependency %s of container %s", c.config.NetNsCtr, c.ID())
}
// We need that container's bind mounts
bindMounts, err := depCtr.BindMounts()
if err != nil {
return errors.Wrapf(err, "error fetching bind mounts from dependency %s of container %s", depCtr.ID(), c.ID())
}
if !c.config.UseImageResolvConf {
// The other container may not have a resolv.conf or /etc/hosts
// If it doesn't, don't copy them
resolvPath, exists := bindMounts["/etc/resolv.conf"]
if exists {
c.state.BindMounts["/etc/resolv.conf"] = resolvPath
}
}
if !c.config.UseImageHosts {
// check if dependency container has an /etc/hosts file
hostsPath, exists := bindMounts["/etc/hosts"]
if !exists {
return errors.Errorf("error finding hosts file of dependency container %s for container %s", depCtr.ID(), c.ID())
}
depCtr.lock.Lock()
// generate a hosts file for the dependency container,
// based on either its old hosts file, or the default,
// and add the relevant information from the new container (hosts and IP)
hostsPath, err = depCtr.appendHosts(hostsPath, c)
if err != nil {
depCtr.lock.Unlock()
return errors.Wrapf(err, "error creating hosts file for container %s which depends on container %s", c.ID(), depCtr.ID())
}
depCtr.lock.Unlock()
// finally, save it in the new container
c.state.BindMounts["/etc/hosts"] = hostsPath
}
} else {
if !c.config.UseImageResolvConf {
newResolv, err := c.generateResolvConf()
if err != nil {
return errors.Wrapf(err, "error creating resolv.conf for container %s", c.ID())
}
c.state.BindMounts["/etc/resolv.conf"] = newResolv
}
if !c.config.UseImageHosts {
newHosts, err := c.generateHosts("/etc/hosts")
if err != nil {
return errors.Wrapf(err, "error creating hosts file for container %s", c.ID())
}
c.state.BindMounts["/etc/hosts"] = newHosts
}
}
if c.state.BindMounts["/etc/hosts"] != "" {
if err := label.Relabel(c.state.BindMounts["/etc/hosts"], c.config.MountLabel, true); err != nil {
return err
}
}
if c.state.BindMounts["/etc/resolv.conf"] != "" {
if err := label.Relabel(c.state.BindMounts["/etc/resolv.conf"], c.config.MountLabel, true); err != nil {
return err
}
}
}
// SHM is always added when we mount the container
c.state.BindMounts["/dev/shm"] = c.config.ShmDir
newPasswd, err := c.generatePasswd()
if err != nil {
return errors.Wrapf(err, "error creating temporary passwd file for container %s", c.ID())
}
if newPasswd != "" {
// Make /etc/passwd
if _, ok := c.state.BindMounts["/etc/passwd"]; ok {
// If it already exists, delete so we can recreate
delete(c.state.BindMounts, "/etc/passwd")
}
logrus.Debugf("adding entry to /etc/passwd for non existent default user")
c.state.BindMounts["/etc/passwd"] = newPasswd
}
// Make /etc/hostname
// This should never change, so no need to recreate if it exists
if _, ok := c.state.BindMounts["/etc/hostname"]; !ok {
hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname())
if err != nil {
return errors.Wrapf(err, "error creating hostname file for container %s", c.ID())
}
c.state.BindMounts["/etc/hostname"] = hostnamePath
}
// Make .containerenv
// Empty file, so no need to recreate if it exists
if _, ok := c.state.BindMounts["/run/.containerenv"]; !ok {
// Empty string for now, but we may consider populating this later
containerenvPath, err := c.writeStringToRundir(".containerenv", "")
if err != nil {
return errors.Wrapf(err, "error creating containerenv file for container %s", c.ID())
}
c.state.BindMounts["/run/.containerenv"] = containerenvPath
}
// Add Secret Mounts
secretMounts := secrets.SecretMountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.DefaultMountsFile, c.state.RunDir, c.RootUID(), c.RootGID(), rootless.IsRootless())
for _, mount := range secretMounts {
if _, ok := c.state.BindMounts[mount.Destination]; !ok {
c.state.BindMounts[mount.Destination] = mount.Source
}
}
return nil
}
// generateResolvConf generates a containers resolv.conf
func (c *Container) generateResolvConf() (string, error) {
resolvConf := "/etc/resolv.conf"
for _, ns := range c.config.Spec.Linux.Namespaces {
if ns.Type == spec.NetworkNamespace {
if ns.Path != "" && !strings.HasPrefix(ns.Path, "/proc/") {
definedPath := filepath.Join("/etc/netns", filepath.Base(ns.Path), "resolv.conf")
_, err := os.Stat(definedPath)
if err == nil {
resolvConf = definedPath
} else if !os.IsNotExist(err) {
return "", errors.Wrapf(err, "failed to stat %s", definedPath)
}
}
break
}
}
// Determine the endpoint for resolv.conf in case it is a symlink
resolvPath, err := filepath.EvalSymlinks(resolvConf)
if err != nil {
return "", err
}
contents, err := ioutil.ReadFile(resolvPath)
if err != nil {
return "", errors.Wrapf(err, "unable to read %s", resolvPath)
}
// Ensure that the container's /etc/resolv.conf is compatible with its
// network configuration.
// TODO: set ipv6 enable bool more sanely
resolv, err := resolvconf.FilterResolvDNS(contents, true, c.config.CreateNetNS)
if err != nil {
return "", errors.Wrapf(err, "error parsing host resolv.conf")
}
// Make a new resolv.conf
nameservers := resolvconf.GetNameservers(resolv.Content)
// slirp4netns has a built in DNS server.
if c.config.NetMode.IsSlirp4netns() {
nameservers = append([]string{"10.0.2.3"}, nameservers...)
}
if len(c.config.DNSServer) > 0 {
// We store DNS servers as net.IP, so need to convert to string
nameservers = []string{}
for _, server := range c.config.DNSServer {
nameservers = append(nameservers, server.String())
}
}
search := resolvconf.GetSearchDomains(resolv.Content)
if len(c.config.DNSSearch) > 0 {
search = c.config.DNSSearch
}
options := resolvconf.GetOptions(resolv.Content)
if len(c.config.DNSOption) > 0 {
options = c.config.DNSOption
}
destPath := filepath.Join(c.state.RunDir, "resolv.conf")
if err := os.Remove(destPath); err != nil && !os.IsNotExist(err) {
return "", errors.Wrapf(err, "error removing resolv.conf for container %s", c.ID())
}
// Build resolv.conf
if _, err = resolvconf.Build(destPath, nameservers, search, options); err != nil {
return "", errors.Wrapf(err, "error building resolv.conf for container %s", c.ID())
}
// Relabel resolv.conf for the container
if err := label.Relabel(destPath, c.config.MountLabel, true); err != nil {
return "", err
}
return filepath.Join(c.state.RunDir, "resolv.conf"), nil
}
// generateHosts creates a containers hosts file
func (c *Container) generateHosts(path string) (string, error) {
orig, err := ioutil.ReadFile(path)
if err != nil {
return "", errors.Wrapf(err, "unable to read %s", path)
}
hosts := string(orig)
hosts += c.getHosts()
return c.writeStringToRundir("hosts", hosts)
}
// appendHosts appends a container's config and state pertaining to hosts to a container's
// local hosts file. netCtr is the container from which the netNS information is
// taken.
// path is the basis of the hosts file, into which netCtr's netNS information will be appended.
func (c *Container) appendHosts(path string, netCtr *Container) (string, error) {
return c.appendStringToRundir("hosts", netCtr.getHosts())
}
// getHosts finds the pertinent information for a container's host file in its config and state
// and returns a string in a format that can be written to the host file
func (c *Container) getHosts() string {
var hosts string
if len(c.config.HostAdd) > 0 {
for _, host := range c.config.HostAdd {
// the host format has already been verified at this point
fields := strings.SplitN(host, ":", 2)
hosts += fmt.Sprintf("%s %s\n", fields[1], fields[0])
}
}
if len(c.state.NetworkStatus) > 0 && len(c.state.NetworkStatus[0].IPs) > 0 {
ipAddress := strings.Split(c.state.NetworkStatus[0].IPs[0].Address.String(), "/")[0]
hosts += fmt.Sprintf("%s\t%s\n", ipAddress, c.Hostname())
}
return hosts
}
// generatePasswd generates a container specific passwd file,
// iff g.config.User is a number
func (c *Container) generatePasswd() (string, error) {
var (
groupspec string
gid int
)
if c.config.User == "" {
return "", nil
}
spec := strings.SplitN(c.config.User, ":", 2)
userspec := spec[0]
if len(spec) > 1 {
groupspec = spec[1]
}
// If a non numeric User, then don't generate passwd
uid, err := strconv.ParseUint(userspec, 10, 32)
if err != nil {
return "", nil
}
// Lookup the user to see if it exists in the container image
_, err = lookup.GetUser(c.state.Mountpoint, userspec)
if err != nil && err != user.ErrNoPasswdEntries {
return "", err
}
if err == nil {
return "", nil
}
if groupspec != "" {
ugid, err := strconv.ParseUint(groupspec, 10, 32)
if err == nil {
gid = int(ugid)
} else {
group, err := lookup.GetGroup(c.state.Mountpoint, groupspec)
if err != nil {
return "", errors.Wrapf(err, "unable to get gid %s from group file", groupspec)
}
gid = group.Gid
}
}
originPasswdFile := filepath.Join(c.state.Mountpoint, "/etc/passwd")
orig, err := ioutil.ReadFile(originPasswdFile)
if err != nil && !os.IsNotExist(err) {
return "", errors.Wrapf(err, "unable to read passwd file %s", originPasswdFile)
}
pwd := fmt.Sprintf("%s%d:x:%d:%d:container user:%s:/bin/sh\n", orig, uid, uid, gid, c.WorkingDir())
passwdFile, err := c.writeStringToRundir("passwd", pwd)
if err != nil {
return "", errors.Wrapf(err, "failed to create temporary passwd file")
}
if os.Chmod(passwdFile, 0644); err != nil {
return "", err
}
return passwdFile, nil
}
func (c *Container) copyOwnerAndPerms(source, dest string) error {
info, err := os.Stat(source)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return errors.Wrapf(err, "cannot stat `%s`", dest)
}
if err := os.Chmod(dest, info.Mode()); err != nil {
return errors.Wrapf(err, "cannot chmod `%s`", dest)
}
if err := os.Chown(dest, int(info.Sys().(*syscall.Stat_t).Uid), int(info.Sys().(*syscall.Stat_t).Gid)); err != nil {
return errors.Wrapf(err, "cannot chown `%s`", dest)
}
return nil
}