podman/libpod/container_internal_linux.go

// +build linux

package libpod

import (
	"context"
	"fmt"
	"io"
	"io/ioutil"
	"net"
	"os"
	"os/user"
	"path"
	"path/filepath"
	"strconv"
	"strings"
	"sync"
	"syscall"
	"time"

	cnitypes "github.com/containernetworking/cni/pkg/types/current"
	"github.com/containernetworking/plugins/pkg/ns"
	"github.com/containers/buildah/pkg/overlay"
	"github.com/containers/buildah/pkg/secrets"
	"github.com/containers/common/pkg/apparmor"
	"github.com/containers/common/pkg/config"
	"github.com/containers/podman/v2/libpod/define"
	"github.com/containers/podman/v2/libpod/events"
	"github.com/containers/podman/v2/pkg/annotations"
	"github.com/containers/podman/v2/pkg/cgroups"
	"github.com/containers/podman/v2/pkg/criu"
	"github.com/containers/podman/v2/pkg/lookup"
	"github.com/containers/podman/v2/pkg/resolvconf"
	"github.com/containers/podman/v2/pkg/rootless"
	"github.com/containers/podman/v2/pkg/util"
	"github.com/containers/podman/v2/utils"
	"github.com/containers/storage/pkg/archive"
	securejoin "github.com/cyphar/filepath-securejoin"
	User "github.com/opencontainers/runc/libcontainer/user"
	spec "github.com/opencontainers/runtime-spec/specs-go"
	"github.com/opencontainers/runtime-tools/generate"
	"github.com/opencontainers/selinux/go-selinux/label"
	"github.com/opentracing/opentracing-go"
	"github.com/pkg/errors"
	"github.com/sirupsen/logrus"
	"golang.org/x/sys/unix"
)

func (c *Container) mountSHM(shmOptions string) error {
	if err := unix.Mount("shm", c.config.ShmDir, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV,
		label.FormatMountLabel(shmOptions, c.config.MountLabel)); err != nil {
		return errors.Wrapf(err, "failed to mount shm tmpfs %q", c.config.ShmDir)
	}
	return nil
}

func (c *Container) unmountSHM(mount string) error {
	if err := unix.Unmount(mount, 0); err != nil {
		if err != syscall.EINVAL && err != syscall.ENOENT {
			return errors.Wrapf(err, "error unmounting container %s SHM mount %s", c.ID(), mount)
		}
		// If it's just an EINVAL or ENOENT, debug logs only
		logrus.Debugf("container %s failed to unmount %s : %v", c.ID(), mount, err)
	}
	return nil
}

// prepare mounts the container and sets up other required resources like net
// namespaces
func (c *Container) prepare() error {
	var (
		wg                              sync.WaitGroup
		netNS                           ns.NetNS
		networkStatus                   []*cnitypes.Result
		createNetNSErr, mountStorageErr error
		mountPoint                      string
		tmpStateLock                    sync.Mutex
	)

	wg.Add(2)

	go func() {
		defer wg.Done()
		// Set up network namespace if not already set up
		noNetNS := c.state.NetNS == nil
		if c.config.CreateNetNS && noNetNS && !c.config.PostConfigureNetNS {
			netNS, networkStatus, createNetNSErr = c.runtime.createNetNS(c)
			if createNetNSErr != nil {
				return
			}

			tmpStateLock.Lock()
			defer tmpStateLock.Unlock()

			// Assign NetNS attributes to container
			c.state.NetNS = netNS
			c.state.NetworkStatus = networkStatus
		}

		// handle rootless network namespace setup
		if noNetNS && c.config.NetMode.IsSlirp4netns() && !c.config.PostConfigureNetNS {
			createNetNSErr = c.runtime.setupRootlessNetNS(c)
		}
	}()
	// Mount storage if not mounted
	go func() {
		defer wg.Done()
		mountPoint, mountStorageErr = c.mountStorage()

		if mountStorageErr != nil {
			return
		}

		tmpStateLock.Lock()
		defer tmpStateLock.Unlock()

		// Finish up mountStorage
		c.state.Mounted = true
		c.state.Mountpoint = mountPoint

		logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint)
	}()

	wg.Wait()

	var createErr error
	if createNetNSErr != nil {
		createErr = createNetNSErr
	}
	if mountStorageErr != nil {
		if createErr != nil {
			logrus.Errorf("Error preparing container %s: %v", c.ID(), createErr)
		}
		createErr = mountStorageErr
	}

	// Only trigger storage cleanup if mountStorage was successful.
	// Otherwise, we may mess up mount counters.
	if createNetNSErr != nil && mountStorageErr == nil {
		if err := c.cleanupStorage(); err != nil {
			// createErr is guaranteed non-nil, so print
			// unconditionally
			logrus.Errorf("Error preparing container %s: %v", c.ID(), createErr)
			createErr = errors.Wrapf(err, "error unmounting storage for container %s after network create failure", c.ID())
		}
	}

	// It's OK to unconditionally trigger network cleanup. If the network
	// isn't ready it will do nothing.
	if createErr != nil {
		if err := c.cleanupNetwork(); err != nil {
			logrus.Errorf("Error preparing container %s: %v", c.ID(), createErr)
			createErr = errors.Wrapf(err, "error cleaning up container %s network after setup failure", c.ID())
		}
	}

	if createErr != nil {
		return createErr
	}

	// Save changes to container state
	if err := c.save(); err != nil {
		return err
	}

	// Ensure container entrypoint is created (if required)
	if c.config.CreateWorkingDir {
		workdir, err := securejoin.SecureJoin(c.state.Mountpoint, c.WorkingDir())
		if err != nil {
			return errors.Wrapf(err, "error creating path to container %s working dir", c.ID())
		}
		rootUID := c.RootUID()
		rootGID := c.RootGID()

		if err := os.MkdirAll(workdir, 0755); err != nil {
			if os.IsExist(err) {
				return nil
			}
			return errors.Wrapf(err, "error creating container %s working dir", c.ID())
		}

		if err := os.Chown(workdir, rootUID, rootGID); err != nil {
			return errors.Wrapf(err, "error chowning container %s working directory to container root", c.ID())
		}
	}

	return nil
}

// cleanupNetwork unmounts and cleans up the container's network
func (c *Container) cleanupNetwork() error {
	if c.config.NetNsCtr != "" {
		return nil
	}
	netDisabled, err := c.NetworkDisabled()
	if err != nil {
		return err
	}
	if netDisabled {
		return nil
	}
	if c.state.NetNS == nil {
		logrus.Debugf("Network is already cleaned up, skipping...")
		return nil
	}

	// Stop the container's network namespace (if it has one)
	if err := c.runtime.teardownNetNS(c); err != nil {
		logrus.Errorf("unable to cleanup network for container %s: %q", c.ID(), err)
	}

	c.state.NetNS = nil
	c.state.NetworkStatus = nil

	if c.valid {
		return c.save()
	}

	return nil
}

func (c *Container) getUserOverrides() *lookup.Overrides {
	var hasPasswdFile, hasGroupFile bool
	overrides := lookup.Overrides{}
	for _, m := range c.config.Spec.Mounts {
		if m.Destination == "/etc/passwd" {
			overrides.ContainerEtcPasswdPath = m.Source
			hasPasswdFile = true
		}
		if m.Destination == "/etc/group" {
			overrides.ContainerEtcGroupPath = m.Source
			hasGroupFile = true
		}
		if m.Destination == "/etc" {
			if !hasPasswdFile {
				overrides.ContainerEtcPasswdPath = filepath.Join(m.Source, "passwd")
			}
			if !hasGroupFile {
				overrides.ContainerEtcGroupPath = filepath.Join(m.Source, "group")
			}
		}
	}
	if path, ok := c.state.BindMounts["/etc/passwd"]; ok {
		overrides.ContainerEtcPasswdPath = path
	}
	return &overrides
}

// Generate spec for a container
// Accepts a map of the container's dependencies
func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
	span, _ := opentracing.StartSpanFromContext(ctx, "generateSpec")
	span.SetTag("type", "container")
	defer span.Finish()

	overrides := c.getUserOverrides()
	execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides)
	if err != nil {
		return nil, err
	}

	g := generate.NewFromSpec(c.config.Spec)

	// If network namespace was requested, add it now
	if c.config.CreateNetNS {
		if c.config.PostConfigureNetNS {
			if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil {
				return nil, err
			}
		} else {
			if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), c.state.NetNS.Path()); err != nil {
				return nil, err
			}
		}
	}

	// Apply AppArmor checks and load the default profile if needed.
	if len(c.config.Spec.Process.ApparmorProfile) > 0 {
		updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile)
		if err != nil {
			return nil, err
		}
		g.SetProcessApparmorProfile(updatedProfile)
	}

	if err := c.makeBindMounts(); err != nil {
		return nil, err
	}

	// Check if the spec file mounts contain the label Relabel flags z or Z.
	// If they do, relabel the source directory and then remove the option.
	for i := range g.Config.Mounts {
		m := &g.Config.Mounts[i]
		var options []string
		for _, o := range m.Options {
			switch o {
			case "z":
				fallthrough
			case "Z":
				if err := label.Relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil {
					return nil, errors.Wrapf(err, "relabel failed %q", m.Source)
				}

			default:
				options = append(options, o)
			}
		}
		m.Options = options
	}

	g.SetProcessSelinuxLabel(c.ProcessLabel())
	g.SetLinuxMountLabel(c.MountLabel())

	// Add named volumes
	for _, namedVol := range c.config.NamedVolumes {
		volume, err := c.runtime.GetVolume(namedVol.Name)
		if err != nil {
			return nil, errors.Wrapf(err, "error retrieving volume %s to add to container %s", namedVol.Name, c.ID())
		}
		mountPoint := volume.MountPoint()
		volMount := spec.Mount{
			Type:        "bind",
			Source:      mountPoint,
			Destination: namedVol.Dest,
			Options:     namedVol.Options,
		}
		g.AddMount(volMount)
	}

	// Add bind mounts to container
	for dstPath, srcPath := range c.state.BindMounts {
		newMount := spec.Mount{
			Type:        "bind",
			Source:      srcPath,
			Destination: dstPath,
			Options:     []string{"bind", "private"},
		}
		if c.IsReadOnly() && dstPath != "/dev/shm" {
			newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
		}
		if !MountExists(g.Mounts(), dstPath) {
			g.AddMount(newMount)
		} else {
			logrus.Warnf("User mount overriding libpod mount at %q", dstPath)
		}
	}

	// Add overlay volumes
	for _, overlayVol := range c.config.OverlayVolumes {
		contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
		if err != nil {
			return nil, errors.Wrapf(err, "failed to create TempDir in the %s directory", c.config.StaticDir)
		}
		overlayMount, err := overlay.Mount(contentDir, overlayVol.Source, overlayVol.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
		if err != nil {
			return nil, errors.Wrapf(err, "creating overlay failed %q", overlayVol.Source)
		}
		g.AddMount(overlayMount)
	}

	hasHomeSet := false
	for _, s := range c.config.Spec.Process.Env {
		if strings.HasPrefix(s, "HOME=") {
			hasHomeSet = true
			break
		}
	}
	if !hasHomeSet {
		c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", execUser.Home))
	}

	if c.config.User != "" {
		if rootless.IsRootless() {
			if err := util.CheckRootlessUIDRange(execUser.Uid); err != nil {
				return nil, err
			}
		}
		// User and Group must go together
		g.SetProcessUID(uint32(execUser.Uid))
		g.SetProcessGID(uint32(execUser.Gid))
	}

	if c.config.Umask != "" {
		decVal, err := strconv.ParseUint(c.config.Umask, 8, 32)
		if err != nil {
			return nil, errors.Wrapf(err, "Invalid Umask Value")
		}
		umask := uint32(decVal)
		g.Config.Process.User.Umask = &umask
	}

	// Add addition groups if c.config.GroupAdd is not empty
	if len(c.config.Groups) > 0 {
		gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides)
		if err != nil {
			return nil, errors.Wrapf(err, "error looking up supplemental groups for container %s", c.ID())
		}
		for _, gid := range gids {
			g.AddProcessAdditionalGid(gid)
		}
	}

	if c.config.Systemd {
		if err := c.setupSystemd(g.Mounts(), g); err != nil {
			return nil, errors.Wrapf(err, "error adding systemd-specific mounts")
		}
	}

	// Look up and add groups the user belongs to, if a group wasn't directly specified
	if !rootless.IsRootless() && !strings.Contains(c.config.User, ":") {
		for _, gid := range execUser.Sgids {
			g.AddProcessAdditionalGid(uint32(gid))
		}
	}

	// Add shared namespaces from other containers
	if c.config.IPCNsCtr != "" {
		if err := c.addNamespaceContainer(&g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil {
			return nil, err
		}
	}
	if c.config.MountNsCtr != "" {
		if err := c.addNamespaceContainer(&g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil {
			return nil, err
		}
	}
	if c.config.NetNsCtr != "" {
		if err := c.addNamespaceContainer(&g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil {
			return nil, err
		}
	}
	if c.config.PIDNsCtr != "" {
		if err := c.addNamespaceContainer(&g, PIDNS, c.config.PIDNsCtr, spec.PIDNamespace); err != nil {
			return nil, err
		}
	}
	if c.config.UserNsCtr != "" {
		if err := c.addNamespaceContainer(&g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil {
			return nil, err
		}
		if len(g.Config.Linux.UIDMappings) == 0 {
			// runc complains if no mapping is specified, even if we join another ns.  So provide a dummy mapping
			g.AddLinuxUIDMapping(uint32(0), uint32(0), uint32(1))
			g.AddLinuxGIDMapping(uint32(0), uint32(0), uint32(1))
		}
	}

	for _, i := range c.config.Spec.Linux.Namespaces {
		if i.Type == spec.UTSNamespace && i.Path == "" {
			hostname := c.Hostname()
			g.SetHostname(hostname)
			g.AddProcessEnv("HOSTNAME", hostname)
			break
		}
	}

	if c.config.UTSNsCtr != "" {
		if err := c.addNamespaceContainer(&g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil {
			return nil, err
		}
	}
	if c.config.CgroupNsCtr != "" {
		if err := c.addNamespaceContainer(&g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil {
			return nil, err
		}
	}

	if c.config.IDMappings.AutoUserNs {
		if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), ""); err != nil {
			return nil, err
		}
		g.ClearLinuxUIDMappings()
		for _, uidmap := range c.config.IDMappings.UIDMap {
			g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size))
		}
		g.ClearLinuxGIDMappings()
		for _, gidmap := range c.config.IDMappings.GIDMap {
			g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size))
		}
	}

	g.SetRootPath(c.state.Mountpoint)
	g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano))
	g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal))

	if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists {
		g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod)
	}

	// Only add container environment variable if not already present
	foundContainerEnv := false
	for _, env := range g.Config.Process.Env {
		if strings.HasPrefix(env, "container=") {
			foundContainerEnv = true
			break
		}
	}
	if !foundContainerEnv {
		g.AddProcessEnv("container", "libpod")
	}

	cgroupPath, err := c.getOCICgroupPath()
	if err != nil {
		return nil, err
	}
	g.SetLinuxCgroupsPath(cgroupPath)

	// Mounts need to be sorted so paths will not cover other paths
	mounts := sortMounts(g.Mounts())
	g.ClearMounts()

	// Determine property of RootPropagation based on volume properties. If
	// a volume is shared, then keep root propagation shared. This should
	// work for slave and private volumes too.
	//
	// For slave volumes, it can be either [r]shared/[r]slave.
	//
	// For private volumes any root propagation value should work.
	rootPropagation := ""
	for _, m := range mounts {
		// We need to remove all symlinks from tmpfs mounts.
		// Runc and other runtimes may choke on them.
		// Easy solution: use securejoin to do a scoped evaluation of
		// the links, then trim off the mount prefix.
		if m.Type == "tmpfs" {
			finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination)
			if err != nil {
				return nil, errors.Wrapf(err, "error resolving symlinks for mount destination %s", m.Destination)
			}
			trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/"))
			m.Destination = trimmedPath
		}
		g.AddMount(m)
		for _, opt := range m.Options {
			switch opt {
			case MountShared, MountRShared:
				if rootPropagation != MountShared && rootPropagation != MountRShared {
					rootPropagation = MountShared
				}
			case MountSlave, MountRSlave:
				if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave {
					rootPropagation = MountRSlave
				}
			}
		}
	}

	if rootPropagation != "" {
		logrus.Debugf("set root propagation to %q", rootPropagation)
		if err := g.SetLinuxRootPropagation(rootPropagation); err != nil {
			return nil, err
		}
	}

	// Warning: precreate hooks may alter g.Config in place.
	if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
		return nil, errors.Wrapf(err, "error setting up OCI Hooks")
	}

	return g.Config, nil
}

// systemd expects to have /run, /run/lock and /tmp on tmpfs
// It also expects to be able to write to /sys/fs/cgroup/systemd and /var/log/journal
func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) error {
	options := []string{"rw", "rprivate", "noexec", "nosuid", "nodev"}
	for _, dest := range []string{"/run", "/run/lock"} {
		if MountExists(mounts, dest) {
			continue
		}
		tmpfsMnt := spec.Mount{
			Destination: dest,
			Type:        "tmpfs",
			Source:      "tmpfs",
			Options:     append(options, "tmpcopyup"),
		}
		g.AddMount(tmpfsMnt)
	}
	for _, dest := range []string{"/tmp", "/var/log/journal"} {
		if MountExists(mounts, dest) {
			continue
		}
		tmpfsMnt := spec.Mount{
			Destination: dest,
			Type:        "tmpfs",
			Source:      "tmpfs",
			Options:     append(options, "tmpcopyup"),
		}
		g.AddMount(tmpfsMnt)
	}

	unified, err := cgroups.IsCgroup2UnifiedMode()
	if err != nil {
		return err
	}

	if unified {
		g.RemoveMount("/sys/fs/cgroup")

		hasCgroupNs := false
		for _, ns := range c.config.Spec.Linux.Namespaces {
			if ns.Type == spec.CgroupNamespace {
				hasCgroupNs = true
				break
			}
		}

		var systemdMnt spec.Mount
		if hasCgroupNs {
			systemdMnt = spec.Mount{
				Destination: "/sys/fs/cgroup",
				Type:        "cgroup",
				Source:      "cgroup",
				Options:     []string{"private", "rw"},
			}
		} else {
			systemdMnt = spec.Mount{
				Destination: "/sys/fs/cgroup",
				Type:        "bind",
				Source:      "/sys/fs/cgroup",
				Options:     []string{"bind", "private", "rw"},
			}
		}
		g.AddMount(systemdMnt)
	} else {
		systemdMnt := spec.Mount{
			Destination: "/sys/fs/cgroup/systemd",
			Type:        "bind",
			Source:      "/sys/fs/cgroup/systemd",
			Options:     []string{"bind", "nodev", "noexec", "nosuid", "rprivate"},
		}
		g.AddMount(systemdMnt)
		g.AddLinuxMaskedPaths("/sys/fs/cgroup/systemd/release_agent")
	}

	return nil
}

// Add an existing container's namespace to the spec
func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr string, specNS spec.LinuxNamespaceType) error {
	nsCtr, err := c.runtime.state.Container(ctr)
	if err != nil {
		return errors.Wrapf(err, "error retrieving dependency %s of container %s from state", ctr, c.ID())
	}

	if specNS == spec.UTSNamespace {
		hostname := nsCtr.Hostname()
		// Joining an existing namespace, cannot set the hostname
		g.SetHostname("")
		g.AddProcessEnv("HOSTNAME", hostname)
	}

	// TODO need unlocked version of this for use in pods
	nsPath, err := nsCtr.NamespacePath(ns)
	if err != nil {
		return err
	}

	if err := g.AddOrReplaceLinuxNamespace(string(specNS), nsPath); err != nil {
		return err
	}

	return nil
}

func (c *Container) exportCheckpoint(dest string, ignoreRootfs bool) error {
	if (len(c.config.NamedVolumes) > 0) || (len(c.Dependencies()) > 0) {
		return errors.Errorf("Cannot export checkpoints of containers with named volumes or dependencies")
	}
	logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), dest)

	includeFiles := []string{
		"checkpoint",
		"artifacts",
		"ctr.log",
		"config.dump",
		"spec.dump",
		"network.status"}

	// Get root file-system changes included in the checkpoint archive
	rootfsDiffPath := filepath.Join(c.bundlePath(), "rootfs-diff.tar")
	deleteFilesList := filepath.Join(c.bundlePath(), "deleted.files")
	if !ignoreRootfs {
		// To correctly track deleted files, let's go through the output of 'podman diff'
		tarFiles, err := c.runtime.GetDiff("", c.ID())
		if err != nil {
			return errors.Wrapf(err, "error exporting root file-system diff to %q", rootfsDiffPath)
		}
		var rootfsIncludeFiles []string
		var deletedFiles []string

		for _, file := range tarFiles {
			if file.Kind == archive.ChangeAdd {
				rootfsIncludeFiles = append(rootfsIncludeFiles, file.Path)
				continue
			}
			if file.Kind == archive.ChangeDelete {
				deletedFiles = append(deletedFiles, file.Path)
				continue
			}
			fileName, err := os.Stat(file.Path)
			if err != nil {
				continue
			}
			if !fileName.IsDir() && file.Kind == archive.ChangeModify {
				rootfsIncludeFiles = append(rootfsIncludeFiles, file.Path)
				continue
			}
		}

		if len(rootfsIncludeFiles) > 0 {
			rootfsTar, err := archive.TarWithOptions(c.state.Mountpoint, &archive.TarOptions{
				Compression:      archive.Uncompressed,
				IncludeSourceDir: true,
				IncludeFiles:     rootfsIncludeFiles,
			})
			if err != nil {
				return errors.Wrapf(err, "error exporting root file-system diff to %q", rootfsDiffPath)
			}
			rootfsDiffFile, err := os.Create(rootfsDiffPath)
			if err != nil {
				return errors.Wrapf(err, "error creating root file-system diff file %q", rootfsDiffPath)
			}
			defer rootfsDiffFile.Close()
			_, err = io.Copy(rootfsDiffFile, rootfsTar)
			if err != nil {
				return err
			}

			includeFiles = append(includeFiles, "rootfs-diff.tar")
		}

		if len(deletedFiles) > 0 {
			formatJSON, err := json.MarshalIndent(deletedFiles, "", "     ")
			if err != nil {
				return errors.Wrapf(err, "error creating delete files list file %q", deleteFilesList)
			}
			if err := ioutil.WriteFile(deleteFilesList, formatJSON, 0600); err != nil {
				return errors.Wrapf(err, "error creating delete files list file %q", deleteFilesList)
			}

			includeFiles = append(includeFiles, "deleted.files")
		}
	}

	input, err := archive.TarWithOptions(c.bundlePath(), &archive.TarOptions{
		Compression:      archive.Gzip,
		IncludeSourceDir: true,
		IncludeFiles:     includeFiles,
	})

	if err != nil {
		return errors.Wrapf(err, "error reading checkpoint directory %q", c.ID())
	}

	outFile, err := os.Create(dest)
	if err != nil {
		return errors.Wrapf(err, "error creating checkpoint export file %q", dest)
	}
	defer outFile.Close()

	if err := os.Chmod(dest, 0600); err != nil {
		return errors.Wrapf(err, "cannot chmod %q", dest)
	}

	_, err = io.Copy(outFile, input)
	if err != nil {
		return err
	}

	os.Remove(rootfsDiffPath)
	os.Remove(deleteFilesList)

	return nil
}

func (c *Container) checkpointRestoreSupported() error {
	if !criu.CheckForCriu() {
		return errors.Errorf("Checkpoint/Restore requires at least CRIU %d", criu.MinCriuVersion)
	}
	if !c.ociRuntime.SupportsCheckpoint() {
		return errors.Errorf("Configured runtime does not support checkpoint/restore")
	}
	return nil
}

func (c *Container) checkpointRestoreLabelLog(fileName string) error {
	// Create the CRIU log file and label it
	dumpLog := filepath.Join(c.bundlePath(), fileName)

	logFile, err := os.OpenFile(dumpLog, os.O_CREATE, 0600)
	if err != nil {
		return errors.Wrapf(err, "failed to create CRIU log file %q", dumpLog)
	}
	if err := logFile.Close(); err != nil {
		logrus.Errorf("unable to close log file: %q", err)
	}
	if err = label.SetFileLabel(dumpLog, c.MountLabel()); err != nil {
		return errors.Wrapf(err, "failed to label CRIU log file %q", dumpLog)
	}
	return nil
}

func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) error {
	if err := c.checkpointRestoreSupported(); err != nil {
		return err
	}

	if c.state.State != define.ContainerStateRunning {
		return errors.Wrapf(define.ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State)
	}

	if c.AutoRemove() && options.TargetFile == "" {
		return errors.Errorf("Cannot checkpoint containers that have been started with '--rm' unless '--export' is used")
	}

	if err := c.checkpointRestoreLabelLog("dump.log"); err != nil {
		return err
	}

	if err := c.ociRuntime.CheckpointContainer(c, options); err != nil {
		return err
	}

	// Save network.status. This is needed to restore the container with
	// the same IP. Currently limited to one IP address in a container
	// with one interface.
	formatJSON, err := json.MarshalIndent(c.state.NetworkStatus, "", "     ")
	if err != nil {
		return err
	}
	if err := ioutil.WriteFile(filepath.Join(c.bundlePath(), "network.status"), formatJSON, 0644); err != nil {
		return err
	}

	defer c.newContainerEvent(events.Checkpoint)

	if options.TargetFile != "" {
		if err = c.exportCheckpoint(options.TargetFile, options.IgnoreRootfs); err != nil {
			return err
		}
	}

	logrus.Debugf("Checkpointed container %s", c.ID())

	if !options.KeepRunning {
		c.state.State = define.ContainerStateStopped

		// Cleanup Storage and Network
		if err := c.cleanup(ctx); err != nil {
			return err
		}
	}

	if !options.Keep {
		cleanup := []string{
			"dump.log",
			"stats-dump",
			"config.dump",
			"spec.dump",
		}
		for _, del := range cleanup {
			file := filepath.Join(c.bundlePath(), del)
			if err := os.Remove(file); err != nil {
				logrus.Debugf("unable to remove file %s", file)
			}
		}
	}

	c.state.FinishedTime = time.Now()
	return c.save()
}

func (c *Container) importCheckpoint(input string) error {
	archiveFile, err := os.Open(input)
	if err != nil {
		return errors.Wrapf(err, "Failed to open checkpoint archive %s for import", input)
	}

	defer archiveFile.Close()
	options := &archive.TarOptions{
		ExcludePatterns: []string{
			// config.dump and spec.dump are only required
			// container creation
			"config.dump",
			"spec.dump",
		},
	}
	err = archive.Untar(archiveFile, c.bundlePath(), options)
	if err != nil {
		return errors.Wrapf(err, "Unpacking of checkpoint archive %s failed", input)
	}

	// Make sure the newly created config.json exists on disk
	g := generate.Generator{Config: c.config.Spec}
	if err = c.saveSpec(g.Config); err != nil {
		return errors.Wrap(err, "Saving imported container specification for restore failed")
	}

	return nil
}

func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (retErr error) {
	if err := c.checkpointRestoreSupported(); err != nil {
		return err
	}

	if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
		return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID())
	}

	if options.TargetFile != "" {
		if err := c.importCheckpoint(options.TargetFile); err != nil {
			return err
		}
	}

	// Let's try to stat() CRIU's inventory file. If it does not exist, it makes
	// no sense to try a restore. This is a minimal check if a checkpoint exist.
	if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
		return errors.Wrapf(err, "A complete checkpoint for this container cannot be found, cannot restore")
	}

	if err := c.checkpointRestoreLabelLog("restore.log"); err != nil {
		return err
	}

	// If a container is restored multiple times from an exported checkpoint with
	// the help of '--import --name', the restore will fail if during 'podman run'
	// a static container IP was set with '--ip'. The user can tell the restore
	// process to ignore the static IP with '--ignore-static-ip'
	if options.IgnoreStaticIP {
		c.config.StaticIP = nil
	}

	// If a container is restored multiple times from an exported checkpoint with
	// the help of '--import --name', the restore will fail if during 'podman run'
	// a static container MAC address was set with '--mac-address'. The user
	// can tell the restore process to ignore the static MAC with
	// '--ignore-static-mac'
	if options.IgnoreStaticMAC {
		c.config.StaticMAC = nil
	}

	// Read network configuration from checkpoint
	// Currently only one interface with one IP is supported.
	networkStatusFile, err := os.Open(filepath.Join(c.bundlePath(), "network.status"))
	// If the restored container should get a new name, the IP address of
	// the container will not be restored. This assumes that if a new name is
	// specified, the container is restored multiple times.
	// TODO: This implicit restoring with or without IP depending on an
	//       unrelated restore parameter (--name) does not seem like the
	//       best solution.
	if err == nil && options.Name == "" && (!options.IgnoreStaticIP || !options.IgnoreStaticMAC) {
		// The file with the network.status does exist. Let's restore the
		// container with the same IP address / MAC address as during checkpointing.
		defer networkStatusFile.Close()
		var networkStatus []*cnitypes.Result
		networkJSON, err := ioutil.ReadAll(networkStatusFile)
		if err != nil {
			return err
		}
		if err := json.Unmarshal(networkJSON, &networkStatus); err != nil {
			return err
		}
		if !options.IgnoreStaticIP {
			// Take the first IP address
			var IP net.IP
			if len(networkStatus) > 0 {
				if len(networkStatus[0].IPs) > 0 {
					IP = networkStatus[0].IPs[0].Address.IP
				}
			}
			if IP != nil {
				// Tell CNI which IP address we want.
				c.requestedIP = IP
			}
		}
		if !options.IgnoreStaticMAC {
			// Take the first device with a defined sandbox.
			var MAC net.HardwareAddr
			for _, n := range networkStatus[0].Interfaces {
				if n.Sandbox != "" {
					MAC, err = net.ParseMAC(n.Mac)
					if err != nil {
						return errors.Wrapf(err, "failed to parse MAC %v", n.Mac)
					}
					break
				}
			}
			if MAC != nil {
				// Tell CNI which MAC address we want.
				c.requestedMAC = MAC
			}
		}
	}

	defer func() {
		if retErr != nil {
			if err := c.cleanup(ctx); err != nil {
				logrus.Errorf("error cleaning up container %s: %v", c.ID(), err)
			}
		}
	}()

	if err := c.prepare(); err != nil {
		return err
	}

	// Read config
	jsonPath := filepath.Join(c.bundlePath(), "config.json")
	logrus.Debugf("generate.NewFromFile at %v", jsonPath)
	g, err := generate.NewFromFile(jsonPath)
	if err != nil {
		logrus.Debugf("generate.NewFromFile failed with %v", err)
		return err
	}

	// Restoring from an import means that we are doing migration
	if options.TargetFile != "" {
		g.SetRootPath(c.state.Mountpoint)
	}

	// We want to have the same network namespace as before.
	if c.config.CreateNetNS {
		netNSPath := ""
		if !c.config.PostConfigureNetNS {
			netNSPath = c.state.NetNS.Path()
		}

		if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), netNSPath); err != nil {
			return err
		}
	}

	if err := c.makeBindMounts(); err != nil {
		return err
	}

	if options.TargetFile != "" {
		for dstPath, srcPath := range c.state.BindMounts {
			newMount := spec.Mount{
				Type:        "bind",
				Source:      srcPath,
				Destination: dstPath,
				Options:     []string{"bind", "private"},
			}
			if c.IsReadOnly() && dstPath != "/dev/shm" {
				newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
			}
			if !MountExists(g.Mounts(), dstPath) {
				g.AddMount(newMount)
			}
		}
	}

	// Cleanup for a working restore.
	if err := c.removeConmonFiles(); err != nil {
		return err
	}

	// Save the OCI spec to disk
	if err := c.saveSpec(g.Config); err != nil {
		return err
	}

	// Before actually restarting the container, apply the root file-system changes
	if !options.IgnoreRootfs {
		rootfsDiffPath := filepath.Join(c.bundlePath(), "rootfs-diff.tar")
		if _, err := os.Stat(rootfsDiffPath); err == nil {
			// Only do this if a rootfs-diff.tar actually exists
			rootfsDiffFile, err := os.Open(rootfsDiffPath)
			if err != nil {
				return errors.Wrapf(err, "Failed to open root file-system diff file %s", rootfsDiffPath)
			}
			defer rootfsDiffFile.Close()
			if err := c.runtime.ApplyDiffTarStream(c.ID(), rootfsDiffFile); err != nil {
				return errors.Wrapf(err, "Failed to apply root file-system diff file %s", rootfsDiffPath)
			}
		}
		deletedFilesPath := filepath.Join(c.bundlePath(), "deleted.files")
		if _, err := os.Stat(deletedFilesPath); err == nil {
			deletedFilesFile, err := os.Open(deletedFilesPath)
			if err != nil {
				return errors.Wrapf(err, "Failed to open deleted files file %s", deletedFilesPath)
			}
			defer deletedFilesFile.Close()

			var deletedFiles []string
			deletedFilesJSON, err := ioutil.ReadAll(deletedFilesFile)
			if err != nil {
				return errors.Wrapf(err, "Failed to read deleted files file %s", deletedFilesPath)
			}
			if err := json.Unmarshal(deletedFilesJSON, &deletedFiles); err != nil {
				return errors.Wrapf(err, "Failed to read deleted files file %s", deletedFilesPath)
			}
			for _, deleteFile := range deletedFiles {
				// Using RemoveAll as deletedFiles, which is generated from 'podman diff'
				// lists completely deleted directories as a single entry: 'D /root'.
				err = os.RemoveAll(filepath.Join(c.state.Mountpoint, deleteFile))
				if err != nil {
					return errors.Wrapf(err, "Failed to delete file %s from container %s during restore", deletedFilesPath, c.ID())
				}
			}
		}
	}

	if err := c.ociRuntime.CreateContainer(c, &options); err != nil {
		return err
	}

	logrus.Debugf("Restored container %s", c.ID())

	c.state.State = define.ContainerStateRunning

	if !options.Keep {
		// Delete all checkpoint related files. At this point, in theory, all files
		// should exist. Still ignoring errors for now as the container should be
		// restored and running. Not erroring out just because some cleanup operation
		// failed. Starting with the checkpoint directory
		err = os.RemoveAll(c.CheckpointPath())
		if err != nil {
			logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
		}
		cleanup := [...]string{"restore.log", "dump.log", "stats-dump", "stats-restore", "network.status", "rootfs-diff.tar", "deleted.files"}
		for _, del := range cleanup {
			file := filepath.Join(c.bundlePath(), del)
			err = os.Remove(file)
			if err != nil {
				logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
			}
		}
	}

	return c.save()
}

// Make standard bind mounts to include in the container
func (c *Container) makeBindMounts() error {
	if err := os.Chown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
		return errors.Wrapf(err, "cannot chown run directory %s", c.state.RunDir)
	}

	if c.state.BindMounts == nil {
		c.state.BindMounts = make(map[string]string)
	}
	netDisabled, err := c.NetworkDisabled()
	if err != nil {
		return err
	}

	if !netDisabled {
		// If /etc/resolv.conf and /etc/hosts exist, delete them so we
		// will recreate. Only do this if we aren't sharing them with
		// another container.
		if c.config.NetNsCtr == "" {
			if resolvePath, ok := c.state.BindMounts["/etc/resolv.conf"]; ok {
				if err := os.Remove(resolvePath); err != nil && !os.IsNotExist(err) {
					return errors.Wrapf(err, "error removing container %s resolv.conf", c.ID())
				}
				delete(c.state.BindMounts, "/etc/resolv.conf")
			}
			if hostsPath, ok := c.state.BindMounts["/etc/hosts"]; ok {
				if err := os.Remove(hostsPath); err != nil && !os.IsNotExist(err) {
					return errors.Wrapf(err, "error removing container %s hosts", c.ID())
				}
				delete(c.state.BindMounts, "/etc/hosts")
			}
		}

		if c.config.NetNsCtr != "" && (!c.config.UseImageResolvConf || !c.config.UseImageHosts) {
			// We share a net namespace.
			// We want /etc/resolv.conf and /etc/hosts from the
			// other container. Unless we're not creating both of
			// them.
			var (
				depCtr  *Container
				nextCtr string
			)

			// I don't like infinite loops, but I don't think there's
			// a serious risk of looping dependencies - too many
			// protections against that elsewhere.
			nextCtr = c.config.NetNsCtr
			for {
				depCtr, err = c.runtime.state.Container(nextCtr)
				if err != nil {
					return errors.Wrapf(err, "error fetching dependency %s of container %s", c.config.NetNsCtr, c.ID())
				}
				nextCtr = depCtr.config.NetNsCtr
				if nextCtr == "" {
					break
				}
			}

			// We need that container's bind mounts
			bindMounts, err := depCtr.BindMounts()
			if err != nil {
				return errors.Wrapf(err, "error fetching bind mounts from dependency %s of container %s", depCtr.ID(), c.ID())
			}

			// The other container may not have a resolv.conf or /etc/hosts
			// If it doesn't, don't copy them
			resolvPath, exists := bindMounts["/etc/resolv.conf"]
			if !c.config.UseImageResolvConf && exists {
				c.state.BindMounts["/etc/resolv.conf"] = resolvPath
			}

			// check if dependency container has an /etc/hosts file.
			// It may not have one, so only use it if it does.
			hostsPath, exists := bindMounts["/etc/hosts"]
			if !c.config.UseImageHosts && exists {
				depCtr.lock.Lock()
				// generate a hosts file for the dependency container,
				// based on either its old hosts file, or the default,
				// and add the relevant information from the new container (hosts and IP)
				hostsPath, err = depCtr.appendHosts(hostsPath, c)

				if err != nil {
					depCtr.lock.Unlock()
					return errors.Wrapf(err, "error creating hosts file for container %s which depends on container %s", c.ID(), depCtr.ID())
				}
				depCtr.lock.Unlock()

				// finally, save it in the new container
				c.state.BindMounts["/etc/hosts"] = hostsPath
			}

			if !hasCurrentUserMapped(c) {
				if err := makeAccessible(resolvPath, c.RootUID(), c.RootGID()); err != nil {
					return err
				}
				if err := makeAccessible(hostsPath, c.RootUID(), c.RootGID()); err != nil {
					return err
				}
			}
		} else {
			if !c.config.UseImageResolvConf {
				newResolv, err := c.generateResolvConf()
				if err != nil {
					return errors.Wrapf(err, "error creating resolv.conf for container %s", c.ID())
				}
				c.state.BindMounts["/etc/resolv.conf"] = newResolv
			}

			if !c.config.UseImageHosts {
				newHosts, err := c.generateHosts("/etc/hosts")
				if err != nil {
					return errors.Wrapf(err, "error creating hosts file for container %s", c.ID())
				}
				c.state.BindMounts["/etc/hosts"] = newHosts
			}
		}

		if c.state.BindMounts["/etc/hosts"] != "" {
			if err := label.Relabel(c.state.BindMounts["/etc/hosts"], c.config.MountLabel, true); err != nil {
				return err
			}
		}

		if c.state.BindMounts["/etc/resolv.conf"] != "" {
			if err := label.Relabel(c.state.BindMounts["/etc/resolv.conf"], c.config.MountLabel, true); err != nil {
				return err
			}
		}
	}

	// SHM is always added when we mount the container
	c.state.BindMounts["/dev/shm"] = c.config.ShmDir

	newPasswd, err := c.generatePasswd()
	if err != nil {
		return errors.Wrapf(err, "error creating temporary passwd file for container %s", c.ID())
	}
	if newPasswd != "" {
		// Make /etc/passwd
		if _, ok := c.state.BindMounts["/etc/passwd"]; ok {
			// If it already exists, delete so we can recreate
			delete(c.state.BindMounts, "/etc/passwd")
		}
		logrus.Debugf("adding entry to /etc/passwd for non existent default user")
		c.state.BindMounts["/etc/passwd"] = newPasswd
	}

	// Make /etc/hostname
	// This should never change, so no need to recreate if it exists
	if _, ok := c.state.BindMounts["/etc/hostname"]; !ok {
		hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname())
		if err != nil {
			return errors.Wrapf(err, "error creating hostname file for container %s", c.ID())
		}
		c.state.BindMounts["/etc/hostname"] = hostnamePath
	}

	// Make /etc/localtime
	if c.Timezone() != "" {
		if _, ok := c.state.BindMounts["/etc/localtime"]; !ok {
			var zonePath string
			if c.Timezone() == "local" {
				zonePath, err = filepath.EvalSymlinks("/etc/localtime")
				if err != nil {
					return errors.Wrapf(err, "error finding local timezone for container %s", c.ID())
				}
			} else {
				zone := filepath.Join("/usr/share/zoneinfo", c.Timezone())
				zonePath, err = filepath.EvalSymlinks(zone)
				if err != nil {
					return errors.Wrapf(err, "error setting timezone for container %s", c.ID())
				}
			}
			localtimePath, err := c.copyTimezoneFile(zonePath)
			if err != nil {
				return errors.Wrapf(err, "error setting timezone for container %s", c.ID())
			}
			c.state.BindMounts["/etc/localtime"] = localtimePath

		}
	}

	// Make .containerenv
	// Empty file, so no need to recreate if it exists
	if _, ok := c.state.BindMounts["/run/.containerenv"]; !ok {
		// Empty string for now, but we may consider populating this later
		containerenvPath, err := c.writeStringToRundir(".containerenv", "")
		if err != nil {
			return errors.Wrapf(err, "error creating containerenv file for container %s", c.ID())
		}
		c.state.BindMounts["/run/.containerenv"] = containerenvPath
	}

	// Add Secret Mounts
	secretMounts := secrets.SecretMountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.Containers.DefaultMountsFile, c.state.Mountpoint, c.RootUID(), c.RootGID(), rootless.IsRootless(), false)
	for _, mount := range secretMounts {
		if _, ok := c.state.BindMounts[mount.Destination]; !ok {
			c.state.BindMounts[mount.Destination] = mount.Source
		}
	}

	return nil
}

// generateResolvConf generates a containers resolv.conf
func (c *Container) generateResolvConf() (string, error) {
	var (
		nameservers    []string
		cniNameServers []string
	)

	resolvConf := "/etc/resolv.conf"
	for _, namespace := range c.config.Spec.Linux.Namespaces {
		if namespace.Type == spec.NetworkNamespace {
			if namespace.Path != "" && !strings.HasPrefix(namespace.Path, "/proc/") {
				definedPath := filepath.Join("/etc/netns", filepath.Base(namespace.Path), "resolv.conf")
				_, err := os.Stat(definedPath)
				if err == nil {
					resolvConf = definedPath
				} else if !os.IsNotExist(err) {
					return "", errors.Wrapf(err, "failed to stat %s", definedPath)
				}
			}
			break
		}
	}

	// Determine the endpoint for resolv.conf in case it is a symlink
	resolvPath, err := filepath.EvalSymlinks(resolvConf)
	if err != nil {
		return "", err
	}

	contents, err := ioutil.ReadFile(resolvPath)
	if err != nil {
		return "", errors.Wrapf(err, "unable to read %s", resolvPath)
	}

	// Ensure that the container's /etc/resolv.conf is compatible with its
	// network configuration.
	// TODO: set ipv6 enable bool more sanely
	resolv, err := resolvconf.FilterResolvDNS(contents, true, c.config.CreateNetNS)
	if err != nil {
		return "", errors.Wrapf(err, "error parsing host resolv.conf")
	}

	// Check if CNI gave back and DNS servers for us to add in
	cniResponse := c.state.NetworkStatus
	for _, i := range cniResponse {
		if i.DNS.Nameservers != nil {
			cniNameServers = append(cniNameServers, i.DNS.Nameservers...)
			logrus.Debugf("adding nameserver(s) from cni response of '%q'", i.DNS.Nameservers)
		}
	}

	dns := make([]net.IP, 0, len(c.runtime.config.Containers.DNSServers))
	for _, i := range c.runtime.config.Containers.DNSServers {
		result := net.ParseIP(i)
		if result == nil {
			return "", errors.Wrapf(define.ErrInvalidArg, "invalid IP address %s", i)
		}
		dns = append(dns, result)
	}
	dnsServers := append(dns, c.config.DNSServer...)
	// If the user provided dns, it trumps all; then dns masq; then resolv.conf
	switch {
	case len(dnsServers) > 0:

		// We store DNS servers as net.IP, so need to convert to string
		for _, server := range dnsServers {
			nameservers = append(nameservers, server.String())
		}
	case len(cniNameServers) > 0:
		nameservers = append(nameservers, cniNameServers...)
	default:
		// Make a new resolv.conf
		nameservers = resolvconf.GetNameservers(resolv.Content)
		// slirp4netns has a built in DNS server.
		if c.config.NetMode.IsSlirp4netns() {
			nameservers = append([]string{"10.0.2.3"}, nameservers...)
		}
	}

	var search []string
	if len(c.config.DNSSearch) > 0 || len(c.runtime.config.Containers.DNSSearches) > 0 {
		if !util.StringInSlice(".", c.config.DNSSearch) {
			search = c.runtime.config.Containers.DNSSearches
			search = append(search, c.config.DNSSearch...)
		}
	} else {
		search = resolvconf.GetSearchDomains(resolv.Content)
	}

	var options []string
	if len(c.config.DNSOption) > 0 || len(c.runtime.config.Containers.DNSOptions) > 0 {
		options = c.runtime.config.Containers.DNSOptions
		options = append(options, c.config.DNSOption...)
	} else {
		options = resolvconf.GetOptions(resolv.Content)
	}

	destPath := filepath.Join(c.state.RunDir, "resolv.conf")

	if err := os.Remove(destPath); err != nil && !os.IsNotExist(err) {
		return "", errors.Wrapf(err, "error removing resolv.conf for container %s", c.ID())
	}

	// Build resolv.conf
	if _, err = resolvconf.Build(destPath, nameservers, search, options); err != nil {
		return "", errors.Wrapf(err, "error building resolv.conf for container %s", c.ID())
	}

	// Relabel resolv.conf for the container
	if err := label.Relabel(destPath, c.config.MountLabel, true); err != nil {
		return "", err
	}

	return filepath.Join(c.state.RunDir, "resolv.conf"), nil
}

// generateHosts creates a containers hosts file
func (c *Container) generateHosts(path string) (string, error) {
	orig, err := ioutil.ReadFile(path)
	if err != nil {
		return "", errors.Wrapf(err, "unable to read %s", path)
	}
	hosts := string(orig)
	hosts += c.getHosts()
	return c.writeStringToRundir("hosts", hosts)
}

// appendHosts appends a container's config and state pertaining to hosts to a container's
// local hosts file. netCtr is the container from which the netNS information is
// taken.
// path is the basis of the hosts file, into which netCtr's netNS information will be appended.
// FIXME.  Path should be used by this function,but I am not sure what is correct; remove //lint
// once this is fixed
func (c *Container) appendHosts(path string, netCtr *Container) (string, error) { //nolint
	return c.appendStringToRundir("hosts", netCtr.getHosts())
}

// getHosts finds the pertinent information for a container's host file in its config and state
// and returns a string in a format that can be written to the host file
func (c *Container) getHosts() string {
	var hosts string
	if len(c.config.HostAdd) > 0 {
		for _, host := range c.config.HostAdd {
			// the host format has already been verified at this point
			fields := strings.SplitN(host, ":", 2)
			hosts += fmt.Sprintf("%s %s\n", fields[1], fields[0])
		}
	}

	if c.config.NetMode.IsSlirp4netns() {
		// When using slirp4netns, the interface gets a static IP
		hosts += fmt.Sprintf("# used by slirp4netns\n%s\t%s %s\n", "10.0.2.100", c.Hostname(), c.Config().Name)
	}
	if len(c.state.NetworkStatus) > 0 && len(c.state.NetworkStatus[0].IPs) > 0 {
		ipAddress := strings.Split(c.state.NetworkStatus[0].IPs[0].Address.String(), "/")[0]
		hosts += fmt.Sprintf("%s\t%s %s\n", ipAddress, c.Hostname(), c.Config().Name)
	}
	return hosts
}

// generateCurrentUserPasswdEntry generates an /etc/passwd entry for the user
// running the container engine
func (c *Container) generateCurrentUserPasswdEntry() (string, error) {
	uid := rootless.GetRootlessUID()
	if uid == 0 {
		return "", nil
	}

	u, err := user.LookupId(strconv.Itoa(rootless.GetRootlessUID()))
	if err != nil {
		return "", errors.Wrapf(err, "failed to get current user")
	}

	// Lookup the user to see if it exists in the container image.
	_, err = lookup.GetUser(c.state.Mountpoint, u.Username)
	if err != User.ErrNoPasswdEntries {
		return "", err
	}

	// If the user's actual home directory exists, or was mounted in - use
	// that.
	homeDir := c.WorkingDir()
	if MountExists(c.config.Spec.Mounts, u.HomeDir) {
		homeDir = u.HomeDir
	}

	return fmt.Sprintf("%s:x:%s:%s:%s:%s:/bin/sh\n", u.Username, u.Uid, u.Gid, u.Username, homeDir), nil
}

// generateUserPasswdEntry generates an /etc/passwd entry for the container user
// to run in the container.
func (c *Container) generateUserPasswdEntry() (string, error) {
	var (
		groupspec string
		gid       int
	)
	if c.config.User == "" {
		return "", nil
	}
	splitSpec := strings.SplitN(c.config.User, ":", 2)
	userspec := splitSpec[0]
	if len(splitSpec) > 1 {
		groupspec = splitSpec[1]
	}
	// If a non numeric User, then don't generate passwd
	uid, err := strconv.ParseUint(userspec, 10, 32)
	if err != nil {
		return "", nil
	}

	// Lookup the user to see if it exists in the container image
	_, err = lookup.GetUser(c.state.Mountpoint, userspec)
	if err != User.ErrNoPasswdEntries {
		return "", err
	}

	if groupspec != "" {
		ugid, err := strconv.ParseUint(groupspec, 10, 32)
		if err == nil {
			gid = int(ugid)
		} else {
			group, err := lookup.GetGroup(c.state.Mountpoint, groupspec)
			if err != nil {
				return "", errors.Wrapf(err, "unable to get gid %s from group file", groupspec)
			}
			gid = group.Gid
		}
	}
	return fmt.Sprintf("%d:x:%d:%d:container user:%s:/bin/sh\n", uid, uid, gid, c.WorkingDir()), nil
}

// generatePasswd generates a container specific passwd file,
// iff g.config.User is a number
func (c *Container) generatePasswd() (string, error) {
	if !c.config.AddCurrentUserPasswdEntry && c.config.User == "" {
		return "", nil
	}
	if MountExists(c.config.Spec.Mounts, "/etc/passwd") {
		return "", nil
	}
	// Re-use passwd if possible
	passwdPath := filepath.Join(c.config.StaticDir, "passwd")
	if _, err := os.Stat(passwdPath); err == nil {
		return passwdPath, nil
	}
	// Check if container has a /etc/passwd - if it doesn't do nothing.
	passwdPath, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
	if err != nil {
		return "", errors.Wrapf(err, "error creating path to container %s /etc/passwd", c.ID())
	}
	if _, err := os.Stat(passwdPath); err != nil {
		if os.IsNotExist(err) {
			return "", nil
		}
		return "", errors.Wrapf(err, "unable to access container %s /etc/passwd", c.ID())
	}
	pwd := ""
	if c.config.User != "" {
		entry, err := c.generateUserPasswdEntry()
		if err != nil {
			return "", err
		}
		pwd += entry
	}
	if c.config.AddCurrentUserPasswdEntry {
		entry, err := c.generateCurrentUserPasswdEntry()
		if err != nil {
			return "", err
		}
		pwd += entry
	}
	if pwd == "" {
		return "", nil
	}

	// If we are *not* read-only - edit /etc/passwd in the container.
	// This is *gross* (shows up in changes to the container, will be
	// committed to images based on the container) but it actually allows us
	// to add users to the container (a bind mount breaks useradd).
	// We should never get here twice, because generateUserPasswdEntry will
	// not return anything if the user already exists in /etc/passwd.
	if !c.IsReadOnly() {
		containerPasswd, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
		if err != nil {
			return "", errors.Wrapf(err, "error looking up location of container %s /etc/passwd", c.ID())
		}

		f, err := os.OpenFile(containerPasswd, os.O_APPEND|os.O_WRONLY, 0600)
		if err != nil {
			return "", errors.Wrapf(err, "error opening container %s /etc/passwd", c.ID())
		}
		defer f.Close()

		if _, err := f.WriteString(pwd); err != nil {
			return "", errors.Wrapf(err, "unable to append to container %s /etc/passwd", c.ID())
		}

		return "", nil
	}

	originPasswdFile := filepath.Join(c.state.Mountpoint, "/etc/passwd")
	orig, err := ioutil.ReadFile(originPasswdFile)
	if err != nil && !os.IsNotExist(err) {
		return "", errors.Wrapf(err, "unable to read passwd file %s", originPasswdFile)
	}
	passwdFile, err := c.writeStringToStaticDir("passwd", string(orig)+pwd)
	if err != nil {
		return "", errors.Wrapf(err, "failed to create temporary passwd file")
	}
	if err := os.Chmod(passwdFile, 0644); err != nil {
		return "", err
	}
	return passwdFile, nil
}

func (c *Container) copyOwnerAndPerms(source, dest string) error {
	info, err := os.Stat(source)
	if err != nil {
		if os.IsNotExist(err) {
			return nil
		}
		return errors.Wrapf(err, "cannot stat `%s`", dest)
	}
	if err := os.Chmod(dest, info.Mode()); err != nil {
		return errors.Wrapf(err, "cannot chmod `%s`", dest)
	}
	if err := os.Chown(dest, int(info.Sys().(*syscall.Stat_t).Uid), int(info.Sys().(*syscall.Stat_t).Gid)); err != nil {
		return errors.Wrapf(err, "cannot chown `%s`", dest)
	}
	return nil
}

// Get cgroup path in a format suitable for the OCI spec
func (c *Container) getOCICgroupPath() (string, error) {
	unified, err := cgroups.IsCgroup2UnifiedMode()
	if err != nil {
		return "", err
	}
	switch {
	case (rootless.IsRootless() && !unified) || c.config.NoCgroups:
		return "", nil
	case c.config.CgroupsMode == cgroupSplit:
		if c.config.CgroupParent != "" {
			return c.config.CgroupParent, nil
		}
		selfCgroup, err := utils.GetOwnCgroup()
		if err != nil {
			return "", err
		}
		return filepath.Join(selfCgroup, "container"), nil
	case c.runtime.config.Engine.CgroupManager == config.SystemdCgroupsManager:
		// When the OCI runtime is set to use Systemd as a cgroup manager, it
		// expects cgroups to be passed as follows:
		// slice:prefix:name
		systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID())
		logrus.Debugf("Setting CGroups for container %s to %s", c.ID(), systemdCgroups)
		return systemdCgroups, nil
	case c.runtime.config.Engine.CgroupManager == config.CgroupfsCgroupsManager:
		cgroupPath, err := c.CGroupPath()
		if err != nil {
			return "", err
		}
		logrus.Debugf("Setting CGroup path for container %s to %s", c.ID(), cgroupPath)
		return cgroupPath, nil
	default:
		return "", errors.Wrapf(define.ErrInvalidArg, "invalid cgroup manager %s requested", c.runtime.config.Engine.CgroupManager)
	}
}

func (c *Container) copyTimezoneFile(zonePath string) (string, error) {
	var localtimeCopy string = filepath.Join(c.state.RunDir, "localtime")
	file, err := os.Stat(zonePath)
	if err != nil {
		return "", err
	}
	if file.IsDir() {
		return "", errors.New("Invalid timezone: is a directory")
	}
	src, err := os.Open(zonePath)
	if err != nil {
		return "", err
	}
	defer src.Close()
	dest, err := os.Create(localtimeCopy)
	if err != nil {
		return "", err
	}
	defer dest.Close()
	_, err = io.Copy(dest, src)
	if err != nil {
		return "", err
	}
	if err := label.Relabel(localtimeCopy, c.config.MountLabel, false); err != nil {
		return "", err
	}
	if err := dest.Chown(c.RootUID(), c.RootGID()); err != nil {
		return "", err
	}
	return localtimeCopy, err
}

func (c *Container) cleanupOverlayMounts() error {
	return overlay.CleanupContent(c.config.StaticDir)
}