podman/libpod/runtime_ctr.go
Matthew Heon 140f87c474 EXPERIMENTAL: Do not call out to runc for sync
When syncing container state, we normally call out to runc to see
the container's status. This does have significant performance
implications, though, and we've seen issues with large amounts of
runc processes being spawned.

This patch attempts to use stat calls on the container exit file
created by Conmon instead to sync state. This massively decreases
the cost of calling updateContainer (it has gone from an
almost-unconditional fork/exec of runc to a single stat call that
can be avoided in most states).

Signed-off-by: Matthew Heon <matthew.heon@gmail.com>
2018-11-07 11:36:01 -05:00

465 lines
13 KiB
Go

package libpod
import (
"context"
"os"
"path"
"path/filepath"
"strings"
"time"
"github.com/containers/storage"
"github.com/containers/storage/pkg/stringid"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/ulule/deepcopier"
)
// CtrRemoveTimeout is the default number of seconds to wait after stopping a container
// before sending the kill signal
const CtrRemoveTimeout = 10
// Contains the public Runtime API for containers
// A CtrCreateOption is a functional option which alters the Container created
// by NewContainer
type CtrCreateOption func(*Container) error
// ContainerFilter is a function to determine whether a container is included
// in command output. Containers to be outputted are tested using the function.
// A true return will include the container, a false return will exclude it.
type ContainerFilter func(*Container) bool
// NewContainer creates a new container from a given OCI config
func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ...CtrCreateOption) (c *Container, err error) {
r.lock.Lock()
defer r.lock.Unlock()
if !r.valid {
return nil, ErrRuntimeStopped
}
return r.newContainer(ctx, rSpec, options...)
}
func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options ...CtrCreateOption) (c *Container, err error) {
if rSpec == nil {
return nil, errors.Wrapf(ErrInvalidArg, "must provide a valid runtime spec to create container")
}
ctr := new(Container)
ctr.config = new(ContainerConfig)
ctr.state = new(containerState)
ctr.config.ID = stringid.GenerateNonCryptoID()
ctr.config.Spec = new(spec.Spec)
deepcopier.Copy(rSpec).To(ctr.config.Spec)
ctr.config.CreatedTime = time.Now()
ctr.config.ShmSize = DefaultShmSize
ctr.state.BindMounts = make(map[string]string)
// Path our lock file will reside at
lockPath := filepath.Join(r.lockDir, ctr.config.ID)
// Grab a lockfile at the given path
lock, err := storage.GetLockfile(lockPath)
if err != nil {
return nil, errors.Wrapf(err, "error creating lockfile for new container")
}
ctr.lock = lock
ctr.config.StopTimeout = CtrRemoveTimeout
// Set namespace based on current runtime namespace
// Do so before options run so they can override it
if r.config.Namespace != "" {
ctr.config.Namespace = r.config.Namespace
}
ctr.runtime = r
for _, option := range options {
if err := option(ctr); err != nil {
return nil, errors.Wrapf(err, "error running container create option")
}
}
ctr.valid = true
ctr.state.State = ContainerStateConfigured
var pod *Pod
if ctr.config.Pod != "" {
// Get the pod from state
pod, err = r.state.Pod(ctr.config.Pod)
if err != nil {
return nil, errors.Wrapf(err, "cannot add container %s to pod %s", ctr.ID(), ctr.config.Pod)
}
}
if ctr.config.Name == "" {
name, err := r.generateName()
if err != nil {
return nil, err
}
ctr.config.Name = name
}
// Check CGroup parent sanity, and set it if it was not set
switch r.config.CgroupManager {
case CgroupfsCgroupsManager:
if ctr.config.CgroupParent == "" {
if pod != nil && pod.config.UsePodCgroup {
podCgroup, err := pod.CgroupPath()
if err != nil {
return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
}
if podCgroup == "" {
return nil, errors.Wrapf(ErrInternal, "pod %s cgroup is not set", pod.ID())
}
ctr.config.CgroupParent = podCgroup
} else {
ctr.config.CgroupParent = CgroupfsDefaultCgroupParent
}
} else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
}
case SystemdCgroupsManager:
if ctr.config.CgroupParent == "" {
if pod != nil && pod.config.UsePodCgroup {
podCgroup, err := pod.CgroupPath()
if err != nil {
return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
}
ctr.config.CgroupParent = podCgroup
} else {
ctr.config.CgroupParent = SystemdDefaultCgroupParent
}
} else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
}
default:
return nil, errors.Wrapf(ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager)
}
// Set up storage for the container
if err := ctr.setupStorage(ctx); err != nil {
return nil, err
}
defer func() {
if err != nil {
if err2 := ctr.teardownStorage(); err2 != nil {
logrus.Errorf("Error removing partially-created container root filesystem: %s", err2)
}
}
}()
if ctr.config.LogPath == "" {
ctr.config.LogPath = filepath.Join(ctr.config.StaticDir, "ctr.log")
}
if ctr.config.ShmDir == "" {
if ctr.state.UserNSRoot == "" {
ctr.config.ShmDir = filepath.Join(ctr.bundlePath(), "shm")
} else {
ctr.config.ShmDir = filepath.Join(ctr.state.UserNSRoot, "shm")
}
if err := os.MkdirAll(ctr.config.ShmDir, 0700); err != nil {
if !os.IsExist(err) {
return nil, errors.Wrapf(err, "unable to create shm %q dir", ctr.config.ShmDir)
}
}
ctr.config.Mounts = append(ctr.config.Mounts, ctr.config.ShmDir)
}
// Add the container to the state
// TODO: May be worth looking into recovering from name/ID collisions here
if ctr.config.Pod != "" {
// Lock the pod to ensure we can't add containers to pods
// being removed
pod.lock.Lock()
defer pod.lock.Unlock()
if err := r.state.AddContainerToPod(pod, ctr); err != nil {
return nil, err
}
} else {
if err := r.state.AddContainer(ctr); err != nil {
return nil, err
}
}
return ctr, nil
}
// RemoveContainer removes the given container
// If force is specified, the container will be stopped first
// Otherwise, RemoveContainer will return an error if the container is running
func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool) error {
r.lock.Lock()
defer r.lock.Unlock()
return r.removeContainer(ctx, c, force)
}
// Internal function to remove a container
// Locks the container, but does not lock the runtime
func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool) error {
if !c.valid {
if ok, _ := r.state.HasContainer(c.ID()); !ok {
// Container probably already removed
// Or was never in the runtime to begin with
return nil
}
}
// We need to lock the pod before we lock the container
// To avoid races around removing a container and the pod it is in
var pod *Pod
var err error
if c.config.Pod != "" {
pod, err = r.state.Pod(c.config.Pod)
if err != nil {
return errors.Wrapf(err, "container %s is in pod %s, but pod cannot be retrieved", c.ID(), pod.ID())
}
// Lock the pod while we're removing container
pod.lock.Lock()
defer pod.lock.Unlock()
if err := pod.updatePod(); err != nil {
return err
}
infraID := pod.state.InfraContainerID
if c.ID() == infraID {
return errors.Errorf("container %s is the infra container of pod %s and cannot be removed without removing the pod", c.ID(), pod.ID())
}
}
c.lock.Lock()
defer c.lock.Unlock()
if !r.valid {
return ErrRuntimeStopped
}
// Update the container to get current state
if err := c.syncContainer(); err != nil {
return err
}
if c.state.State == ContainerStatePaused {
return errors.Wrapf(ErrCtrStateInvalid, "container %s is paused, cannot remove until unpaused", c.ID())
}
// Check that the container's in a good state to be removed
if c.state.State == ContainerStateRunning && force {
if err := r.ociRuntime.stopContainer(c, c.StopTimeout()); err != nil {
return errors.Wrapf(err, "cannot remove container %s as it could not be stopped", c.ID())
}
// Need to update container state to make sure we know it's stopped
if err := c.waitForExitFileAndSync(); err != nil {
return err
}
} else if !(c.state.State == ContainerStateConfigured ||
c.state.State == ContainerStateCreated ||
c.state.State == ContainerStateStopped ||
c.state.State == ContainerStateExited) {
return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it is %s - running or paused containers cannot be removed", c.ID(), c.state.State.String())
}
// Check that all of our exec sessions have finished
if len(c.state.ExecSessions) != 0 {
if force {
if err := r.ociRuntime.execStopContainer(c, c.StopTimeout()); err != nil {
return err
}
} else {
return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it has active exec sessions", c.ID())
}
}
// Check that no other containers depend on the container
deps, err := r.state.ContainerInUse(c)
if err != nil {
return err
}
if len(deps) != 0 {
depsStr := strings.Join(deps, ", ")
return errors.Wrapf(ErrCtrExists, "container %s has dependent containers which must be removed before it: %s", c.ID(), depsStr)
}
var cleanupErr error
// Remove the container from the state
if c.config.Pod != "" {
if err := r.state.RemoveContainerFromPod(pod, c); err != nil {
if cleanupErr == nil {
cleanupErr = err
} else {
logrus.Errorf("removing container from pod: %v", err)
}
}
} else {
if err := r.state.RemoveContainer(c); err != nil {
if cleanupErr == nil {
cleanupErr = err
} else {
logrus.Errorf("removing container: %v", err)
}
}
}
// Set container as invalid so it can no longer be used
c.valid = false
// Clean up network namespace, cgroups, mounts
if err := c.cleanup(ctx); err != nil {
if cleanupErr == nil {
cleanupErr = err
} else {
logrus.Errorf("cleanup network, cgroups, mounts: %v", err)
}
}
// Stop the container's storage
if err := c.teardownStorage(); err != nil {
if cleanupErr == nil {
cleanupErr = err
} else {
logrus.Errorf("cleanup storage: %v", err)
}
}
// Delete the container.
// Not needed in Configured and Exited states, where the container
// doesn't exist in the runtime
if c.state.State != ContainerStateConfigured &&
c.state.State != ContainerStateExited {
if err := c.delete(ctx); err != nil {
if cleanupErr == nil {
cleanupErr = err
} else {
logrus.Errorf("delete container: %v", err)
}
}
}
return cleanupErr
}
// GetContainer retrieves a container by its ID
func (r *Runtime) GetContainer(id string) (*Container, error) {
r.lock.RLock()
defer r.lock.RUnlock()
if !r.valid {
return nil, ErrRuntimeStopped
}
return r.state.Container(id)
}
// HasContainer checks if a container with the given ID is present
func (r *Runtime) HasContainer(id string) (bool, error) {
r.lock.RLock()
defer r.lock.RUnlock()
if !r.valid {
return false, ErrRuntimeStopped
}
return r.state.HasContainer(id)
}
// LookupContainer looks up a container by its name or a partial ID
// If a partial ID is not unique, an error will be returned
func (r *Runtime) LookupContainer(idOrName string) (*Container, error) {
r.lock.RLock()
defer r.lock.RUnlock()
if !r.valid {
return nil, ErrRuntimeStopped
}
return r.state.LookupContainer(idOrName)
}
// GetContainers retrieves all containers from the state
// Filters can be provided which will determine what containers are included in
// the output. Multiple filters are handled by ANDing their output, so only
// containers matching all filters are returned
func (r *Runtime) GetContainers(filters ...ContainerFilter) ([]*Container, error) {
r.lock.RLock()
defer r.lock.RUnlock()
if !r.valid {
return nil, ErrRuntimeStopped
}
ctrs, err := r.state.AllContainers()
if err != nil {
return nil, err
}
ctrsFiltered := make([]*Container, 0, len(ctrs))
for _, ctr := range ctrs {
include := true
for _, filter := range filters {
include = include && filter(ctr)
}
if include {
ctrsFiltered = append(ctrsFiltered, ctr)
}
}
return ctrsFiltered, nil
}
// GetAllContainers is a helper function for GetContainers
func (r *Runtime) GetAllContainers() ([]*Container, error) {
return r.state.AllContainers()
}
// GetRunningContainers is a helper function for GetContainers
func (r *Runtime) GetRunningContainers() ([]*Container, error) {
running := func(c *Container) bool {
state, _ := c.State()
return state == ContainerStateRunning
}
return r.GetContainers(running)
}
// GetContainersByList is a helper function for GetContainers
// which takes a []string of container IDs or names
func (r *Runtime) GetContainersByList(containers []string) ([]*Container, error) {
var ctrs []*Container
for _, inputContainer := range containers {
ctr, err := r.LookupContainer(inputContainer)
if err != nil {
return ctrs, errors.Wrapf(err, "unable to lookup container %s", inputContainer)
}
ctrs = append(ctrs, ctr)
}
return ctrs, nil
}
// GetLatestContainer returns a container object of the latest created container.
func (r *Runtime) GetLatestContainer() (*Container, error) {
lastCreatedIndex := -1
var lastCreatedTime time.Time
ctrs, err := r.GetAllContainers()
if err != nil {
return nil, errors.Wrapf(err, "unable to find latest container")
}
if len(ctrs) == 0 {
return nil, ErrNoSuchCtr
}
for containerIndex, ctr := range ctrs {
createdTime := ctr.config.CreatedTime
if createdTime.After(lastCreatedTime) {
lastCreatedTime = createdTime
lastCreatedIndex = containerIndex
}
}
return ctrs[lastCreatedIndex], nil
}