podman/cmd/kpod/spec.go
umohnani8 34696c55e9 Add Linux Root Propagation to kpod create and run
Add [r]shared, [r]private, [r]slave functionality to the --volume flag
for kpod create and kpod run
This sets the root propagation for each bind mount

Signed-off-by: umohnani8 <umohnani@redhat.com>

Closes: #87
Approved by: rhatdan
2017-12-05 10:23:13 +00:00

558 lines
15 KiB
Go

package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"strings"
"github.com/docker/docker/daemon/caps"
"github.com/docker/docker/pkg/mount"
"github.com/docker/go-units"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/projectatomic/libpod/libpod"
ann "github.com/projectatomic/libpod/pkg/annotations"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func blockAccessToKernelFilesystems(config *createConfig, g *generate.Generator) {
if !config.privileged {
for _, mp := range []string{
"/proc/kcore",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/proc/scsi",
"/sys/firmware",
} {
g.AddLinuxMaskedPaths(mp)
}
for _, rp := range []string{
"/proc/asound",
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger",
} {
g.AddLinuxReadonlyPaths(rp)
}
}
}
func addPidNS(config *createConfig, g *generate.Generator) error {
pidMode := config.pidMode
if pidMode.IsHost() {
return g.RemoveLinuxNamespace(libpod.PIDNamespace)
}
if pidMode.IsContainer() {
ctr, err := config.runtime.LookupContainer(pidMode.Container())
if err != nil {
return errors.Wrapf(err, "container %q not found", pidMode.Container())
}
pid, err := ctr.PID()
if err != nil {
return errors.Wrapf(err, "Failed to get pid of container %q", pidMode.Container())
}
pidNsPath := fmt.Sprintf("/proc/%d/ns/pid", pid)
if err := g.AddOrReplaceLinuxNamespace(libpod.PIDNamespace, pidNsPath); err != nil {
return err
}
}
return nil
}
func addNetNS(config *createConfig, g *generate.Generator) error {
netMode := config.netMode
if netMode.IsHost() {
return g.RemoveLinuxNamespace(libpod.NetNamespace)
}
if netMode.IsNone() {
return libpod.ErrNotImplemented
}
if netMode.IsBridge() {
return libpod.ErrNotImplemented
}
if netMode.IsContainer() {
ctr, err := config.runtime.LookupContainer(netMode.ConnectedContainer())
if err != nil {
return errors.Wrapf(err, "container %q not found", netMode.ConnectedContainer())
}
pid, err := ctr.PID()
if err != nil {
return errors.Wrapf(err, "Failed to get pid of container %q", netMode.ConnectedContainer())
}
nsPath := fmt.Sprintf("/proc/%d/ns/net", pid)
if err := g.AddOrReplaceLinuxNamespace(libpod.NetNamespace, nsPath); err != nil {
return err
}
}
return nil
}
func addUTSNS(config *createConfig, g *generate.Generator) error {
utsMode := config.utsMode
if utsMode.IsHost() {
return g.RemoveLinuxNamespace(libpod.UTSNamespace)
}
return nil
}
func addIpcNS(config *createConfig, g *generate.Generator) error {
ipcMode := config.ipcMode
if ipcMode.IsHost() {
return g.RemoveLinuxNamespace(libpod.IPCNamespace)
}
if ipcMode.IsContainer() {
ctr, err := config.runtime.LookupContainer(ipcMode.Container())
if err != nil {
return errors.Wrapf(err, "container %q not found", ipcMode.Container())
}
pid, err := ctr.PID()
if err != nil {
return errors.Wrapf(err, "Failed to get pid of container %q", ipcMode.Container())
}
nsPath := fmt.Sprintf("/proc/%d/ns/ipc", pid)
if err := g.AddOrReplaceLinuxNamespace(libpod.IPCNamespace, nsPath); err != nil {
return err
}
}
return nil
}
func addRlimits(config *createConfig, g *generate.Generator) error {
var (
ul *units.Ulimit
err error
)
for _, u := range config.resources.ulimit {
if ul, err = units.ParseUlimit(u); err != nil {
return errors.Wrapf(err, "ulimit option %q requires name=SOFT:HARD, failed to be parsed", u)
}
g.AddProcessRlimits("RLIMIT_"+strings.ToUpper(ul.Name), uint64(ul.Soft), uint64(ul.Hard))
}
return nil
}
func setupCapabilities(config *createConfig, configSpec *spec.Spec) error {
var err error
var caplist []string
if config.privileged {
caplist = caps.GetAllCapabilities()
} else {
caplist, err = caps.TweakCapabilities(configSpec.Process.Capabilities.Bounding, config.capAdd, config.capDrop)
if err != nil {
return err
}
}
configSpec.Process.Capabilities.Bounding = caplist
configSpec.Process.Capabilities.Permitted = caplist
configSpec.Process.Capabilities.Inheritable = caplist
configSpec.Process.Capabilities.Effective = caplist
return nil
}
// Parses information needed to create a container into an OCI runtime spec
func createConfigToOCISpec(config *createConfig) (*spec.Spec, error) {
g := generate.New()
g.AddCgroupsMount("ro")
g.SetProcessCwd(config.workDir)
g.SetProcessArgs(config.command)
g.SetProcessTerminal(config.tty)
// User and Group must go together
g.SetProcessUID(config.user)
g.SetProcessGID(config.group)
for _, gid := range config.groupAdd {
g.AddProcessAdditionalGid(gid)
}
for key, val := range config.GetAnnotations() {
g.AddAnnotation(key, val)
}
g.SetRootReadonly(config.readOnlyRootfs)
g.SetHostname(config.hostname)
if config.hostname != "" {
g.AddProcessEnv("HOSTNAME", config.hostname)
}
for _, sysctl := range config.sysctl {
s := strings.SplitN(sysctl, "=", 2)
g.AddLinuxSysctl(s[0], s[1])
}
// RESOURCES - MEMORY
if config.resources.memory != 0 {
g.SetLinuxResourcesMemoryLimit(config.resources.memory)
}
if config.resources.memoryReservation != 0 {
g.SetLinuxResourcesMemoryReservation(config.resources.memoryReservation)
}
if config.resources.memorySwap != 0 {
g.SetLinuxResourcesMemorySwap(config.resources.memorySwap)
}
if config.resources.kernelMemory != 0 {
g.SetLinuxResourcesMemoryKernel(config.resources.kernelMemory)
}
if config.resources.memorySwappiness != -1 {
g.SetLinuxResourcesMemorySwappiness(uint64(config.resources.memorySwappiness))
}
g.SetLinuxResourcesMemoryDisableOOMKiller(config.resources.disableOomKiller)
g.SetProcessOOMScoreAdj(config.resources.oomScoreAdj)
// RESOURCES - CPU
if config.resources.cpuShares != 0 {
g.SetLinuxResourcesCPUShares(config.resources.cpuShares)
}
if config.resources.cpuQuota != 0 {
g.SetLinuxResourcesCPUQuota(config.resources.cpuQuota)
}
if config.resources.cpuPeriod != 0 {
g.SetLinuxResourcesCPUPeriod(config.resources.cpuPeriod)
}
if config.resources.cpuRtRuntime != 0 {
g.SetLinuxResourcesCPURealtimeRuntime(config.resources.cpuRtRuntime)
}
if config.resources.cpuRtPeriod != 0 {
g.SetLinuxResourcesCPURealtimePeriod(config.resources.cpuRtPeriod)
}
if config.resources.cpus != "" {
g.SetLinuxResourcesCPUCpus(config.resources.cpus)
}
if config.resources.cpusetMems != "" {
g.SetLinuxResourcesCPUMems(config.resources.cpusetMems)
}
// SECURITY OPTS
g.SetProcessNoNewPrivileges(config.noNewPrivileges)
g.SetProcessApparmorProfile(config.apparmorProfile)
g.SetProcessSelinuxLabel(config.processLabel)
g.SetLinuxMountLabel(config.mountLabel)
blockAccessToKernelFilesystems(config, &g)
// RESOURCES - PIDS
if config.resources.pidsLimit != 0 {
g.SetLinuxResourcesPidsLimit(config.resources.pidsLimit)
}
for _, i := range config.tmpfs {
options := []string{"rw", "noexec", "nosuid", "nodev", "size=65536k"}
spliti := strings.SplitN(i, ":", 2)
if len(spliti) > 1 {
if _, _, err := mount.ParseTmpfsOptions(spliti[1]); err != nil {
return nil, err
}
options = strings.Split(spliti[1], ",")
}
// Default options if nothing passed
g.AddTmpfsMount(spliti[0], options)
}
for name, val := range config.env {
g.AddProcessEnv(name, val)
}
if err := addRlimits(config, &g); err != nil {
return nil, err
}
if err := addPidNS(config, &g); err != nil {
return nil, err
}
if err := addNetNS(config, &g); err != nil {
return nil, err
}
if err := addUTSNS(config, &g); err != nil {
return nil, err
}
if err := addIpcNS(config, &g); err != nil {
return nil, err
}
configSpec := g.Spec()
if config.seccompProfilePath != "" && config.seccompProfilePath != "unconfined" {
seccompProfile, err := ioutil.ReadFile(config.seccompProfilePath)
if err != nil {
return nil, errors.Wrapf(err, "opening seccomp profile (%s) failed", config.seccompProfilePath)
}
var seccompConfig spec.LinuxSeccomp
if err := json.Unmarshal(seccompProfile, &seccompConfig); err != nil {
return nil, errors.Wrapf(err, "decoding seccomp profile (%s) failed", config.seccompProfilePath)
}
configSpec.Linux.Seccomp = &seccompConfig
}
// BIND MOUNTS
mounts, err := config.GetVolumeMounts()
if err != nil {
return nil, errors.Wrapf(err, "error getting volume mounts")
}
configSpec.Mounts = append(configSpec.Mounts, mounts...)
for _, mount := range configSpec.Mounts {
for _, opt := range mount.Options {
switch opt {
case "private", "rprivate", "slave", "rslave", "shared", "rshared":
if err := g.SetLinuxRootPropagation(opt); err != nil {
return nil, errors.Wrapf(err, "error setting root propagation for %q", mount.Destination)
}
}
}
}
// HANDLE CAPABILITIES
if err := setupCapabilities(config, configSpec); err != nil {
return nil, err
}
/*
Hooks: &configSpec.Hooks{},
//Annotations
Resources: &configSpec.LinuxResources{
Devices: config.GetDefaultDevices(),
BlockIO: &blkio,
//HugepageLimits:
Network: &configSpec.LinuxNetwork{
// ClassID *uint32
// Priorites []LinuxInterfacePriority
},
},
//CgroupsPath:
//Namespaces: []LinuxNamespace
//Devices
// DefaultAction:
// Architectures
// Syscalls:
},
// RootfsPropagation
// MaskedPaths
// ReadonlyPaths:
// IntelRdt
},
}
*/
return configSpec, nil
}
func (c *createConfig) CreateBlockIO() (spec.LinuxBlockIO, error) {
bio := spec.LinuxBlockIO{}
bio.Weight = &c.resources.blkioWeight
if len(c.resources.blkioWeightDevice) > 0 {
var lwds []spec.LinuxWeightDevice
for _, i := range c.resources.blkioWeightDevice {
wd, err := validateweightDevice(i)
if err != nil {
return bio, errors.Wrapf(err, "invalid values for blkio-weight-device")
}
wdStat := getStatFromPath(wd.path)
lwd := spec.LinuxWeightDevice{
Weight: &wd.weight,
}
lwd.Major = int64(unix.Major(wdStat.Rdev))
lwd.Minor = int64(unix.Minor(wdStat.Rdev))
lwds = append(lwds, lwd)
}
}
if len(c.resources.deviceReadBps) > 0 {
readBps, err := makeThrottleArray(c.resources.deviceReadBps)
if err != nil {
return bio, err
}
bio.ThrottleReadBpsDevice = readBps
}
if len(c.resources.deviceWriteBps) > 0 {
writeBpds, err := makeThrottleArray(c.resources.deviceWriteBps)
if err != nil {
return bio, err
}
bio.ThrottleWriteBpsDevice = writeBpds
}
if len(c.resources.deviceReadIOps) > 0 {
readIOps, err := makeThrottleArray(c.resources.deviceReadIOps)
if err != nil {
return bio, err
}
bio.ThrottleReadIOPSDevice = readIOps
}
if len(c.resources.deviceWriteIOps) > 0 {
writeIOps, err := makeThrottleArray(c.resources.deviceWriteIOps)
if err != nil {
return bio, err
}
bio.ThrottleWriteIOPSDevice = writeIOps
}
return bio, nil
}
// GetAnnotations returns the all the annotations for the container
func (c *createConfig) GetAnnotations() map[string]string {
a := getDefaultAnnotations()
// TODO - Which annotations do we want added by default
// TODO - This should be added to the DB long term
if c.tty {
a["io.kubernetes.cri-o.TTY"] = "true"
}
return a
}
func getDefaultAnnotations() map[string]string {
var annotations map[string]string
annotations = make(map[string]string)
annotations[ann.Annotations] = ""
annotations[ann.ContainerID] = ""
annotations[ann.ContainerName] = ""
annotations[ann.ContainerType] = ""
annotations[ann.Created] = ""
annotations[ann.HostName] = ""
annotations[ann.IP] = ""
annotations[ann.Image] = ""
annotations[ann.ImageName] = ""
annotations[ann.ImageRef] = ""
annotations[ann.KubeName] = ""
annotations[ann.Labels] = ""
annotations[ann.LogPath] = ""
annotations[ann.Metadata] = ""
annotations[ann.Name] = ""
annotations[ann.PrivilegedRuntime] = ""
annotations[ann.ResolvPath] = ""
annotations[ann.HostnamePath] = ""
annotations[ann.SandboxID] = ""
annotations[ann.SandboxName] = ""
annotations[ann.ShmPath] = ""
annotations[ann.MountPoint] = ""
annotations[ann.TrustedSandbox] = ""
annotations[ann.TTY] = "false"
annotations[ann.Stdin] = ""
annotations[ann.StdinOnce] = ""
annotations[ann.Volumes] = ""
return annotations
}
//GetVolumeMounts takes user provided input for bind mounts and creates Mount structs
func (c *createConfig) GetVolumeMounts() ([]spec.Mount, error) {
var m []spec.Mount
var options []string
for _, i := range c.volumes {
// We need to handle SELinux options better here, specifically :Z
spliti := strings.Split(i, ":")
if len(spliti) > 2 {
options = strings.Split(spliti[2], ",")
}
options = append(options, "rbind")
var foundrw, foundro, foundz, foundZ bool
var rootProp string
for _, opt := range options {
switch opt {
case "rw":
foundrw = true
case "ro":
foundro = true
case "z":
foundz = true
case "Z":
foundZ = true
case "private", "rprivate", "slave", "rslave", "shared", "rshared":
rootProp = opt
}
}
if !foundrw && !foundro {
options = append(options, "rw")
}
if foundz {
if err := label.Relabel(spliti[0], c.mountLabel, true); err != nil {
return nil, errors.Wrapf(err, "relabel failed %q", spliti[0])
}
}
if foundZ {
if err := label.Relabel(spliti[0], c.mountLabel, false); err != nil {
return nil, errors.Wrapf(err, "relabel failed %q", spliti[0])
}
}
if rootProp == "" {
options = append(options, "rprivate")
}
m = append(m, spec.Mount{
Destination: spliti[1],
Type: string(TypeBind),
Source: spliti[0],
Options: options,
})
}
return m, nil
}
//GetTmpfsMounts takes user provided input for tmpfs mounts and creates Mount structs
func (c *createConfig) GetTmpfsMounts() []spec.Mount {
var m []spec.Mount
for _, i := range c.tmpfs {
// Default options if nothing passed
options := []string{"rw", "noexec", "nosuid", "nodev", "size=65536k"}
spliti := strings.Split(i, ":")
destPath := spliti[0]
if len(spliti) > 1 {
options = strings.Split(spliti[1], ",")
}
m = append(m, spec.Mount{
Destination: destPath,
Type: string(TypeTmpfs),
Options: options,
Source: string(TypeTmpfs),
})
}
return m
}
func (c *createConfig) GetContainerCreateOptions() ([]libpod.CtrCreateOption, error) {
var options []libpod.CtrCreateOption
// Uncomment after talking to mheon about unimplemented funcs
// options = append(options, libpod.WithLabels(c.labels))
if c.interactive {
options = append(options, libpod.WithStdin())
}
if c.name != "" {
logrus.Debug("appending name %s", c.name)
options = append(options, libpod.WithName(c.name))
}
return options, nil
}
func getStatFromPath(path string) unix.Stat_t {
s := unix.Stat_t{}
_ = unix.Stat(path, &s)
return s
}
func makeThrottleArray(throttleInput []string) ([]spec.LinuxThrottleDevice, error) {
var ltds []spec.LinuxThrottleDevice
for _, i := range throttleInput {
t, err := validateBpsDevice(i)
if err != nil {
return []spec.LinuxThrottleDevice{}, err
}
ltd := spec.LinuxThrottleDevice{}
ltd.Rate = t.rate
ltdStat := getStatFromPath(t.path)
ltd.Major = int64(unix.Major(ltdStat.Rdev))
ltd.Minor = int64(unix.Major(ltdStat.Rdev))
ltds = append(ltds, ltd)
}
return ltds, nil
}