Privileged containers can now restart if the host devices change

If a privileged container is running, stops, and the devices on the host
change, such as a USB device is unplugged, then a container would no
longer start. Previously, the devices from the host were only being
added to the container once: when the container was created. Now, this
happens every time the container starts.

I did this by adding a boolean to the container config that indicates
whether to mount all of the devices or not, which can be set via an option.

During spec generation, if the `MountAllDevices` option is set in the
container config, all host devices are added to the container.

Additionally, a couple of functions from `pkg/specgen/generate/config_linux.go`
were moved into `pkg/util/utils_linux.go` as they were needed in
multiple packages.

Closes #13899

Signed-off-by: Jake Correnti <jcorrenti13@gmail.com>
This commit is contained in:
Jake Correnti 2022-06-03 11:25:45 -04:00
parent f5389dbb44
commit 8533ea0004
8 changed files with 199 additions and 145 deletions

View file

@ -412,6 +412,9 @@ type ContainerMiscConfig struct {
InitContainerType string `json:"init_container_type,omitempty"`
// PasswdEntry specifies arbitrary data to append to a file.
PasswdEntry string `json:"passwd_entry,omitempty"`
// MountAllDevices is an option to indicate whether a privileged container
// will mount all the host's devices
MountAllDevices bool `json:"mountAllDevices"`
}
// InfraInherit contains the compatible options inheritable from the infra container

View file

@ -407,6 +407,14 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
//nolint:staticcheck
g := generate.NewFromSpec(c.config.Spec)
// If the flag to mount all devices is set for a privileged container, add
// all the devices from the host's machine into the container
if c.config.MountAllDevices {
if err := util.AddPrivilegedDevices(&g); err != nil {
return nil, err
}
}
// If network namespace was requested, add it now
if c.config.CreateNetNS {
if c.config.PostConfigureNetNS {

View file

@ -2159,3 +2159,17 @@ func WithPasswdEntry(passwdEntry string) CtrCreateOption {
return nil
}
}
// WithMountAllDevices sets the option to mount all of a privileged container's
// host devices
func WithMountAllDevices() CtrCreateOption {
return func(ctr *Container) error {
if ctr.valid {
return define.ErrCtrFinalized
}
ctr.config.MountAllDevices = true
return nil
}
}

View file

@ -3,7 +3,6 @@ package generate
import (
"fmt"
"io/fs"
"io/ioutil"
"os"
"path"
"path/filepath"
@ -11,6 +10,7 @@ import (
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/podman/v4/pkg/util"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
"github.com/pkg/errors"
@ -18,56 +18,6 @@ import (
"golang.org/x/sys/unix"
)
var (
errNotADevice = errors.New("not a device node")
)
func addPrivilegedDevices(g *generate.Generator) error {
hostDevices, err := getDevices("/dev")
if err != nil {
return err
}
g.ClearLinuxDevices()
if rootless.IsRootless() {
mounts := make(map[string]interface{})
for _, m := range g.Mounts() {
mounts[m.Destination] = true
}
newMounts := []spec.Mount{}
for _, d := range hostDevices {
devMnt := spec.Mount{
Destination: d.Path,
Type: define.TypeBind,
Source: d.Path,
Options: []string{"slave", "nosuid", "noexec", "rw", "rbind"},
}
if d.Path == "/dev/ptmx" || strings.HasPrefix(d.Path, "/dev/tty") {
continue
}
if _, found := mounts[d.Path]; found {
continue
}
newMounts = append(newMounts, devMnt)
}
g.Config.Mounts = append(newMounts, g.Config.Mounts...)
if g.Config.Linux.Resources != nil {
g.Config.Linux.Resources.Devices = nil
}
} else {
for _, d := range hostDevices {
g.AddDevice(d)
}
// Add resources device - need to clear the existing one first.
if g.Config.Linux.Resources != nil {
g.Config.Linux.Resources.Devices = nil
}
g.AddLinuxResourcesDevice(true, "", nil, nil, "rwm")
}
return nil
}
// DevicesFromPath computes a list of devices
func DevicesFromPath(g *generate.Generator, devicePath string) error {
devs := strings.Split(devicePath, ":")
@ -174,60 +124,12 @@ func BlockAccessToKernelFilesystems(privileged, pidModeIsHost bool, mask, unmask
}
}
// based on getDevices from runc (libcontainer/devices/devices.go)
func getDevices(path string) ([]spec.LinuxDevice, error) {
files, err := ioutil.ReadDir(path)
if err != nil {
if rootless.IsRootless() && os.IsPermission(err) {
return nil, nil
}
return nil, err
}
out := []spec.LinuxDevice{}
for _, f := range files {
switch {
case f.IsDir():
switch f.Name() {
// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts":
continue
default:
sub, err := getDevices(filepath.Join(path, f.Name()))
if err != nil {
return nil, err
}
if sub != nil {
out = append(out, sub...)
}
continue
}
case f.Name() == "console":
continue
case f.Mode()&os.ModeSymlink != 0:
continue
}
device, err := deviceFromPath(filepath.Join(path, f.Name()))
if err != nil {
if err == errNotADevice {
continue
}
if os.IsNotExist(err) {
continue
}
return nil, err
}
out = append(out, *device)
}
return out, nil
}
func addDevice(g *generate.Generator, device string) error {
src, dst, permissions, err := ParseDevice(device)
if err != nil {
return err
}
dev, err := deviceFromPath(src)
dev, err := util.DeviceFromPath(src)
if err != nil {
return errors.Wrapf(err, "%s is not a valid device", src)
}
@ -316,43 +218,6 @@ func IsValidDeviceMode(mode string) bool {
return true
}
// Copied from github.com/opencontainers/runc/libcontainer/devices
// Given the path to a device look up the information about a linux device
func deviceFromPath(path string) (*spec.LinuxDevice, error) {
var stat unix.Stat_t
err := unix.Lstat(path, &stat)
if err != nil {
return nil, err
}
var (
devType string
mode = stat.Mode
devNumber = uint64(stat.Rdev) // nolint: unconvert
m = os.FileMode(mode)
)
switch {
case mode&unix.S_IFBLK == unix.S_IFBLK:
devType = "b"
case mode&unix.S_IFCHR == unix.S_IFCHR:
devType = "c"
case mode&unix.S_IFIFO == unix.S_IFIFO:
devType = "p"
default:
return nil, errNotADevice
}
return &spec.LinuxDevice{
Type: devType,
Path: path,
FileMode: &m,
UID: &stat.Uid,
GID: &stat.Gid,
Major: int64(unix.Major(devNumber)),
Minor: int64(unix.Minor(devNumber)),
}, nil
}
func supportAmbientCapabilities() bool {
err := unix.Prctl(unix.PR_CAP_AMBIENT, unix.PR_CAP_AMBIENT_IS_SET, 0, 0, 0)
return err == nil

View file

@ -278,6 +278,10 @@ func createContainerOptions(rt *libpod.Runtime, s *specgen.SpecGenerator, pod *l
options = append(options, libpod.WithPasswdEntry(s.PasswdEntry))
}
if s.Privileged {
options = append(options, libpod.WithMountAllDevices())
}
useSystemd := false
switch s.Systemd {
case "always":

View file

@ -337,14 +337,8 @@ func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runt
}
var userDevices []spec.LinuxDevice
if s.Privileged {
// If privileged, we need to add all the host devices to the
// spec. We do not add the user provided ones because we are
// already adding them all.
if err := addPrivilegedDevices(&g); err != nil {
return nil, err
}
} else {
if !s.Privileged {
// add default devices from containers.conf
for _, device := range rtc.Containers.Devices {
if err = DevicesFromPath(&g, device); err != nil {

View file

@ -3,13 +3,24 @@ package util
import (
"fmt"
"io/fs"
"io/ioutil"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/psgo"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
var (
errNotADevice = errors.New("not a device node")
)
// GetContainerPidInformationDescriptors returns a string slice of all supported
@ -59,3 +70,134 @@ func FindDeviceNodes() (map[string]string, error) {
return nodes, nil
}
func AddPrivilegedDevices(g *generate.Generator) error {
hostDevices, err := getDevices("/dev")
if err != nil {
return err
}
g.ClearLinuxDevices()
if rootless.IsRootless() {
mounts := make(map[string]interface{})
for _, m := range g.Mounts() {
mounts[m.Destination] = true
}
newMounts := []spec.Mount{}
for _, d := range hostDevices {
devMnt := spec.Mount{
Destination: d.Path,
Type: define.TypeBind,
Source: d.Path,
Options: []string{"slave", "nosuid", "noexec", "rw", "rbind"},
}
if d.Path == "/dev/ptmx" || strings.HasPrefix(d.Path, "/dev/tty") {
continue
}
if _, found := mounts[d.Path]; found {
continue
}
newMounts = append(newMounts, devMnt)
}
g.Config.Mounts = append(newMounts, g.Config.Mounts...)
if g.Config.Linux.Resources != nil {
g.Config.Linux.Resources.Devices = nil
}
} else {
for _, d := range hostDevices {
g.AddDevice(d)
}
// Add resources device - need to clear the existing one first.
if g.Config.Linux.Resources != nil {
g.Config.Linux.Resources.Devices = nil
}
g.AddLinuxResourcesDevice(true, "", nil, nil, "rwm")
}
return nil
}
// based on getDevices from runc (libcontainer/devices/devices.go)
func getDevices(path string) ([]spec.LinuxDevice, error) {
files, err := ioutil.ReadDir(path)
if err != nil {
if rootless.IsRootless() && os.IsPermission(err) {
return nil, nil
}
return nil, err
}
out := []spec.LinuxDevice{}
for _, f := range files {
switch {
case f.IsDir():
switch f.Name() {
// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts":
continue
default:
sub, err := getDevices(filepath.Join(path, f.Name()))
if err != nil {
return nil, err
}
if sub != nil {
out = append(out, sub...)
}
continue
}
case f.Name() == "console":
continue
case f.Mode()&os.ModeSymlink != 0:
continue
}
device, err := DeviceFromPath(filepath.Join(path, f.Name()))
if err != nil {
if err == errNotADevice {
continue
}
if os.IsNotExist(err) {
continue
}
return nil, err
}
out = append(out, *device)
}
return out, nil
}
// Copied from github.com/opencontainers/runc/libcontainer/devices
// Given the path to a device look up the information about a linux device
func DeviceFromPath(path string) (*spec.LinuxDevice, error) {
var stat unix.Stat_t
err := unix.Lstat(path, &stat)
if err != nil {
return nil, err
}
var (
devType string
mode = stat.Mode
devNumber = uint64(stat.Rdev) // nolint: unconvert
m = os.FileMode(mode)
)
switch {
case mode&unix.S_IFBLK == unix.S_IFBLK:
devType = "b"
case mode&unix.S_IFCHR == unix.S_IFCHR:
devType = "c"
case mode&unix.S_IFIFO == unix.S_IFIFO:
devType = "p"
default:
return nil, errNotADevice
}
return &spec.LinuxDevice{
Type: devType,
Path: path,
FileMode: &m,
UID: &stat.Uid,
GID: &stat.Gid,
Major: int64(unix.Major(devNumber)),
Minor: int64(unix.Minor(devNumber)),
}, nil
}

View file

@ -131,6 +131,30 @@ var _ = Describe("Podman privileged container tests", func() {
Expect(len(session.OutputToStringArray())).To(BeNumerically(">", 20))
})
It("podman privileged should restart after host devices change", func() {
containerName := "privileged-restart-test"
SkipIfRootless("Cannot create devices in /dev in rootless mode")
Expect(os.MkdirAll("/dev/foodevdir", os.ModePerm)).To(BeNil())
mknod := SystemExec("mknod", []string{"/dev/foodevdir/null", "c", "1", "3"})
mknod.WaitWithDefaultTimeout()
Expect(mknod).Should(Exit(0))
session := podmanTest.Podman([]string{"run", "--name=" + containerName, "--privileged", "-it", fedoraMinimal, "ls", "/dev"})
session.WaitWithDefaultTimeout()
Expect(session).Should(Exit(0))
deviceFiles := session.OutputToStringArray()
os.RemoveAll("/dev/foodevdir")
session = podmanTest.Podman([]string{"start", "--attach", containerName})
session.WaitWithDefaultTimeout()
Expect(session).Should(Exit(0))
deviceFilesAfterRemoval := session.OutputToStringArray()
Expect(deviceFiles).To(Not(Equal(deviceFilesAfterRemoval)))
})
It("run no-new-privileges test", func() {
// Check if our kernel is new enough
k, err := IsKernelNewerThan("4.14")