nspawn: expose a dir in the container where it can bind AF_UNIX sockets that will appear on the host

This commit is contained in:
Lennart Poettering 2024-01-05 16:43:41 +01:00
parent 0abd510f7f
commit 613fb4b601

View file

@ -2,7 +2,6 @@
#include <errno.h>
#include <getopt.h>
#include <linux/fs.h>
#include <linux/loop.h>
#if HAVE_SELINUX
#include <selinux/selinux.h>
@ -10,6 +9,7 @@
#include <stdlib.h>
#include <sys/file.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/personality.h>
#include <sys/prctl.h>
#include <sys/types.h>
@ -17,6 +17,8 @@
#include <termios.h>
#include <unistd.h>
#include <linux/fs.h> /* Must be included after <sys/mount.h> */
#include "sd-bus.h"
#include "sd-daemon.h"
#include "sd-id128.h"
@ -3608,6 +3610,102 @@ static int setup_notify_child(void) {
return TAKE_FD(fd);
}
static int setup_unix_export_dir_outside(char **ret) {
int r;
assert(ret);
_cleanup_free_ char *p = NULL;
p = path_join("/run/systemd/nspawn/unix-export", arg_machine);
if (!p)
return log_oom();
r = path_is_mount_point(p, /* root= */ NULL, 0);
if (r > 0)
return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Mount point '%s' exists already, refusing.", p);
if (r < 0 && r != -ENOENT)
return log_error_errno(r, "Failed to detect if '%s' is a mount point: %m", p);
r = mkdir_p(p, 0755);
if (r < 0)
return log_error_errno(r, "Failed to create '%s': %m", p);
_cleanup_(rmdir_and_freep) char *q = TAKE_PTR(p);
/* Mount the "unix export" directory really tiny, just 64 inodes. We mark the superblock writable
* (since the container shall bind sockets into it). */
r = mount_nofollow_verbose(
LOG_ERR,
"tmpfs",
q,
"tmpfs",
MS_NODEV|MS_NOEXEC|MS_NOSUID|ms_nosymfollow_supported(),
"size=4M,nr_inodes=64,mode=0755");
if (r < 0)
return r;
_cleanup_(umount_and_rmdir_and_freep) char *w = TAKE_PTR(q);
/* After creating the superblock we change the bind mount to be read-only. This means that the fs
* itself is writable, but not through the mount accessible from the host. */
r = mount_nofollow_verbose(
LOG_ERR,
/* source= */ NULL,
w,
/* fstype= */ NULL,
MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NODEV|MS_NOEXEC|MS_NOSUID|ms_nosymfollow_supported(),
/* options= */ NULL);
if (r < 0)
return r;
*ret = TAKE_PTR(w);
return 0;
}
static int setup_unix_export_host_inside(const char *directory, const char *unix_export_path) {
int r;
assert(directory);
assert(unix_export_path);
r = make_run_host(directory);
if (r < 0)
return r;
_cleanup_free_ char *p = path_join(directory, "run/host/unix-export");
if (!p)
return log_oom();
if (mkdir(p, 0755) < 0)
return log_error_errno(errno, "Failed to create '%s': %m", p);
r = mount_nofollow_verbose(
LOG_ERR,
unix_export_path,
p,
/* fstype= */ NULL,
MS_BIND,
/* options= */ NULL);
if (r < 0)
return r;
r = mount_nofollow_verbose(
LOG_ERR,
/* source= */ NULL,
p,
/* fstype= */ NULL,
MS_BIND|MS_REMOUNT|MS_NODEV|MS_NOEXEC|MS_NOSUID|ms_nosymfollow_supported(),
/* options= */ NULL);
if (r < 0)
return r;
r = userns_lchown(p, 0, 0);
if (r < 0)
return log_error_errno(r, "Failed to chown '%s': %m", p);
return 0;
}
static int outer_child(
Barrier *barrier,
const char *directory,
@ -3615,7 +3713,8 @@ static int outer_child(
int fd_outer_socket,
int fd_inner_socket,
FDSet *fds,
int netns_fd) {
int netns_fd,
const char *unix_export_path) {
_cleanup_(bind_user_context_freep) BindUserContext *bind_user_context = NULL;
_cleanup_strv_free_ char **os_release_pairs = NULL;
@ -3909,6 +4008,10 @@ static int outer_child(
p = prefix_roota(directory, "/run/host");
(void) make_inaccessible_nodes(p, arg_uid_shift, arg_uid_shift);
r = setup_unix_export_host_inside(directory, unix_export_path);
if (r < 0)
return r;
r = setup_pts(directory);
if (r < 0)
return r;
@ -4760,6 +4863,7 @@ static int run_container(
_cleanup_close_ int notify_socket = -EBADF, mntns_fd = -EBADF, fd_kmsg_fifo = -EBADF;
_cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
_cleanup_(sd_event_source_unrefp) sd_event_source *notify_event_source = NULL;
_cleanup_(umount_and_rmdir_and_freep) char *unix_export_host_dir = NULL;
_cleanup_(sd_event_unrefp) sd_event *event = NULL;
_cleanup_(pty_forward_freep) PTYForward *forward = NULL;
_cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
@ -4775,6 +4879,11 @@ static int run_container(
assert_se(sigemptyset(&mask_chld) == 0);
assert_se(sigaddset(&mask_chld, SIGCHLD) == 0);
/* Set up the unix export host directory on the host first */
r = setup_unix_export_dir_outside(&unix_export_host_dir);
if (r < 0)
return r;
if (arg_userns_mode == USER_NAMESPACE_PICK) {
/* When we shall pick the UID/GID range, let's first lock /etc/passwd, so that we can safely
* check with getpwuid() if the specific user already exists. Note that /etc might be
@ -4845,7 +4954,8 @@ static int run_container(
fd_outer_socket_pair[1],
fd_inner_socket_pair[1],
fds,
child_netns_fd);
child_netns_fd,
unix_export_host_dir);
if (r < 0)
_exit(EXIT_FAILURE);
@ -5919,6 +6029,10 @@ finish:
p = strjoina("/run/systemd/nspawn/propagate/", arg_machine);
(void) rm_rf(p, REMOVE_ROOT);
p = strjoina("/run/systemd/nspawn/unix-export/", arg_machine);
(void) umount2(p, MNT_DETACH|UMOUNT_NOFOLLOW);
(void) rmdir(p);
}
expose_port_flush(&fw_ctx, arg_expose_ports, AF_INET, &expose_args.address4);