nspawn: add filesystem id mapping support to --bind and --bind-ro

This commit is contained in:
Andreas Valder 2021-06-05 18:39:38 +02:00 committed by Lennart Poettering
parent 5433d425b4
commit c0c8f71800
4 changed files with 55 additions and 37 deletions

View file

@ -1357,17 +1357,21 @@ After=sys-subsystem-net-devices-ens1.device</programlisting>
source path is taken relative to the image's root directory. This permits setting up bind mounts within the
container image. The source path may be specified as empty string, in which case a temporary directory below
the host's <filename>/var/tmp/</filename> directory is used. It is automatically removed when the container is
shut down. Mount options are comma-separated and currently, only <option>rbind</option> and
<option>norbind</option> are allowed, controlling whether to create a recursive or a regular bind
mount. Defaults to "rbind". Backslash escapes are interpreted, so <literal>\:</literal> may be used to embed
colons in either path. This option may be specified multiple times for creating multiple independent bind
mount points. The <option>--bind-ro=</option> option creates read-only bind mounts.</para>
shut down. The <option>--bind-ro=</option> option creates read-only bind mounts. Backslash escapes are interpreted,
so <literal>\:</literal> may be used to embed colons in either path. This option may be specified
multiple times for creating multiple independent bind mount points.</para>
<para>Mount options are comma-separated. <option>rbind</option> and <option>norbind</option> control whether
to create a recursive or a regular bind mount. Defaults to "rbind". <option>idmap</option> and <option>noidmap</option>
control if the bind mount should use filesystem id mappings. Using this option requires support by the source filesystem
for id mappings. Defaults to "noidmap".</para>
<para>Note that when this option is used in combination with <option>--private-users</option>, the resulting
mount points will be owned by the <constant>nobody</constant> user. That's because the mount and its files and
directories continue to be owned by the relevant host users and groups, which do not exist in the container,
and thus show up under the wildcard UID 65534 (nobody). If such bind mounts are created, it is recommended to
make them read-only, using <option>--bind-ro=</option>.</para></listitem>
make them read-only, using <option>--bind-ro=</option>. Alternatively you can use the "idmap" mount option to
map the filesystem ids.</para></listitem>
</varlistentry>
<varlistentry>

View file

@ -672,9 +672,10 @@ int mount_all(const char *dest,
return 0;
}
static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts, bool *idmapped) {
unsigned long flags = *mount_flags;
char *opts = NULL;
bool flag_idmapped = *idmapped;
int r;
assert(options);
@ -692,29 +693,35 @@ static int parse_mount_bind_options(const char *options, unsigned long *mount_fl
flags |= MS_REC;
else if (streq(word, "norbind"))
flags &= ~MS_REC;
else if (streq(word, "idmap"))
flag_idmapped = true;
else if (streq(word, "noidmap"))
flag_idmapped = false;
else
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
"Invalid bind mount option: %s", word);
}
*mount_flags = flags;
*idmapped = flag_idmapped;
/* in the future mount_opts will hold string options for mount(2) */
*mount_opts = opts;
return 0;
}
static int mount_bind(const char *dest, CustomMount *m) {
static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t uid_range) {
_cleanup_free_ char *mount_opts = NULL, *where = NULL;
unsigned long mount_flags = MS_BIND | MS_REC;
struct stat source_st, dest_st;
int r;
bool idmapped = false;
assert(dest);
assert(m);
if (m->options) {
r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts, &idmapped);
if (r < 0)
return r;
}
@ -767,6 +774,12 @@ static int mount_bind(const char *dest, CustomMount *m) {
return log_error_errno(r, "Read-only bind mount failed: %m");
}
if (idmapped) {
r = remount_idmap(where, uid_shift, uid_range);
if (r < 0)
return log_error_errno(r, "Failed to map ids for bind mount %s: %m", where);
}
return 0;
}
@ -906,6 +919,7 @@ int mount_custom(
const char *dest,
CustomMount *mounts, size_t n,
uid_t uid_shift,
uid_t uid_range,
const char *selinux_apifs_context,
MountSettingsMask mount_settings) {
int r;
@ -927,7 +941,7 @@ int mount_custom(
switch (m->type) {
case CUSTOM_MOUNT_BIND:
r = mount_bind(dest, m);
r = mount_bind(dest, m, uid_shift, uid_range);
break;
case CUSTOM_MOUNT_TMPFS:

View file

@ -58,7 +58,7 @@ int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s);
int mount_all(const char *dest, MountSettingsMask mount_settings, uid_t uid_shift, const char *selinux_apifs_context);
int mount_sysfs(const char *dest, MountSettingsMask mount_settings);
int mount_custom(const char *dest, CustomMount *mounts, size_t n, uid_t uid_shift, const char *selinux_apifs_context, MountSettingsMask mount_settings);
int mount_custom(const char *dest, CustomMount *mounts, size_t n, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context, MountSettingsMask mount_settings);
bool has_custom_root_mount(const CustomMount *mounts, size_t n);
int setup_volatile_mode(const char *directory, VolatileMode mode, uid_t uid_shift, const char *selinux_apifs_context);

View file

@ -3314,6 +3314,7 @@ static int inner_child(
arg_custom_mounts,
arg_n_custom_mounts,
0,
0,
arg_selinux_apifs_context,
MOUNT_NON_ROOT_ONLY | MOUNT_IN_USERNS);
if (r < 0)
@ -3719,32 +3720,6 @@ static int outer_child(
directory = "/run/systemd/nspawn-root";
}
if (arg_userns_mode != USER_NAMESPACE_NO &&
IN_SET(arg_userns_ownership, USER_NAMESPACE_OWNERSHIP_MAP, USER_NAMESPACE_OWNERSHIP_AUTO) &&
arg_uid_shift != 0) {
r = make_mount_point(directory);
if (r < 0)
return r;
r = remount_idmap(directory, arg_uid_shift, arg_uid_range);
if (r == -EINVAL || ERRNO_IS_NOT_SUPPORTED(r)) {
/* This might fail because the kernel or file system doesn't support idmapping. We
* can't really distinguish this nicely, nor do we have any guarantees about the
* error codes we see, could be EOPNOTSUPP or EINVAL. */
if (arg_userns_ownership != USER_NAMESPACE_OWNERSHIP_AUTO)
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
"ID mapped mounts are apparently not available, sorry.");
log_debug("ID mapped mounts are apparently not available on this kernel or for the selected file system, reverting to recursive chown()ing.");
arg_userns_ownership = USER_NAMESPACE_OWNERSHIP_CHOWN;
} else if (r < 0)
return log_error_errno(r, "Failed to set up ID mapped mounts: %m");
else {
log_debug("ID mapped mounts available, making use of them.");
idmap = true;
}
}
r = setup_pivot_root(
directory,
arg_pivot_root_new,
@ -3795,6 +3770,7 @@ static int outer_child(
arg_custom_mounts,
arg_n_custom_mounts,
arg_uid_shift,
arg_uid_range,
arg_selinux_apifs_context,
MOUNT_ROOT_ONLY);
if (r < 0)
@ -3805,6 +3781,29 @@ static int outer_child(
if (r < 0)
return r;
if (arg_userns_mode != USER_NAMESPACE_NO &&
IN_SET(arg_userns_ownership, USER_NAMESPACE_OWNERSHIP_MAP, USER_NAMESPACE_OWNERSHIP_AUTO) &&
arg_uid_shift != 0) {
r = remount_idmap(directory, arg_uid_shift, arg_uid_range);
if (r == -EINVAL || ERRNO_IS_NOT_SUPPORTED(r)) {
/* This might fail because the kernel or file system doesn't support idmapping. We
* can't really distinguish this nicely, nor do we have any guarantees about the
* error codes we see, could be EOPNOTSUPP or EINVAL. */
if (arg_userns_ownership != USER_NAMESPACE_OWNERSHIP_AUTO)
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
"ID mapped mounts are apparently not available, sorry.");
log_debug("ID mapped mounts are apparently not available on this kernel or for the selected file system, reverting to recursive chown()ing.");
arg_userns_ownership = USER_NAMESPACE_OWNERSHIP_CHOWN;
} else if (r < 0)
return log_error_errno(r, "Failed to set up ID mapped mounts: %m");
else {
log_debug("ID mapped mounts available, making use of them.");
idmap = true;
}
}
if (dissected_image) {
/* Now we know the uid shift, let's now mount everything else that might be in the image. */
r = dissected_image_mount(
@ -3915,6 +3914,7 @@ static int outer_child(
arg_custom_mounts,
arg_n_custom_mounts,
arg_uid_shift,
arg_uid_range,
arg_selinux_apifs_context,
MOUNT_NON_ROOT_ONLY);
if (r < 0)