nspawn: add support for owneridmap bind option

owneridmap bind option will map the target directory owner from inside the
container to the owner of the directory bound from the host filesystem.
This will ensure files and directories created in the container will be owned
by the directory owner of the host filesystem. All other users will remain
unmapped. Files to be written as other users in the container will not be
allowed.

Resolves: #27037
This commit is contained in:
Benjamin Franzke 2023-11-17 08:03:57 +01:00 committed by Lennart Poettering
parent 4dcfdd210c
commit 614d09a37d
7 changed files with 113 additions and 17 deletions

View file

@ -1491,12 +1491,12 @@ After=sys-subsystem-net-devices-ens1.device</programlisting>
<para>Mount options are comma-separated. <option>rbind</option> and <option>norbind</option> control whether
to create a recursive or a regular bind mount. Defaults to <option>rbind</option>. <option>noidmap</option>,
<option>idmap</option>, and <option>rootidmap</option> control ID mapping.</para>
<option>idmap</option>, <option>rootidmap</option> and <option>owneridmap</option> control ID mapping.</para>
<para>Using <option>idmap</option> or <option>rootidmap</option> requires support by the source filesystem
for user/group ID mapped mounts. Defaults to <option>noidmap</option>. With <option>x</option> being the container's UID range
offset, <option>y</option> being the length of the container's UID range, and <option>p</option> being the
owner UID of the bind mount source inode on the host:
<para>Using <option>idmap</option>, <option>rootidmap</option> or <option>owneridmap</option> requires support
by the source filesystem for user/group ID mapped mounts. Defaults to <option>noidmap</option>. With
<option>x</option> being the container's UID range offset, <option>y</option> being the length of the
container's UID range, and <option>p</option> being the owner UID of the bind mount source inode on the host:
<itemizedlist>
<listitem><para>If <option>noidmap</option> is used, any user <option>z</option> in the range
@ -1512,10 +1512,15 @@ After=sys-subsystem-net-devices-ens1.device</programlisting>
<listitem><para>If <option>rootidmap</option> is used, the user <option>0</option> seen from inside
of the container is mapped to <option>p</option> on the host. Other host users are mapped to
<option>nobody</option> inside the container.</para></listitem>
<listitem><para>If <option>owneridmap</option> is used, the owner of the target directory inside of the
container is mapped to <option>p</option> on the host. Other host users are mapped to
<option>nobody</option> inside the container.</para></listitem>
</itemizedlist></para>
<para>Whichever ID mapping option is used, the same mapping will be used for users and groups IDs. If
<option>rootidmap</option> is used, the group owning the bind mounted directory will have no effect.</para>
<option>rootidmap</option> or <option>owneridmap</option> are used, the group owning the bind mounted directory
will have no effect.</para>
<para>Note that when this option is used in combination with <option>--private-users</option>, the resulting
mount points will be owned by the <constant>nobody</constant> user. That's because the mount and its files and

View file

@ -742,6 +742,8 @@ static int parse_mount_bind_options(const char *options, unsigned long *mount_fl
new_idmapping = REMOUNT_IDMAPPING_NONE;
else if (streq(word, "rootidmap"))
new_idmapping = REMOUNT_IDMAPPING_HOST_OWNER;
else if (streq(word, "owneridmap"))
new_idmapping = REMOUNT_IDMAPPING_HOST_OWNER_TO_TARGET_OWNER;
else
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
"Invalid bind mount option: %s", word);
@ -759,6 +761,7 @@ static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t u
_cleanup_free_ char *mount_opts = NULL, *where = NULL;
unsigned long mount_flags = MS_BIND | MS_REC;
struct stat source_st, dest_st;
uid_t dest_uid = UID_INVALID;
int r;
RemountIdmapping idmapping = REMOUNT_IDMAPPING_NONE;
@ -787,6 +790,8 @@ static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t u
if (stat(where, &dest_st) < 0)
return log_error_errno(errno, "Failed to stat %s: %m", where);
dest_uid = dest_st.st_uid;
if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
"Cannot bind mount directory %s on file %s.",
@ -815,6 +820,8 @@ static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t u
if (chown(where, uid_shift, uid_shift) < 0)
return log_error_errno(errno, "Failed to chown %s: %m", where);
dest_uid = uid_shift;
}
r = mount_nofollow_verbose(LOG_ERR, m->source, where, NULL, mount_flags, mount_opts);
@ -828,7 +835,7 @@ static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t u
}
if (idmapping != REMOUNT_IDMAPPING_NONE) {
r = remount_idmap(STRV_MAKE(where), uid_shift, uid_range, source_st.st_uid, idmapping);
r = remount_idmap(STRV_MAKE(where), uid_shift, uid_range, source_st.st_uid, dest_uid, idmapping);
if (r < 0)
return log_error_errno(r, "Failed to map ids for bind mount %s: %m", where);
}

View file

@ -3931,7 +3931,7 @@ static int outer_child(
dirs[i] = NULL;
r = remount_idmap(dirs, arg_uid_shift, arg_uid_range, UID_INVALID, REMOUNT_IDMAPPING_HOST_ROOT);
r = remount_idmap(dirs, arg_uid_shift, arg_uid_range, UID_INVALID, UID_INVALID, REMOUNT_IDMAPPING_HOST_ROOT);
if (r == -EINVAL || ERRNO_IS_NEG_NOT_SUPPORTED(r)) {
/* This might fail because the kernel or file system doesn't support idmapping. We
* can't really distinguish this nicely, nor do we have any guarantees about the

View file

@ -2144,7 +2144,7 @@ int dissected_image_mount(
if (userns_fd < 0 && need_user_mapping(uid_shift, uid_range) && FLAGS_SET(flags, DISSECT_IMAGE_MOUNT_IDMAPPED)) {
my_userns_fd = make_userns(uid_shift, uid_range, UID_INVALID, REMOUNT_IDMAPPING_HOST_ROOT);
my_userns_fd = make_userns(uid_shift, uid_range, UID_INVALID, UID_INVALID, REMOUNT_IDMAPPING_HOST_ROOT);
if (my_userns_fd < 0)
return my_userns_fd;

View file

@ -1314,7 +1314,7 @@ int fd_make_mount_point(int fd) {
return 1;
}
int make_userns(uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping idmapping) {
int make_userns(uid_t uid_shift, uid_t uid_range, uid_t source_owner, uid_t dest_owner, RemountIdmapping idmapping) {
_cleanup_close_ int userns_fd = -EBADF;
_cleanup_free_ char *line = NULL;
@ -1349,8 +1349,20 @@ int make_userns(uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping
if (idmapping == REMOUNT_IDMAPPING_HOST_OWNER) {
/* Remap the owner of the bind mounted directory to the root user within the container. This
* way every file written by root within the container to the bind-mounted directory will
* be owned by the original user. All other user will remain unmapped. */
if (asprintf(&line, UID_FMT " " UID_FMT " " UID_FMT "\n", owner, uid_shift, 1u) < 0)
* be owned by the original user from the host. All other users will remain unmapped. */
if (asprintf(&line, UID_FMT " " UID_FMT " " UID_FMT "\n", source_owner, uid_shift, 1u) < 0)
return log_oom_debug();
}
if (idmapping == REMOUNT_IDMAPPING_HOST_OWNER_TO_TARGET_OWNER) {
/* Remap the owner of the bind mounted directory to the owner of the target directory
* within the container. This way every file written by target directory owner within the
* container to the bind-mounted directory will be owned by the original host user.
* All other users will remain unmapped. */
if (asprintf(
&line,
UID_FMT " " UID_FMT " " UID_FMT "\n",
source_owner, dest_owner, 1u) < 0)
return log_oom_debug();
}
@ -1424,10 +1436,10 @@ int remount_idmap_fd(
return 0;
}
int remount_idmap(char **p, uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping idmapping) {
int remount_idmap(char **p, uid_t uid_shift, uid_t uid_range, uid_t source_owner, uid_t dest_owner,RemountIdmapping idmapping) {
_cleanup_close_ int userns_fd = -EBADF;
userns_fd = make_userns(uid_shift, uid_range, owner, idmapping);
userns_fd = make_userns(uid_shift, uid_range, source_owner, dest_owner, idmapping);
if (userns_fd < 0)
return userns_fd;

View file

@ -116,16 +116,19 @@ typedef enum RemountIdmapping {
* certain security implications defaults to off, and requires explicit opt-in. */
REMOUNT_IDMAPPING_HOST_ROOT,
/* Define a mapping from root user within the container to the owner of the bind mounted directory.
* This ensure no root-owned files will be written in a bind-mounted directory owned by a different
* This ensures no root-owned files will be written in a bind-mounted directory owned by a different
* user. No other users are mapped. */
REMOUNT_IDMAPPING_HOST_OWNER,
/* Define a mapping from bind-target owner within the container to the host owner of the bind mounted
* directory. No other users are mapped. */
REMOUNT_IDMAPPING_HOST_OWNER_TO_TARGET_OWNER,
_REMOUNT_IDMAPPING_MAX,
_REMOUNT_IDMAPPING_INVALID = -EINVAL,
} RemountIdmapping;
int make_userns(uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping idmapping);
int make_userns(uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping);
int remount_idmap_fd(char **p, int userns_fd);
int remount_idmap(char **p, uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping idmapping);
int remount_idmap(char **p, uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping);
int bind_mount_submounts(
const char *source,

View file

@ -622,6 +622,75 @@ testcase_rootidmap() {
fi
}
owneridmap_cleanup() {
local dir="${1:?}"
mountpoint -q "$dir/bind" && umount "$dir/bind"
rm -fr "$dir"
}
testcase_owneridmap() {
local root cmd permissions
local owner=1000
root="$(mktemp -d /var/lib/machines/testsuite-13.owneridmap-path.XXX)"
# Create ext4 image, as ext4 supports idmapped-mounts.
mkdir -p /tmp/owneridmap/bind
dd if=/dev/zero of=/tmp/owneridmap/ext4.img bs=4k count=2048
mkfs.ext4 /tmp/owneridmap/ext4.img
mount /tmp/owneridmap/ext4.img /tmp/owneridmap/bind
trap "owneridmap_cleanup /tmp/owneridmap/" RETURN
touch /tmp/owneridmap/bind/file
chown -R "$owner:$owner" /tmp/owneridmap/bind
# Allow users to read and execute / in order to execute binaries
chmod o+rx "$root"
create_dummy_container "$root"
# --user=
# "Fake" getent passwd's bare minimum, so we don't have to pull it in
# with all the DSO shenanigans
cat >"$root/bin/getent" <<\EOF
#!/bin/bash
if [[ $# -eq 0 ]]; then
:
elif [[ $1 == passwd ]]; then
echo "testuser:x:1010:1010:testuser:/:/bin/sh"
elif [[ $1 == initgroups ]]; then
echo "testuser"
fi
EOF
chmod +x "$root/bin/getent"
mkdir -p "$root/home/testuser"
chown 1010:1010 "$root/home/testuser"
cmd='PERMISSIONS=$(stat -c "%u:%g" /home/testuser/file); if [[ $PERMISSIONS != "1010:1010" ]]; then echo "*** wrong permissions: $PERMISSIONS"; return 1; fi; touch /home/testuser/other_file'
if ! SYSTEMD_LOG_TARGET=console \
systemd-nspawn --register=no \
--directory="$root" \
-U \
--user=testuser \
--bind=/tmp/owneridmap/bind:/home/testuser:owneridmap \
/usr/bin/bash -c "$cmd" |& tee nspawn.out; then
if grep -q "Failed to map ids for bind mount.*: Function not implemented" nspawn.out; then
echo "idmapped mounts are not supported, skipping the test..."
return 0
fi
return 1
fi
permissions=$(stat -c "%u:%g" /tmp/owneridmap/bind/other_file)
if [[ $permissions != "$owner:$owner" ]]; then
echo "*** wrong permissions: $permissions"
[[ "$IS_USERNS_SUPPORTED" == "yes" ]] && return 1
fi
}
testcase_notification_socket() {
# https://github.com/systemd/systemd/issues/4944
local root