Merge pull request #26704 from poettering/mnt-nosymlinks

Set MS_NOSYMFOLLOW for ESP + XBOOTLDR and many mount option clean-ups
This commit is contained in:
Lennart Poettering 2023-03-10 09:34:04 +01:00 committed by GitHub
commit 96c96fb250
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 423 additions and 91 deletions

View file

@ -629,6 +629,7 @@ foreach ident : [
['open_tree', '''#include <sys/mount.h>'''],
['fsopen', '''#include <sys/mount.h>'''],
['fsconfig', '''#include <sys/mount.h>'''],
['fsmount', '''#include <sys/mount.h>'''],
['getdents64', '''#include <dirent.h>'''],
]

View file

@ -591,10 +591,22 @@ static inline int missing_fsopen(const char *fsname, unsigned flags) {
#if !HAVE_FSCONFIG
#ifndef FSCONFIG_SET_FLAG
#define FSCONFIG_SET_FLAG 0 /* Set parameter, supplying no value */
#endif
#ifndef FSCONFIG_SET_STRING
#define FSCONFIG_SET_STRING 1 /* Set parameter, supplying a string value */
#endif
#ifndef FSCONFIG_SET_FD
#define FSCONFIG_SET_FD 5 /* Set parameter, supplying an object by fd */
#endif
#ifndef FSCONFIG_CMD_CREATE
#define FSCONFIG_CMD_CREATE 6 /* Invoke superblock creation */
#endif
static inline int missing_fsconfig(int fd, unsigned cmd, const char *key, const void *value, int aux) {
# if defined __NR_fsconfig && __NR_fsconfig >= 0
return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
@ -609,6 +621,26 @@ static inline int missing_fsconfig(int fd, unsigned cmd, const char *key, const
/* ======================================================================= */
#if !HAVE_FSMOUNT
#ifndef FSMOUNT_CLOEXEC
#define FSMOUNT_CLOEXEC 0x00000001
#endif
static inline int missing_fsmount(int fd, unsigned flags, unsigned ms_flags) {
# if defined __NR_fsmount && __NR_fsmount >= 0
return syscall(__NR_fsmount, fd, flags, ms_flags);
# else
errno = ENOSYS;
return -1;
# endif
}
# define fsmount missing_fsmount
#endif
/* ======================================================================= */
#if !HAVE_GETDENTS64
static inline ssize_t missing_getdents64(int fd, void *buffer, size_t length) {

View file

@ -3,6 +3,9 @@
#include <errno.h>
#include <fcntl.h>
#include <sys/mount.h>
#if WANT_LINUX_FS_H
#include <linux/fs.h>
#endif
#include "alloc-util.h"
#include "chase-symlinks.h"
@ -10,6 +13,8 @@
#include "fileio.h"
#include "filesystems.h"
#include "fs-util.h"
#include "missing_fs.h"
#include "missing_mount.h"
#include "missing_stat.h"
#include "missing_syscall.h"
#include "mkdir.h"
@ -456,6 +461,15 @@ bool fstype_is_ro(const char *fstype) {
}
bool fstype_can_discard(const char *fstype) {
int r;
assert(fstype);
/* On new kernels we can just ask the kernel */
r = mount_option_supported(fstype, "discard", NULL);
if (r >= 0)
return r;
return STR_IN_SET(fstype,
"btrfs",
"f2fs",
@ -464,10 +478,42 @@ bool fstype_can_discard(const char *fstype) {
"xfs");
}
bool fstype_can_uid_gid(const char *fstype) {
bool fstype_can_norecovery(const char *fstype) {
int r;
/* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
* current and future. */
assert(fstype);
/* On new kernels we can just ask the kernel */
r = mount_option_supported(fstype, "norecovery", NULL);
if (r >= 0)
return r;
return STR_IN_SET(fstype,
"ext3",
"ext4",
"xfs",
"btrfs");
}
bool fstype_can_umask(const char *fstype) {
int r;
assert(fstype);
/* On new kernels we can just ask the kernel */
r = mount_option_supported(fstype, "umask", "0077");
if (r >= 0)
return r;
return streq(fstype, "vfat");
}
bool fstype_can_uid_gid(const char *fstype) {
/* All file systems that have a uid=/gid= mount option that fixates the owners of all files and
* directories, current and future. Note that this does *not* ask the kernel via
* mount_option_supported() here because the uid=/gid= setting of various file systems mean different
* things: some apply it only to the root dir inode, others to all inodes in the file system. Thus we
* maintain the curated list below. 😢 */
return STR_IN_SET(fstype,
"adfs",
@ -602,3 +648,111 @@ int mount_propagation_flag_from_string(const char *name, unsigned long *ret) {
bool mount_propagation_flag_is_valid(unsigned long flag) {
return IN_SET(flag, 0, MS_SHARED, MS_PRIVATE, MS_SLAVE);
}
unsigned long ms_nosymfollow_supported(void) {
_cleanup_close_ int fsfd = -EBADF, mntfd = -EBADF;
static int cache = -1;
/* Returns MS_NOSYMFOLLOW if it is supported, zero otherwise. */
if (cache >= 0)
return cache ? MS_NOSYMFOLLOW : 0;
/* Checks if MS_NOSYMFOLLOW is supported (which was added in 5.10). We use the new mount API's
* mount_setattr() call for that, which was added in 5.12, which is close enough. */
fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
if (fsfd < 0) {
if (ERRNO_IS_NOT_SUPPORTED(errno))
goto not_supported;
log_debug_errno(errno, "Failed to open superblock context for tmpfs: %m");
return 0;
}
if (fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) {
if (ERRNO_IS_NOT_SUPPORTED(errno))
goto not_supported;
log_debug_errno(errno, "Failed to create tmpfs superblock: %m");
return 0;
}
mntfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
if (mntfd < 0) {
if (ERRNO_IS_NOT_SUPPORTED(errno))
goto not_supported;
log_debug_errno(errno, "Failed to turn superblock fd into mount fd: %m");
return 0;
}
if (mount_setattr(mntfd, "", AT_EMPTY_PATH|AT_RECURSIVE,
&(struct mount_attr) {
.attr_set = MOUNT_ATTR_NOSYMFOLLOW,
}, sizeof(struct mount_attr)) < 0) {
if (ERRNO_IS_NOT_SUPPORTED(errno))
goto not_supported;
log_debug_errno(errno, "Failed to set MOUNT_ATTR_NOSYMFOLLOW mount attribute: %m");
return 0;
}
cache = true;
return MS_NOSYMFOLLOW;
not_supported:
cache = false;
return 0;
}
int mount_option_supported(const char *fstype, const char *key, const char *value) {
_cleanup_close_ int fd = -EBADF;
int r;
/* Checks if the specified file system supports a mount option. Returns > 0 if it suppors it, == 0 if
* it does not. Return -EAGAIN if we can't determine it. And any other error otherwise. */
assert(fstype);
assert(key);
fd = fsopen(fstype, FSOPEN_CLOEXEC);
if (fd < 0) {
if (ERRNO_IS_NOT_SUPPORTED(errno))
return -EAGAIN; /* new mount API not available → don't know */
return log_debug_errno(errno, "Failed to open superblock context for '%s': %m", fstype);
}
/* Various file systems have not been converted to the new mount API yet. For such file systems
* fsconfig() with FSCONFIG_SET_STRING/FSCONFIG_SET_FLAG never fail. Which sucks, because we want to
* use it for testing support, after all. Let's hence do a check if the file system got converted yet
* first. */
if (fsconfig(fd, FSCONFIG_SET_FD, "adefinitelynotexistingmountoption", NULL, fd) < 0) {
/* If FSCONFIG_SET_FD is not supported for the fs, then the file system was not converted to
* the new mount API yet. If it returns EINVAL the mount option doesn't exist, but the fstype
* is converted. */
if (errno == EOPNOTSUPP)
return -EAGAIN; /* FSCONFIG_SET_FD not supported on the fs, hence not converted to new mount API → don't know */
if (errno != EINVAL)
return log_debug_errno(errno, "Failed to check if file system has been converted to new mount API: %m");
/* So FSCONFIG_SET_FD worked, but the option didn't exist (we got EINVAL), this means the fs
* is converted. Let's now ask the actual question we wonder about. */
} else
return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN), "FSCONFIG_SET_FD worked unexpectedly for '%s', whoa!", fstype);
if (value)
r = fsconfig(fd, FSCONFIG_SET_STRING, key, value, 0);
else
r = fsconfig(fd, FSCONFIG_SET_FLAG, key, NULL, 0);
if (r < 0) {
if (errno == EINVAL)
return false; /* EINVAL means option not supported. */
return log_debug_errno(errno, "Failed to set '%s%s%s' on '%s' superblock context: %m",
key, value ? "=" : "", strempty(value), fstype);
}
return true; /* works! */
}

View file

@ -49,6 +49,8 @@ bool fstype_is_blockdev_backed(const char *fstype);
bool fstype_is_ro(const char *fsype);
bool fstype_can_discard(const char *fstype);
bool fstype_can_uid_gid(const char *fstype);
bool fstype_can_norecovery(const char *fstype);
bool fstype_can_umask(const char *fstype);
int dev_is_devtmpfs(void);
@ -58,3 +60,7 @@ int mount_nofollow(const char *source, const char *target, const char *filesyste
const char *mount_propagation_flag_to_string(unsigned long flags);
int mount_propagation_flag_from_string(const char *name, unsigned long *ret);
bool mount_propagation_flag_is_valid(unsigned long flag);
unsigned long ms_nosymfollow_supported(void);
int mount_option_supported(const char *fstype, const char *key, const char *value);

View file

@ -1099,27 +1099,6 @@ static int mount_bind_sysfs(const MountEntry *m) {
return 1;
}
static bool mount_option_supported(const char *fstype, const char *key, const char *value) {
_cleanup_close_ int fd = -EBADF;
int r;
/* This function assumes support by default. Only if the fsconfig() call fails with -EINVAL/-EOPNOTSUPP
* will it report that the option/value is not supported. */
fd = fsopen(fstype, FSOPEN_CLOEXEC);
if (fd < 0) {
if (errno != ENOSYS)
log_debug_errno(errno, "Failed to open superblock context for '%s': %m", fstype);
return true; /* If fsopen() fails for whatever reason, assume the value is supported. */
}
r = fsconfig(fd, FSCONFIG_SET_STRING, key, value, 0);
if (r < 0 && !IN_SET(errno, EINVAL, EOPNOTSUPP, ENOSYS))
log_debug_errno(errno, "Failed to set '%s=%s' on '%s' superblock context: %m", key, value, fstype);
return r >= 0 || !IN_SET(errno, EINVAL, EOPNOTSUPP);
}
static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
_cleanup_free_ char *opts = NULL;
const char *entry_path;
@ -1147,13 +1126,14 @@ static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
* fsopen()/fsconfig() was also backported on some distros which allows us to detect
* hidepid=/subset= support in even more scenarios. */
if (mount_option_supported("proc", "hidepid", hpv)) {
if (mount_option_supported("proc", "hidepid", hpv) != 0) {
opts = strjoin("hidepid=", hpv);
if (!opts)
return -ENOMEM;
}
if (ns_info->proc_subset == PROC_SUBSET_PID && mount_option_supported("proc", "subset", "pid"))
if (ns_info->proc_subset == PROC_SUBSET_PID &&
mount_option_supported("proc", "subset", "pid") != 0)
if (!strextend_with_separator(&opts, ",", "subset=pid"))
return -ENOMEM;
}

View file

@ -245,9 +245,7 @@ static int add_mount(
fprintf(f, "Type=%s\n", fstype);
if (options)
fprintf(f, "Options=%s,%s\n", options, rw ? "rw" : "ro");
else
fprintf(f, "Options=%s\n", rw ? "rw" : "ro");
fprintf(f, "Options=%s\n", options);
r = fflush_and_check(f);
if (r < 0)
@ -301,18 +299,31 @@ static int path_is_busy(const char *where) {
}
static int add_partition_mount(
PartitionDesignator d,
DissectedPartition *p,
const char *id,
const char *where,
const char *description) {
_cleanup_free_ char *options = NULL;
int r;
assert(p);
r = path_is_busy(where);
if (r != 0)
return r < 0 ? r : 0;
r = partition_pick_mount_options(
d,
dissected_partition_fstype(p),
p->rw,
/* discard= */ true,
&options,
/* ret_ms_flags= */ NULL);
if (r < 0)
return r;
return add_mount(
id,
p->node,
@ -321,7 +332,7 @@ static int add_partition_mount(
p->rw,
p->growfs,
/* measure= */ STR_IN_SET(id, "root", "var"), /* by default measure rootfs and /var, since they contain the "identity" of the system */
NULL,
options,
description,
SPECIAL_LOCAL_FS_TARGET);
}
@ -452,20 +463,8 @@ static int add_automount(
return generator_add_symlink(arg_dest, SPECIAL_LOCAL_FS_TARGET, "wants", unit);
}
static const char *esp_or_xbootldr_options(const DissectedPartition *p) {
assert(p);
/* Discovered ESP and XBOOTLDR partition are always hardened with "noexec,nosuid,nodev".
* If we probed vfat or have no idea about the file system then assume these file systems are vfat
* and thus understand "umask=0077". */
if (!p->fstype || streq(p->fstype, "vfat"))
return "umask=0077,noexec,nosuid,nodev";
return "noexec,nosuid,nodev";
}
static int add_partition_xbootldr(DissectedPartition *p) {
_cleanup_free_ char *options = NULL;
int r;
assert(p);
@ -489,13 +488,23 @@ static int add_partition_xbootldr(DissectedPartition *p) {
if (r > 0)
return 0;
r = partition_pick_mount_options(
PARTITION_XBOOTLDR,
dissected_partition_fstype(p),
/* rw= */ true,
/* discard= */ false,
&options,
/* ret_ms_flags= */ NULL);
if (r < 0)
return log_error_errno(r, "Failed to determine default mount options for Boot Loader Partition: %m");
return add_automount("boot",
p->node,
"/boot",
p->fstype,
/* rw= */ true,
/* growfs= */ false,
esp_or_xbootldr_options(p),
options,
"Boot Loader Partition",
120 * USEC_PER_SEC);
}
@ -503,6 +512,7 @@ static int add_partition_xbootldr(DissectedPartition *p) {
#if ENABLE_EFI
static int add_partition_esp(DissectedPartition *p, bool has_xbootldr) {
const char *esp_path = NULL, *id = NULL;
_cleanup_free_ char *options = NULL;
int r;
assert(p);
@ -569,13 +579,23 @@ static int add_partition_esp(DissectedPartition *p, bool has_xbootldr) {
} else
log_debug("Not an EFI boot, skipping ESP check.");
r = partition_pick_mount_options(
PARTITION_ESP,
dissected_partition_fstype(p),
/* rw= */ true,
/* discard= */ false,
&options,
/* ret_ms_flags= */ NULL);
if (r < 0)
return log_error_errno(r, "Failed to determine default mount options for EFI System Partition: %m");
return add_automount(id,
p->node,
esp_path,
p->fstype,
/* rw= */ true,
/* growfs= */ false,
esp_or_xbootldr_options(p),
options,
"EFI System Partition Automount",
120 * USEC_PER_SEC);
}
@ -637,6 +657,7 @@ static int add_root_cryptsetup(void) {
static int add_root_mount(void) {
#if ENABLE_EFI
_cleanup_free_ char *options = NULL;
int r;
if (!is_efi_boot()) {
@ -668,6 +689,20 @@ static int add_root_mount(void) {
/* Note that we do not need to enable systemd-remount-fs.service here. If
* /etc/fstab exists, systemd-fstab-generator will pull it in for us. */
r = partition_pick_mount_options(
PARTITION_ROOT,
arg_root_fstype,
arg_root_rw > 0,
/* discard= */ true,
&options,
/* ret_ms_flags= */ NULL);
if (r < 0)
return log_error_errno(r, "Failed to pick root mount options: %m");
if (arg_root_options)
if (!strextend_with_separator(&options, ",", arg_root_options))
return log_oom();
return add_mount(
"root",
"/dev/gpt-auto-root",
@ -676,7 +711,7 @@ static int add_root_mount(void) {
/* rw= */ arg_root_rw > 0,
/* growfs= */ false,
/* measure= */ true,
arg_root_options,
options,
"Root Partition",
in_initrd() ? SPECIAL_INITRD_ROOT_FS_TARGET : SPECIAL_LOCAL_FS_TARGET);
#else
@ -745,25 +780,25 @@ static int enumerate_partitions(dev_t devnum) {
}
if (m->partitions[PARTITION_HOME].found) {
k = add_partition_mount(m->partitions + PARTITION_HOME, "home", "/home", "Home Partition");
k = add_partition_mount(PARTITION_HOME, m->partitions + PARTITION_HOME, "home", "/home", "Home Partition");
if (k < 0)
r = k;
}
if (m->partitions[PARTITION_SRV].found) {
k = add_partition_mount(m->partitions + PARTITION_SRV, "srv", "/srv", "Server Data Partition");
k = add_partition_mount(PARTITION_SRV, m->partitions + PARTITION_SRV, "srv", "/srv", "Server Data Partition");
if (k < 0)
r = k;
}
if (m->partitions[PARTITION_VAR].found) {
k = add_partition_mount(m->partitions + PARTITION_VAR, "var", "/var", "Variable Data Partition");
k = add_partition_mount(PARTITION_VAR, m->partitions + PARTITION_VAR, "var", "/var", "Variable Data Partition");
if (k < 0)
r = k;
}
if (m->partitions[PARTITION_TMP].found) {
k = add_partition_mount(m->partitions + PARTITION_TMP, "var-tmp", "/var/tmp", "Temporary Data Partition");
k = add_partition_mount(PARTITION_TMP, m->partitions + PARTITION_TMP, "var-tmp", "/var/tmp", "Temporary Data Partition");
if (k < 0)
r = k;
}

View file

@ -50,6 +50,7 @@
#include "id128-util.h"
#include "import-util.h"
#include "io-util.h"
#include "missing_mount.h"
#include "mkdir-label.h"
#include "mount-util.h"
#include "mountpoint-util.h"
@ -1502,7 +1503,99 @@ static int fs_grow(const char *node_path, const char *mount_path) {
return 0;
}
int partition_pick_mount_options(
PartitionDesignator d,
const char *fstype,
bool rw,
bool discard,
char **ret_options,
unsigned long *ret_ms_flags) {
_cleanup_free_ char *options = NULL;
assert(ret_options);
/* Selects a baseline of bind mount flags, that should always apply.
*
* Firstly, we set MS_NODEV universally on all mounts, since we don't want to allow device nodes outside of /dev/.
*
* On /var/tmp/ we'll also set MS_NOSUID, same as we set for /tmp/ on the host.
*
* On the ESP and XBOOTLDR partitions we'll also disable symlinks, and execution. These file systems
* are generally untrusted (i.e. not encrypted or authenticated), and typically VFAT hence we should
* be as restrictive as possible, and this shouldn't hurt, since the functionality is not available
* there anyway. */
unsigned long flags = MS_NODEV;
if (!rw)
flags |= MS_RDONLY;
switch (d) {
case PARTITION_ESP:
case PARTITION_XBOOTLDR:
flags |= MS_NOSUID|MS_NOEXEC|ms_nosymfollow_supported();
/* The ESP might contain a pre-boot random seed. Let's make this unaccessible to regular
* userspace. ESP/XBOOTLDR is almost certainly VFAT, hence if we don't know assume it is. */
if (!fstype || fstype_can_umask(fstype))
if (!strextend_with_separator(&options, ",", "umask=0077"))
return -ENOMEM;
break;
case PARTITION_TMP:
flags |= MS_NOSUID;
break;
default:
break;
}
/* So, when you request MS_RDONLY from ext4, then this means nothing. It happily still writes to the
* backing storage. What's worse, the BLKRO[GS]ET flag and (in case of loopback devices)
* LO_FLAGS_READ_ONLY don't mean anything, they affect userspace accesses only, and write accesses
* from the upper file system still get propagated through to the underlying file system,
* unrestricted. To actually get ext4/xfs/btrfs to stop writing to the device we need to specify
* "norecovery" as mount option, in addition to MS_RDONLY. Yes, this sucks, since it means we need to
* carry a per file system table here.
*
* Note that this means that we might not be able to mount corrupted file systems as read-only
* anymore (since in some cases the kernel implementations will refuse mounting when corrupted,
* read-only and "norecovery" is specified). But I think for the case of automatically determined
* mount options for loopback devices this is the right choice, since otherwise using the same
* loopback file twice even in read-only mode, is going to fail badly sooner or later. The usecase of
* making reuse of the immutable images "just work" is more relevant to us than having read-only
* access that actually modifies stuff work on such image files. Or to say this differently: if
* people want their file systems to be fixed up they should just open them in writable mode, where
* all these problems don't exist. */
if (!rw && fstype && fstype_can_norecovery(fstype))
if (!strextend_with_separator(&options, ",", "norecovery"))
return -ENOMEM;
if (discard && fstype && fstype_can_discard(fstype))
if (!strextend_with_separator(&options, ",", "discard"))
return -ENOMEM;
if (!ret_ms_flags) /* Fold flags into option string if ret_flags specified as NULL */
if (!strextend_with_separator(&options, ",",
FLAGS_SET(flags, MS_RDONLY) ? "ro" : "rw",
FLAGS_SET(flags, MS_NODEV) ? "nodev" : "dev",
FLAGS_SET(flags, MS_NOSUID) ? "nosuid" : "suid",
FLAGS_SET(flags, MS_NOEXEC) ? "noexec" : "exec",
FLAGS_SET(flags, MS_NOSYMFOLLOW) ? "nosymfollow" : NULL))
/* NB: we suppress 'symfollow' here, since it's the default, and old /bin/mount might not know it */
return -ENOMEM;
if (ret_ms_flags)
*ret_ms_flags = flags;
*ret_options = TAKE_PTR(options);
return 0;
}
static int mount_partition(
PartitionDesignator d,
DissectedPartition *m,
const char *where,
const char *directory,
@ -1511,8 +1604,9 @@ static int mount_partition(
DissectImageFlags flags) {
_cleanup_free_ char *chased = NULL, *options = NULL;
bool rw, discard, remap_uid_gid = false;
const char *p, *node, *fstype;
bool rw, remap_uid_gid = false;
unsigned long ms_flags;
int r;
assert(m);
@ -1523,7 +1617,7 @@ static int mount_partition(
/* Use decrypted node and matching fstype if available, otherwise use the original device */
node = FORMAT_PROC_FD_PATH(m->mount_node_fd);
fstype = m->decrypted_node ? m->decrypted_fstype: m->fstype;
fstype = dissected_partition_fstype(m);
if (!fstype)
return -EAFNOSUPPORT;
@ -1541,6 +1635,9 @@ static int mount_partition(
rw = m->rw && !(flags & DISSECT_IMAGE_MOUNT_READ_ONLY);
discard = ((flags & DISSECT_IMAGE_DISCARD) ||
((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node) > 0));
if (FLAGS_SET(flags, DISSECT_IMAGE_FSCK) && rw) {
r = run_fsck(m->mount_node_fd, fstype);
if (r < 0)
@ -1571,14 +1668,9 @@ static int mount_partition(
p = where;
}
/* If requested, turn on discard support. */
if (fstype_can_discard(fstype) &&
((flags & DISSECT_IMAGE_DISCARD) ||
((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node) > 0))) {
options = strdup("discard");
if (!options)
return -ENOMEM;
}
r = partition_pick_mount_options(d, dissected_partition_fstype(m), rw, discard, &options, &ms_flags);
if (r < 0)
return r;
if (uid_is_valid(uid_shift) && uid_shift != 0) {
@ -1598,28 +1690,7 @@ static int mount_partition(
if (!strextend_with_separator(&options, ",", m->mount_options))
return -ENOMEM;
/* So, when you request MS_RDONLY from ext4, then this means nothing. It happily still writes to the
* backing storage. What's worse, the BLKRO[GS]ET flag and (in case of loopback devices)
* LO_FLAGS_READ_ONLY don't mean anything, they affect userspace accesses only, and write accesses
* from the upper file system still get propagated through to the underlying file system,
* unrestricted. To actually get ext4/xfs/btrfs to stop writing to the device we need to specify
* "norecovery" as mount option, in addition to MS_RDONLY. Yes, this sucks, since it means we need to
* carry a per file system table here.
*
* Note that this means that we might not be able to mount corrupted file systems as read-only
* anymore (since in some cases the kernel implementations will refuse mounting when corrupted,
* read-only and "norecovery" is specified). But I think for the case of automatically determined
* mount options for loopback devices this is the right choice, since otherwise using the same
* loopback file twice even in read-only mode, is going to fail badly sooner or later. The usecase of
* making reuse of the immutable images "just work" is more relevant to us than having read-only
* access that actually modifies stuff work on such image files. Or to say this differently: if
* people want their file systems to be fixed up they should just open them in writable mode, where
* all these problems don't exist. */
if (!rw && STRPTR_IN_SET(fstype, "ext3", "ext4", "xfs", "btrfs"))
if (!strextend_with_separator(&options, ",", "norecovery"))
return -ENOMEM;
r = mount_nofollow_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
r = mount_nofollow_verbose(LOG_DEBUG, node, p, fstype, ms_flags, options);
if (r < 0)
return r;
@ -1692,14 +1763,14 @@ int dissected_image_mount(
/* First mount the root fs. If there's none we use a tmpfs. */
if (m->partitions[PARTITION_ROOT].found)
r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, uid_range, flags);
r = mount_partition(PARTITION_ROOT, m->partitions + PARTITION_ROOT, where, NULL, uid_shift, uid_range, flags);
else
r = mount_root_tmpfs(where, uid_shift, flags);
if (r < 0)
return r;
/* For us mounting root always means mounting /usr as well */
r = mount_partition(m->partitions + PARTITION_USR, where, "/usr", uid_shift, uid_range, flags);
r = mount_partition(PARTITION_USR, m->partitions + PARTITION_USR, where, "/usr", uid_shift, uid_range, flags);
if (r < 0)
return r;
@ -1731,23 +1802,23 @@ int dissected_image_mount(
if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY)
return 0;
r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, uid_range, flags);
r = mount_partition(PARTITION_HOME, m->partitions + PARTITION_HOME, where, "/home", uid_shift, uid_range, flags);
if (r < 0)
return r;
r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, uid_range, flags);
r = mount_partition(PARTITION_SRV, m->partitions + PARTITION_SRV, where, "/srv", uid_shift, uid_range, flags);
if (r < 0)
return r;
r = mount_partition(m->partitions + PARTITION_VAR, where, "/var", uid_shift, uid_range, flags);
r = mount_partition(PARTITION_VAR, m->partitions + PARTITION_VAR, where, "/var", uid_shift, uid_range, flags);
if (r < 0)
return r;
r = mount_partition(m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, uid_range, flags);
r = mount_partition(PARTITION_TMP, m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, uid_range, flags);
if (r < 0)
return r;
xbootldr_mounted = mount_partition(m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, uid_range, flags);
xbootldr_mounted = mount_partition(PARTITION_XBOOTLDR, m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, uid_range, flags);
if (xbootldr_mounted < 0)
return xbootldr_mounted;
@ -1773,7 +1844,7 @@ int dissected_image_mount(
return r;
} else if (dir_is_empty(p, /* ignore_hidden_or_backup= */ false) > 0) {
/* It exists and is an empty directory. Let's mount the ESP there. */
r = mount_partition(m->partitions + PARTITION_ESP, where, "/boot", uid_shift, uid_range, flags);
r = mount_partition(PARTITION_ESP, m->partitions + PARTITION_ESP, where, "/boot", uid_shift, uid_range, flags);
if (r < 0)
return r;
@ -1785,7 +1856,7 @@ int dissected_image_mount(
if (!esp_done) {
/* OK, let's mount the ESP now to /efi (possibly creating the dir if missing) */
r = mount_partition(m->partitions + PARTITION_ESP, where, "/efi", uid_shift, uid_range, flags);
r = mount_partition(PARTITION_ESP, m->partitions + PARTITION_ESP, where, "/efi", uid_shift, uid_range, flags);
if (r < 0)
return r;
}

View file

@ -193,3 +193,11 @@ int dissect_fstype_ok(const char *fstype);
int probe_sector_size(int fd, uint32_t *ret);
int probe_sector_size_prefer_ioctl(int fd, uint32_t *ret);
int partition_pick_mount_options(PartitionDesignator d, const char *fstype, bool rw, bool discard, char **ret_options, unsigned long *ret_ms_flags);
static inline const char *dissected_partition_fstype(const DissectedPartition *m) {
assert(m);
return m->decrypted_node ? m->decrypted_fstype : m->fstype;
}

View file

@ -321,6 +321,51 @@ TEST(fd_is_mount_point) {
assert_se(fd_is_mount_point(fd, "", 0) == -EINVAL);
}
TEST(ms_nosymfollow_supported) {
log_info("MS_NOSYMFOLLOW supported: %s", yes_no(ms_nosymfollow_supported()));
}
TEST(mount_option_supported) {
int r;
r = mount_option_supported("tmpfs", "size", "64M");
log_info("tmpfs supports size=64M: %s (%i)", r < 0 ? "dont know" : yes_no(r), r);
assert_se(r > 0 || (r < 0 && ERRNO_IS_PRIVILEGE(r)));
r = mount_option_supported("ext4", "discard", NULL);
log_info("ext4 supports discard: %s (%i)", r < 0 ? "dont know" : yes_no(r), r);
assert_se(r > 0 || r == -EAGAIN || (r < 0 && ERRNO_IS_PRIVILEGE(r)));
r = mount_option_supported("tmpfs", "idontexist", "64M");
log_info("tmpfs supports idontexist: %s (%i)", r < 0 ? "dont know" : yes_no(r), r);
assert_se(r == 0 || (r < 0 && ERRNO_IS_PRIVILEGE(r)));
r = mount_option_supported("tmpfs", "ialsodontexist", NULL);
log_info("tmpfs supports ialsodontexist: %s (%i)", r < 0 ? "dont know" : yes_no(r), r);
assert_se(r == 0 || (r < 0 && ERRNO_IS_PRIVILEGE(r)));
r = mount_option_supported("proc", "hidepid", "1");
log_info("proc supports hidepid=1: %s (%i)", r < 0 ? "dont know" : yes_no(r), r);
assert_se(r >= 0 || (r < 0 && ERRNO_IS_PRIVILEGE(r)));
}
TEST(fstype_can_discard) {
assert_se(fstype_can_discard("ext4"));
assert_se(!fstype_can_discard("squashfs"));
assert_se(!fstype_can_discard("iso9660"));
}
TEST(fstype_can_norecovery) {
assert_se(fstype_can_norecovery("ext4"));
assert_se(!fstype_can_norecovery("vfat"));
assert_se(!fstype_can_norecovery("tmpfs"));
}
TEST(fstype_can_umask) {
assert_se(fstype_can_umask("vfat"));
assert_se(!fstype_can_umask("tmpfs"));
}
static int intro(void) {
/* let's move into our own mount namespace with all propagation from the host turned off, so
* that /proc/self/mountinfo is static and constant for the whole time our test runs. */