mirror of
https://github.com/systemd/systemd
synced 2024-09-16 06:43:18 +00:00
Merge pull request #26704 from poettering/mnt-nosymlinks
Set MS_NOSYMFOLLOW for ESP + XBOOTLDR and many mount option clean-ups
This commit is contained in:
commit
96c96fb250
|
@ -629,6 +629,7 @@ foreach ident : [
|
|||
['open_tree', '''#include <sys/mount.h>'''],
|
||||
['fsopen', '''#include <sys/mount.h>'''],
|
||||
['fsconfig', '''#include <sys/mount.h>'''],
|
||||
['fsmount', '''#include <sys/mount.h>'''],
|
||||
['getdents64', '''#include <dirent.h>'''],
|
||||
]
|
||||
|
||||
|
|
|
@ -591,10 +591,22 @@ static inline int missing_fsopen(const char *fsname, unsigned flags) {
|
|||
|
||||
#if !HAVE_FSCONFIG
|
||||
|
||||
#ifndef FSCONFIG_SET_FLAG
|
||||
#define FSCONFIG_SET_FLAG 0 /* Set parameter, supplying no value */
|
||||
#endif
|
||||
|
||||
#ifndef FSCONFIG_SET_STRING
|
||||
#define FSCONFIG_SET_STRING 1 /* Set parameter, supplying a string value */
|
||||
#endif
|
||||
|
||||
#ifndef FSCONFIG_SET_FD
|
||||
#define FSCONFIG_SET_FD 5 /* Set parameter, supplying an object by fd */
|
||||
#endif
|
||||
|
||||
#ifndef FSCONFIG_CMD_CREATE
|
||||
#define FSCONFIG_CMD_CREATE 6 /* Invoke superblock creation */
|
||||
#endif
|
||||
|
||||
static inline int missing_fsconfig(int fd, unsigned cmd, const char *key, const void *value, int aux) {
|
||||
# if defined __NR_fsconfig && __NR_fsconfig >= 0
|
||||
return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
|
||||
|
@ -609,6 +621,26 @@ static inline int missing_fsconfig(int fd, unsigned cmd, const char *key, const
|
|||
|
||||
/* ======================================================================= */
|
||||
|
||||
#if !HAVE_FSMOUNT
|
||||
|
||||
#ifndef FSMOUNT_CLOEXEC
|
||||
#define FSMOUNT_CLOEXEC 0x00000001
|
||||
#endif
|
||||
|
||||
static inline int missing_fsmount(int fd, unsigned flags, unsigned ms_flags) {
|
||||
# if defined __NR_fsmount && __NR_fsmount >= 0
|
||||
return syscall(__NR_fsmount, fd, flags, ms_flags);
|
||||
# else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
# endif
|
||||
}
|
||||
|
||||
# define fsmount missing_fsmount
|
||||
#endif
|
||||
|
||||
/* ======================================================================= */
|
||||
|
||||
#if !HAVE_GETDENTS64
|
||||
|
||||
static inline ssize_t missing_getdents64(int fd, void *buffer, size_t length) {
|
||||
|
|
|
@ -3,6 +3,9 @@
|
|||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mount.h>
|
||||
#if WANT_LINUX_FS_H
|
||||
#include <linux/fs.h>
|
||||
#endif
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "chase-symlinks.h"
|
||||
|
@ -10,6 +13,8 @@
|
|||
#include "fileio.h"
|
||||
#include "filesystems.h"
|
||||
#include "fs-util.h"
|
||||
#include "missing_fs.h"
|
||||
#include "missing_mount.h"
|
||||
#include "missing_stat.h"
|
||||
#include "missing_syscall.h"
|
||||
#include "mkdir.h"
|
||||
|
@ -456,6 +461,15 @@ bool fstype_is_ro(const char *fstype) {
|
|||
}
|
||||
|
||||
bool fstype_can_discard(const char *fstype) {
|
||||
int r;
|
||||
|
||||
assert(fstype);
|
||||
|
||||
/* On new kernels we can just ask the kernel */
|
||||
r = mount_option_supported(fstype, "discard", NULL);
|
||||
if (r >= 0)
|
||||
return r;
|
||||
|
||||
return STR_IN_SET(fstype,
|
||||
"btrfs",
|
||||
"f2fs",
|
||||
|
@ -464,10 +478,42 @@ bool fstype_can_discard(const char *fstype) {
|
|||
"xfs");
|
||||
}
|
||||
|
||||
bool fstype_can_uid_gid(const char *fstype) {
|
||||
bool fstype_can_norecovery(const char *fstype) {
|
||||
int r;
|
||||
|
||||
/* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
|
||||
* current and future. */
|
||||
assert(fstype);
|
||||
|
||||
/* On new kernels we can just ask the kernel */
|
||||
r = mount_option_supported(fstype, "norecovery", NULL);
|
||||
if (r >= 0)
|
||||
return r;
|
||||
|
||||
return STR_IN_SET(fstype,
|
||||
"ext3",
|
||||
"ext4",
|
||||
"xfs",
|
||||
"btrfs");
|
||||
}
|
||||
|
||||
bool fstype_can_umask(const char *fstype) {
|
||||
int r;
|
||||
|
||||
assert(fstype);
|
||||
|
||||
/* On new kernels we can just ask the kernel */
|
||||
r = mount_option_supported(fstype, "umask", "0077");
|
||||
if (r >= 0)
|
||||
return r;
|
||||
|
||||
return streq(fstype, "vfat");
|
||||
}
|
||||
|
||||
bool fstype_can_uid_gid(const char *fstype) {
|
||||
/* All file systems that have a uid=/gid= mount option that fixates the owners of all files and
|
||||
* directories, current and future. Note that this does *not* ask the kernel via
|
||||
* mount_option_supported() here because the uid=/gid= setting of various file systems mean different
|
||||
* things: some apply it only to the root dir inode, others to all inodes in the file system. Thus we
|
||||
* maintain the curated list below. 😢 */
|
||||
|
||||
return STR_IN_SET(fstype,
|
||||
"adfs",
|
||||
|
@ -602,3 +648,111 @@ int mount_propagation_flag_from_string(const char *name, unsigned long *ret) {
|
|||
bool mount_propagation_flag_is_valid(unsigned long flag) {
|
||||
return IN_SET(flag, 0, MS_SHARED, MS_PRIVATE, MS_SLAVE);
|
||||
}
|
||||
|
||||
unsigned long ms_nosymfollow_supported(void) {
|
||||
_cleanup_close_ int fsfd = -EBADF, mntfd = -EBADF;
|
||||
static int cache = -1;
|
||||
|
||||
/* Returns MS_NOSYMFOLLOW if it is supported, zero otherwise. */
|
||||
|
||||
if (cache >= 0)
|
||||
return cache ? MS_NOSYMFOLLOW : 0;
|
||||
|
||||
/* Checks if MS_NOSYMFOLLOW is supported (which was added in 5.10). We use the new mount API's
|
||||
* mount_setattr() call for that, which was added in 5.12, which is close enough. */
|
||||
|
||||
fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
|
||||
if (fsfd < 0) {
|
||||
if (ERRNO_IS_NOT_SUPPORTED(errno))
|
||||
goto not_supported;
|
||||
|
||||
log_debug_errno(errno, "Failed to open superblock context for tmpfs: %m");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) {
|
||||
if (ERRNO_IS_NOT_SUPPORTED(errno))
|
||||
goto not_supported;
|
||||
|
||||
log_debug_errno(errno, "Failed to create tmpfs superblock: %m");
|
||||
return 0;
|
||||
}
|
||||
|
||||
mntfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
|
||||
if (mntfd < 0) {
|
||||
if (ERRNO_IS_NOT_SUPPORTED(errno))
|
||||
goto not_supported;
|
||||
|
||||
log_debug_errno(errno, "Failed to turn superblock fd into mount fd: %m");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (mount_setattr(mntfd, "", AT_EMPTY_PATH|AT_RECURSIVE,
|
||||
&(struct mount_attr) {
|
||||
.attr_set = MOUNT_ATTR_NOSYMFOLLOW,
|
||||
}, sizeof(struct mount_attr)) < 0) {
|
||||
if (ERRNO_IS_NOT_SUPPORTED(errno))
|
||||
goto not_supported;
|
||||
|
||||
log_debug_errno(errno, "Failed to set MOUNT_ATTR_NOSYMFOLLOW mount attribute: %m");
|
||||
return 0;
|
||||
}
|
||||
|
||||
cache = true;
|
||||
return MS_NOSYMFOLLOW;
|
||||
|
||||
not_supported:
|
||||
cache = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mount_option_supported(const char *fstype, const char *key, const char *value) {
|
||||
_cleanup_close_ int fd = -EBADF;
|
||||
int r;
|
||||
|
||||
/* Checks if the specified file system supports a mount option. Returns > 0 if it suppors it, == 0 if
|
||||
* it does not. Return -EAGAIN if we can't determine it. And any other error otherwise. */
|
||||
|
||||
assert(fstype);
|
||||
assert(key);
|
||||
|
||||
fd = fsopen(fstype, FSOPEN_CLOEXEC);
|
||||
if (fd < 0) {
|
||||
if (ERRNO_IS_NOT_SUPPORTED(errno))
|
||||
return -EAGAIN; /* new mount API not available → don't know */
|
||||
|
||||
return log_debug_errno(errno, "Failed to open superblock context for '%s': %m", fstype);
|
||||
}
|
||||
|
||||
/* Various file systems have not been converted to the new mount API yet. For such file systems
|
||||
* fsconfig() with FSCONFIG_SET_STRING/FSCONFIG_SET_FLAG never fail. Which sucks, because we want to
|
||||
* use it for testing support, after all. Let's hence do a check if the file system got converted yet
|
||||
* first. */
|
||||
if (fsconfig(fd, FSCONFIG_SET_FD, "adefinitelynotexistingmountoption", NULL, fd) < 0) {
|
||||
/* If FSCONFIG_SET_FD is not supported for the fs, then the file system was not converted to
|
||||
* the new mount API yet. If it returns EINVAL the mount option doesn't exist, but the fstype
|
||||
* is converted. */
|
||||
if (errno == EOPNOTSUPP)
|
||||
return -EAGAIN; /* FSCONFIG_SET_FD not supported on the fs, hence not converted to new mount API → don't know */
|
||||
if (errno != EINVAL)
|
||||
return log_debug_errno(errno, "Failed to check if file system has been converted to new mount API: %m");
|
||||
|
||||
/* So FSCONFIG_SET_FD worked, but the option didn't exist (we got EINVAL), this means the fs
|
||||
* is converted. Let's now ask the actual question we wonder about. */
|
||||
} else
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN), "FSCONFIG_SET_FD worked unexpectedly for '%s', whoa!", fstype);
|
||||
|
||||
if (value)
|
||||
r = fsconfig(fd, FSCONFIG_SET_STRING, key, value, 0);
|
||||
else
|
||||
r = fsconfig(fd, FSCONFIG_SET_FLAG, key, NULL, 0);
|
||||
if (r < 0) {
|
||||
if (errno == EINVAL)
|
||||
return false; /* EINVAL means option not supported. */
|
||||
|
||||
return log_debug_errno(errno, "Failed to set '%s%s%s' on '%s' superblock context: %m",
|
||||
key, value ? "=" : "", strempty(value), fstype);
|
||||
}
|
||||
|
||||
return true; /* works! */
|
||||
}
|
||||
|
|
|
@ -49,6 +49,8 @@ bool fstype_is_blockdev_backed(const char *fstype);
|
|||
bool fstype_is_ro(const char *fsype);
|
||||
bool fstype_can_discard(const char *fstype);
|
||||
bool fstype_can_uid_gid(const char *fstype);
|
||||
bool fstype_can_norecovery(const char *fstype);
|
||||
bool fstype_can_umask(const char *fstype);
|
||||
|
||||
int dev_is_devtmpfs(void);
|
||||
|
||||
|
@ -58,3 +60,7 @@ int mount_nofollow(const char *source, const char *target, const char *filesyste
|
|||
const char *mount_propagation_flag_to_string(unsigned long flags);
|
||||
int mount_propagation_flag_from_string(const char *name, unsigned long *ret);
|
||||
bool mount_propagation_flag_is_valid(unsigned long flag);
|
||||
|
||||
unsigned long ms_nosymfollow_supported(void);
|
||||
|
||||
int mount_option_supported(const char *fstype, const char *key, const char *value);
|
||||
|
|
|
@ -1099,27 +1099,6 @@ static int mount_bind_sysfs(const MountEntry *m) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
static bool mount_option_supported(const char *fstype, const char *key, const char *value) {
|
||||
_cleanup_close_ int fd = -EBADF;
|
||||
int r;
|
||||
|
||||
/* This function assumes support by default. Only if the fsconfig() call fails with -EINVAL/-EOPNOTSUPP
|
||||
* will it report that the option/value is not supported. */
|
||||
|
||||
fd = fsopen(fstype, FSOPEN_CLOEXEC);
|
||||
if (fd < 0) {
|
||||
if (errno != ENOSYS)
|
||||
log_debug_errno(errno, "Failed to open superblock context for '%s': %m", fstype);
|
||||
return true; /* If fsopen() fails for whatever reason, assume the value is supported. */
|
||||
}
|
||||
|
||||
r = fsconfig(fd, FSCONFIG_SET_STRING, key, value, 0);
|
||||
if (r < 0 && !IN_SET(errno, EINVAL, EOPNOTSUPP, ENOSYS))
|
||||
log_debug_errno(errno, "Failed to set '%s=%s' on '%s' superblock context: %m", key, value, fstype);
|
||||
|
||||
return r >= 0 || !IN_SET(errno, EINVAL, EOPNOTSUPP);
|
||||
}
|
||||
|
||||
static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
|
||||
_cleanup_free_ char *opts = NULL;
|
||||
const char *entry_path;
|
||||
|
@ -1147,13 +1126,14 @@ static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
|
|||
* fsopen()/fsconfig() was also backported on some distros which allows us to detect
|
||||
* hidepid=/subset= support in even more scenarios. */
|
||||
|
||||
if (mount_option_supported("proc", "hidepid", hpv)) {
|
||||
if (mount_option_supported("proc", "hidepid", hpv) != 0) {
|
||||
opts = strjoin("hidepid=", hpv);
|
||||
if (!opts)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (ns_info->proc_subset == PROC_SUBSET_PID && mount_option_supported("proc", "subset", "pid"))
|
||||
if (ns_info->proc_subset == PROC_SUBSET_PID &&
|
||||
mount_option_supported("proc", "subset", "pid") != 0)
|
||||
if (!strextend_with_separator(&opts, ",", "subset=pid"))
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
|
|
@ -245,9 +245,7 @@ static int add_mount(
|
|||
fprintf(f, "Type=%s\n", fstype);
|
||||
|
||||
if (options)
|
||||
fprintf(f, "Options=%s,%s\n", options, rw ? "rw" : "ro");
|
||||
else
|
||||
fprintf(f, "Options=%s\n", rw ? "rw" : "ro");
|
||||
fprintf(f, "Options=%s\n", options);
|
||||
|
||||
r = fflush_and_check(f);
|
||||
if (r < 0)
|
||||
|
@ -301,18 +299,31 @@ static int path_is_busy(const char *where) {
|
|||
}
|
||||
|
||||
static int add_partition_mount(
|
||||
PartitionDesignator d,
|
||||
DissectedPartition *p,
|
||||
const char *id,
|
||||
const char *where,
|
||||
const char *description) {
|
||||
|
||||
_cleanup_free_ char *options = NULL;
|
||||
int r;
|
||||
|
||||
assert(p);
|
||||
|
||||
r = path_is_busy(where);
|
||||
if (r != 0)
|
||||
return r < 0 ? r : 0;
|
||||
|
||||
r = partition_pick_mount_options(
|
||||
d,
|
||||
dissected_partition_fstype(p),
|
||||
p->rw,
|
||||
/* discard= */ true,
|
||||
&options,
|
||||
/* ret_ms_flags= */ NULL);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return add_mount(
|
||||
id,
|
||||
p->node,
|
||||
|
@ -321,7 +332,7 @@ static int add_partition_mount(
|
|||
p->rw,
|
||||
p->growfs,
|
||||
/* measure= */ STR_IN_SET(id, "root", "var"), /* by default measure rootfs and /var, since they contain the "identity" of the system */
|
||||
NULL,
|
||||
options,
|
||||
description,
|
||||
SPECIAL_LOCAL_FS_TARGET);
|
||||
}
|
||||
|
@ -452,20 +463,8 @@ static int add_automount(
|
|||
return generator_add_symlink(arg_dest, SPECIAL_LOCAL_FS_TARGET, "wants", unit);
|
||||
}
|
||||
|
||||
static const char *esp_or_xbootldr_options(const DissectedPartition *p) {
|
||||
assert(p);
|
||||
|
||||
/* Discovered ESP and XBOOTLDR partition are always hardened with "noexec,nosuid,nodev".
|
||||
* If we probed vfat or have no idea about the file system then assume these file systems are vfat
|
||||
* and thus understand "umask=0077". */
|
||||
|
||||
if (!p->fstype || streq(p->fstype, "vfat"))
|
||||
return "umask=0077,noexec,nosuid,nodev";
|
||||
|
||||
return "noexec,nosuid,nodev";
|
||||
}
|
||||
|
||||
static int add_partition_xbootldr(DissectedPartition *p) {
|
||||
_cleanup_free_ char *options = NULL;
|
||||
int r;
|
||||
|
||||
assert(p);
|
||||
|
@ -489,13 +488,23 @@ static int add_partition_xbootldr(DissectedPartition *p) {
|
|||
if (r > 0)
|
||||
return 0;
|
||||
|
||||
r = partition_pick_mount_options(
|
||||
PARTITION_XBOOTLDR,
|
||||
dissected_partition_fstype(p),
|
||||
/* rw= */ true,
|
||||
/* discard= */ false,
|
||||
&options,
|
||||
/* ret_ms_flags= */ NULL);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to determine default mount options for Boot Loader Partition: %m");
|
||||
|
||||
return add_automount("boot",
|
||||
p->node,
|
||||
"/boot",
|
||||
p->fstype,
|
||||
/* rw= */ true,
|
||||
/* growfs= */ false,
|
||||
esp_or_xbootldr_options(p),
|
||||
options,
|
||||
"Boot Loader Partition",
|
||||
120 * USEC_PER_SEC);
|
||||
}
|
||||
|
@ -503,6 +512,7 @@ static int add_partition_xbootldr(DissectedPartition *p) {
|
|||
#if ENABLE_EFI
|
||||
static int add_partition_esp(DissectedPartition *p, bool has_xbootldr) {
|
||||
const char *esp_path = NULL, *id = NULL;
|
||||
_cleanup_free_ char *options = NULL;
|
||||
int r;
|
||||
|
||||
assert(p);
|
||||
|
@ -569,13 +579,23 @@ static int add_partition_esp(DissectedPartition *p, bool has_xbootldr) {
|
|||
} else
|
||||
log_debug("Not an EFI boot, skipping ESP check.");
|
||||
|
||||
r = partition_pick_mount_options(
|
||||
PARTITION_ESP,
|
||||
dissected_partition_fstype(p),
|
||||
/* rw= */ true,
|
||||
/* discard= */ false,
|
||||
&options,
|
||||
/* ret_ms_flags= */ NULL);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to determine default mount options for EFI System Partition: %m");
|
||||
|
||||
return add_automount(id,
|
||||
p->node,
|
||||
esp_path,
|
||||
p->fstype,
|
||||
/* rw= */ true,
|
||||
/* growfs= */ false,
|
||||
esp_or_xbootldr_options(p),
|
||||
options,
|
||||
"EFI System Partition Automount",
|
||||
120 * USEC_PER_SEC);
|
||||
}
|
||||
|
@ -637,6 +657,7 @@ static int add_root_cryptsetup(void) {
|
|||
|
||||
static int add_root_mount(void) {
|
||||
#if ENABLE_EFI
|
||||
_cleanup_free_ char *options = NULL;
|
||||
int r;
|
||||
|
||||
if (!is_efi_boot()) {
|
||||
|
@ -668,6 +689,20 @@ static int add_root_mount(void) {
|
|||
/* Note that we do not need to enable systemd-remount-fs.service here. If
|
||||
* /etc/fstab exists, systemd-fstab-generator will pull it in for us. */
|
||||
|
||||
r = partition_pick_mount_options(
|
||||
PARTITION_ROOT,
|
||||
arg_root_fstype,
|
||||
arg_root_rw > 0,
|
||||
/* discard= */ true,
|
||||
&options,
|
||||
/* ret_ms_flags= */ NULL);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to pick root mount options: %m");
|
||||
|
||||
if (arg_root_options)
|
||||
if (!strextend_with_separator(&options, ",", arg_root_options))
|
||||
return log_oom();
|
||||
|
||||
return add_mount(
|
||||
"root",
|
||||
"/dev/gpt-auto-root",
|
||||
|
@ -676,7 +711,7 @@ static int add_root_mount(void) {
|
|||
/* rw= */ arg_root_rw > 0,
|
||||
/* growfs= */ false,
|
||||
/* measure= */ true,
|
||||
arg_root_options,
|
||||
options,
|
||||
"Root Partition",
|
||||
in_initrd() ? SPECIAL_INITRD_ROOT_FS_TARGET : SPECIAL_LOCAL_FS_TARGET);
|
||||
#else
|
||||
|
@ -745,25 +780,25 @@ static int enumerate_partitions(dev_t devnum) {
|
|||
}
|
||||
|
||||
if (m->partitions[PARTITION_HOME].found) {
|
||||
k = add_partition_mount(m->partitions + PARTITION_HOME, "home", "/home", "Home Partition");
|
||||
k = add_partition_mount(PARTITION_HOME, m->partitions + PARTITION_HOME, "home", "/home", "Home Partition");
|
||||
if (k < 0)
|
||||
r = k;
|
||||
}
|
||||
|
||||
if (m->partitions[PARTITION_SRV].found) {
|
||||
k = add_partition_mount(m->partitions + PARTITION_SRV, "srv", "/srv", "Server Data Partition");
|
||||
k = add_partition_mount(PARTITION_SRV, m->partitions + PARTITION_SRV, "srv", "/srv", "Server Data Partition");
|
||||
if (k < 0)
|
||||
r = k;
|
||||
}
|
||||
|
||||
if (m->partitions[PARTITION_VAR].found) {
|
||||
k = add_partition_mount(m->partitions + PARTITION_VAR, "var", "/var", "Variable Data Partition");
|
||||
k = add_partition_mount(PARTITION_VAR, m->partitions + PARTITION_VAR, "var", "/var", "Variable Data Partition");
|
||||
if (k < 0)
|
||||
r = k;
|
||||
}
|
||||
|
||||
if (m->partitions[PARTITION_TMP].found) {
|
||||
k = add_partition_mount(m->partitions + PARTITION_TMP, "var-tmp", "/var/tmp", "Temporary Data Partition");
|
||||
k = add_partition_mount(PARTITION_TMP, m->partitions + PARTITION_TMP, "var-tmp", "/var/tmp", "Temporary Data Partition");
|
||||
if (k < 0)
|
||||
r = k;
|
||||
}
|
||||
|
|
|
@ -50,6 +50,7 @@
|
|||
#include "id128-util.h"
|
||||
#include "import-util.h"
|
||||
#include "io-util.h"
|
||||
#include "missing_mount.h"
|
||||
#include "mkdir-label.h"
|
||||
#include "mount-util.h"
|
||||
#include "mountpoint-util.h"
|
||||
|
@ -1502,7 +1503,99 @@ static int fs_grow(const char *node_path, const char *mount_path) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int partition_pick_mount_options(
|
||||
PartitionDesignator d,
|
||||
const char *fstype,
|
||||
bool rw,
|
||||
bool discard,
|
||||
char **ret_options,
|
||||
unsigned long *ret_ms_flags) {
|
||||
|
||||
_cleanup_free_ char *options = NULL;
|
||||
|
||||
assert(ret_options);
|
||||
|
||||
/* Selects a baseline of bind mount flags, that should always apply.
|
||||
*
|
||||
* Firstly, we set MS_NODEV universally on all mounts, since we don't want to allow device nodes outside of /dev/.
|
||||
*
|
||||
* On /var/tmp/ we'll also set MS_NOSUID, same as we set for /tmp/ on the host.
|
||||
*
|
||||
* On the ESP and XBOOTLDR partitions we'll also disable symlinks, and execution. These file systems
|
||||
* are generally untrusted (i.e. not encrypted or authenticated), and typically VFAT hence we should
|
||||
* be as restrictive as possible, and this shouldn't hurt, since the functionality is not available
|
||||
* there anyway. */
|
||||
|
||||
unsigned long flags = MS_NODEV;
|
||||
|
||||
if (!rw)
|
||||
flags |= MS_RDONLY;
|
||||
|
||||
switch (d) {
|
||||
|
||||
case PARTITION_ESP:
|
||||
case PARTITION_XBOOTLDR:
|
||||
flags |= MS_NOSUID|MS_NOEXEC|ms_nosymfollow_supported();
|
||||
|
||||
/* The ESP might contain a pre-boot random seed. Let's make this unaccessible to regular
|
||||
* userspace. ESP/XBOOTLDR is almost certainly VFAT, hence if we don't know assume it is. */
|
||||
if (!fstype || fstype_can_umask(fstype))
|
||||
if (!strextend_with_separator(&options, ",", "umask=0077"))
|
||||
return -ENOMEM;
|
||||
break;
|
||||
|
||||
case PARTITION_TMP:
|
||||
flags |= MS_NOSUID;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* So, when you request MS_RDONLY from ext4, then this means nothing. It happily still writes to the
|
||||
* backing storage. What's worse, the BLKRO[GS]ET flag and (in case of loopback devices)
|
||||
* LO_FLAGS_READ_ONLY don't mean anything, they affect userspace accesses only, and write accesses
|
||||
* from the upper file system still get propagated through to the underlying file system,
|
||||
* unrestricted. To actually get ext4/xfs/btrfs to stop writing to the device we need to specify
|
||||
* "norecovery" as mount option, in addition to MS_RDONLY. Yes, this sucks, since it means we need to
|
||||
* carry a per file system table here.
|
||||
*
|
||||
* Note that this means that we might not be able to mount corrupted file systems as read-only
|
||||
* anymore (since in some cases the kernel implementations will refuse mounting when corrupted,
|
||||
* read-only and "norecovery" is specified). But I think for the case of automatically determined
|
||||
* mount options for loopback devices this is the right choice, since otherwise using the same
|
||||
* loopback file twice even in read-only mode, is going to fail badly sooner or later. The usecase of
|
||||
* making reuse of the immutable images "just work" is more relevant to us than having read-only
|
||||
* access that actually modifies stuff work on such image files. Or to say this differently: if
|
||||
* people want their file systems to be fixed up they should just open them in writable mode, where
|
||||
* all these problems don't exist. */
|
||||
if (!rw && fstype && fstype_can_norecovery(fstype))
|
||||
if (!strextend_with_separator(&options, ",", "norecovery"))
|
||||
return -ENOMEM;
|
||||
|
||||
if (discard && fstype && fstype_can_discard(fstype))
|
||||
if (!strextend_with_separator(&options, ",", "discard"))
|
||||
return -ENOMEM;
|
||||
|
||||
if (!ret_ms_flags) /* Fold flags into option string if ret_flags specified as NULL */
|
||||
if (!strextend_with_separator(&options, ",",
|
||||
FLAGS_SET(flags, MS_RDONLY) ? "ro" : "rw",
|
||||
FLAGS_SET(flags, MS_NODEV) ? "nodev" : "dev",
|
||||
FLAGS_SET(flags, MS_NOSUID) ? "nosuid" : "suid",
|
||||
FLAGS_SET(flags, MS_NOEXEC) ? "noexec" : "exec",
|
||||
FLAGS_SET(flags, MS_NOSYMFOLLOW) ? "nosymfollow" : NULL))
|
||||
/* NB: we suppress 'symfollow' here, since it's the default, and old /bin/mount might not know it */
|
||||
return -ENOMEM;
|
||||
|
||||
if (ret_ms_flags)
|
||||
*ret_ms_flags = flags;
|
||||
|
||||
*ret_options = TAKE_PTR(options);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mount_partition(
|
||||
PartitionDesignator d,
|
||||
DissectedPartition *m,
|
||||
const char *where,
|
||||
const char *directory,
|
||||
|
@ -1511,8 +1604,9 @@ static int mount_partition(
|
|||
DissectImageFlags flags) {
|
||||
|
||||
_cleanup_free_ char *chased = NULL, *options = NULL;
|
||||
bool rw, discard, remap_uid_gid = false;
|
||||
const char *p, *node, *fstype;
|
||||
bool rw, remap_uid_gid = false;
|
||||
unsigned long ms_flags;
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
|
@ -1523,7 +1617,7 @@ static int mount_partition(
|
|||
|
||||
/* Use decrypted node and matching fstype if available, otherwise use the original device */
|
||||
node = FORMAT_PROC_FD_PATH(m->mount_node_fd);
|
||||
fstype = m->decrypted_node ? m->decrypted_fstype: m->fstype;
|
||||
fstype = dissected_partition_fstype(m);
|
||||
|
||||
if (!fstype)
|
||||
return -EAFNOSUPPORT;
|
||||
|
@ -1541,6 +1635,9 @@ static int mount_partition(
|
|||
|
||||
rw = m->rw && !(flags & DISSECT_IMAGE_MOUNT_READ_ONLY);
|
||||
|
||||
discard = ((flags & DISSECT_IMAGE_DISCARD) ||
|
||||
((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node) > 0));
|
||||
|
||||
if (FLAGS_SET(flags, DISSECT_IMAGE_FSCK) && rw) {
|
||||
r = run_fsck(m->mount_node_fd, fstype);
|
||||
if (r < 0)
|
||||
|
@ -1571,14 +1668,9 @@ static int mount_partition(
|
|||
p = where;
|
||||
}
|
||||
|
||||
/* If requested, turn on discard support. */
|
||||
if (fstype_can_discard(fstype) &&
|
||||
((flags & DISSECT_IMAGE_DISCARD) ||
|
||||
((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node) > 0))) {
|
||||
options = strdup("discard");
|
||||
if (!options)
|
||||
return -ENOMEM;
|
||||
}
|
||||
r = partition_pick_mount_options(d, dissected_partition_fstype(m), rw, discard, &options, &ms_flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (uid_is_valid(uid_shift) && uid_shift != 0) {
|
||||
|
||||
|
@ -1598,28 +1690,7 @@ static int mount_partition(
|
|||
if (!strextend_with_separator(&options, ",", m->mount_options))
|
||||
return -ENOMEM;
|
||||
|
||||
/* So, when you request MS_RDONLY from ext4, then this means nothing. It happily still writes to the
|
||||
* backing storage. What's worse, the BLKRO[GS]ET flag and (in case of loopback devices)
|
||||
* LO_FLAGS_READ_ONLY don't mean anything, they affect userspace accesses only, and write accesses
|
||||
* from the upper file system still get propagated through to the underlying file system,
|
||||
* unrestricted. To actually get ext4/xfs/btrfs to stop writing to the device we need to specify
|
||||
* "norecovery" as mount option, in addition to MS_RDONLY. Yes, this sucks, since it means we need to
|
||||
* carry a per file system table here.
|
||||
*
|
||||
* Note that this means that we might not be able to mount corrupted file systems as read-only
|
||||
* anymore (since in some cases the kernel implementations will refuse mounting when corrupted,
|
||||
* read-only and "norecovery" is specified). But I think for the case of automatically determined
|
||||
* mount options for loopback devices this is the right choice, since otherwise using the same
|
||||
* loopback file twice even in read-only mode, is going to fail badly sooner or later. The usecase of
|
||||
* making reuse of the immutable images "just work" is more relevant to us than having read-only
|
||||
* access that actually modifies stuff work on such image files. Or to say this differently: if
|
||||
* people want their file systems to be fixed up they should just open them in writable mode, where
|
||||
* all these problems don't exist. */
|
||||
if (!rw && STRPTR_IN_SET(fstype, "ext3", "ext4", "xfs", "btrfs"))
|
||||
if (!strextend_with_separator(&options, ",", "norecovery"))
|
||||
return -ENOMEM;
|
||||
|
||||
r = mount_nofollow_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
|
||||
r = mount_nofollow_verbose(LOG_DEBUG, node, p, fstype, ms_flags, options);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
|
@ -1692,14 +1763,14 @@ int dissected_image_mount(
|
|||
|
||||
/* First mount the root fs. If there's none we use a tmpfs. */
|
||||
if (m->partitions[PARTITION_ROOT].found)
|
||||
r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, uid_range, flags);
|
||||
r = mount_partition(PARTITION_ROOT, m->partitions + PARTITION_ROOT, where, NULL, uid_shift, uid_range, flags);
|
||||
else
|
||||
r = mount_root_tmpfs(where, uid_shift, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* For us mounting root always means mounting /usr as well */
|
||||
r = mount_partition(m->partitions + PARTITION_USR, where, "/usr", uid_shift, uid_range, flags);
|
||||
r = mount_partition(PARTITION_USR, m->partitions + PARTITION_USR, where, "/usr", uid_shift, uid_range, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
|
@ -1731,23 +1802,23 @@ int dissected_image_mount(
|
|||
if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY)
|
||||
return 0;
|
||||
|
||||
r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, uid_range, flags);
|
||||
r = mount_partition(PARTITION_HOME, m->partitions + PARTITION_HOME, where, "/home", uid_shift, uid_range, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, uid_range, flags);
|
||||
r = mount_partition(PARTITION_SRV, m->partitions + PARTITION_SRV, where, "/srv", uid_shift, uid_range, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = mount_partition(m->partitions + PARTITION_VAR, where, "/var", uid_shift, uid_range, flags);
|
||||
r = mount_partition(PARTITION_VAR, m->partitions + PARTITION_VAR, where, "/var", uid_shift, uid_range, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = mount_partition(m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, uid_range, flags);
|
||||
r = mount_partition(PARTITION_TMP, m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, uid_range, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
xbootldr_mounted = mount_partition(m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, uid_range, flags);
|
||||
xbootldr_mounted = mount_partition(PARTITION_XBOOTLDR, m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, uid_range, flags);
|
||||
if (xbootldr_mounted < 0)
|
||||
return xbootldr_mounted;
|
||||
|
||||
|
@ -1773,7 +1844,7 @@ int dissected_image_mount(
|
|||
return r;
|
||||
} else if (dir_is_empty(p, /* ignore_hidden_or_backup= */ false) > 0) {
|
||||
/* It exists and is an empty directory. Let's mount the ESP there. */
|
||||
r = mount_partition(m->partitions + PARTITION_ESP, where, "/boot", uid_shift, uid_range, flags);
|
||||
r = mount_partition(PARTITION_ESP, m->partitions + PARTITION_ESP, where, "/boot", uid_shift, uid_range, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
|
@ -1785,7 +1856,7 @@ int dissected_image_mount(
|
|||
if (!esp_done) {
|
||||
/* OK, let's mount the ESP now to /efi (possibly creating the dir if missing) */
|
||||
|
||||
r = mount_partition(m->partitions + PARTITION_ESP, where, "/efi", uid_shift, uid_range, flags);
|
||||
r = mount_partition(PARTITION_ESP, m->partitions + PARTITION_ESP, where, "/efi", uid_shift, uid_range, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
|
|
@ -193,3 +193,11 @@ int dissect_fstype_ok(const char *fstype);
|
|||
|
||||
int probe_sector_size(int fd, uint32_t *ret);
|
||||
int probe_sector_size_prefer_ioctl(int fd, uint32_t *ret);
|
||||
|
||||
int partition_pick_mount_options(PartitionDesignator d, const char *fstype, bool rw, bool discard, char **ret_options, unsigned long *ret_ms_flags);
|
||||
|
||||
static inline const char *dissected_partition_fstype(const DissectedPartition *m) {
|
||||
assert(m);
|
||||
|
||||
return m->decrypted_node ? m->decrypted_fstype : m->fstype;
|
||||
}
|
||||
|
|
|
@ -321,6 +321,51 @@ TEST(fd_is_mount_point) {
|
|||
assert_se(fd_is_mount_point(fd, "", 0) == -EINVAL);
|
||||
}
|
||||
|
||||
TEST(ms_nosymfollow_supported) {
|
||||
log_info("MS_NOSYMFOLLOW supported: %s", yes_no(ms_nosymfollow_supported()));
|
||||
}
|
||||
|
||||
TEST(mount_option_supported) {
|
||||
int r;
|
||||
|
||||
r = mount_option_supported("tmpfs", "size", "64M");
|
||||
log_info("tmpfs supports size=64M: %s (%i)", r < 0 ? "dont know" : yes_no(r), r);
|
||||
assert_se(r > 0 || (r < 0 && ERRNO_IS_PRIVILEGE(r)));
|
||||
|
||||
r = mount_option_supported("ext4", "discard", NULL);
|
||||
log_info("ext4 supports discard: %s (%i)", r < 0 ? "dont know" : yes_no(r), r);
|
||||
assert_se(r > 0 || r == -EAGAIN || (r < 0 && ERRNO_IS_PRIVILEGE(r)));
|
||||
|
||||
r = mount_option_supported("tmpfs", "idontexist", "64M");
|
||||
log_info("tmpfs supports idontexist: %s (%i)", r < 0 ? "dont know" : yes_no(r), r);
|
||||
assert_se(r == 0 || (r < 0 && ERRNO_IS_PRIVILEGE(r)));
|
||||
|
||||
r = mount_option_supported("tmpfs", "ialsodontexist", NULL);
|
||||
log_info("tmpfs supports ialsodontexist: %s (%i)", r < 0 ? "dont know" : yes_no(r), r);
|
||||
assert_se(r == 0 || (r < 0 && ERRNO_IS_PRIVILEGE(r)));
|
||||
|
||||
r = mount_option_supported("proc", "hidepid", "1");
|
||||
log_info("proc supports hidepid=1: %s (%i)", r < 0 ? "dont know" : yes_no(r), r);
|
||||
assert_se(r >= 0 || (r < 0 && ERRNO_IS_PRIVILEGE(r)));
|
||||
}
|
||||
|
||||
TEST(fstype_can_discard) {
|
||||
assert_se(fstype_can_discard("ext4"));
|
||||
assert_se(!fstype_can_discard("squashfs"));
|
||||
assert_se(!fstype_can_discard("iso9660"));
|
||||
}
|
||||
|
||||
TEST(fstype_can_norecovery) {
|
||||
assert_se(fstype_can_norecovery("ext4"));
|
||||
assert_se(!fstype_can_norecovery("vfat"));
|
||||
assert_se(!fstype_can_norecovery("tmpfs"));
|
||||
}
|
||||
|
||||
TEST(fstype_can_umask) {
|
||||
assert_se(fstype_can_umask("vfat"));
|
||||
assert_se(!fstype_can_umask("tmpfs"));
|
||||
}
|
||||
|
||||
static int intro(void) {
|
||||
/* let's move into our own mount namespace with all propagation from the host turned off, so
|
||||
* that /proc/self/mountinfo is static and constant for the whole time our test runs. */
|
||||
|
|
Loading…
Reference in a new issue