switch-root: always use MS_BIND to move api vfs over

We previously would use MS_MOVE to move the old procfs, sysfs, /dev/ and
/run to the new place in some places, and MS_BIND in others.

The logic when to use MS_MOVE and when to use MS_BIND was pretty
arbitrary so far: we'd use MS_MOVE during the initrd → host transition
and MS_BIND when transitioning from host into the exitrd during
shutdown.

Traditionally, using MS_MOVE was preferable, because we didn't bother
with unmounting the old mount hierarchy before the switch root, and thus
using MS_MOVE did some clean-up as side-effect (because the old mounts
went away this way). But since we nowadays properly umount all remaining
mount points (since 268d1244e8) when
transitioning it's pointless.

Let's just use MS_BIND always. Let's tweak it though: let's use
MS_BIND|MS_REC for the kernel API VFS, and MS_BIND without MS_REC for
/run/. The latter reflects the fact that the submounts /run/ has usually
are not so much about just accessing kernel APIs but about auxiliary
user resources. Hence let's only move the main mount over for that.

While we are at it, also set up the base filesystem *before* we move the
mounts from the old to the new root, since the base filesystem setup
logic creates various needed inodes for us, which we really should make
use of instead of creating on our own.
This commit is contained in:
Lennart Poettering 2023-05-16 14:57:31 +02:00
parent 34f21ff610
commit 7c764d4599
4 changed files with 40 additions and 31 deletions

View file

@ -1838,10 +1838,8 @@ static int do_reexecute(
} }
if (switch_root_dir) { if (switch_root_dir) {
/* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
r = switch_root(/* new_root= */ switch_root_dir, r = switch_root(/* new_root= */ switch_root_dir,
/* old_root_after= */ NULL, /* old_root_after= */ NULL,
MS_MOVE,
/* destroy_old_root= */ objective == MANAGER_SWITCH_ROOT); /* destroy_old_root= */ objective == MANAGER_SWITCH_ROOT);
if (r < 0) if (r < 0)
log_error_errno(r, "Failed to switch root, trying to continue: %m"); log_error_errno(r, "Failed to switch root, trying to continue: %m");

View file

@ -10,6 +10,7 @@
#include "base-filesystem.h" #include "base-filesystem.h"
#include "chase.h" #include "chase.h"
#include "creds-util.h"
#include "fd-util.h" #include "fd-util.h"
#include "initrd-util.h" #include "initrd-util.h"
#include "log.h" #include "log.h"
@ -27,15 +28,26 @@
int switch_root(const char *new_root, int switch_root(const char *new_root,
const char *old_root_after, /* path below the new root, where to place the old root after the transition; may be NULL to unmount it */ const char *old_root_after, /* path below the new root, where to place the old root after the transition; may be NULL to unmount it */
unsigned long mount_flags, /* MS_MOVE or MS_BIND used for /proc/, /dev/, /run/, /sys/ */
bool destroy_old_root) { bool destroy_old_root) {
struct {
const char *path;
unsigned long mount_flags;
} transfer_table[] = {
{ "/dev", MS_BIND|MS_REC }, /* Recursive, because we want to save the original /dev/shm + /dev/pts and similar */
{ "/sys", MS_BIND|MS_REC }, /* Similar, we want to retain various API VFS, or the cgroupv1 /sys/fs/cgroup/ tree */
{ "/proc", MS_BIND|MS_REC }, /* Similar */
{ "/run", MS_BIND }, /* Stuff mounted below this we don't save, as it might have lost its relevance, i.e. credentials, removable media and such, we rather want that the new boot mounts this fresh */
{ SYSTEM_CREDENTIALS_DIRECTORY, MS_BIND }, /* Credentials passed into the system should survive */
{ ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY, MS_BIND }, /* Similar */
{ "/run/host", MS_BIND|MS_REC }, /* Host supplied hierarchy should also survive */
};
_cleanup_close_ int old_root_fd = -EBADF, new_root_fd = -EBADF; _cleanup_close_ int old_root_fd = -EBADF, new_root_fd = -EBADF;
_cleanup_free_ char *resolved_old_root_after = NULL; _cleanup_free_ char *resolved_old_root_after = NULL;
int r, istmp; int r, istmp;
assert(new_root); assert(new_root);
assert(IN_SET(mount_flags, MS_MOVE, MS_BIND));
/* Check if we shall remove the contents of the old root */ /* Check if we shall remove the contents of the old root */
old_root_fd = open("/", O_DIRECTORY|O_CLOEXEC); old_root_fd = open("/", O_DIRECTORY|O_CLOEXEC);
@ -83,32 +95,35 @@ int switch_root(const char *new_root,
if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0) if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0)
return log_error_errno(errno, "Failed to set \"/\" mount propagation to private: %m"); return log_error_errno(errno, "Failed to set \"/\" mount propagation to private: %m");
FOREACH_STRING(path, "/sys", "/dev", "/run", "/proc") {
_cleanup_free_ char *chased = NULL;
r = chase(path, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &chased, NULL);
if (r < 0)
return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, path);
if (r > 0) {
/* Already exists. Let's see if it is a mount point already. */
r = path_is_mount_point(chased, NULL, 0);
if (r < 0)
return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", chased);
if (r > 0) /* If it is already mounted, then do nothing */
continue;
} else
/* Doesn't exist yet? */
(void) mkdir_p_label(chased, 0755);
if (mount(path, chased, NULL, mount_flags, NULL) < 0)
return log_error_errno(errno, "Failed to mount %s to %s: %m", path, chased);
}
/* Do not fail if base_filesystem_create() fails. Not all switch roots are like base_filesystem_create() wants /* Do not fail if base_filesystem_create() fails. Not all switch roots are like base_filesystem_create() wants
* them to look like. They might even boot, if they are RO and don't have the FS layout. Just ignore the error * them to look like. They might even boot, if they are RO and don't have the FS layout. Just ignore the error
* and switch_root() nevertheless. */ * and switch_root() nevertheless. */
(void) base_filesystem_create_fd(new_root_fd, new_root, UID_INVALID, GID_INVALID); (void) base_filesystem_create_fd(new_root_fd, new_root, UID_INVALID, GID_INVALID);
FOREACH_ARRAY(transfer, transfer_table, ELEMENTSOF(transfer_table)) {
_cleanup_free_ char *chased = NULL;
if (access(transfer->path, F_OK) < 0) {
log_debug_errno(errno, "Path '%s' to move to target root directory, not found, ignoring: %m", transfer->path);
continue;
}
r = chase(transfer->path, new_root, CHASE_PREFIX_ROOT, &chased, NULL);
if (r < 0)
return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, transfer->path);
/* Let's see if it is a mount point already. */
r = path_is_mount_point(chased, NULL, 0);
if (r < 0)
return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", chased);
if (r > 0) /* If it is already mounted, then do nothing */
continue;
r = mount_nofollow_verbose(LOG_ERR, transfer->path, chased, NULL, transfer->mount_flags, NULL);
if (r < 0)
return r;
}
if (fchdir(new_root_fd) < 0) if (fchdir(new_root_fd) < 0)
return log_error_errno(errno, "Failed to change directory to %s: %m", new_root); return log_error_errno(errno, "Failed to change directory to %s: %m", new_root);

View file

@ -3,4 +3,4 @@
#include <stdbool.h> #include <stdbool.h>
int switch_root(const char *new_root, const char *old_root_after, unsigned long mount_flags, bool destroy_old_root); int switch_root(const char *new_root, const char *old_root_after, bool destroy_old_root);

View file

@ -165,14 +165,10 @@ static int switch_root_initramfs(void) {
if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0) if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0)
return log_error_errno(errno, "Failed to make /run/initramfs private mount: %m"); return log_error_errno(errno, "Failed to make /run/initramfs private mount: %m");
/* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors. /* Do not detach the old root, because /run/initramfs/shutdown needs to access it. */
* /run/initramfs/shutdown will take care of these.
* Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
*/
return switch_root( return switch_root(
/* new_root= */ "/run/initramfs", /* new_root= */ "/run/initramfs",
/* old_root_after= */ "/oldroot", /* old_root_after= */ "/oldroot",
MS_BIND,
/* destroy_old_root= */ false); /* destroy_old_root= */ false);
} }