From f2c1d491a539035d6cc1fa53a7cef0cbc8d52902 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 28 Apr 2023 18:35:31 +0200 Subject: [PATCH] switch-root: don't require /mnt/ when switching root into host OS So far, we invoked pivot_root() specifying /mnt/ as second argument, which then unmounted right-after. We'd create /mnt/ if needed. This sucks, because it means /mnt/ must strictly be pre-created on immutable images. Remove this limitation, by using pivot_root() with "." as source and target, which will result in two stacked mounts afterwards: the new one underneath, the old one ontop. We can then simply unmount the top one, and have what we want without needing any extra /mnt/ dir. Since we don't need /mnt/ anymore we can get rid of the extra unmount_old_root parameter and simply specify it as NULL if we don't want the old mount to stick around. --- src/core/main.c | 2 +- src/shared/switch-root.c | 83 ++++++++++++++++++++++------------------ src/shared/switch-root.h | 2 +- src/shutdown/shutdown.c | 2 +- 4 files changed, 49 insertions(+), 40 deletions(-) diff --git a/src/core/main.c b/src/core/main.c index 932ea64e45..af88fe34bb 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -1797,7 +1797,7 @@ static int do_reexecute( broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec); /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */ - r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE); + r = switch_root(switch_root_dir, /* old_root_after= */ NULL, MS_MOVE); if (r < 0) log_error_errno(r, "Failed to switch root, trying to continue: %m"); } diff --git a/src/shared/switch-root.c b/src/shared/switch-root.c index 8dc93c2822..33e918b7d0 100644 --- a/src/shared/switch-root.c +++ b/src/shared/switch-root.c @@ -26,41 +26,45 @@ #include "user-util.h" int switch_root(const char *new_root, - const char *old_root_after, /* path below the new root, where to place the old root after the transition */ - bool unmount_old_root, - unsigned long mount_flags) { /* MS_MOVE or MS_BIND */ + const char *old_root_after, /* path below the new root, where to place the old root after the transition; may be NULL to unmount it */ + unsigned long mount_flags) { /* MS_MOVE or MS_BIND used for /proc/, /dev/, /run/, /sys/ */ + _cleanup_close_ int old_root_fd = -EBADF, new_root_fd = -EBADF; _cleanup_free_ char *resolved_old_root_after = NULL; - _cleanup_close_ int old_root_fd = -EBADF; - int r; + int r, istmp; assert(new_root); - assert(old_root_after); + assert(IN_SET(mount_flags, MS_MOVE, MS_BIND)); if (path_equal(new_root, "/")) return 0; /* Check if we shall remove the contents of the old root */ - old_root_fd = open("/", O_RDONLY | O_CLOEXEC | O_DIRECTORY); + old_root_fd = open("/", O_DIRECTORY|O_CLOEXEC); if (old_root_fd < 0) return log_error_errno(errno, "Failed to open root directory: %m"); - r = fd_is_temporary_fs(old_root_fd); - if (r < 0) - return log_error_errno(r, "Failed to stat root directory: %m"); - if (r > 0) + + istmp = fd_is_temporary_fs(old_root_fd); + if (istmp < 0) + return log_error_errno(istmp, "Failed to stat root directory: %m"); + if (istmp > 0) log_debug("Root directory is on tmpfs, will do cleanup later."); - else - old_root_fd = safe_close(old_root_fd); - /* Determine where we shall place the old root after the transition */ - r = chase(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after, NULL); - if (r < 0) - return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, old_root_after); - if (r == 0) /* Doesn't exist yet. Let's create it */ - (void) mkdir_p_label(resolved_old_root_after, 0755); + new_root_fd = open(new_root, O_DIRECTORY|O_CLOEXEC); + if (new_root_fd < 0) + return log_error_errno(errno, "Failed to open target directory '%s': %m", new_root); - /* Work-around for kernel design: the kernel refuses MS_MOVE if any file systems are mounted MS_SHARED. Hence - * remount them MS_PRIVATE here as a work-around. + if (old_root_after) { + /* Determine where we shall place the old root after the transition */ + r = chase(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after, NULL); + if (r < 0) + return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, old_root_after); + if (r == 0) /* Doesn't exist yet. Let's create it */ + (void) mkdir_p_label(resolved_old_root_after, 0755); + } + + /* Work-around for kernel design: the kernel refuses MS_MOVE if any file systems are mounted + * MS_SHARED. Hence remount them MS_PRIVATE here as a work-around. * * https://bugzilla.redhat.com/show_bug.cgi?id=847418 */ if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0) @@ -92,35 +96,40 @@ int switch_root(const char *new_root, * and switch_root() nevertheless. */ (void) base_filesystem_create(new_root, UID_INVALID, GID_INVALID); - if (chdir(new_root) < 0) + if (fchdir(new_root_fd) < 0) return log_error_errno(errno, "Failed to change directory to %s: %m", new_root); /* We first try a pivot_root() so that we can umount the old root dir. In many cases (i.e. where rootfs is /), * that's not possible however, and hence we simply overmount root */ - if (pivot_root(new_root, resolved_old_root_after) >= 0) { - - /* Immediately get rid of the old root, if detach_oldroot is set. - * Since we are running off it we need to do this lazily. */ - if (unmount_old_root) { - r = umount_recursive(old_root_after, MNT_DETACH); - if (r < 0) - log_warning_errno(r, "Failed to unmount old root directory tree, ignoring: %m"); + if (resolved_old_root_after) + r = RET_NERRNO(pivot_root(".", resolved_old_root_after)); + else { + r = RET_NERRNO(pivot_root(".", ".")); + if (r >= 0) { + /* Now unmount the upper of the two stacked file systems */ + if (umount2(".", MNT_DETACH) < 0) + return log_error_errno(errno, "Failed to unmount the old root: %m"); } + } + if (r < 0) { + log_debug_errno(r, "Pivoting root file system failed, moving mounts instead: %m"); - } else if (mount(new_root, "/", NULL, MS_MOVE, NULL) < 0) - return log_error_errno(errno, "Failed to move %s to /: %m", new_root); + if (mount(".", "/", NULL, MS_MOVE, NULL) < 0) + return log_error_errno(errno, "Failed to move %s to /: %m", new_root); - if (chroot(".") < 0) - return log_error_errno(errno, "Failed to change root: %m"); + if (chroot(".") < 0) + return log_error_errno(errno, "Failed to change root: %m"); - if (chdir("/") < 0) - return log_error_errno(errno, "Failed to change directory: %m"); + if (chdir(".") < 0) + return log_error_errno(errno, "Failed to change directory: %m"); + } - if (old_root_fd >= 0) { + if (istmp) { struct stat rb; if (fstat(old_root_fd, &rb) < 0) return log_error_errno(errno, "Failed to stat old root directory: %m"); + (void) rm_rf_children(TAKE_FD(old_root_fd), 0, &rb); /* takes possession of the dir fd, even on failure */ } diff --git a/src/shared/switch-root.h b/src/shared/switch-root.h index 4e04283e53..e3fabae3d9 100644 --- a/src/shared/switch-root.h +++ b/src/shared/switch-root.h @@ -3,4 +3,4 @@ #include -int switch_root(const char *new_root, const char *oldroot, bool detach_oldroot, unsigned long mountflags); +int switch_root(const char *new_root, const char *old_root_after, unsigned long mount_flags); diff --git a/src/shutdown/shutdown.c b/src/shutdown/shutdown.c index 802be44a79..cf0351cf78 100644 --- a/src/shutdown/shutdown.c +++ b/src/shutdown/shutdown.c @@ -169,7 +169,7 @@ static int switch_root_initramfs(void) { * /run/initramfs/shutdown will take care of these. * Also do not detach the old root, because /run/initramfs/shutdown needs to access it. */ - return switch_root("/run/initramfs", "/oldroot", false, MS_BIND); + return switch_root("/run/initramfs", "/oldroot", MS_BIND); } /* Read the following fields from /proc/meminfo: