execute: split out mounting of credentials fs

Let's add two new helpers: mount_credentials_fs() and
credentials_fs_mount_flags(). The former mounts a file system suitable
for storing of unencrypted credentials at runtime (i.e. a ramfs or
tmpfs). The latter determines the right mount flags to use for such a
mount.

Both functions mostly just take code from execute.c, but make two
changes:

1. If the kernel supports it we'll use a tmpfs with the new "noswap"
   mount option instead of ramfs. Was added in kernel 6.4, hence is very
   recent, but tmpfs is so much less crappy than ramfs, hence worth it.

2. We'll set MS_NOSYMFOLLOW on the mounts if supported. These file
   systems should only contain regulra files, hence no need to allow
   symlinks.
This commit is contained in:
Lennart Poettering 2023-06-29 12:32:44 +02:00
parent 7ca59e67b1
commit 1155f44f48
3 changed files with 81 additions and 42 deletions

View file

@ -3389,7 +3389,7 @@ static int setup_credentials_internal(
if (r < 0)
return r;
r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL);
r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
if (r < 0)
return r;
@ -3400,57 +3400,34 @@ static int setup_credentials_internal(
if (workspace_mounted < 0) {
/* Nothing is mounted on the workspace yet, let's try to mount something now */
for (int try = 0;; try++) {
if (try == 0) {
/* Try "ramfs" first, since it's not swap backed */
r = mount_nofollow_verbose(LOG_DEBUG, "ramfs", workspace, "ramfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, "mode=0700");
if (r >= 0) {
workspace_mounted = true;
break;
}
r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
if (r < 0) {
/* If that didn't work, try to make a bind mount from the final to the workspace, so that we can make it writable there. */
r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
if (r < 0) {
if (!ERRNO_IS_PRIVILEGE(r)) /* Propagate anything that isn't a permission problem */
return r;
} else if (try == 1) {
_cleanup_free_ char *opts = NULL;
if (must_mount) /* If we it's not OK to use the plain directory
* fallback, propagate all errors too */
return r;
if (asprintf(&opts, "mode=0700,nr_inodes=1024,size=%zu", (size_t) CREDENTIALS_TOTAL_SIZE_MAX) < 0)
return -ENOMEM;
/* Fall back to "tmpfs" otherwise */
r = mount_nofollow_verbose(LOG_DEBUG, "tmpfs", workspace, "tmpfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, opts);
if (r >= 0) {
workspace_mounted = true;
break;
}
/* If we lack privileges to bind mount stuff, then let's gracefully
* proceed for compat with container envs, and just use the final dir
* as is. */
workspace_mounted = false;
} else {
/* If that didn't work, try to make a bind mount from the final to the workspace, so that we can make it writable there. */
r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
if (r < 0) {
if (!ERRNO_IS_PRIVILEGE(r)) /* Propagate anything that isn't a permission problem */
return r;
if (must_mount) /* If we it's not OK to use the plain directory
* fallback, propagate all errors too */
return r;
/* If we lack privileges to bind mount stuff, then let's gracefully
* proceed for compat with container envs, and just use the final dir
* as is. */
workspace_mounted = false;
break;
}
/* Make the new bind mount writable (i.e. drop MS_RDONLY) */
r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL);
r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
if (r < 0)
return r;
workspace_mounted = true;
break;
}
}
} else
workspace_mounted = true;
}
assert(!must_mount || workspace_mounted > 0);
@ -3482,7 +3459,7 @@ static int setup_credentials_internal(
if (install) {
/* Make workspace read-only now, so that any bind mount we make from it defaults to read-only too */
r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL);
r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ true), NULL);
if (r < 0)
return r;

View file

@ -1469,3 +1469,62 @@ int trigger_automount_at(int dir_fd, const char *path) {
return 0;
}
unsigned long credentials_fs_mount_flags(bool ro) {
/* A tight set of mount flags for credentials mounts */
return MS_NODEV|MS_NOEXEC|MS_NOSUID|ms_nosymfollow_supported()|(ro ? MS_RDONLY : 0);
}
int mount_credentials_fs(const char *path, size_t size, bool ro) {
_cleanup_free_ char *opts = NULL;
int r, noswap_supported;
/* Mounts a file system we can place credentials in, i.e. with tight access modes right from the
* beginning, and ideally swapping turned off. In order of preference:
*
* 1. tmpfs if it supports "noswap"
* 2. ramfs
* 3. tmpfs if it doesn't support "noswap"
*/
noswap_supported = mount_option_supported("tmpfs", "noswap", NULL); /* Check explicitly to avoid kmsg noise */
if (noswap_supported > 0) {
_cleanup_free_ char *noswap_opts = NULL;
if (asprintf(&noswap_opts, "mode=0700,nr_inodes=1024,size=%zu,noswap", size) < 0)
return -ENOMEM;
/* Best case: tmpfs with noswap (needs kernel >= 6.3) */
r = mount_nofollow_verbose(
LOG_DEBUG,
"tmpfs",
path,
"tmpfs",
credentials_fs_mount_flags(ro),
noswap_opts);
if (r >= 0)
return r;
}
r = mount_nofollow_verbose(
LOG_DEBUG,
"ramfs",
path,
"ramfs",
credentials_fs_mount_flags(ro),
"mode=0700");
if (r >= 0)
return r;
if (asprintf(&opts, "mode=0700,nr_inodes=1024,size=%zu", size) < 0)
return -ENOMEM;
return mount_nofollow_verbose(
LOG_DEBUG,
"tmpfs",
path,
"tmpfs",
credentials_fs_mount_flags(ro),
opts);
}

View file

@ -141,3 +141,6 @@ int make_mount_point_inode_from_stat(const struct stat *st, const char *dest, mo
int make_mount_point_inode_from_path(const char *source, const char *dest, mode_t mode);
int trigger_automount_at(int dir_fd, const char *path);
unsigned long credentials_fs_mount_flags(bool ro);
int mount_credentials_fs(const char *path, size_t size, bool ro);