1
0
mirror of https://github.com/systemd/systemd synced 2024-07-01 07:34:28 +00:00

core: do not imply PrivateTmp with DynamicUser, create a private tmpfs instead

DynamicUser= enables PrivateTmp= implicitly to avoid files owned by reusable uids
leaking into the host. Change it to instead create a fully private tmpfs instance
instead, which also ensures the same result, since it has less impactful semantics
with respect to PrivateTmp=yes, which links the mount namespace to the host's /tmp
instead. If a user specifies PrivateTmp manually, let the existing behaviour
unchanged to ensure backward compatibility is not broken.
This commit is contained in:
Luca Boccassi 2024-05-08 20:12:57 +01:00
parent a2979bb842
commit 0e551b04ef
15 changed files with 244 additions and 60 deletions

View File

@ -671,12 +671,13 @@
part of a unit for which dynamic users/groups are enabled do not leave files or directories owned by
these users/groups around, as a different unit might get the same UID/GID assigned later on, and thus
gain access to these files or directories. If <varname>DynamicUser=</varname> is enabled,
<varname>RemoveIPC=</varname> and <varname>PrivateTmp=</varname> are implied (and cannot be turned
off). This ensures that the lifetime of IPC objects and temporary files created by the executed
processes is bound to the runtime of the service, and hence the lifetime of the dynamic
user/group. Since <filename>/tmp/</filename> and <filename>/var/tmp/</filename> are usually the only
world-writable directories on a system this ensures that a unit making use of dynamic user/group
allocation cannot leave files around after unit termination. Furthermore
<varname>RemoveIPC=</varname> is implied (and cannot be turned off). This ensures that the lifetime
of IPC objects and temporary files created by the executed processes is bound to the runtime of the
service, and hence the lifetime of the dynamic user/group. Since <filename>/tmp/</filename> and
<filename>/var/tmp/</filename> are usually the only world-writable directories on a system, unless
<varname>PrivateTmp=</varname> is manually enabled, those directories will be placed on a private
tmpfs filesystem, as this ensures that a unit making use of dynamic user/group allocation cannot
leave files around after unit termination. Furthermore
<varname>NoNewPrivileges=</varname> and <varname>RestrictSUIDSGID=</varname> are implicitly enabled
(and cannot be disabled), to ensure that processes invoked cannot take benefit or create SUID/SGID
files or directories. Moreover <varname>ProtectSystem=strict</varname> and

View File

@ -1055,7 +1055,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("NoExecPaths", "as", NULL, offsetof(ExecContext, no_exec_paths), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ExecSearchPath", "as", NULL, offsetof(ExecContext, exec_search_path), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_propagation_flag), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_private_tmp, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectClock", "b", bus_property_get_bool, offsetof(ExecContext, protect_clock), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
@ -1736,7 +1736,7 @@ int bus_exec_context_set_transient_property(
return bus_set_transient_unsigned(u, name, &c->tty_cols, message, flags, error);
if (streq(name, "PrivateTmp"))
return bus_set_transient_bool(u, name, &c->private_tmp, message, flags, error);
return bus_set_transient_private_tmp(u, name, &c->private_tmp, message, flags, error);
if (streq(name, "PrivateDevices"))
return bus_set_transient_bool(u, name, &c->private_devices, message, flags, error);

View File

@ -150,6 +150,45 @@ int bus_set_transient_usec_internal(
return 1;
}
int bus_set_transient_private_tmp(
Unit *u,
const char *name,
PrivateTmp *p,
sd_bus_message *message,
UnitWriteFlags flags,
sd_bus_error *error) {
int v, r;
assert(p);
r = sd_bus_message_read(message, "b", &v);
if (r < 0)
return r;
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
*p = v ? PRIVATE_TMP_CONNECTED : PRIVATE_TMP_OFF;
unit_write_settingf(u, flags, name, "%s=%s", name, yes_no(v));
}
return 1;
}
int bus_property_get_private_tmp(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
PrivateTmp *p = ASSERT_PTR(userdata);
int b = *p != PRIVATE_TMP_OFF;
return sd_bus_message_append_basic(reply, 'b', &b);
}
int bus_verify_manage_units_async_full(
Unit *u,
const char *verb,

View File

@ -4,6 +4,7 @@
#include "sd-bus.h"
#include "dissect-image.h"
#include "execute.h"
#include "unit.h"
int bus_property_get_triggered_unit(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
@ -244,6 +245,7 @@ int bus_set_transient_string(Unit *u, const char *name, char **p, sd_bus_message
int bus_set_transient_bool(Unit *u, const char *name, bool *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
int bus_set_transient_tristate(Unit *u, const char *name, int *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
int bus_set_transient_usec_internal(Unit *u, const char *name, usec_t *p, bool fix_0, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
int bus_set_transient_private_tmp(Unit *u, const char *name, PrivateTmp *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
static inline int bus_set_transient_usec(Unit *u, const char *name, usec_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error) {
return bus_set_transient_usec_internal(u, name, p, false, message, flags, error);
}
@ -255,3 +257,4 @@ int bus_verify_manage_units_async_full(Unit *u, const char *verb, const char *po
int bus_read_mount_options(sd_bus_message *message, sd_bus_error *error, MountOptions **ret_options, char **ret_format_str, const char *separator);
int bus_property_get_activation_details(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
int bus_property_get_private_tmp(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);

View File

@ -3054,7 +3054,7 @@ static int apply_mount_namespace(
_cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL,
**read_write_paths_cleanup = NULL;
_cleanup_free_ char *creds_path = NULL, *incoming_dir = NULL, *propagate_dir = NULL,
*extension_dir = NULL, *host_os_release_stage = NULL, *root_image = NULL, *root_dir = NULL;
*private_namespace_dir = NULL, *host_os_release_stage = NULL, *root_image = NULL, *root_dir = NULL;
const char *tmp_dir = NULL, *var_tmp_dir = NULL;
char **read_write_paths;
bool setup_os_release_symlink;
@ -3108,7 +3108,7 @@ static int apply_mount_namespace(
* to world users. Inside of it there's a /tmp that is sticky, and that's the one we want to
* use here. This does not apply when we are using /run/systemd/empty as fallback. */
if (context->private_tmp && runtime && runtime->shared) {
if (context->private_tmp == PRIVATE_TMP_CONNECTED && runtime && runtime->shared) {
if (streq_ptr(runtime->shared->tmp_dir, RUN_SYSTEMD_EMPTY))
tmp_dir = runtime->shared->tmp_dir;
else if (runtime->shared->tmp_dir)
@ -3145,8 +3145,8 @@ static int apply_mount_namespace(
if (!incoming_dir)
return -ENOMEM;
extension_dir = strdup("/run/systemd/unit-extensions");
if (!extension_dir)
private_namespace_dir = strdup("/run/systemd");
if (!private_namespace_dir)
return -ENOMEM;
/* If running under a different root filesystem, propagate the host's os-release. We make a
@ -3159,7 +3159,7 @@ static int apply_mount_namespace(
} else {
assert(params->runtime_scope == RUNTIME_SCOPE_USER);
if (asprintf(&extension_dir, "/run/user/" UID_FMT "/systemd/unit-extensions", geteuid()) < 0)
if (asprintf(&private_namespace_dir, "/run/user/" UID_FMT "/systemd", geteuid()) < 0)
return -ENOMEM;
if (setup_os_release_symlink) {
@ -3205,6 +3205,8 @@ static int apply_mount_namespace(
.temporary_filesystems = context->temporary_filesystems,
.n_temporary_filesystems = context->n_temporary_filesystems,
.private_tmp = context->private_tmp,
.mount_images = context->mount_images,
.n_mount_images = context->n_mount_images,
.mount_image_policy = context->mount_image_policy ?: &image_policy_service,
@ -3225,7 +3227,7 @@ static int apply_mount_namespace(
.propagate_dir = propagate_dir,
.incoming_dir = incoming_dir,
.extension_dir = extension_dir,
.private_namespace_dir = private_namespace_dir,
.notify_socket = root_dir || root_image ? params->notify_socket : NULL,
.host_os_release_stage = host_os_release_stage,
@ -3857,7 +3859,7 @@ static bool exec_context_need_unprivileged_private_users(
return false;
return context->private_users ||
context->private_tmp ||
context->private_tmp != PRIVATE_TMP_OFF ||
context->private_devices ||
context->private_network ||
context->network_namespace_path ||

View File

@ -1840,7 +1840,7 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
if (r < 0)
return r;
r = serialize_bool_elide(f, "exec-context-private-tmp", c->private_tmp);
r = serialize_item(f, "exec-context-private-tmp", private_tmp_to_string(c->private_tmp));
if (r < 0)
return r;
@ -2720,10 +2720,9 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
if (r < 0)
return r;
} else if ((val = startswith(l, "exec-context-private-tmp="))) {
r = parse_boolean(val);
if (r < 0)
return r;
c->private_tmp = r;
c->private_tmp = private_tmp_from_string(val);
if (c->private_tmp < 0)
return c->private_tmp;
} else if ((val = startswith(l, "exec-context-private-devices="))) {
r = parse_boolean(val);
if (r < 0)

View File

@ -231,7 +231,10 @@ bool exec_needs_mount_namespace(
if (!IN_SET(context->mount_propagation_flag, 0, MS_SHARED))
return true;
if (context->private_tmp && runtime && runtime->shared && (runtime->shared->tmp_dir || runtime->shared->var_tmp_dir))
if (context->private_tmp == PRIVATE_TMP_DISCONNECTED)
return true;
if (context->private_tmp == PRIVATE_TMP_CONNECTED && runtime && runtime->shared && (runtime->shared->tmp_dir || runtime->shared->var_tmp_dir))
return true;
if (context->private_devices ||
@ -964,7 +967,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
prefix, empty_to_root(c->root_directory),
prefix, yes_no(c->root_ephemeral),
prefix, yes_no(c->non_blocking),
prefix, yes_no(c->private_tmp),
prefix, private_tmp_to_string(c->private_tmp),
prefix, yes_no(c->private_devices),
prefix, yes_no(c->protect_kernel_tunables),
prefix, yes_no(c->protect_kernel_modules),
@ -2155,12 +2158,12 @@ static int exec_shared_runtime_make(
assert(id);
/* It is not necessary to create ExecSharedRuntime object. */
if (!exec_needs_network_namespace(c) && !exec_needs_ipc_namespace(c) && !c->private_tmp) {
if (!exec_needs_network_namespace(c) && !exec_needs_ipc_namespace(c) && c->private_tmp != PRIVATE_TMP_CONNECTED) {
*ret = NULL;
return 0;
}
if (c->private_tmp &&
if (c->private_tmp == PRIVATE_TMP_CONNECTED &&
!(prefixed_path_strv_contains(c->inaccessible_paths, "/tmp") &&
(prefixed_path_strv_contains(c->inaccessible_paths, "/var/tmp") ||
prefixed_path_strv_contains(c->inaccessible_paths, "/var")))) {

View File

@ -314,7 +314,7 @@ struct ExecContext {
int private_mounts;
int mount_apivfs;
int memory_ksm;
bool private_tmp;
PrivateTmp private_tmp;
bool private_network;
bool private_devices;
bool private_users;

View File

@ -119,7 +119,7 @@
{{type}}.BindPaths, config_parse_bind_paths, 0, offsetof({{type}}, exec_context)
{{type}}.BindReadOnlyPaths, config_parse_bind_paths, 0, offsetof({{type}}, exec_context)
{{type}}.TemporaryFileSystem, config_parse_temporary_filesystems, 0, offsetof({{type}}, exec_context)
{{type}}.PrivateTmp, config_parse_bool, 0, offsetof({{type}}, exec_context.private_tmp)
{{type}}.PrivateTmp, config_parse_private_tmp, 0, offsetof({{type}}, exec_context)
{{type}}.PrivateDevices, config_parse_bool, 0, offsetof({{type}}, exec_context.private_devices)
{{type}}.ProtectKernelTunables, config_parse_bool, 0, offsetof({{type}}, exec_context.protect_kernel_tunables)
{{type}}.ProtectKernelModules, config_parse_bool, 0, offsetof({{type}}, exec_context.protect_kernel_modules)

View File

@ -5199,6 +5199,34 @@ int config_parse_temporary_filesystems(
}
}
int config_parse_private_tmp(
const char* unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
ExecContext *c = ASSERT_PTR(data);
int r;
assert(filename);
assert(rvalue);
r = parse_boolean(rvalue);
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse boolean value: %s ignoring", rvalue);
return 0;
}
c->private_tmp = r ? PRIVATE_TMP_CONNECTED : PRIVATE_TMP_OFF;
return 0;
}
int config_parse_bind_paths(
const char *unit,
const char *filename,

View File

@ -113,6 +113,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_import_credential);
CONFIG_PARSER_PROTOTYPE(config_parse_set_status);
CONFIG_PARSER_PROTOTYPE(config_parse_namespace_path_strv);
CONFIG_PARSER_PROTOTYPE(config_parse_temporary_filesystems);
CONFIG_PARSER_PROTOTYPE(config_parse_private_tmp);
CONFIG_PARSER_PROTOTYPE(config_parse_cpu_quota);
CONFIG_PARSER_PROTOTYPE(config_parse_allowed_cpuset);
CONFIG_PARSER_PROTOTYPE(config_parse_protect_home);

View File

@ -72,6 +72,7 @@ typedef enum MountMode {
MOUNT_EXEC,
MOUNT_TMPFS,
MOUNT_RUN,
MOUNT_PRIVATE_TMPFS, /* Mounted outside the root directory, and used by subsequent mounts */
MOUNT_EXTENSION_DIRECTORY, /* Bind-mounted outside the root directory, and used by subsequent mounts */
MOUNT_EXTENSION_IMAGE, /* Mounted outside the root directory, and used by subsequent mounts */
MOUNT_MQUEUEFS,
@ -97,6 +98,8 @@ typedef struct MountEntry {
bool nosuid:1; /* Shall set MS_NOSUID on the mount itself */
bool noexec:1; /* Shall set MS_NOEXEC on the mount itself */
bool exec:1; /* Shall clear MS_NOEXEC on the mount itself */
bool create_source_dir:1; /* Create the source directory if it doesn't exist - for implicit bind mounts */
mode_t source_dir_mode; /* Mode for the source directory, if it is to be created */
MountEntryState state; /* Whether it was already processed or skipped */
char *path_malloc; /* Use this instead of 'path_const' if we had to allocate memory */
const char *unprefixed_path_const; /* If the path was amended with a prefix, these will save the original */
@ -246,6 +249,7 @@ static const char * const mount_mode_table[_MOUNT_MODE_MAX] = {
[MOUNT_EXEC] = "exec",
[MOUNT_TMPFS] = "tmpfs",
[MOUNT_RUN] = "run",
[MOUNT_PRIVATE_TMPFS] = "private-tmpfs",
[MOUNT_EXTENSION_DIRECTORY] = "extension-directory",
[MOUNT_EXTENSION_IMAGE] = "extension-image",
[MOUNT_MQUEUEFS] = "mqueuefs",
@ -467,7 +471,7 @@ static int append_mount_images(MountList *ml, const MountImage *mount_images, si
static int append_extensions(
MountList *ml,
const char *root,
const char *extension_dir,
const char *private_namespace_dir,
char **hierarchies,
const MountImage *mount_images,
size_t n_mount_images,
@ -482,7 +486,7 @@ static int append_extensions(
if (n_mount_images == 0 && strv_isempty(extension_directories))
return 0;
assert(extension_dir);
assert(private_namespace_dir);
n_overlays = strv_length(hierarchies);
if (n_overlays == 0)
@ -519,7 +523,7 @@ static int append_extensions(
"No matching entry in .v/ directory %s found.",
m->source);
if (asprintf(&mount_point, "%s/%zu", extension_dir, i) < 0)
if (asprintf(&mount_point, "%s/unit-extensions/%zu", private_namespace_dir, i) < 0)
return -ENOMEM;
for (size_t j = 0; hierarchies && hierarchies[j]; ++j) {
@ -556,7 +560,7 @@ static int append_extensions(
bool ignore_enoent = false;
/* Pick up the counter where the ExtensionImages left it. */
if (asprintf(&mount_point, "%s/%zu", extension_dir, n_mount_images++) < 0)
if (asprintf(&mount_point, "%s/unit-extensions/%zu", private_namespace_dir, n_mount_images++) < 0)
return -ENOMEM;
/* Look for any prefixes */
@ -888,8 +892,12 @@ static void drop_outside_root(MountList *ml, const char *root_directory) {
for (f = ml->mounts, t = ml->mounts; f < ml->mounts + ml->n_mounts; f++) {
/* ExtensionImages/Directories bases are opened in /run/systemd/unit-extensions on the host */
if (!IN_SET(f->mode, MOUNT_EXTENSION_IMAGE, MOUNT_EXTENSION_DIRECTORY) && !path_startswith(mount_entry_path(f), root_directory)) {
/* ExtensionImages/Directories bases are opened in /run/[user/xyz/]systemd/unit-extensions
* on the host, and a private (invisible to the guest) tmpfs instance is mounted on
* /run/[user/xyz/]systemd/unit-private-tmp as the storage backend of private /tmp and
* /var/tmp. */
if (!IN_SET(f->mode, MOUNT_EXTENSION_IMAGE, MOUNT_EXTENSION_DIRECTORY, MOUNT_PRIVATE_TMPFS) &&
!path_startswith(mount_entry_path(f), root_directory)) {
log_debug("%s is outside of root directory.", mount_entry_path(f));
mount_entry_done(f);
continue;
@ -1618,6 +1626,14 @@ static int apply_one_mount(
* that bind mount source paths are always relative to the host root, hence we pass NULL as
* root directory to chase() here. */
/* When we create implicit mounts, we might need to create the path ourselves as it is on a
* just-created tmpfs, for example. */
if (m->create_source_dir) {
r = mkdir_p(mount_entry_source(m), m->source_dir_mode);
if (r < 0)
return log_debug_errno(r, "Failed to create source directory %s: %m", mount_entry_source(m));
}
r = chase(mount_entry_source(m), NULL, CHASE_TRAIL_SLASH, &chased, NULL);
if (r == -ENOENT && m->ignore) {
log_debug_errno(r, "Path %s does not exist, ignoring.", mount_entry_source(m));
@ -1637,6 +1653,7 @@ static int apply_one_mount(
}
case MOUNT_EMPTY_DIR:
case MOUNT_PRIVATE_TMPFS:
case MOUNT_TMPFS:
return mount_tmpfs(m);
@ -1743,7 +1760,7 @@ static int make_read_only(const MountEntry *m, char **deny_list, FILE *proc_self
* and running Linux <= 4.17. */
submounts =
mount_entry_read_only(m) &&
!IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS);
!IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS, MOUNT_PRIVATE_TMPFS);
if (submounts)
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), new_flags, flags_mask, deny_list, proc_self_mountinfo);
else
@ -1783,7 +1800,7 @@ static int make_noexec(const MountEntry *m, char **deny_list, FILE *proc_self_mo
if (flags_mask == 0) /* No Change? */
return 0;
submounts = !IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS);
submounts = !IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS, MOUNT_PRIVATE_TMPFS);
if (submounts)
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), new_flags, flags_mask, deny_list, proc_self_mountinfo);
@ -1808,7 +1825,7 @@ static int make_nosuid(const MountEntry *m, FILE *proc_self_mountinfo) {
if (m->state != MOUNT_APPLIED)
return 0;
submounts = !IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS);
submounts = !IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS, MOUNT_PRIVATE_TMPFS);
if (submounts)
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), MS_NOSUID, MS_NOSUID, NULL, proc_self_mountinfo);
else
@ -1959,8 +1976,11 @@ static int apply_mounts(
if (m->state != MOUNT_PENDING)
continue;
/* ExtensionImages/Directories are first opened in the propagate directory, not in the root_directory */
r = follow_symlink(!IN_SET(m->mode, MOUNT_EXTENSION_IMAGE, MOUNT_EXTENSION_DIRECTORY) ? root : NULL, m);
/* ExtensionImages/Directories are first opened in the propagate directory, not in
* the root_directory. A private (invisible to the guest) tmpfs instance is mounted
* on /run/[user/xyz/]systemd/unit-private-tmp as the storage backend of private
* /tmp and /var/tmp. */
r = follow_symlink(!IN_SET(m->mode, MOUNT_EXTENSION_IMAGE, MOUNT_EXTENSION_DIRECTORY, MOUNT_PRIVATE_TMPFS) ? root : NULL, m);
if (r < 0) {
mount_entry_path_debug_string(root, m, error_path);
return r;
@ -2245,39 +2265,88 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) {
if (r < 0)
return r;
if (p->tmp_dir) {
bool ro = streq(p->tmp_dir, RUN_SYSTEMD_EMPTY);
/* When DynamicUser=yes enforce that /tmp/ and /var/tmp/ are disconnected from the host's
* directories, as they are world writable and ephemeral uid/gid will be used. */
if (p->private_tmp == PRIVATE_TMP_DISCONNECTED) {
_cleanup_free_ char *tmpfs_dir = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
MountEntry *tmpfs_entry, *tmp_entry, *var_tmp_entry;
MountEntry *me = mount_list_extend(&ml);
if (!me)
tmpfs_dir = path_join(p->private_namespace_dir, "unit-private-tmp");
tmp_dir = path_join(tmpfs_dir, "tmp");
var_tmp_dir = path_join(tmpfs_dir, "var-tmp");
if (!tmpfs_dir || !tmp_dir || !var_tmp_dir)
return log_oom_debug();
*me = (MountEntry) {
tmpfs_entry = mount_list_extend(&ml);
if (!tmpfs_entry)
return log_oom_debug();
*tmpfs_entry = (MountEntry) {
.path_malloc = TAKE_PTR(tmpfs_dir),
.mode = MOUNT_PRIVATE_TMPFS,
.options_const = "mode=0700" NESTED_TMPFS_LIMITS,
.flags = MS_NODEV|MS_STRICTATIME,
.has_prefix = true,
};
tmp_entry = mount_list_extend(&ml);
if (!tmp_entry)
return log_oom_debug();
*tmp_entry = (MountEntry) {
.source_malloc = TAKE_PTR(tmp_dir),
.path_const = "/tmp",
.mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
.source_const = p->tmp_dir,
.mode = MOUNT_BIND,
.source_dir_mode = 01777,
.create_source_dir = true,
};
}
if (p->var_tmp_dir) {
bool ro = streq(p->var_tmp_dir, RUN_SYSTEMD_EMPTY);
MountEntry *me = mount_list_extend(&ml);
if (!me)
var_tmp_entry = mount_list_extend(&ml);
if (!var_tmp_entry)
return log_oom_debug();
*me = (MountEntry) {
*var_tmp_entry = (MountEntry) {
.source_malloc = TAKE_PTR(var_tmp_dir),
.path_const = "/var/tmp",
.mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
.source_const = p->var_tmp_dir,
.mode = MOUNT_BIND,
.source_dir_mode = 01777,
.create_source_dir = true,
};
/* Ensure that the tmpfs is mounted first, and bind mounts are added later. */
assert_cc(MOUNT_BIND < MOUNT_PRIVATE_TMPFS);
} else {
if (p->tmp_dir) {
bool ro = streq(p->tmp_dir, RUN_SYSTEMD_EMPTY);
MountEntry *me = mount_list_extend(&ml);
if (!me)
return log_oom_debug();
*me = (MountEntry) {
.path_const = "/tmp",
.mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
.source_const = p->tmp_dir,
};
}
if (p->var_tmp_dir) {
bool ro = streq(p->var_tmp_dir, RUN_SYSTEMD_EMPTY);
MountEntry *me = mount_list_extend(&ml);
if (!me)
return log_oom_debug();
*me = (MountEntry) {
.path_const = "/var/tmp",
.mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
.source_const = p->var_tmp_dir,
};
}
}
r = append_mount_images(&ml, p->mount_images, p->n_mount_images);
if (r < 0)
return r;
r = append_extensions(&ml, root, p->extension_dir, hierarchies, p->extension_images, p->n_extension_images, p->extension_directories);
r = append_extensions(&ml, root, p->private_namespace_dir, hierarchies, p->extension_images, p->n_extension_images, p->extension_directories);
if (r < 0)
return r;
@ -2530,10 +2599,12 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) {
if (setup_propagate)
(void) mkdir_p(p->propagate_dir, 0600);
if (p->n_extension_images > 0 || !strv_isempty(p->extension_directories))
if (p->n_extension_images > 0 || !strv_isempty(p->extension_directories)) {
/* ExtensionImages/Directories mountpoint directories will be created while parsing the
* mounts to create, so have the parent ready */
(void) mkdir_p(p->extension_dir, 0600);
char *extension_dir = strjoina(p->private_namespace_dir, "/unit-extensions");
(void) mkdir_p(extension_dir, 0600);
}
/* Remount / as SLAVE so that nothing now mounted in the namespace
* shows up in the parent */
@ -3074,3 +3145,11 @@ static const char* const proc_subset_table[_PROC_SUBSET_MAX] = {
};
DEFINE_STRING_TABLE_LOOKUP(proc_subset, ProcSubset);
static const char* const private_tmp_table[_PRIVATE_TMP_MAX] = {
[PRIVATE_TMP_OFF] = "off",
[PRIVATE_TMP_CONNECTED] = "connected",
[PRIVATE_TMP_DISCONNECTED] = "disconnected",
};
DEFINE_STRING_TABLE_LOOKUP(private_tmp, PrivateTmp);

View File

@ -53,6 +53,14 @@ typedef enum ProcSubset {
_PROC_SUBSET_INVALID = -EINVAL,
} ProcSubset;
typedef enum PrivateTmp {
PRIVATE_TMP_OFF,
PRIVATE_TMP_CONNECTED, /* Bind mounted from the host's filesystem */
PRIVATE_TMP_DISCONNECTED, /* A completely private tmpfs, invisible from the host */
_PRIVATE_TMP_MAX,
_PRIVATE_TMP_INVALID = -EINVAL,
} PrivateTmp;
struct BindMount {
char *source;
char *destination;
@ -127,7 +135,7 @@ struct NamespaceParameters {
const char *propagate_dir;
const char *incoming_dir;
const char *extension_dir;
const char *private_namespace_dir;
const char *notify_socket;
const char *host_os_release_stage;
@ -150,6 +158,7 @@ struct NamespaceParameters {
ProtectSystem protect_system;
ProtectProc protect_proc;
ProcSubset proc_subset;
PrivateTmp private_tmp;
};
int setup_namespace(const NamespaceParameters *p, char **error_path);
@ -184,6 +193,9 @@ ProtectProc protect_proc_from_string(const char *s) _pure_;
const char* proc_subset_to_string(ProcSubset i) _const_;
ProcSubset proc_subset_from_string(const char *s) _pure_;
const char* private_tmp_to_string(PrivateTmp i) _const_;
PrivateTmp private_tmp_from_string(const char *s) _pure_;
void bind_mount_free_many(BindMount *b, size_t n);
int bind_mount_add(BindMount **b, size_t *n, const BindMount *item);

View File

@ -1275,7 +1275,7 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
return r;
}
if (c->private_tmp) {
if (c->private_tmp == PRIVATE_TMP_CONNECTED) {
r = unit_add_mounts_for(u, "/tmp", UNIT_DEPENDENCY_FILE, UNIT_MOUNT_WANTS);
if (r < 0)
return r;
@ -4287,7 +4287,11 @@ int unit_patch_contexts(Unit *u) {
* UID/GID around in the file system or on IPC objects. Hence enforce a strict
* sandbox. */
ec->private_tmp = true;
/* With DynamicUser= we want private directories, so if the user hasn't manually
* selected PrivateTmp=, enable it, but to a fully private (disconnected) tmpfs
* instance. */
if (ec->private_tmp == PRIVATE_TMP_OFF)
ec->private_tmp = PRIVATE_TMP_DISCONNECTED;
ec->remove_ipc = true;
ec->protect_system = PROTECT_SYSTEM_STRICT;
if (ec->protect_home == PROTECT_HOME_NO)

View File

@ -340,6 +340,19 @@ if [[ ! -v ASAN_OPTIONS ]] && systemctl --version | grep "+BPF_FRAMEWORK" && ker
(! systemd-run --wait --pipe -p RestrictFileSystems="~proc devtmpfs sysfs" ls /sys)
fi
if [[ ! -v ASAN_OPTIONS ]]; then
# Ensure DynamicUser=yes does not imply PrivateTmp=yes if TemporaryFileSystem=/tmp /var/tmp is set
systemd-run --unit test-07-dynamic-user-tmp.service \
--service-type=notify \
-p DynamicUser=yes \
-p NotifyAccess=all \
sh -c 'touch /tmp/a && touch /var/tmp/b && ! test -f /tmp/b && ! test -f /var/tmp/a && systemd-notify --ready && sleep infinity'
(! ls /tmp/systemd-private-"$(tr -d '-' < /proc/sys/kernel/random/boot_id)"-test-07-dynamic-user-tmp.service-* &>/dev/null)
(! ls /var/tmp/systemd-private-"$(tr -d '-' < /proc/sys/kernel/random/boot_id)"-test-07-dynamic-user-tmp.service-* &>/dev/null)
systemctl is-active test-07-dynamic-user-tmp.service
systemctl stop test-07-dynamic-user-tmp.service
fi
# Make sure we properly (de)serialize various string arrays, including whitespaces
# See: https://github.com/systemd/systemd/issues/31214
systemd-run --wait --pipe -p Environment="FOO='bar4 '" \