Merge pull request #32516 from YHNdnzj/core-cleanup

core: several cleanups
This commit is contained in:
Mike Yuan 2024-04-27 19:43:27 +08:00 committed by GitHub
commit 119bc912a8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 105 additions and 169 deletions

View file

@ -125,9 +125,10 @@
<listitem><para>Takes a directory path relative to the host's root directory (i.e. the root of the system
running the service manager). Sets the root directory for executed processes, with the <citerefentry
project='man-pages'><refentrytitle>chroot</refentrytitle><manvolnum>2</manvolnum></citerefentry> system
call. If this is used, it must be ensured that the process binary and all its auxiliary files are available in
the <function>chroot()</function> jail. Note that setting this parameter might result in additional
project='man-pages'><refentrytitle>pivot_root</refentrytitle><manvolnum>2</manvolnum></citerefentry>
or <citerefentry project='man-pages'><refentrytitle>chroot</refentrytitle><manvolnum>2</manvolnum></citerefentry>
system call. If this is used, it must be ensured that the process binary and all its auxiliary files
are available in the new root. Note that setting this parameter might result in additional
dependencies to be added to the unit (see above).</para>
<para>The <varname>MountAPIVFS=</varname> and <varname>PrivateUsers=</varname> settings are particularly useful

View file

@ -1744,6 +1744,9 @@ int bus_exec_context_set_transient_property(
if (streq(name, "PrivateMounts"))
return bus_set_transient_tristate(u, name, &c->private_mounts, message, flags, error);
if (streq(name, "MountAPIVFS"))
return bus_set_transient_tristate(u, name, &c->mount_apivfs, message, flags, error);
if (streq(name, "PrivateNetwork"))
return bus_set_transient_bool(u, name, &c->private_network, message, flags, error);
@ -2711,20 +2714,6 @@ int bus_exec_context_set_transient_property(
return 1;
} else if (streq(name, "MountAPIVFS")) {
bool b;
r = bus_set_transient_bool(u, name, &b, message, flags, error);
if (r < 0)
return r;
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
c->mount_apivfs = b;
c->mount_apivfs_set = true;
}
return 1;
} else if (streq(name, "WorkingDirectory")) {
_cleanup_free_ char *simplified = NULL;
bool missing_ok, is_home;

View file

@ -3861,7 +3861,7 @@ static bool exec_context_need_unprivileged_private_users(
context->private_ipc ||
context->ipc_namespace_path ||
context->private_mounts > 0 ||
context->mount_apivfs ||
context->mount_apivfs > 0 ||
context->n_bind_mounts > 0 ||
context->n_temporary_filesystems > 0 ||
context->root_directory ||

View file

@ -1597,7 +1597,7 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
if (fd < 0)
continue;
p->stdin_fd = fd;
close_and_replace(p->stdin_fd, fd);
} else if ((val = startswith(l, "exec-parameters-stdout-fd="))) {
int fd;
@ -1606,7 +1606,7 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
if (fd < 0)
continue;
p->stdout_fd = fd;
close_and_replace(p->stdout_fd, fd);
} else if ((val = startswith(l, "exec-parameters-stderr-fd="))) {
int fd;
@ -1615,7 +1615,7 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
if (fd < 0)
continue;
p->stderr_fd = fd;
close_and_replace(p->stderr_fd, fd);
} else if ((val = startswith(l, "exec-parameters-exec-fd="))) {
int fd;
@ -1623,7 +1623,7 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
if (fd < 0)
continue;
p->exec_fd = fd;
close_and_replace(p->exec_fd, fd);
} else if ((val = startswith(l, "exec-parameters-handoff-timestamp-fd="))) {
int fd;
@ -1639,13 +1639,13 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
if (fd < 0)
continue;
p->bpf_restrict_fs_map_fd = fd;
close_and_replace(p->bpf_restrict_fs_map_fd, fd);
} else if ((val = startswith(l, "exec-parameters-notify-socket="))) {
r = free_and_strdup(&p->notify_socket, val);
if (r < 0)
return r;
} else if ((val = startswith(l, "exec-parameters-open-file="))) {
OpenFile *of = NULL;
OpenFile *of;
r = open_file_parse(val, &of);
if (r < 0)
@ -1663,7 +1663,7 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
if (fd < 0)
continue;
p->user_lookup_fd = fd;
close_and_replace(p->user_lookup_fd, fd);
} else if ((val = startswith(l, "exec-parameters-files-env="))) {
r = deserialize_strv(val, &p->files_env);
if (r < 0)
@ -1832,6 +1832,10 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
if (r < 0)
return r;
r = serialize_item_tristate(f, "exec-context-mount-api-vfs", c->mount_apivfs);
if (r < 0)
return r;
r = serialize_item_tristate(f, "exec-context-memory-ksm", c->memory_ksm);
if (r < 0)
return r;
@ -1888,12 +1892,6 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
if (r < 0)
return r;
if (c->mount_apivfs_set) {
r = serialize_bool(f, "exec-context-mount-api-vfs", c->mount_apivfs);
if (r < 0)
return r;
}
r = serialize_bool_elide(f, "exec-context-same-pgrp", c->same_pgrp);
if (r < 0)
return r;
@ -2713,6 +2711,10 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
r = safe_atoi(val, &c->private_mounts);
if (r < 0)
return r;
} else if ((val = startswith(l, "exec-context-mount-api-vfs="))) {
r = safe_atoi(val, &c->mount_apivfs);
if (r < 0)
return r;
} else if ((val = startswith(l, "exec-context-memory-ksm="))) {
r = safe_atoi(val, &c->memory_ksm);
if (r < 0)
@ -2780,12 +2782,6 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
c->protect_system = protect_system_from_string(val);
if (c->protect_system < 0)
return -EINVAL;
} else if ((val = startswith(l, "exec-context-mount-api-vfs="))) {
r = parse_boolean(val);
if (r < 0)
return r;
c->mount_apivfs = r;
c->mount_apivfs_set = true;
} else if ((val = startswith(l, "exec-context-same-pgrp="))) {
r = parse_boolean(val);
if (r < 0)

View file

@ -504,6 +504,7 @@ void exec_context_init(ExecContext *c) {
.tty_rows = UINT_MAX,
.tty_cols = UINT_MAX,
.private_mounts = -1,
.mount_apivfs = -1,
.memory_ksm = -1,
.set_login_environment = -1,
};
@ -1440,8 +1441,8 @@ bool exec_context_get_effective_mount_apivfs(const ExecContext *c) {
assert(c);
/* Explicit setting wins */
if (c->mount_apivfs_set)
return c->mount_apivfs;
if (c->mount_apivfs >= 0)
return c->mount_apivfs > 0;
/* Default to "yes" if root directory or image are specified */
if (exec_context_with_rootfs(c))

View file

@ -200,7 +200,6 @@ struct ExecContext {
bool nice_set:1;
bool ioprio_set:1;
bool cpu_sched_set:1;
bool mount_apivfs_set:1;
/* This is not exposed to the user but available internally. We need it to make sure that whenever we
* spawn /usr/bin/mount it is run in the same process group as us so that the autofs logic detects
@ -313,6 +312,7 @@ struct ExecContext {
ProcSubset proc_subset; /* subset= */
int private_mounts;
int mount_apivfs;
int memory_ksm;
bool private_tmp;
bool private_network;
@ -327,7 +327,6 @@ struct ExecContext {
ProtectSystem protect_system;
ProtectHome protect_home;
bool protect_hostname;
bool mount_apivfs;
bool dynamic_user;
bool remove_ipc;

View file

@ -136,7 +136,7 @@
{{type}}.ProtectSystem, config_parse_protect_system, 0, offsetof({{type}}, exec_context.protect_system)
{{type}}.ProtectHome, config_parse_protect_home, 0, offsetof({{type}}, exec_context.protect_home)
{{type}}.MountFlags, config_parse_exec_mount_propagation_flag, 0, offsetof({{type}}, exec_context.mount_propagation_flag)
{{type}}.MountAPIVFS, config_parse_exec_mount_apivfs, 0, offsetof({{type}}, exec_context)
{{type}}.MountAPIVFS, config_parse_tristate, 0, offsetof({{type}}, exec_context.mount_apivfs)
{{type}}.Personality, config_parse_personality, 0, offsetof({{type}}, exec_context.personality)
{{type}}.RuntimeDirectoryPreserve, config_parse_exec_preserve_mode, 0, offsetof({{type}}, exec_context.runtime_directory_preserve_mode)
{{type}}.RuntimeDirectoryMode, config_parse_mode, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_RUNTIME].mode)

View file

@ -1496,43 +1496,6 @@ int config_parse_exec_cpu_sched_policy(const char *unit,
return 0;
}
int config_parse_exec_mount_apivfs(const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
ExecContext *c = ASSERT_PTR(data);
int k;
assert(filename);
assert(lvalue);
assert(rvalue);
if (isempty(rvalue)) {
c->mount_apivfs_set = false;
c->mount_apivfs = false;
return 0;
}
k = parse_boolean(rvalue);
if (k < 0) {
log_syntax(unit, LOG_WARNING, filename, line, k,
"Failed to parse boolean value, ignoring: %s",
rvalue);
return 0;
}
c->mount_apivfs_set = true;
c->mount_apivfs = k;
return 0;
}
int config_parse_numa_mask(const char *unit,
const char *filename,
unsigned line,
@ -5213,7 +5176,7 @@ int config_parse_bind_paths(
void *userdata) {
ExecContext *c = ASSERT_PTR(data);
const Unit *u = userdata;
const Unit *u = ASSERT_PTR(userdata);
int r;
assert(filename);

View file

@ -456,8 +456,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
fd = deserialize_fd(fds, val);
if (fd >= 0) {
m->notify_event_source = sd_event_source_disable_unref(m->notify_event_source);
safe_close(m->notify_fd);
m->notify_fd = fd;
close_and_replace(m->notify_fd, fd);
}
} else if ((val = startswith(l, "notify-socket="))) {
@ -471,8 +470,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
fd = deserialize_fd(fds, val);
if (fd >= 0) {
m->cgroups_agent_event_source = sd_event_source_disable_unref(m->cgroups_agent_event_source);
safe_close(m->cgroups_agent_fd);
m->cgroups_agent_fd = fd;
close_and_replace(m->cgroups_agent_fd, fd);
}
} else if ((val = startswith(l, "user-lookup="))) {
@ -484,6 +482,15 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
if (r < 0)
log_warning_errno(r, "Failed to parse user-lookup fds: \"%s\", ignoring: %m", val);
} else if ((val = startswith(l, "handoff-timestamp-fds="))) {
m->handoff_timestamp_event_source = sd_event_source_disable_unref(m->handoff_timestamp_event_source);
safe_close_pair(m->handoff_timestamp_fds);
r = deserialize_fd_many(fds, val, 2, m->handoff_timestamp_fds);
if (r < 0)
log_warning_errno(r, "Failed to parse handoff-timestamp fds: \"%s\", ignoring: %m", val);
} else if ((val = startswith(l, "dynamic-user=")))
dynamic_user_deserialize_one(m, val, fds, NULL);
else if ((val = startswith(l, "destroy-ipc-uid=")))

View file

@ -902,42 +902,41 @@ static void drop_outside_root(MountList *ml, const char *root_directory) {
ml->n_mounts = t - ml->mounts;
}
static int clone_device_node(
const char *d,
const char *temporary_mount,
bool *make_devnode) {
static int clone_device_node(const char *node, const char *temporary_mount, bool *make_devnode) {
_cleanup_free_ char *sl = NULL;
const char *dn, *bn, *t;
const char *dn, *bn;
struct stat st;
int r;
if (stat(d, &st) < 0) {
assert(node);
assert(path_is_absolute(node));
assert(temporary_mount);
assert(make_devnode);
if (stat(node, &st) < 0) {
if (errno == ENOENT) {
log_debug_errno(errno, "Device node '%s' to clone does not exist, ignoring.", d);
log_debug_errno(errno, "Device node '%s' to clone does not exist.", node);
return -ENXIO;
}
return log_debug_errno(errno, "Failed to stat() device node '%s' to clone, ignoring: %m", d);
return log_debug_errno(errno, "Failed to stat() device node '%s' to clone: %m", node);
}
if (!S_ISBLK(st.st_mode) &&
!S_ISCHR(st.st_mode))
return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
"Device node '%s' to clone is not a device node, ignoring.",
d);
r = stat_verify_device_node(&st);
if (r < 0)
return log_debug_errno(r, "Cannot clone device node '%s': %m", node);
dn = strjoina(temporary_mount, d);
dn = strjoina(temporary_mount, node);
/* First, try to create device node properly */
if (*make_devnode) {
mac_selinux_create_file_prepare(d, st.st_mode);
mac_selinux_create_file_prepare(node, st.st_mode);
r = mknod(dn, st.st_mode, st.st_rdev);
mac_selinux_create_file_clear();
if (r >= 0)
goto add_symlink;
if (errno != EPERM)
return log_debug_errno(errno, "mknod failed for %s: %m", d);
return log_debug_errno(errno, "Failed to mknod '%s': %m", node);
/* This didn't work, let's not try this again for the next iterations. */
*make_devnode = false;
@ -947,17 +946,17 @@ static int clone_device_node(
* Do not prepare device-node SELinux label (see issue 13762) */
r = mknod(dn, S_IFREG, 0);
if (r < 0 && errno != EEXIST)
return log_debug_errno(errno, "mknod() fallback failed for '%s': %m", d);
return log_debug_errno(errno, "Failed to mknod dummy device node for '%s': %m", node);
/* Fallback to bind-mounting: The assumption here is that all used device nodes carry standard
* properties. Specifically, the devices nodes we bind-mount should either be owned by root:root or
* root:tty (e.g. /dev/tty, /dev/ptmx) and should not carry ACLs. */
r = mount_nofollow_verbose(LOG_DEBUG, d, dn, NULL, MS_BIND, NULL);
r = mount_nofollow_verbose(LOG_DEBUG, node, dn, NULL, MS_BIND, NULL);
if (r < 0)
return r;
add_symlink:
bn = path_startswith(d, "/dev/");
bn = path_startswith(node, "/dev/");
if (!bn)
return 0;
@ -970,14 +969,27 @@ add_symlink:
(void) mkdir_parents(sl, 0755);
t = strjoina("../", bn);
const char *t = strjoina("../", bn);
if (symlink(t, sl) < 0)
log_debug_errno(errno, "Failed to symlink '%s' to '%s', ignoring: %m", t, sl);
return 0;
}
static char *settle_runtime_dir(RuntimeScope scope) {
static int bind_mount_device_dir(const char *temporary_mount, const char *dir) {
const char *t;
assert(temporary_mount);
assert(dir);
assert(path_is_absolute(dir));
t = strjoina(temporary_mount, dir);
(void) mkdir(t, 0755);
return mount_nofollow_verbose(LOG_DEBUG, dir, t, NULL, MS_BIND, NULL);
}
static char* settle_runtime_dir(RuntimeScope scope) {
char *runtime_dir;
if (scope != RUNTIME_SCOPE_USER)
@ -1018,8 +1030,8 @@ static int mount_private_dev(MountEntry *m, RuntimeScope scope) {
"/dev/urandom\0"
"/dev/tty\0";
_cleanup_free_ char *temporary_mount = NULL;
const char *dev = NULL, *devpts = NULL, *devshm = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL;
_cleanup_(rmdir_and_freep) char *temporary_mount = NULL;
_cleanup_(umount_and_rmdir_and_freep) char *dev = NULL;
bool can_mknod = true;
int r;
@ -1029,67 +1041,56 @@ static int mount_private_dev(MountEntry *m, RuntimeScope scope) {
if (r < 0)
return r;
dev = strjoina(temporary_mount, "/dev");
dev = path_join(temporary_mount, "dev");
if (!dev)
return -ENOMEM;
(void) mkdir(dev, 0755);
r = mount_nofollow_verbose(LOG_DEBUG, "tmpfs", dev, "tmpfs", DEV_MOUNT_OPTIONS, "mode=0755" TMPFS_LIMITS_PRIVATE_DEV);
if (r < 0)
goto fail;
return r;
r = label_fix_full(AT_FDCWD, dev, "/dev", 0);
if (r < 0) {
log_debug_errno(r, "Failed to fix label of '%s' as /dev: %m", dev);
goto fail;
}
devpts = strjoina(temporary_mount, "/dev/pts");
(void) mkdir(devpts, 0755);
r = mount_nofollow_verbose(LOG_DEBUG, "/dev/pts", devpts, NULL, MS_BIND, NULL);
if (r < 0)
goto fail;
return log_debug_errno(r, "Failed to fix label of '%s' as /dev/: %m", dev);
r = bind_mount_device_dir(temporary_mount, "/dev/pts");
if (r < 0)
return r;
/* /dev/ptmx can either be a device node or a symlink to /dev/pts/ptmx.
* When /dev/ptmx a device node, /dev/pts/ptmx has 000 permissions making it inaccessible.
* Thus, in that case make a clone.
* In nspawn and other containers it will be a symlink, in that case make it a symlink. */
r = is_symlink("/dev/ptmx");
if (r < 0) {
log_debug_errno(r, "Failed to detect whether /dev/ptmx is a symlink or not: %m");
goto fail;
} else if (r > 0) {
devptmx = strjoina(temporary_mount, "/dev/ptmx");
if (symlink("pts/ptmx", devptmx) < 0) {
r = log_debug_errno(errno, "Failed to create a symlink '%s' to pts/ptmx: %m", devptmx);
goto fail;
}
if (r < 0)
return log_debug_errno(r, "Failed to detect whether /dev/ptmx is a symlink or not: %m");
if (r > 0) {
const char *devptmx = strjoina(temporary_mount, "/dev/ptmx");
if (symlink("pts/ptmx", devptmx) < 0)
return log_debug_errno(errno, "Failed to create symlink '%s' to pts/ptmx: %m", devptmx);
} else {
r = clone_device_node("/dev/ptmx", temporary_mount, &can_mknod);
if (r < 0)
goto fail;
return r;
}
devshm = strjoina(temporary_mount, "/dev/shm");
(void) mkdir(devshm, 0755);
r = mount_nofollow_verbose(LOG_DEBUG, "/dev/shm", devshm, NULL, MS_BIND, NULL);
r = bind_mount_device_dir(temporary_mount, "/dev/shm");
if (r < 0)
goto fail;
return r;
devmqueue = strjoina(temporary_mount, "/dev/mqueue");
(void) mkdir(devmqueue, 0755);
(void) mount_nofollow_verbose(LOG_DEBUG, "/dev/mqueue", devmqueue, NULL, MS_BIND, NULL);
FOREACH_STRING(d, "/dev/mqueue", "/dev/hugepages")
(void) bind_mount_device_dir(temporary_mount, d);
devhugepages = strjoina(temporary_mount, "/dev/hugepages");
(void) mkdir(devhugepages, 0755);
(void) mount_nofollow_verbose(LOG_DEBUG, "/dev/hugepages", devhugepages, NULL, MS_BIND, NULL);
devlog = strjoina(temporary_mount, "/dev/log");
const char *devlog = strjoina(temporary_mount, "/dev/log");
if (symlink("/run/systemd/journal/dev-log", devlog) < 0)
log_debug_errno(errno, "Failed to create a symlink '%s' to /run/systemd/journal/dev-log, ignoring: %m", devlog);
log_debug_errno(errno, "Failed to create symlink '%s' to /run/systemd/journal/dev-log, ignoring: %m", devlog);
NULSTR_FOREACH(d, devnodes) {
r = clone_device_node(d, temporary_mount, &can_mknod);
/* ENXIO means the *source* is not a device file, skip creation in that case */
if (r < 0 && r != -ENXIO)
goto fail;
return r;
}
r = dev_setup(temporary_mount, UID_INVALID, GID_INVALID);
@ -1107,31 +1108,10 @@ static int mount_private_dev(MountEntry *m, RuntimeScope scope) {
r = mount_nofollow_verbose(LOG_DEBUG, dev, mount_entry_path(m), NULL, MS_MOVE, NULL);
if (r < 0)
goto fail;
(void) rmdir(dev);
(void) rmdir(temporary_mount);
return r;
dev = rmdir_and_free(dev); /* Mount is successfully moved, do not umount() */
return 1;
fail:
if (devpts)
(void) umount_verbose(LOG_DEBUG, devpts, UMOUNT_NOFOLLOW);
if (devshm)
(void) umount_verbose(LOG_DEBUG, devshm, UMOUNT_NOFOLLOW);
if (devhugepages)
(void) umount_verbose(LOG_DEBUG, devhugepages, UMOUNT_NOFOLLOW);
if (devmqueue)
(void) umount_verbose(LOG_DEBUG, devmqueue, UMOUNT_NOFOLLOW);
(void) umount_verbose(LOG_DEBUG, dev, UMOUNT_NOFOLLOW);
(void) rmdir(dev);
(void) rmdir(temporary_mount);
return r;
}
static int mount_bind_dev(const MountEntry *m) {
@ -2644,9 +2624,9 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) {
void bind_mount_free_many(BindMount *b, size_t n) {
assert(b || n == 0);
for (size_t i = 0; i < n; i++) {
free(b[i].source);
free(b[i].destination);
FOREACH_ARRAY(i, b, n) {
free(i->source);
free(i->destination);
}
free(b);