Merge pull request #27890 from bluca/executor

core: add systemd-executor binary
This commit is contained in:
Luca Boccassi 2023-10-13 22:01:16 +01:00 committed by GitHub
commit ccba67f494
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
42 changed files with 10314 additions and 5107 deletions

View file

@ -201,3 +201,25 @@ can be found under various directories such as `factory/`, `modprobe.d/`, `netwo
`tools/`, `coccinelle/`, `.github/`, `.semaphore/`, `.mkosi/` host various
utilities and scripts that are used by maintainers and developers. They are not
shipped or installed.
# Service Manager Overview
The Service Manager takes configuration in the form of unit files, credentials,
kernel command line options and D-Bus commands, and based on those manages the
system and spawns other processes. It runs in system mode as PID1, and in user
mode with one instance per user session.
When starting a unit requires forking a new process, configuration for the new
process will be serialized and passed over to the new process, created via a
posix_spawn() call. This is done in order to avoid excessive processing after
a fork() but before an exec(), which is against glibc's best practices and can
also result in a copy-on-write trap. The new process will start as the
`systemd-executor` binary, which will deserialize the configuration and apply
all the options (sandboxing, namespacing, cgroup, etc.) before exec'ing the
configured executable.
```
┌──────┐posix_spawn() ┌───────────┐execve() ┌────────┐
│ PID1 ├─────────────►│sd-executor├────────►│program │
└──────┘ (memfd) └───────────┘ └────────┘
```

View file

@ -225,6 +225,7 @@ conf.set_quoted('SYSCONF_DIR', sysconfdir)
conf.set_quoted('SYSCTL_DIR', sysctldir)
conf.set_quoted('SYSTEMCTL_BINARY_PATH', bindir / 'systemctl')
conf.set_quoted('SYSTEMD_BINARY_PATH', libexecdir / 'systemd')
conf.set_quoted('SYSTEMD_EXECUTOR_BINARY_PATH', libexecdir / 'systemd-executor')
conf.set_quoted('SYSTEMD_CATALOG_DIR', catalogdir)
conf.set_quoted('SYSTEMD_CGROUPS_AGENT_PATH', libexecdir / 'systemd-cgroups-agent')
conf.set_quoted('SYSTEMD_CRYPTSETUP_PATH', bindir / 'systemd-cryptsetup')

View file

@ -36,7 +36,7 @@ typedef enum CGroupController {
CGROUP_CONTROLLER_BPF_SOCKET_BIND,
CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES,
/* The BPF hook implementing RestrictFileSystems= is not defined here.
* It's applied as late as possible in exec_child() so we don't block
* It's applied as late as possible in exec_invoke() so we don't block
* our own unit setup code. */
_CGROUP_CONTROLLER_MAX,

View file

@ -1074,3 +1074,22 @@ int getenv_steal_erase(const char *name, char **ret) {
return 1;
}
int set_full_environment(char **env) {
int r;
clearenv();
STRV_FOREACH(e, env) {
_cleanup_free_ char *k = NULL, *v = NULL;
r = split_pair(*e, "=", &k, &v);
if (r < 0)
return r;
if (setenv(k, v, /* overwrite= */ true) < 0)
return -errno;
}
return 0;
}

View file

@ -77,3 +77,5 @@ int setenv_systemd_exec_pid(bool update_only);
int getenv_path_list(const char *name, char ***ret_paths);
int getenv_steal_erase(const char *name, char **ret);
int set_full_environment(char **env);

View file

@ -5,6 +5,7 @@
#include <limits.h>
#include <linux/oom.h>
#include <pthread.h>
#include <spawn.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
@ -1732,6 +1733,51 @@ int make_reaper_process(bool b) {
return 0;
}
int posix_spawn_wrapper(const char *path, char *const *argv, char *const *envp, pid_t *ret_pid) {
posix_spawnattr_t attr;
sigset_t mask;
pid_t pid;
int r;
/* Forks and invokes 'path' with 'argv' and 'envp' using CLONE_VM and CLONE_VFORK, which means the
* caller will be blocked until the child either exits or exec's. The memory of the child will be
* fully shared with the memory of the parent, so that there are no copy-on-write or memory.max
* issues. */
assert(path);
assert(argv);
assert(ret_pid);
assert_se(sigfillset(&mask) >= 0);
r = posix_spawnattr_init(&attr);
if (r != 0)
return -r; /* These functions return a positive errno on failure */
r = posix_spawnattr_setflags(&attr, POSIX_SPAWN_SETSIGMASK);
if (r != 0)
goto fail;
r = posix_spawnattr_setflags(&attr, POSIX_SPAWN_SETSIGDEF); /* Set all signals to SIG_DFL */
if (r != 0)
goto fail;
r = posix_spawnattr_setsigmask(&attr, &mask);
if (r != 0)
goto fail;
r = posix_spawn(&pid, path, NULL, &attr, argv, envp);
if (r != 0)
goto fail;
*ret_pid = pid;
posix_spawnattr_destroy(&attr);
return 0;
fail:
assert(r > 0);
posix_spawnattr_destroy(&attr);
return -r;
}
static const char *const sigchld_code_table[] = {
[CLD_EXITED] = "exited",
[CLD_KILLED] = "killed",

View file

@ -210,3 +210,5 @@ int get_process_threads(pid_t pid);
int is_reaper_process(void);
int make_reaper_process(bool b);
int posix_spawn_wrapper(const char *path, char *const *argv, char *const *envp, pid_t *ret_pid);

View file

@ -154,18 +154,14 @@ int lsm_bpf_setup(Manager *m) {
return 0;
}
int lsm_bpf_unit_restrict_filesystems(Unit *u, const Set *filesystems, bool allow_list) {
int lsm_bpf_restrict_filesystems(const Set *filesystems, uint64_t cgroup_id, int outer_map_fd, bool allow_list) {
uint32_t dummy_value = 1, zero = 0;
const char *fs;
const statfs_f_type_t *magic;
int r;
assert(filesystems);
assert(u);
if (!u->manager->restrict_fs)
return log_unit_error_errno(u, SYNTHETIC_ERRNO(EINVAL),
"bpf-lsm: BPF LSM object is not installed, has setup failed?");
assert(outer_map_fd >= 0);
int inner_map_fd = compat_bpf_map_create(
BPF_MAP_TYPE_HASH,
@ -175,39 +171,35 @@ int lsm_bpf_unit_restrict_filesystems(Unit *u, const Set *filesystems, bool allo
128U, /* Should be enough for all filesystem types */
NULL);
if (inner_map_fd < 0)
return log_unit_error_errno(u, errno, "bpf-lsm: Failed to create inner BPF map: %m");
return log_error_errno(errno, "bpf-lsm: Failed to create inner BPF map: %m");
int outer_map_fd = sym_bpf_map__fd(u->manager->restrict_fs->maps.cgroup_hash);
if (outer_map_fd < 0)
return log_unit_error_errno(u, errno, "bpf-lsm: Failed to get BPF map fd: %m");
if (sym_bpf_map_update_elem(outer_map_fd, &u->cgroup_id, &inner_map_fd, BPF_ANY) != 0)
return log_unit_error_errno(u, errno, "bpf-lsm: Error populating BPF map: %m");
if (sym_bpf_map_update_elem(outer_map_fd, &cgroup_id, &inner_map_fd, BPF_ANY) != 0)
return log_error_errno(errno, "bpf-lsm: Error populating BPF map: %m");
uint32_t allow = allow_list;
/* Use key 0 to store whether this is an allow list or a deny list */
if (sym_bpf_map_update_elem(inner_map_fd, &zero, &allow, BPF_ANY) != 0)
return log_unit_error_errno(u, errno, "bpf-lsm: Error initializing map: %m");
return log_error_errno(errno, "bpf-lsm: Error initializing map: %m");
SET_FOREACH(fs, filesystems) {
r = fs_type_from_string(fs, &magic);
if (r < 0) {
log_unit_warning(u, "bpf-lsm: Invalid filesystem name '%s', ignoring.", fs);
log_warning("bpf-lsm: Invalid filesystem name '%s', ignoring.", fs);
continue;
}
log_unit_debug(u, "bpf-lsm: Restricting filesystem access to '%s'", fs);
log_debug("bpf-lsm: Restricting filesystem access to '%s'", fs);
for (int i = 0; i < FILESYSTEM_MAGIC_MAX; i++) {
if (magic[i] == 0)
break;
if (sym_bpf_map_update_elem(inner_map_fd, &magic[i], &dummy_value, BPF_ANY) != 0) {
r = log_unit_error_errno(u, errno, "bpf-lsm: Failed to update BPF map: %m");
r = log_error_errno(errno, "bpf-lsm: Failed to update BPF map: %m");
if (sym_bpf_map_delete_elem(outer_map_fd, &u->cgroup_id) != 0)
log_unit_debug_errno(u, errno, "bpf-lsm: Failed to delete cgroup entry from BPF map: %m");
if (sym_bpf_map_delete_elem(outer_map_fd, &cgroup_id) != 0)
log_debug_errno(errno, "bpf-lsm: Failed to delete cgroup entry from BPF map: %m");
return r;
}
@ -263,8 +255,8 @@ int lsm_bpf_setup(Manager *m) {
return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "bpf-lsm: Failed to set up LSM BPF: %m");
}
int lsm_bpf_unit_restrict_filesystems(Unit *u, const Set *filesystems, const bool allow_list) {
return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "bpf-lsm: Failed to restrict filesystems using LSM BPF: %m");
int lsm_bpf_restrict_filesystems(const Set *filesystems, uint64_t cgroup_id, int outer_map_fd, const bool allow_list) {
return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "bpf-lsm: Failed to restrict filesystems using LSM BPF: %m");
}
int lsm_bpf_cleanup(const Unit *u) {

View file

@ -16,7 +16,7 @@ typedef struct restrict_fs_bpf restrict_fs_bpf;
bool lsm_bpf_supported(bool initialize);
int lsm_bpf_setup(Manager *m);
int lsm_bpf_unit_restrict_filesystems(Unit *u, const Set *filesystems, bool allow_list);
int lsm_bpf_restrict_filesystems(const Set *filesystems, uint64_t cgroup_id, int outer_map_fd, bool allow_list);
int lsm_bpf_cleanup(const Unit *u);
int lsm_bpf_map_restrict_fs_fd(Unit *u);
void lsm_bpf_destroy(struct restrict_fs_bpf *prog);

View file

@ -143,7 +143,14 @@ static void cgroup_compat_warn(void) {
void cgroup_context_init(CGroupContext *c) {
assert(c);
/* Initialize everything to the kernel defaults. */
/* Initialize everything to the kernel defaults. When initializing a bool member to 'true', make
* sure to serialize in execute-serialize.c using serialize_bool() instead of
* serialize_bool_elide(), as sd-executor will initialize here to 'true', but serialize_bool_elide()
* skips serialization if the value is 'false' (as that's the common default), so if the value at
* runtime is zero it would be lost after deserialization. Same when initializing uint64_t and other
* values, update/add a conditional serialization check. This is to minimize the amount of
* serialized data that is sent to the sd-executor, so that there is less work to do on the default
* cases. */
*c = (CGroupContext) {
.cpu_weight = CGROUP_WEIGHT_INVALID,
@ -724,6 +731,23 @@ int cgroup_context_add_device_allow(CGroupContext *c, const char *dev, const cha
return 0;
}
int cgroup_context_add_or_update_device_allow(CGroupContext *c, const char *dev, const char *mode) {
assert(c);
assert(dev);
assert(isempty(mode) || in_charset(mode, "rwm"));
LIST_FOREACH(device_allow, b, c->device_allow)
if (path_equal(b->path, dev)) {
b->r = isempty(mode) || strchr(mode, 'r');
b->w = isempty(mode) || strchr(mode, 'w');
b->m = isempty(mode) || strchr(mode, 'm');
return 0;
}
return cgroup_context_add_device_allow(c, dev, mode);
}
int cgroup_context_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_type, const char *bpffs_path) {
CGroupBPFForeignProgram *p;
_cleanup_free_ char *d = NULL;

View file

@ -279,6 +279,7 @@ static inline bool cgroup_context_want_memory_pressure(const CGroupContext *c) {
}
int cgroup_context_add_device_allow(CGroupContext *c, const char *dev, const char *mode);
int cgroup_context_add_or_update_device_allow(CGroupContext *c, const char *dev, const char *mode);
int cgroup_context_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_type, const char *path);
void unit_modify_nft_set(Unit *u, bool add);

View file

@ -1820,31 +1820,9 @@ int bus_cgroup_set_property(
return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "DeviceAllow= requires combination of rwm flags");
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
CGroupDeviceAllow *a = NULL;
LIST_FOREACH(device_allow, b, c->device_allow)
if (path_equal(b->path, path)) {
a = b;
break;
}
if (!a) {
a = new0(CGroupDeviceAllow, 1);
if (!a)
return -ENOMEM;
a->path = strdup(path);
if (!a->path) {
free(a);
return -ENOMEM;
}
LIST_PREPEND(device_allow, c->device_allow, a);
}
a->r = strchr(rwm, 'r');
a->w = strchr(rwm, 'w');
a->m = strchr(rwm, 'm');
r = cgroup_context_add_or_update_device_allow(c, path, rwm);
if (r < 0)
return r;
}
n++;

View file

@ -26,9 +26,9 @@
/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN)
DEFINE_PRIVATE_TRIVIAL_REF_FUNC(DynamicUser, dynamic_user);
DEFINE_TRIVIAL_REF_FUNC(DynamicUser, dynamic_user);
static DynamicUser* dynamic_user_free(DynamicUser *d) {
DynamicUser* dynamic_user_free(DynamicUser *d) {
if (!d)
return NULL;
@ -43,13 +43,15 @@ static int dynamic_user_add(Manager *m, const char *name, int storage_socket[sta
DynamicUser *d;
int r;
assert(m);
assert(m || ret);
assert(name);
assert(storage_socket);
r = hashmap_ensure_allocated(&m->dynamic_users, &string_hash_ops);
if (r < 0)
return r;
if (m) { /* Might be called in sd-executor with no manager object */
r = hashmap_ensure_allocated(&m->dynamic_users, &string_hash_ops);
if (r < 0)
return r;
}
d = malloc0(offsetof(DynamicUser, name) + strlen(name) + 1);
if (!d)
@ -60,10 +62,12 @@ static int dynamic_user_add(Manager *m, const char *name, int storage_socket[sta
d->storage_socket[0] = storage_socket[0];
d->storage_socket[1] = storage_socket[1];
r = hashmap_put(m->dynamic_users, d->name, d);
if (r < 0) {
free(d);
return r;
if (m) { /* Might be called in sd-executor with no manager object */
r = hashmap_put(m->dynamic_users, d->name, d);
if (r < 0) {
free(d);
return r;
}
}
d->manager = m;
@ -603,37 +607,49 @@ static DynamicUser* dynamic_user_destroy(DynamicUser *d) {
return dynamic_user_free(d);
}
int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds) {
DynamicUser *d;
int dynamic_user_serialize_one(DynamicUser *d, const char *key, FILE *f, FDSet *fds) {
int copy0, copy1;
assert(m);
assert(key);
assert(f);
assert(fds);
/* Dump the dynamic user database into the manager serialization, to deal with daemon reloads. */
if (!d)
return 0;
HASHMAP_FOREACH(d, m->dynamic_users) {
int copy0, copy1;
if (d->storage_socket[0] < 0 || d->storage_socket[1] < 0)
return 0;
copy0 = fdset_put_dup(fds, d->storage_socket[0]);
if (copy0 < 0)
return log_error_errno(copy0, "Failed to add dynamic user storage fd to serialization: %m");
copy0 = fdset_put_dup(fds, d->storage_socket[0]);
if (copy0 < 0)
return log_error_errno(copy0, "Failed to add dynamic user storage fd to serialization: %m");
copy1 = fdset_put_dup(fds, d->storage_socket[1]);
if (copy1 < 0)
return log_error_errno(copy1, "Failed to add dynamic user storage fd to serialization: %m");
copy1 = fdset_put_dup(fds, d->storage_socket[1]);
if (copy1 < 0)
return log_error_errno(copy1, "Failed to add dynamic user storage fd to serialization: %m");
(void) serialize_item_format(f, "dynamic-user", "%s %i %i", d->name, copy0, copy1);
}
(void) serialize_item_format(f, key, "%s %i %i", d->name, copy0, copy1);
return 0;
}
void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds) {
int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds) {
DynamicUser *d;
assert(m);
/* Dump the dynamic user database into the manager serialization, to deal with daemon reloads. */
HASHMAP_FOREACH(d, m->dynamic_users)
(void) dynamic_user_serialize_one(d, "dynamic-user", f, fds);
return 0;
}
void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds, DynamicUser **ret) {
_cleanup_free_ char *name = NULL, *s0 = NULL, *s1 = NULL;
int r, fd0, fd1;
assert(m);
assert(value);
assert(fds);
@ -655,7 +671,7 @@ void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds) {
return;
}
r = dynamic_user_add(m, name, (int[]) { fd0, fd1 }, NULL);
r = dynamic_user_add(m, name, (int[]) { fd0, fd1 }, ret);
if (r < 0) {
log_debug_errno(r, "Failed to add dynamic user: %m");
return;
@ -663,6 +679,9 @@ void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds) {
(void) fdset_remove(fds, fd0);
(void) fdset_remove(fds, fd1);
if (ret) /* If the caller uses it directly, increment the refcount */
(*ret)->n_ref++;
}
void dynamic_user_vacuum(Manager *m, bool close_user) {
@ -831,3 +850,12 @@ DynamicCreds* dynamic_creds_destroy(DynamicCreds *creds) {
return mfree(creds);
}
void dynamic_creds_done(DynamicCreds *creds) {
if (!creds)
return;
if (creds->group != creds->user)
dynamic_user_free(creds->group);
creds->group = creds->user = dynamic_user_free(creds->user);
}

View file

@ -26,7 +26,9 @@ struct DynamicUser {
};
int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds);
void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds);
int dynamic_user_serialize_one(DynamicUser *d, const char *key, FILE *f, FDSet *fds);
void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds, DynamicUser **ret);
DynamicUser* dynamic_user_free(DynamicUser *d);
void dynamic_user_vacuum(Manager *m, bool close_user);
int dynamic_user_current(DynamicUser *d, uid_t *ret);
@ -38,6 +40,9 @@ int dynamic_creds_realize(DynamicCreds *creds, char **suggested_paths, uid_t *ui
DynamicCreds *dynamic_creds_unref(DynamicCreds *creds);
DynamicCreds *dynamic_creds_destroy(DynamicCreds *creds);
void dynamic_creds_done(DynamicCreds *creds);
DEFINE_TRIVIAL_CLEANUP_FUNC(DynamicCreds*, dynamic_creds_unref);
DEFINE_TRIVIAL_CLEANUP_FUNC(DynamicCreds*, dynamic_creds_destroy);
DynamicUser *dynamic_user_ref(DynamicUser *user);

5082
src/core/exec-invoke.c Normal file

File diff suppressed because it is too large Load diff

16
src/core/exec-invoke.h Normal file
View file

@ -0,0 +1,16 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
typedef struct ExecCommand ExecCommand;
typedef struct ExecContext ExecContext;
typedef struct ExecParameters ExecParameters;
typedef struct ExecRuntime ExecRuntime;
typedef struct CGroupContext CGroupContext;
int exec_invoke(
const ExecCommand *command,
const ExecContext *context,
ExecParameters *params,
ExecRuntime *runtime,
const CGroupContext *cgroup_context,
int *exit_status);

4084
src/core/execute-serialize.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,23 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
#include "execute.h"
/* These functions serialize/deserialize for invocation purposes (i.e.: serialized object is passed to a
* child process) rather than to save state across reload/reexec. */
int exec_serialize_invocation(FILE *f,
FDSet *fds,
const ExecContext *ctx,
const ExecCommand *cmd,
const ExecParameters *p,
const ExecRuntime *rt,
const CGroupContext *cg);
int exec_deserialize_invocation(FILE *f,
FDSet *fds,
ExecContext *ctx,
ExecCommand *cmd,
ExecParameters *p,
ExecRuntime *rt,
CGroupContext *cg);

File diff suppressed because it is too large Load diff

View file

@ -100,7 +100,7 @@ struct ExecStatus {
struct ExecCommand {
char *path;
char **argv;
ExecStatus exec_status;
ExecStatus exec_status; /* Note that this is not serialized to sd-executor */
ExecCommandFlags flags;
LIST_FIELDS(ExecCommand, command); /* useful for chaining commands */
};
@ -420,13 +420,15 @@ struct ExecParameters {
bool selinux_context_net:1;
CGroupMask cgroup_supported;
const char *cgroup_path;
char *cgroup_path;
uint64_t cgroup_id;
char **prefix;
const char *received_credentials_directory;
const char *received_encrypted_credentials_directory;
char *received_credentials_directory;
char *received_encrypted_credentials_directory;
const char *confirm_spawn;
char *confirm_spawn;
bool shall_confirm_spawn;
usec_t watchdog_usec;
@ -439,22 +441,45 @@ struct ExecParameters {
/* An fd that is closed by the execve(), and thus will result in EOF when the execve() is done */
int exec_fd;
const char *notify_socket;
char *notify_socket;
LIST_HEAD(OpenFile, open_files);
char *fallback_smack_process_label;
char **files_env;
int user_lookup_fd;
int bpf_outer_map_fd;
/* Used for logging in the executor functions */
char *unit_id;
sd_id128_t invocation_id;
char invocation_id_string[SD_ID128_STRING_MAX];
};
#define EXEC_PARAMETERS_INIT(_flags) \
(ExecParameters) { \
.flags = (_flags), \
.stdin_fd = -EBADF, \
.stdout_fd = -EBADF, \
.stderr_fd = -EBADF, \
.exec_fd = -EBADF, \
.bpf_outer_map_fd = -EBADF, \
.user_lookup_fd = -EBADF, \
};
#include "unit.h"
#include "dynamic-user.h"
int exec_spawn(Unit *unit,
ExecCommand *command,
const ExecContext *context,
const ExecParameters *exec_params,
ExecParameters *exec_params,
ExecRuntime *runtime,
const CGroupContext *cgroup_context,
pid_t *ret);
void exec_command_done(ExecCommand *c);
void exec_command_done_array(ExecCommand *c, size_t n);
ExecCommand* exec_command_free_list(ExecCommand *c);
void exec_command_free_array(ExecCommand **c, size_t n);
@ -487,6 +512,10 @@ void exec_context_revert_tty(ExecContext *c);
int exec_context_get_clean_directories(ExecContext *c, char **prefix, ExecCleanMask mask, char ***ret);
int exec_context_get_clean_mask(ExecContext *c, ExecCleanMask *ret);
const char *exec_context_tty_path(const ExecContext *context);
int exec_context_tty_size(const ExecContext *context, unsigned *ret_rows, unsigned *ret_cols);
void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p);
void exec_status_start(ExecStatus *s, pid_t pid);
void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status);
void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix);
@ -500,20 +529,26 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(ExecSharedRuntime*, exec_shared_runtime_unref);
int exec_shared_runtime_serialize(const Manager *m, FILE *f, FDSet *fds);
int exec_shared_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds);
int exec_shared_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds);
void exec_shared_runtime_done(ExecSharedRuntime *rt);
void exec_shared_runtime_vacuum(Manager *m);
int exec_runtime_make(const Unit *unit, const ExecContext *context, ExecSharedRuntime *shared, DynamicCreds *creds, ExecRuntime **ret);
ExecRuntime* exec_runtime_free(ExecRuntime *rt);
DEFINE_TRIVIAL_CLEANUP_FUNC(ExecRuntime*, exec_runtime_free);
ExecRuntime* exec_runtime_destroy(ExecRuntime *rt);
void exec_runtime_clear(ExecRuntime *rt);
int exec_params_get_cgroup_path(const ExecParameters *params, const CGroupContext *c, char **ret);
void exec_params_clear(ExecParameters *p);
void exec_params_dump(const ExecParameters *p, FILE* f, const char *prefix);
void exec_params_serialized_done(ExecParameters *p);
bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c);
void exec_directory_done(ExecDirectory *d);
int exec_directory_add(ExecDirectory *d, const char *path, const char *symlink);
void exec_directory_sort(ExecDirectory *d);
bool exec_directory_is_private(const ExecContext *context, ExecDirectoryType type);
ExecCleanMask exec_clean_mask_from_string(const char *s);
@ -543,3 +578,104 @@ ExecDirectoryType exec_resource_type_from_string(const char *s) _pure_;
bool exec_needs_mount_namespace(const ExecContext *context, const ExecParameters *params, const ExecRuntime *runtime);
bool exec_needs_network_namespace(const ExecContext *context);
bool exec_needs_ipc_namespace(const ExecContext *context);
/* These logging macros do the same logging as those in unit.h, but using ExecContext and ExecParameters
* instead of the unit object, so that it can be used in the sd-executor context (where the unit object is
* not available). */
#define LOG_EXEC_ID_FIELD(ep) \
((ep)->runtime_scope == RUNTIME_SCOPE_USER ? "USER_UNIT=" : "UNIT=")
#define LOG_EXEC_ID_FIELD_FORMAT(ep) \
((ep)->runtime_scope == RUNTIME_SCOPE_USER ? "USER_UNIT=%s" : "UNIT=%s")
#define LOG_EXEC_INVOCATION_ID_FIELD(ep) \
((ep)->runtime_scope == RUNTIME_SCOPE_USER ? "USER_INVOCATION_ID=" : "INVOCATION_ID=")
#define LOG_EXEC_INVOCATION_ID_FIELD_FORMAT(ep) \
((ep)->runtime_scope == RUNTIME_SCOPE_USER ? "USER_INVOCATION_ID=%s" : "INVOCATION_ID=%s")
#define log_exec_full_errno_zerook(ec, ep, level, error, ...) \
({ \
const ExecContext *_c = (ec); \
const ExecParameters *_p = (ep); \
const int _l = (level); \
bool _do_log = !(log_get_max_level() < LOG_PRI(_l) || \
!(_c->log_level_max < 0 || \
_c->log_level_max >= LOG_PRI(_l))); \
LOG_CONTEXT_PUSH_IOV(_c->log_extra_fields, \
_c->n_log_extra_fields); \
!_do_log ? -ERRNO_VALUE(error) : \
log_object_internal(_l, error, PROJECT_FILE, \
__LINE__, __func__, \
LOG_EXEC_ID_FIELD(_p), \
_p->unit_id, \
LOG_EXEC_INVOCATION_ID_FIELD(_p), \
_p->invocation_id_string, ##__VA_ARGS__); \
})
#define log_exec_full_errno(ec, ep, level, error, ...) \
({ \
int _error = (error); \
ASSERT_NON_ZERO(_error); \
log_exec_full_errno_zerook(ec, ep, level, _error, ##__VA_ARGS__); \
})
#define log_exec_full(ec, ep, level, ...) (void) log_exec_full_errno_zerook(ec, ep, level, 0, __VA_ARGS__)
#define log_exec_debug(ec, ep, ...) log_exec_full(ec, ep, LOG_DEBUG, __VA_ARGS__)
#define log_exec_info(ec, ep, ...) log_exec_full(ec, ep, LOG_INFO, __VA_ARGS__)
#define log_exec_notice(ec, ep, ...) log_exec_full(ec, ep, LOG_NOTICE, __VA_ARGS__)
#define log_exec_warning(ec, ep, ...) log_exec_full(ec, ep, LOG_WARNING, __VA_ARGS__)
#define log_exec_error(ec, ep, ...) log_exec_full(ec, ep, LOG_ERR, __VA_ARGS__)
#define log_exec_debug_errno(ec, ep, error, ...) log_exec_full_errno(ec, ep, LOG_DEBUG, error, __VA_ARGS__)
#define log_exec_info_errno(ec, ep, error, ...) log_exec_full_errno(ec, ep, LOG_INFO, error, __VA_ARGS__)
#define log_exec_notice_errno(ec, ep, error, ...) log_exec_full_errno(ec, ep, LOG_NOTICE, error, __VA_ARGS__)
#define log_exec_warning_errno(ec, ep, error, ...) log_exec_full_errno(ec, ep, LOG_WARNING, error, __VA_ARGS__)
#define log_exec_error_errno(ec, ep, error, ...) log_exec_full_errno(ec, ep, LOG_ERR, error, __VA_ARGS__)
#define log_exec_struct_errno(ec, ep, level, error, ...) \
({ \
const ExecContext *_c = (ec); \
const ExecParameters *_p = (ep); \
const int _l = (level); \
bool _do_log = !(_c->log_level_max < 0 || \
_c->log_level_max >= LOG_PRI(_l)); \
LOG_CONTEXT_PUSH_IOV(_c->log_extra_fields, \
_c->n_log_extra_fields); \
_do_log ? \
log_struct_errno(_l, error, __VA_ARGS__, LOG_EXEC_ID_FIELD_FORMAT(_p), _p->unit_id) : \
-ERRNO_VALUE(error); \
})
#define log_exec_struct(ec, ep, level, ...) log_exec_struct_errno(ec, ep, level, 0, __VA_ARGS__)
#define log_exec_struct_iovec_errno(ec, ep, level, error, iovec, n_iovec) \
({ \
const ExecContext *_c = (ec); \
const ExecParameters *_p = (ep); \
const int _l = (level); \
bool _do_log = !(_c->log_level_max < 0 || \
_c->log_level_max >= LOG_PRI(_l)); \
LOG_CONTEXT_PUSH_IOV(_c->log_extra_fields, \
_c->n_log_extra_fields); \
_do_log ? \
log_struct_iovec_errno(_l, error, iovec, n_iovec) : \
-ERRNO_VALUE(error); \
})
#define log_exec_struct_iovec(ec, ep, level, iovec, n_iovec) log_exec_struct_iovec_errno(ec, ep, level, 0, iovec, n_iovec)
/* Like LOG_MESSAGE(), but with the unit name prefixed. */
#define LOG_EXEC_MESSAGE(ep, fmt, ...) LOG_MESSAGE("%s: " fmt, (ep)->unit_id, ##__VA_ARGS__)
#define LOG_EXEC_ID(ep) LOG_EXEC_ID_FIELD_FORMAT(ep), (ep)->unit_id
#define LOG_EXEC_INVOCATION_ID(ep) LOG_EXEC_INVOCATION_ID_FIELD_FORMAT(ep), (ep)->invocation_id_string
#define _LOG_CONTEXT_PUSH_EXEC(ec, ep, p, c) \
const ExecContext *c = (ec); \
const ExecParameters *p = (ep); \
LOG_CONTEXT_PUSH_KEY_VALUE(LOG_EXEC_ID_FIELD(p), p->unit_id); \
LOG_CONTEXT_PUSH_KEY_VALUE(LOG_EXEC_INVOCATION_ID_FIELD(p), p->invocation_id_string); \
LOG_CONTEXT_PUSH_IOV(c->log_extra_fields, c->n_log_extra_fields)
#define LOG_CONTEXT_PUSH_EXEC(ec, ep) \
_LOG_CONTEXT_PUSH_EXEC(ec, ep, UNIQ_T(p, UNIQ), UNIQ_T(c, UNIQ))

252
src/core/executor.c Normal file
View file

@ -0,0 +1,252 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <getopt.h>
#include <unistd.h>
#include "sd-messages.h"
#include "alloc-util.h"
#include "build.h"
#include "exec-invoke.h"
#include "execute-serialize.h"
#include "execute.h"
#include "exit-status.h"
#include "fdset.h"
#include "fd-util.h"
#include "fileio.h"
#include "getopt-defs.h"
#include "parse-util.h"
#include "pretty-print.h"
#include "static-destruct.h"
static FILE* arg_serialization = NULL;
STATIC_DESTRUCTOR_REGISTER(arg_serialization, fclosep);
static int help(void) {
_cleanup_free_ char *link = NULL;
int r;
r = terminal_urlify_man("systemd", "1", &link);
if (r < 0)
return log_oom();
printf("%s [OPTIONS...]\n\n"
"%sSandbox and execute processes.%s\n\n"
" -h --help Show this help and exit\n"
" --version Print version string and exit\n"
" --log-target=TARGET Set log target (console, journal,\n"
" journal-or-kmsg,\n"
" kmsg, null)\n"
" --log-level=LEVEL Set log level (debug, info, notice,\n"
" warning, err, crit,\n"
" alert, emerg)\n"
" --log-color=BOOL Highlight important messages\n"
" --log-location=BOOL Include code location in messages\n"
" --log-time=BOOL Prefix messages with current time\n"
" --deserialize=FD Deserialize process config from FD\n"
"\nSee the %s for details.\n",
program_invocation_short_name,
ansi_highlight(),
ansi_normal(),
link);
return 0;
}
static int parse_argv(int argc, char *argv[]) {
enum {
COMMON_GETOPT_ARGS,
ARG_VERSION,
ARG_DESERIALIZE,
};
static const struct option options[] = {
{ "log-level", required_argument, NULL, ARG_LOG_LEVEL },
{ "log-target", required_argument, NULL, ARG_LOG_TARGET },
{ "log-color", required_argument, NULL, ARG_LOG_COLOR },
{ "log-location", required_argument, NULL, ARG_LOG_LOCATION },
{ "log-time", required_argument, NULL, ARG_LOG_TIME },
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, ARG_VERSION },
{ "deserialize", required_argument, NULL, ARG_DESERIALIZE },
{}
};
int c, r;
assert(argc >= 0);
assert(argv);
while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
switch (c) {
case 'h':
return help();
case ARG_VERSION:
return version();
case ARG_LOG_LEVEL:
r = log_set_max_level_from_string(optarg);
if (r < 0)
return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
break;
case ARG_LOG_TARGET:
r = log_set_target_from_string(optarg);
if (r < 0)
return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
break;
case ARG_LOG_COLOR:
r = log_show_color_from_string(optarg);
if (r < 0)
return log_error_errno(
r,
"Failed to parse log color setting \"%s\": %m",
optarg);
break;
case ARG_LOG_LOCATION:
r = log_show_location_from_string(optarg);
if (r < 0)
return log_error_errno(
r,
"Failed to parse log location setting \"%s\": %m",
optarg);
break;
case ARG_LOG_TIME:
r = log_show_time_from_string(optarg);
if (r < 0)
return log_error_errno(
r,
"Failed to parse log time setting \"%s\": %m",
optarg);
break;
case ARG_DESERIALIZE: {
FILE *f;
int fd;
fd = parse_fd(optarg);
if (fd < 0)
return log_error_errno(
fd,
"Failed to parse serialization fd \"%s\": %m",
optarg);
r = fd_cloexec(fd, /* cloexec= */ true);
if (r < 0)
return log_error_errno(
r,
"Failed to set serialization fd \"%s\" to close-on-exec: %m",
optarg);
f = fdopen(fd, "r");
if (!f)
return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
safe_fclose(arg_serialization);
arg_serialization = f;
break;
}
case '?':
return -EINVAL;
default:
assert_not_reached();
}
if (!arg_serialization)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
"No serialization fd specified.");
return 1 /* work to do */;
}
int main(int argc, char *argv[]) {
_cleanup_fdset_free_ FDSet *fdset = NULL;
int exit_status = EXIT_SUCCESS, r;
_cleanup_(cgroup_context_done) CGroupContext cgroup_context = {};
_cleanup_(exec_context_done) ExecContext context = {};
_cleanup_(exec_command_done) ExecCommand command = {};
_cleanup_(exec_params_serialized_done) ExecParameters params = EXEC_PARAMETERS_INIT(/* flags= */ 0);
_cleanup_(exec_shared_runtime_done) ExecSharedRuntime shared = {
.netns_storage_socket = PIPE_EBADF,
.ipcns_storage_socket = PIPE_EBADF,
};
_cleanup_(dynamic_creds_done) DynamicCreds dynamic_creds = {};
_cleanup_(exec_runtime_clear) ExecRuntime runtime = {
.ephemeral_storage_socket = PIPE_EBADF,
.shared = &shared,
.dynamic_creds = &dynamic_creds,
};
exec_context_init(&context);
cgroup_context_init(&cgroup_context);
/* We might be starting the journal itself, we'll be told by the caller what to do */
log_set_always_reopen_console(true);
log_set_prohibit_ipc(true);
log_setup();
r = fdset_new_fill(/* filter_cloexec= */ 0, &fdset);
if (r < 0)
return log_error_errno(r, "Failed to create fd set: %m");
r = parse_argv(argc, argv);
if (r <= 0)
return r;
/* Now try again if we were told it's fine to use a different target */
if (log_get_target() != LOG_TARGET_KMSG) {
log_set_prohibit_ipc(false);
log_open();
}
r = fdset_remove(fdset, fileno(arg_serialization));
if (r < 0)
return log_error_errno(r, "Failed to remove serialization fd from fd set: %m");
r = exec_deserialize_invocation(arg_serialization,
fdset,
&context,
&command,
&params,
&runtime,
&cgroup_context);
if (r < 0)
return log_error_errno(r, "Failed to deserialize: %m");
arg_serialization = safe_fclose(arg_serialization);
fdset = fdset_free(fdset);
r = exec_invoke(&command,
&context,
&params,
&runtime,
&cgroup_context,
&exit_status);
if (r < 0) {
const char *status = ASSERT_PTR(
exit_status_to_string(exit_status, EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD));
log_exec_struct_errno(&context, &params, LOG_ERR, r,
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
LOG_EXEC_INVOCATION_ID(&params),
LOG_EXEC_MESSAGE(&params, "Failed at step %s spawning %s: %m",
status, command.path),
"EXECUTABLE=%s", command.path);
} else
assert(exit_status == EXIT_SUCCESS); /* When 'skip' is chosen in the confirm spawn prompt */
return exit_status;
}

View file

@ -24,7 +24,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
log_set_target(LOG_TARGET_NULL);
}
assert_se(manager_new(RUNTIME_SCOPE_SYSTEM, MANAGER_TEST_RUN_MINIMAL, &m) >= 0);
assert_se(manager_new(RUNTIME_SCOPE_SYSTEM, MANAGER_TEST_RUN_MINIMAL|MANAGER_TEST_DONT_OPEN_EXECUTOR, &m) >= 0);
/* Set log overrides as well to make it harder for a serialization file
* to switch log levels/targets during fuzzing */
manager_override_log_level(m, log_get_max_level());

View file

@ -65,7 +65,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
if (!getenv("SYSTEMD_LOG_LEVEL"))
log_set_max_level(LOG_CRIT);
assert_se(manager_new(RUNTIME_SCOPE_SYSTEM, MANAGER_TEST_RUN_MINIMAL, &m) >= 0);
assert_se(manager_new(RUNTIME_SCOPE_SYSTEM, MANAGER_TEST_RUN_MINIMAL|MANAGER_TEST_DONT_OPEN_EXECUTOR, &m) >= 0);
name = strjoina("a.", unit_type_to_string(t));
assert_se(unit_new_for_name(m, unit_vtable[t]->object_size, name, &u) >= 0);

View file

@ -18,21 +18,9 @@
#include "varlink-internal.h"
int manager_open_serialization(Manager *m, FILE **ret_f) {
_cleanup_close_ int fd = -EBADF;
FILE *f;
assert(ret_f);
fd = open_serialization_fd("systemd-state");
if (fd < 0)
return fd;
f = take_fdopen(&fd, "w+");
if (!f)
return -errno;
*ret_f = f;
return 0;
return open_serialization_file("systemd-state", ret_f);
}
static bool manager_timestamp_shall_serialize(ManagerTimestamp t) {
@ -195,7 +183,7 @@ int manager_serialize(
if (u->id != t)
continue;
r = unit_serialize(u, f, fds, switching_root);
r = unit_serialize_state(u, f, fds, switching_root);
if (r < 0)
return r;
}
@ -222,7 +210,7 @@ static int manager_deserialize_one_unit(Manager *m, const char *name, FILE *f, F
return log_notice_errno(r, "Failed to load unit \"%s\", skipping deserialization: %m", name);
}
r = unit_deserialize(u, f, fds);
r = unit_deserialize_state(u, f, fds);
if (r < 0) {
if (r == -ENOMEM)
return r;
@ -251,7 +239,7 @@ static int manager_deserialize_units(Manager *m, FILE *f, FDSet *fds) {
if (r == -ENOMEM)
return r;
if (r < 0) {
r = unit_deserialize_skip(f);
r = unit_deserialize_state_skip(f);
if (r < 0)
return r;
}
@ -507,7 +495,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
}
} else if ((val = startswith(l, "dynamic-user=")))
dynamic_user_deserialize_one(m, val, fds);
dynamic_user_deserialize_one(m, val, fds, NULL);
else if ((val = startswith(l, "destroy-ipc-uid=")))
manager_deserialize_uid_refs_one(m, val);
else if ((val = startswith(l, "destroy-ipc-gid=")))

View file

@ -144,6 +144,15 @@ static usec_t manager_watch_jobs_next_time(Manager *m) {
return usec_add(now(CLOCK_MONOTONIC), timeout);
}
static bool manager_is_confirm_spawn_disabled(Manager *m) {
assert(m);
if (!m->confirm_spawn)
return true;
return access("/run/systemd/confirm_spawn_disabled", F_OK) >= 0;
}
static void manager_watch_jobs_in_progress(Manager *m) {
usec_t next;
int r;
@ -912,6 +921,8 @@ int manager_new(RuntimeScope runtime_scope, ManagerTestRunFlags test_run_flags,
.interval = 10 * USEC_PER_MINUTE,
.burst = 10,
},
.executor_fd = -EBADF,
};
unit_defaults_init(&m->defaults, runtime_scope);
@ -1030,6 +1041,42 @@ int manager_new(RuntimeScope runtime_scope, ManagerTestRunFlags test_run_flags,
if (r < 0 && r != -EEXIST)
return r;
m->executor_fd = open(SYSTEMD_EXECUTOR_BINARY_PATH, O_CLOEXEC|O_PATH);
if (m->executor_fd < 0)
return log_warning_errno(errno,
"Failed to open executor binary '%s': %m",
SYSTEMD_EXECUTOR_BINARY_PATH);
} else if (!FLAGS_SET(test_run_flags, MANAGER_TEST_DONT_OPEN_EXECUTOR)) {
_cleanup_free_ char *self_exe = NULL, *executor_path = NULL;
_cleanup_close_ int self_dir_fd = -EBADF;
int level = LOG_DEBUG;
/* Prefer sd-executor from the same directory as the test, e.g.: when running unit tests from the
* build directory. Fallback to working directory and then the installation path. */
r = readlink_and_make_absolute("/proc/self/exe", &self_exe);
if (r < 0)
return r;
self_dir_fd = open_parent(self_exe, O_CLOEXEC|O_DIRECTORY, 0);
if (self_dir_fd < 0)
return -errno;
m->executor_fd = openat(self_dir_fd, "systemd-executor", O_CLOEXEC|O_PATH);
if (m->executor_fd < 0 && errno == ENOENT)
m->executor_fd = openat(AT_FDCWD, "systemd-executor", O_CLOEXEC|O_PATH);
if (m->executor_fd < 0 && errno == ENOENT) {
m->executor_fd = open(SYSTEMD_EXECUTOR_BINARY_PATH, O_CLOEXEC|O_PATH);
level = LOG_WARNING; /* Tests should normally use local builds */
}
if (m->executor_fd < 0)
return -errno;
r = fd_get_path(m->executor_fd, &executor_path);
if (r < 0)
return r;
log_full(level, "Using systemd-executor binary from '%s'", executor_path);
}
/* Note that we do not set up the notify fd here. We do that after deserialization,
@ -1692,6 +1739,8 @@ Manager* manager_free(Manager *m) {
lsm_bpf_destroy(m->restrict_fs);
#endif
safe_close(m->executor_fd);
return mfree(m);
}
@ -4430,13 +4479,6 @@ void manager_disable_confirm_spawn(void) {
(void) touch("/run/systemd/confirm_spawn_disabled");
}
bool manager_is_confirm_spawn_disabled(Manager *m) {
if (!m->confirm_spawn)
return true;
return access("/run/systemd/confirm_spawn_disabled", F_OK) >= 0;
}
static bool manager_should_show_status(Manager *m, StatusType type) {
assert(m);
@ -4954,6 +4996,17 @@ void unit_defaults_done(UnitDefaults *defaults) {
rlimit_free_all(defaults->rlimit);
}
LogTarget manager_get_executor_log_target(Manager *m) {
assert(m);
/* If journald is not available tell sd-executor to go to kmsg, as it might be starting journald */
if (manager_journal_is_running(m))
return log_get_target();
return LOG_TARGET_KMSG;
}
static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
[MANAGER_INITIALIZING] = "initializing",
[MANAGER_STARTING] = "starting",

View file

@ -145,6 +145,7 @@ typedef enum ManagerTestRunFlags {
MANAGER_TEST_RUN_ENV_GENERATORS = 1 << 2, /* also run env generators */
MANAGER_TEST_RUN_GENERATORS = 1 << 3, /* also run unit generators */
MANAGER_TEST_RUN_IGNORE_DEPENDENCIES = 1 << 4, /* run while ignoring dependencies */
MANAGER_TEST_DONT_OPEN_EXECUTOR = 1 << 5, /* avoid trying to load sd-executor */
MANAGER_TEST_FULL = MANAGER_TEST_RUN_BASIC | MANAGER_TEST_RUN_ENV_GENERATORS | MANAGER_TEST_RUN_GENERATORS,
} ManagerTestRunFlags;
@ -496,6 +497,10 @@ struct Manager {
/* For NFTSet= */
FirewallContext *fw_ctx;
/* Pin the systemd-executor binary, so that it never changes until re-exec, ensuring we don't have
* serialization/deserialization compatibility issues during upgrades. */
int executor_fd;
};
static inline usec_t manager_default_timeout_abort_usec(Manager *m) {
@ -616,7 +621,6 @@ const char *manager_state_to_string(ManagerState m) _const_;
ManagerState manager_state_from_string(const char *s) _pure_;
const char *manager_get_confirm_spawn(Manager *m);
bool manager_is_confirm_spawn_disabled(Manager *m);
void manager_disable_confirm_spawn(void);
const char *manager_timestamp_to_string(ManagerTimestamp m) _const_;
@ -629,6 +633,8 @@ void manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout);
int manager_set_watchdog_pretimeout_governor(Manager *m, const char *governor);
int manager_override_watchdog_pretimeout_governor(Manager *m, const char *governor);
LogTarget manager_get_executor_log_target(Manager *m);
const char* oom_policy_to_string(OOMPolicy i) _const_;
OOMPolicy oom_policy_from_string(const char *s) _pure_;

View file

@ -36,6 +36,7 @@ libcore_sources = files(
'emergency-action.c',
'exec-credential.c',
'execute.c',
'execute-serialize.c',
'generator-setup.c',
'ima-setup.c',
'import-creds.c',
@ -143,6 +144,11 @@ systemd_sources = files(
'crash-handler.c',
)
systemd_executor_sources = files(
'executor.c',
'exec-invoke.c',
)
executables += [
libexec_template + {
'name' : 'systemd',
@ -155,6 +161,22 @@ executables += [
],
'dependencies' : libseccomp,
},
libexec_template + {
'name' : 'systemd-executor',
'public' : true,
'sources' : systemd_executor_sources,
'include_directories' : core_includes,
'link_with' : [
libcore,
libshared,
],
'dependencies' : [
libapparmor,
libpam,
libseccomp,
libselinux,
],
},
fuzz_template + {
'sources' : files('fuzz-unit-file.c'),
'link_with' : [

View file

@ -889,13 +889,8 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
static int mount_spawn(Mount *m, ExecCommand *c, PidRef *ret_pid) {
_cleanup_(exec_params_clear) ExecParameters exec_params = {
.flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
.stdin_fd = -EBADF,
.stdout_fd = -EBADF,
.stderr_fd = -EBADF,
.exec_fd = -EBADF,
};
_cleanup_(exec_params_clear) ExecParameters exec_params = EXEC_PARAMETERS_INIT(
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN);
_cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
pid_t pid;
int r;

View file

@ -1602,13 +1602,7 @@ static int service_spawn_internal(
ExecFlags flags,
PidRef *ret_pid) {
_cleanup_(exec_params_clear) ExecParameters exec_params = {
.flags = flags,
.stdin_fd = -EBADF,
.stdout_fd = -EBADF,
.stderr_fd = -EBADF,
.exec_fd = -EBADF,
};
_cleanup_(exec_params_clear) ExecParameters exec_params = EXEC_PARAMETERS_INIT(flags);
_cleanup_(sd_event_source_unrefp) sd_event_source *exec_fd_source = NULL;
_cleanup_strv_free_ char **final_env = NULL, **our_env = NULL;
_cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;

View file

@ -1913,13 +1913,8 @@ static int socket_coldplug(Unit *u) {
static int socket_spawn(Socket *s, ExecCommand *c, PidRef *ret_pid) {
_cleanup_(exec_params_clear) ExecParameters exec_params = {
.flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
.stdin_fd = -EBADF,
.stdout_fd = -EBADF,
.stderr_fd = -EBADF,
.exec_fd = -EBADF,
};
_cleanup_(exec_params_clear) ExecParameters exec_params = EXEC_PARAMETERS_INIT(
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN);
_cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
pid_t pid;
int r;

View file

@ -632,13 +632,8 @@ static void swap_dump(Unit *u, FILE *f, const char *prefix) {
static int swap_spawn(Swap *s, ExecCommand *c, PidRef *ret_pid) {
_cleanup_(exec_params_clear) ExecParameters exec_params = {
.flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
.stdin_fd = -EBADF,
.stdout_fd = -EBADF,
.stderr_fd = -EBADF,
.exec_fd = -EBADF,
};
_cleanup_(exec_params_clear) ExecParameters exec_params = EXEC_PARAMETERS_INIT(
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN);
_cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
pid_t pid;
int r;

View file

@ -90,7 +90,7 @@ static const char *const io_accounting_metric_field_last[_CGROUP_IO_ACCOUNTING_M
[CGROUP_IO_WRITE_OPERATIONS] = "io-accounting-write-operations-last",
};
int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool switching_root) {
int unit_serialize_state(Unit *u, FILE *f, FDSet *fds, bool switching_root) {
int r;
assert(u);
@ -264,7 +264,7 @@ static int unit_deserialize_job(Unit *u, FILE *f) {
_deserialize_matched; \
})
int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
int unit_deserialize_state(Unit *u, FILE *f, FDSet *fds) {
int r;
assert(u);
@ -552,7 +552,7 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
return 0;
}
int unit_deserialize_skip(FILE *f) {
int unit_deserialize_state_skip(FILE *f) {
int r;
assert(f);

View file

@ -6,8 +6,11 @@
#include "unit.h"
#include "fdset.h"
int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs);
int unit_deserialize(Unit *u, FILE *f, FDSet *fds);
int unit_deserialize_skip(FILE *f);
/* These functions serialize state for our own usage, i.e.: across a reload/reexec, rather than for being
* passed to a child process. */
int unit_serialize_state(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs);
int unit_deserialize_state(Unit *u, FILE *f, FDSet *fds);
int unit_deserialize_state_skip(FILE *f);
void unit_dump(Unit *u, FILE *f, const char *prefix);

View file

@ -1854,18 +1854,6 @@ int unit_test_start_limit(Unit *u) {
return -ECANCELED;
}
bool unit_shall_confirm_spawn(Unit *u) {
assert(u);
if (manager_is_confirm_spawn_disabled(u->manager))
return false;
/* For some reasons units remaining in the same process group
* as PID 1 fail to acquire the console even if it's not used
* by any process. So skip the confirmation question for them. */
return !unit_get_exec_context(u)->same_pgrp;
}
static bool unit_verify_deps(Unit *u) {
Unit *other;
@ -5369,6 +5357,7 @@ int unit_acquire_invocation_id(Unit *u) {
}
int unit_set_exec_params(Unit *u, ExecParameters *p) {
const char *confirm_spawn;
int r;
assert(u);
@ -5381,7 +5370,13 @@ int unit_set_exec_params(Unit *u, ExecParameters *p) {
p->runtime_scope = u->manager->runtime_scope;
p->confirm_spawn = manager_get_confirm_spawn(u->manager);
confirm_spawn = manager_get_confirm_spawn(u->manager);
if (confirm_spawn) {
p->confirm_spawn = strdup(confirm_spawn);
if (!p->confirm_spawn)
return -ENOMEM;
}
p->cgroup_supported = u->manager->cgroup_supported;
p->prefix = u->manager->prefix;
SET_FLAG(p->flags, EXEC_PASS_LOG_UNIT|EXEC_CHOWN_DIRECTORIES, MANAGER_IS_SYSTEM(u->manager));
@ -5393,6 +5388,27 @@ int unit_set_exec_params(Unit *u, ExecParameters *p) {
p->received_credentials_directory = u->manager->received_credentials_directory;
p->received_encrypted_credentials_directory = u->manager->received_encrypted_credentials_directory;
p->shall_confirm_spawn = !!u->manager->confirm_spawn;
p->fallback_smack_process_label = u->manager->defaults.smack_process_label;
if (u->manager->restrict_fs && p->bpf_outer_map_fd < 0) {
int fd = lsm_bpf_map_restrict_fs_fd(u);
if (fd < 0)
return fd;
p->bpf_outer_map_fd = fd;
}
p->user_lookup_fd = u->manager->user_lookup_fds[1];
p->cgroup_id = u->cgroup_id;
p->invocation_id = u->invocation_id;
sd_id128_to_string(p->invocation_id, p->invocation_id_string);
p->unit_id = strdup(u->id);
if (!p->unit_id)
return -ENOMEM;
return 0;
}

View file

@ -1024,8 +1024,6 @@ void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid);
int unit_set_invocation_id(Unit *u, sd_id128_t id);
int unit_acquire_invocation_id(Unit *u);
bool unit_shall_confirm_spawn(Unit *u);
int unit_set_exec_params(Unit *s, ExecParameters *p);
int unit_fork_helper_process(Unit *u, const char *name, PidRef *ret);

View file

@ -7,6 +7,7 @@
#include "escape.h"
#include "fd-util.h"
#include "fileio.h"
#include "hexdecoct.h"
#include "memfd-util.h"
#include "missing_mman.h"
#include "missing_syscall.h"
@ -173,6 +174,93 @@ int serialize_pidref(FILE *f, FDSet *fds, const char *key, PidRef *pidref) {
return serialize_item_format(f, key, "@%i", copy);
}
int serialize_item_hexmem(FILE *f, const char *key, const void *p, size_t l) {
_cleanup_free_ char *encoded = NULL;
int r;
assert(f);
assert(key);
assert(p || l == 0);
if (l == 0)
return 0;
encoded = hexmem(p, l);
if (!encoded)
return log_oom_debug();
r = serialize_item(f, key, encoded);
if (r < 0)
return r;
return 1;
}
int serialize_item_base64mem(FILE *f, const char *key, const void *p, size_t l) {
_cleanup_free_ char *encoded = NULL;
ssize_t len;
int r;
assert(f);
assert(key);
assert(p || l == 0);
if (l == 0)
return 0;
len = base64mem(p, l, &encoded);
if (len <= 0)
return log_oom_debug();
r = serialize_item(f, key, encoded);
if (r < 0)
return r;
return 1;
}
int serialize_string_set(FILE *f, const char *key, Set *s) {
const char *e;
int r;
assert(f);
assert(key);
if (set_isempty(s))
return 0;
/* Serialize as individual items, as each element might contain separators and escapes */
SET_FOREACH(e, s) {
r = serialize_item(f, key, e);
if (r < 0)
return r;
}
return 1;
}
int serialize_image_policy(FILE *f, const char *key, const ImagePolicy *p) {
_cleanup_free_ char *policy = NULL;
int r;
assert(f);
assert(key);
if (!p)
return 0;
r = image_policy_to_string(p, /* simplify= */ false, &policy);
if (r < 0)
return r;
r = serialize_item(f, key, policy);
if (r < 0)
return r;
return 1;
}
int deserialize_read_line(FILE *f, char **ret) {
_cleanup_free_ char *line = NULL;
int r;
@ -333,3 +421,22 @@ int open_serialization_fd(const char *ident) {
return fd;
}
int open_serialization_file(const char *ident, FILE **ret) {
_cleanup_fclose_ FILE *f = NULL;
_cleanup_close_ int fd;
assert(ret);
fd = open_serialization_fd(ident);
if (fd < 0)
return fd;
f = take_fdopen(&fd, "w+");
if (!f)
return -errno;
*ret = TAKE_PTR(f);
return 0;
}

View file

@ -4,19 +4,25 @@
#include <stdio.h>
#include "fdset.h"
#include "image-policy.h"
#include "macro.h"
#include "pidref.h"
#include "set.h"
#include "string-util.h"
#include "time-util.h"
int serialize_item(FILE *f, const char *key, const char *value);
int serialize_item_escaped(FILE *f, const char *key, const char *value);
int serialize_item_format(FILE *f, const char *key, const char *value, ...) _printf_(3,4);
int serialize_item_hexmem(FILE *f, const char *key, const void *p, size_t l);
int serialize_item_base64mem(FILE *f, const char *key, const void *p, size_t l);
int serialize_fd(FILE *f, FDSet *fds, const char *key, int fd);
int serialize_usec(FILE *f, const char *key, usec_t usec);
int serialize_dual_timestamp(FILE *f, const char *key, const dual_timestamp *t);
int serialize_strv(FILE *f, const char *key, char **l);
int serialize_pidref(FILE *f, FDSet *fds, const char *key, PidRef *pidref);
int serialize_string_set(FILE *f, const char *key, Set *s);
int serialize_image_policy(FILE *f, const char *key, const ImagePolicy *p);
static inline int serialize_bool(FILE *f, const char *key, bool b) {
return serialize_item(f, key, yes_no(b));
@ -25,6 +31,10 @@ static inline int serialize_bool_elide(FILE *f, const char *key, bool b) {
return b ? serialize_item(f, key, yes_no(b)) : 0;
}
static inline int serialize_item_tristate(FILE *f, const char *key, int value) {
return value >= 0 ? serialize_item_format(f, key, "%i", value) : 0;
}
int deserialize_read_line(FILE *f, char **ret);
int deserialize_usec(const char *value, usec_t *timestamp);
@ -34,3 +44,4 @@ int deserialize_strv(char ***l, const char *value);
int deserialize_pidref(FDSet *fds, const char *value, PidRef *ret);
int open_serialization_fd(const char *ident);
int open_serialization_file(const char *ident, FILE **ret);

View file

@ -100,6 +100,17 @@ TEST(open_serialization_fd) {
assert_se(write(fd, "test\n", 5) == 5);
}
TEST(open_serialization_file) {
_cleanup_fclose_ FILE *f = NULL;
int r;
r = open_serialization_file("test", &f);
assert_se(r >= 0);
assert_se(f);
assert_se(fwrite("test\n", 1, 5, f) == 5);
}
TEST(fd_move_above_stdio) {
int original_stdin, new_fd;

View file

@ -190,6 +190,72 @@ TEST(serialize_environment) {
assert_se(strv_equal(env, env2));
}
TEST(serialize_item_hexmem) {
_cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-serialize.XXXXXX";
_cleanup_fclose_ FILE *f = NULL;
assert_se(fmkostemp_safe(fn, "r+", &f) == 0);
log_info("/* %s (%s) */", __func__, fn);
assert_se(serialize_item_hexmem(f, "a", NULL, 0) == 0);
assert_se(serialize_item_hexmem(f, "a", (uint8_t []){0xff, 0xff, 0xff}, sizeof(uint8_t) * 3) == 1);
rewind(f);
_cleanup_free_ char *line = NULL;
assert_se(read_line(f, LONG_LINE_MAX, &line) > 0);
assert_se(streq(line, "a=ffffff"));
}
TEST(serialize_item_base64mem) {
_cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-serialize.XXXXXX";
_cleanup_fclose_ FILE *f = NULL;
assert_se(fmkostemp_safe(fn, "r+", &f) == 0);
log_info("/* %s (%s) */", __func__, fn);
assert_se(serialize_item_base64mem(f, "a", NULL, 0) == 0);
assert_se(serialize_item_base64mem(f, "a", (uint8_t []){0xff, 0xff, 0xff}, sizeof(uint8_t) * 3) == 1);
rewind(f);
_cleanup_free_ char *line = NULL;
assert_se(read_line(f, LONG_LINE_MAX, &line) > 0);
assert_se(streq(line, "a=////"));
}
TEST(serialize_string_set) {
_cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-serialize.XXXXXX";
_cleanup_fclose_ FILE *f = NULL;
_cleanup_set_free_free_ Set *s = NULL;
_cleanup_free_ char *line1 = NULL, *line2 = NULL;
char *p, *q;
assert_se(fmkostemp_safe(fn, "r+", &f) == 0);
log_info("/* %s (%s) */", __func__, fn);
assert_se(set_ensure_allocated(&s, &string_hash_ops) >= 0);
assert_se(serialize_string_set(f, "a", s) == 0);
assert_se(set_put_strsplit(s, "abc def,ghi jkl", ",", 0) >= 0);
assert_se(serialize_string_set(f, "a", s) == 1);
rewind(f);
assert_se(read_line(f, LONG_LINE_MAX, &line1) > 0);
assert_se((p = startswith(line1, "a=")));
assert_se(read_line(f, LONG_LINE_MAX, &line2) > 0);
assert_se((q = startswith(line2, "a=")));
assert_se(!streq(p, q));
assert_se(STR_IN_SET(p, "abc def", "ghi jkl"));
assert_se(STR_IN_SET(q, "abc def", "ghi jkl"));
}
static int intro(void) {
memset(long_string, 'x', sizeof(long_string)-1);
char_array_0(long_string);

View file

@ -2,5 +2,5 @@
Description=Log filtering unit
[Service]
ExecStart=sh -c 'while true; do echo "Logging from the service, and ~more~"; sleep .25; done'
ExecStart=sh -c 'while true; do echo "Logging from the service, and ~more~ foo bar"; sleep .25; done'
SyslogLevel=notice

View file

@ -78,6 +78,10 @@ if is_xattr_supported; then
add_logs_filtering_override "logs-filtering.service" "10-allow-with-escape-char" "\\\\x7emore~"
[[ -n $(run_service_and_fetch_logs "logs-filtering.service") ]]
add_logs_filtering_override "logs-filtering.service" "11-reset" ""
add_logs_filtering_override "logs-filtering.service" "12-allow-with-spaces" "foo bar"
[[ -n $(run_service_and_fetch_logs "logs-filtering.service") ]]
add_logs_filtering_override "delegated-cgroup-filtering.service" "00-allow-all" ".*"
[[ -n $(run_service_and_fetch_logs "delegated-cgroup-filtering.service") ]]

View file

@ -68,6 +68,12 @@ if systemctl is-active systemd-oomd.service; then
systemctl restart systemd-oomd.service
fi
# Ensure that we can start services even with a very low hard memory cap without oom-kills, but skip under
# sanitizers as they balloon memory usage.
if ! [[ -v ASAN_OPTIONS || -v UBSAN_OPTIONS ]]; then
systemd-run -t -p MemoryMax=10M -p MemorySwapMax=0 -p MemoryZSwapMax=0 /bin/true
fi
systemctl start testsuite-55-testchill.service
systemctl start testsuite-55-testbloat.service