udev: run the main process, workers, and spawned commands in /udev subcgroup

And enable cgroup delegation for udevd.
Then, processes invoked through ExecReload= are assigned .control
subcgroup, and they are not killed by cg_kill().

Fixes #16867 and #22686.
This commit is contained in:
Yu Watanabe 2022-03-16 20:46:49 +09:00
parent 4267084642
commit a1f4fd3876
2 changed files with 59 additions and 18 deletions

View file

@ -28,6 +28,7 @@
#include "sd-event.h"
#include "alloc-util.h"
#include "cgroup-setup.h"
#include "cgroup-util.h"
#include "cpu-set-util.h"
#include "dev-setup.h"
@ -48,6 +49,7 @@
#include "mkdir.h"
#include "netlink-util.h"
#include "parse-util.h"
#include "path-util.h"
#include "pretty-print.h"
#include "proc-cmdline.h"
#include "process-util.h"
@ -85,7 +87,7 @@ typedef struct Manager {
sd_event *event;
Hashmap *workers;
LIST_HEAD(Event, events);
const char *cgroup;
char *cgroup;
pid_t pid; /* the process that originally allocated the manager object */
int log_level;
@ -238,6 +240,7 @@ static Manager* manager_free(Manager *manager) {
safe_close(manager->inotify_fd);
safe_close_pair(manager->worker_watch);
free(manager->cgroup);
return mfree(manager);
}
@ -1722,12 +1725,63 @@ static int parse_argv(int argc, char *argv[]) {
return 1;
}
static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cgroup) {
static int create_subcgroup(char **ret) {
_cleanup_free_ char *cgroup = NULL, *subcgroup = NULL;
int r;
if (getppid() != 1)
return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Not invoked by PID1.");
r = sd_booted();
if (r < 0)
return log_debug_errno(r, "Failed to check if systemd is running: %m");
if (r == 0)
return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "systemd is not running.");
/* Get our own cgroup, we regularly kill everything udev has left behind.
* We only do this on systemd systems, and only if we are directly spawned
* by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
if (r < 0) {
if (IN_SET(r, -ENOENT, -ENOMEDIUM))
return log_debug_errno(r, "Dedicated cgroup not found: %m");
return log_debug_errno(r, "Failed to get cgroup: %m");
}
r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, cgroup, "trusted.delegate");
if (IN_SET(r, 0, -ENODATA))
return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "The cgroup %s is not delegated to us.", cgroup);
if (r < 0)
return log_debug_errno(r, "Failed to read trusted.delegate attribute: %m");
/* We are invoked with our own delegated cgroup tree, let's move us one level down, so that we
* don't collide with the "no processes in inner nodes" rule of cgroups, when the service
* manager invokes the ExecReload= job in the .control/ subcgroup. */
subcgroup = path_join(cgroup, "/udev");
if (!subcgroup)
return log_oom_debug();
r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup, 0);
if (r < 0)
return log_debug_errno(r, "Failed to create %s subcgroup: %m", subcgroup);
log_debug("Created %s subcgroup.", subcgroup);
if (ret)
*ret = TAKE_PTR(subcgroup);
return 0;
}
static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent) {
_cleanup_(manager_freep) Manager *manager = NULL;
_cleanup_free_ char *cgroup = NULL;
int r;
assert(ret);
(void) create_subcgroup(&cgroup);
manager = new(Manager, 1);
if (!manager)
return log_oom();
@ -1735,7 +1789,7 @@ static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cg
*manager = (Manager) {
.inotify_fd = -1,
.worker_watch = { -1, -1 },
.cgroup = cgroup,
.cgroup = TAKE_PTR(cgroup),
};
r = udev_ctrl_new_from_fd(&manager->ctrl, fd_ctrl);
@ -1880,7 +1934,6 @@ static int main_loop(Manager *manager) {
}
int run_udevd(int argc, char *argv[]) {
_cleanup_free_ char *cgroup = NULL;
_cleanup_(manager_freep) Manager *manager = NULL;
int fd_ctrl = -1, fd_uevent = -1;
int r;
@ -1937,24 +1990,11 @@ int run_udevd(int argc, char *argv[]) {
if (r < 0 && r != -EEXIST)
return log_error_errno(r, "Failed to create /run/udev: %m");
if (getppid() == 1 && sd_booted() > 0) {
/* Get our own cgroup, we regularly kill everything udev has left behind.
* We only do this on systemd systems, and only if we are directly spawned
* by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
if (r < 0) {
if (IN_SET(r, -ENOENT, -ENOMEDIUM))
log_debug_errno(r, "Dedicated cgroup not found: %m");
else
log_warning_errno(r, "Failed to get cgroup: %m");
}
}
r = listen_fds(&fd_ctrl, &fd_uevent);
if (r < 0)
return log_error_errno(r, "Failed to listen on fds: %m");
r = manager_new(&manager, fd_ctrl, fd_uevent, cgroup);
r = manager_new(&manager, fd_ctrl, fd_uevent);
if (r < 0)
return log_error_errno(r, "Failed to create manager: %m");

View file

@ -16,6 +16,7 @@ Before=sysinit.target
ConditionPathIsReadWrite=/sys
[Service]
Delegate=pids
DeviceAllow=block-* rwm
DeviceAllow=char-* rwm
Type=notify