Merge pull request #30532 from yuwata/udev-extend-timeout-kill-worker

udev: extend timeout to prevent kill worker
This commit is contained in:
Yu Watanabe 2024-01-04 05:21:50 +09:00 committed by GitHub
commit 124c712692
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 119 additions and 35 deletions

View file

@ -249,6 +249,12 @@ All tools:
devices sysfs path are actually backed by sysfs. Relaxing this verification
is useful for testing purposes.
* `$SYSTEMD_UDEV_EXTRA_TIMEOUT_SEC=` — Specifies an extra timespan that the
udev manager process waits for a worker process kills slow programs specified
by IMPORT{program}=, PROGRAM=, or RUN=, and finalizes the processing event.
If the worker process cannot finalize the event within the specified timespan,
the worker process is killed by the manager process. Defaults to 10 seconds.
`udevadm` and `systemd-hwdb`:
* `SYSTEMD_HWDB_UPDATE_BYPASS=` — If set to "1", execution of hwdb updates is skipped

View file

@ -332,6 +332,28 @@ static int on_event_timeout_warning(sd_event_source *s, uint64_t usec, void *use
return 1;
}
static usec_t extra_timeout_usec(void) {
static usec_t saved = 10 * USEC_PER_SEC;
static bool parsed = false;
const char *e;
int r;
if (parsed)
return saved;
parsed = true;
e = getenv("SYSTEMD_UDEV_EXTRA_TIMEOUT_SEC");
if (!e)
return saved;
r = parse_sec(e, &saved);
if (r < 0)
log_debug_errno(r, "Failed to parse $SYSTEMD_UDEV_EXTRA_TIMEOUT_SEC=%s, ignoring: %m", e);
return saved;
}
static void worker_attach_event(Worker *worker, Event *event) {
Manager *manager = ASSERT_PTR(ASSERT_PTR(worker)->manager);
sd_event *e = ASSERT_PTR(manager->event);
@ -349,8 +371,12 @@ static void worker_attach_event(Worker *worker, Event *event) {
udev_warn_timeout(manager->timeout_usec), USEC_PER_SEC,
on_event_timeout_warning, event);
/* Manager.timeout_usec is also used as the timeout for running programs specified in
* IMPORT{program}=, PROGRAM=, or RUN=. Here, let's add an extra time before the manager
* kills a worker, to make it possible that the worker detects timed out of spawned programs,
* kills them, and finalizes the event. */
(void) sd_event_add_time_relative(e, &event->timeout_event, CLOCK_MONOTONIC,
manager->timeout_usec, USEC_PER_SEC,
usec_add(manager->timeout_usec, extra_timeout_usec()), USEC_PER_SEC,
on_event_timeout, event);
}

View file

@ -23,6 +23,7 @@ typedef struct Spawn {
usec_t timeout_usec;
int timeout_signal;
usec_t event_birth_usec;
usec_t cmd_birth_usec;
bool accept_failure;
int fd_stdout;
int fd_stderr;
@ -163,31 +164,20 @@ static int spawn_wait(Spawn *spawn) {
if (r < 0)
return log_device_debug_errno(spawn->device, r, "Failed to allocate sd-event object: %m");
if (spawn->timeout_usec > 0) {
usec_t usec, age_usec;
usec = now(CLOCK_MONOTONIC);
age_usec = usec - spawn->event_birth_usec;
if (age_usec < spawn->timeout_usec) {
if (spawn->timeout_warn_usec > 0 &&
spawn->timeout_warn_usec < spawn->timeout_usec &&
spawn->timeout_warn_usec > age_usec) {
spawn->timeout_warn_usec -= age_usec;
r = sd_event_add_time(e, NULL, CLOCK_MONOTONIC,
usec + spawn->timeout_warn_usec, USEC_PER_SEC,
on_spawn_timeout_warning, spawn);
if (r < 0)
return log_device_debug_errno(spawn->device, r, "Failed to create timeout warning event source: %m");
}
spawn->timeout_usec -= age_usec;
if (spawn->timeout_usec != USEC_INFINITY) {
if (spawn->timeout_warn_usec < spawn->timeout_usec) {
r = sd_event_add_time(e, NULL, CLOCK_MONOTONIC,
usec + spawn->timeout_usec, USEC_PER_SEC, on_spawn_timeout, spawn);
usec_add(spawn->cmd_birth_usec, spawn->timeout_warn_usec), USEC_PER_SEC,
on_spawn_timeout_warning, spawn);
if (r < 0)
return log_device_debug_errno(spawn->device, r, "Failed to create timeout event source: %m");
return log_device_debug_errno(spawn->device, r, "Failed to create timeout warning event source: %m");
}
r = sd_event_add_time(e, NULL, CLOCK_MONOTONIC,
usec_add(spawn->cmd_birth_usec, spawn->timeout_usec), USEC_PER_SEC,
on_spawn_timeout, spawn);
if (r < 0)
return log_device_debug_errno(spawn->device, r, "Failed to create timeout event source: %m");
}
if (spawn->fd_stdout >= 0) {
@ -240,6 +230,13 @@ int udev_event_spawn(
int timeout_signal = event->worker ? event->worker->timeout_signal : SIGKILL;
usec_t timeout_usec = event->worker ? event->worker->timeout_usec : DEFAULT_WORKER_TIMEOUT_USEC;
usec_t now_usec = now(CLOCK_MONOTONIC);
usec_t age_usec = usec_sub_unsigned(now_usec, event->birth_usec);
usec_t cmd_timeout_usec = usec_sub_unsigned(timeout_usec, age_usec);
if (cmd_timeout_usec <= 0)
return log_device_warning_errno(event->dev, SYNTHETIC_ERRNO(ETIME),
"The event already takes longer (%s) than the timeout (%s), skipping execution of '%s'.",
FORMAT_TIMESPAN(age_usec, 1), FORMAT_TIMESPAN(timeout_usec, 1), cmd);
/* pipes from child to parent */
if (result || log_get_max_level() >= LOG_INFO)
@ -300,10 +297,11 @@ int udev_event_spawn(
.cmd = cmd,
.pid = pid,
.accept_failure = accept_failure,
.timeout_warn_usec = udev_warn_timeout(timeout_usec),
.timeout_usec = timeout_usec,
.timeout_warn_usec = udev_warn_timeout(cmd_timeout_usec),
.timeout_usec = cmd_timeout_usec,
.timeout_signal = timeout_signal,
.event_birth_usec = event->birth_usec,
.cmd_birth_usec = now_usec,
.fd_stdout = outpipe[READ_END],
.fd_stderr = errpipe[READ_END],
.result = result,
@ -340,6 +338,17 @@ void udev_event_execute_run(UdevEvent *event) {
log_device_debug_errno(event->dev, r, "Failed to run built-in command \"%s\", ignoring: %m", command);
} else {
if (event->worker && event->worker->exec_delay_usec > 0) {
usec_t timeout_usec = event->worker ? event->worker->timeout_usec : DEFAULT_WORKER_TIMEOUT_USEC;
usec_t now_usec = now(CLOCK_MONOTONIC);
usec_t age_usec = usec_sub_unsigned(now_usec, event->birth_usec);
if (event->worker->exec_delay_usec >= usec_sub_unsigned(timeout_usec, age_usec)) {
log_device_warning(event->dev,
"Cannot delaying execution of \"%s\" for %s, skipping.",
command, FORMAT_TIMESPAN(event->worker->exec_delay_usec, USEC_PER_SEC));
continue;
}
log_device_debug(event->dev, "Delaying execution of \"%s\" for %s.",
command, FORMAT_TIMESPAN(event->worker->exec_delay_usec, USEC_PER_SEC));
(void) usleep_safe(event->worker->exec_delay_usec);

View file

@ -1,7 +1,8 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: LGPL-2.1-or-later
set -ex
set -eux
TMPDIR=
TEST_RULE="/run/udev/rules.d/49-test.rules"
KILL_PID=
@ -10,8 +11,15 @@ setup() {
[[ -e /etc/udev/udev.conf ]] && cp -f /etc/udev/udev.conf /etc/udev/udev.conf.bak
cat >"${TEST_RULE}" <<EOF
ACTION=="add", SUBSYSTEM=="mem", KERNEL=="null", OPTIONS="log_level=debug"
ACTION=="add", SUBSYSTEM=="mem", KERNEL=="null", PROGRAM=="/bin/sleep 60"
ACTION!="add", GOTO="test_end"
SUBSYSTEM!="mem", GOTO="test_end"
KERNEL!="null", GOTO="test_end"
OPTIONS="log_level=debug"
PROGRAM=="/bin/touch /tmp/test-udev-marker"
PROGRAM!="/bin/sleep 60", ENV{PROGRAM_RESULT}="KILLED"
LABEL="test_end"
EOF
cat >/etc/udev/udev.conf <<EOF
event_timeout=10
@ -35,11 +43,7 @@ teardown() {
systemctl restart systemd-udevd.service
}
run_test() {
local since
since="$(date '+%F %T')"
run_test_timeout() {
TMPDIR=$(mktemp -d -p /tmp udev-tests.XXXXXX)
udevadm monitor --udev --property --subsystem-match=mem >"$TMPDIR"/monitor.txt &
KILL_PID="$!"
@ -47,7 +51,43 @@ run_test() {
SYSTEMD_LOG_LEVEL=debug udevadm trigger --verbose --action add /dev/null
for _ in {1..40}; do
if coredumpctl --since "$since" --no-legend --no-pager | grep /bin/udevadm ; then
if grep -q 'PROGRAM_RESULT=KILLED' "$TMPDIR"/monitor.txt; then
sleep .5
kill "$KILL_PID"
KILL_PID=
cat "$TMPDIR"/monitor.txt
(! grep -q 'UDEV_WORKER_FAILED=1' "$TMPDIR"/monitor.txt)
(! grep -q 'UDEV_WORKER_SIGNAL=6' "$TMPDIR"/monitor.txt)
(! grep -q 'UDEV_WORKER_SIGNAL_NAME=ABRT' "$TMPDIR"/monitor.txt)
grep -q 'PROGRAM_RESULT=KILLED' "$TMPDIR"/monitor.txt
rm -rf "$TMPDIR"
return 0
fi
sleep .5
done
return 1
}
run_test_killed() {
local killed=
TMPDIR=$(mktemp -d -p /tmp udev-tests.XXXXXX)
udevadm monitor --udev --property --subsystem-match=mem >"$TMPDIR"/monitor.txt &
KILL_PID="$!"
rm -f /tmp/test-udev-marker
SYSTEMD_LOG_LEVEL=debug udevadm trigger --verbose --action add /dev/null
for _ in {1..40}; do
if [[ -z "$killed" ]]; then
if [[ -e /tmp/test-udev-marker ]]; then
killall --signal ABRT --regexp udev-worker
killed=1
fi
elif grep -q 'UDEV_WORKER_FAILED=1' "$TMPDIR"/monitor.txt; then
sleep .5
kill "$KILL_PID"
KILL_PID=
@ -55,6 +95,8 @@ run_test() {
grep -q 'UDEV_WORKER_FAILED=1' "$TMPDIR"/monitor.txt
grep -q 'UDEV_WORKER_SIGNAL=6' "$TMPDIR"/monitor.txt
grep -q 'UDEV_WORKER_SIGNAL_NAME=ABRT' "$TMPDIR"/monitor.txt
(! grep -q 'PROGRAM_RESULT=KILLED' "$TMPDIR"/monitor.txt)
rm -rf "$TMPDIR"
return 0
fi
sleep .5
@ -66,6 +108,7 @@ run_test() {
trap teardown EXIT
setup
run_test
run_test_timeout
run_test_killed
exit 0