core: Check unit start rate limiting earlier

Fixes #17433. Currently, if any of the validations we do before we
check start rate limiting fail, we can still enter a busy loop as
no rate limiting gets applied. A common occurence of this scenario
is path units triggering a service that fails a condition check.

To fix the issue, we simply move up start rate limiting checks to
be the first thing we do when starting a unit. To achieve this,
we add a new method to the unit vtable and implement it for the
relevant unit types so that we can do the start rate limit checks
earlier on.
This commit is contained in:
Daan De Meyer 2021-08-24 16:46:47 +01:00
parent a243128d1f
commit 9727f2427f
16 changed files with 169 additions and 43 deletions

View file

@ -813,12 +813,6 @@ static int automount_start(Unit *u) {
if (r < 0)
return r;
r = unit_test_start_limit(u);
if (r < 0) {
automount_enter_dead(a, AUTOMOUNT_FAILURE_START_LIMIT_HIT);
return r;
}
r = unit_acquire_invocation_id(u);
if (r < 0)
return r;
@ -1064,6 +1058,21 @@ static bool automount_supported(void) {
return supported;
}
static int automount_test_start_limit(Unit *u) {
Automount *a = AUTOMOUNT(u);
int r;
assert(a);
r = unit_test_start_limit(u);
if (r < 0) {
automount_enter_dead(a, AUTOMOUNT_FAILURE_START_LIMIT_HIT);
return r;
}
return 0;
}
static const char* const automount_result_table[_AUTOMOUNT_RESULT_MAX] = {
[AUTOMOUNT_SUCCESS] = "success",
[AUTOMOUNT_FAILURE_RESOURCES] = "resources",
@ -1126,4 +1135,6 @@ const UnitVTable automount_vtable = {
[JOB_FAILED] = "Failed to unset automount %s.",
},
},
.test_start_limit = automount_test_start_limit,
};

View file

@ -1167,12 +1167,6 @@ static int mount_start(Unit *u) {
assert(IN_SET(m->state, MOUNT_DEAD, MOUNT_FAILED));
r = unit_test_start_limit(u);
if (r < 0) {
mount_enter_dead(m, MOUNT_FAILURE_START_LIMIT_HIT);
return r;
}
r = unit_acquire_invocation_id(u);
if (r < 0)
return r;
@ -2138,6 +2132,21 @@ static int mount_can_clean(Unit *u, ExecCleanMask *ret) {
return exec_context_get_clean_mask(&m->exec_context, ret);
}
static int mount_test_start_limit(Unit *u) {
Mount *m = MOUNT(u);
int r;
assert(m);
r = unit_test_start_limit(u);
if (r < 0) {
mount_enter_dead(m, MOUNT_FAILURE_START_LIMIT_HIT);
return r;
}
return 0;
}
static const char* const mount_exec_command_table[_MOUNT_EXEC_COMMAND_MAX] = {
[MOUNT_EXEC_MOUNT] = "ExecMount",
[MOUNT_EXEC_UNMOUNT] = "ExecUnmount",
@ -2235,4 +2244,6 @@ const UnitVTable mount_vtable = {
[JOB_TIMEOUT] = "Timed out unmounting %s.",
},
},
.test_start_limit = mount_test_start_limit,
};

View file

@ -590,12 +590,6 @@ static int path_start(Unit *u) {
if (r < 0)
return r;
r = unit_test_start_limit(u);
if (r < 0) {
path_enter_dead(p, PATH_FAILURE_START_LIMIT_HIT);
return r;
}
r = unit_acquire_invocation_id(u);
if (r < 0)
return r;
@ -812,6 +806,21 @@ static void path_reset_failed(Unit *u) {
p->result = PATH_SUCCESS;
}
static int path_test_start_limit(Unit *u) {
Path *p = PATH(u);
int r;
assert(p);
r = unit_test_start_limit(u);
if (r < 0) {
path_enter_dead(p, PATH_FAILURE_START_LIMIT_HIT);
return r;
}
return 0;
}
static const char* const path_type_table[_PATH_TYPE_MAX] = {
[PATH_EXISTS] = "PathExists",
[PATH_EXISTS_GLOB] = "PathExistsGlob",
@ -866,4 +875,6 @@ const UnitVTable path_vtable = {
.reset_failed = path_reset_failed,
.bus_set_property = bus_path_set_property,
.test_start_limit = path_test_start_limit,
};

View file

@ -2436,13 +2436,6 @@ static int service_start(Unit *u) {
assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED));
/* Make sure we don't enter a busy loop of some kind. */
r = unit_test_start_limit(u);
if (r < 0) {
service_enter_dead(s, SERVICE_FAILURE_START_LIMIT_HIT, false);
return r;
}
r = unit_acquire_invocation_id(u);
if (r < 0)
return r;
@ -4445,6 +4438,22 @@ static const char *service_finished_job(Unit *u, JobType t, JobResult result) {
return NULL;
}
static int service_test_start_limit(Unit *u) {
Service *s = SERVICE(u);
int r;
assert(s);
/* Make sure we don't enter a busy loop of some kind. */
r = unit_test_start_limit(u);
if (r < 0) {
service_enter_dead(s, SERVICE_FAILURE_START_LIMIT_HIT, false);
return r;
}
return 0;
}
static const char* const service_restart_table[_SERVICE_RESTART_MAX] = {
[SERVICE_RESTART_NO] = "no",
[SERVICE_RESTART_ON_SUCCESS] = "on-success",
@ -4608,4 +4617,6 @@ const UnitVTable service_vtable = {
},
.finished_job = service_finished_job,
},
.test_start_limit = service_test_start_limit,
};

View file

@ -2513,12 +2513,6 @@ static int socket_start(Unit *u) {
assert(IN_SET(s->state, SOCKET_DEAD, SOCKET_FAILED));
r = unit_test_start_limit(u);
if (r < 0) {
socket_enter_dead(s, SOCKET_FAILURE_START_LIMIT_HIT);
return r;
}
r = unit_acquire_invocation_id(u);
if (r < 0)
return r;
@ -3425,6 +3419,21 @@ static int socket_can_clean(Unit *u, ExecCleanMask *ret) {
return exec_context_get_clean_mask(&s->exec_context, ret);
}
static int socket_test_start_limit(Unit *u) {
Socket *s = SOCKET(u);
int r;
assert(s);
r = unit_test_start_limit(u);
if (r < 0) {
socket_enter_dead(s, SOCKET_FAILURE_START_LIMIT_HIT);
return r;
}
return 0;
}
static const char* const socket_exec_command_table[_SOCKET_EXEC_COMMAND_MAX] = {
[SOCKET_EXEC_START_PRE] = "ExecStartPre",
[SOCKET_EXEC_START_CHOWN] = "ExecStartChown",
@ -3551,4 +3560,6 @@ const UnitVTable socket_vtable = {
[JOB_TIMEOUT] = "Timed out stopping %s.",
},
},
.test_start_limit = socket_test_start_limit,
};

View file

@ -932,12 +932,6 @@ static int swap_start(Unit *u) {
if (UNIT(other)->job && UNIT(other)->job->state == JOB_RUNNING)
return -EAGAIN;
r = unit_test_start_limit(u);
if (r < 0) {
swap_enter_dead(s, SWAP_FAILURE_START_LIMIT_HIT);
return r;
}
r = unit_acquire_invocation_id(u);
if (r < 0)
return r;
@ -1588,6 +1582,21 @@ static int swap_can_clean(Unit *u, ExecCleanMask *ret) {
return exec_context_get_clean_mask(&s->exec_context, ret);
}
static int swap_test_start_limit(Unit *u) {
Swap *s = SWAP(u);
int r;
assert(s);
r = unit_test_start_limit(u);
if (r < 0) {
swap_enter_dead(s, SWAP_FAILURE_START_LIMIT_HIT);
return r;
}
return 0;
}
static const char* const swap_exec_command_table[_SWAP_EXEC_COMMAND_MAX] = {
[SWAP_EXEC_ACTIVATE] = "ExecActivate",
[SWAP_EXEC_DEACTIVATE] = "ExecDeactivate",
@ -1683,4 +1692,6 @@ const UnitVTable swap_vtable = {
[JOB_TIMEOUT] = "Timed out deactivating swap %s.",
},
},
.test_start_limit = swap_test_start_limit,
};

View file

@ -627,12 +627,6 @@ static int timer_start(Unit *u) {
if (r < 0)
return r;
r = unit_test_start_limit(u);
if (r < 0) {
timer_enter_dead(t, TIMER_FAILURE_START_LIMIT_HIT);
return r;
}
r = unit_acquire_invocation_id(u);
if (r < 0)
return r;
@ -890,6 +884,21 @@ static int timer_can_clean(Unit *u, ExecCleanMask *ret) {
return 0;
}
static int timer_test_start_limit(Unit *u) {
Timer *t = TIMER(u);
int r;
assert(t);
r = unit_test_start_limit(u);
if (r < 0) {
timer_enter_dead(t, TIMER_FAILURE_START_LIMIT_HIT);
return r;
}
return 0;
}
static const char* const timer_base_table[_TIMER_BASE_MAX] = {
[TIMER_ACTIVE] = "OnActiveSec",
[TIMER_BOOT] = "OnBootSec",
@ -949,4 +958,6 @@ const UnitVTable timer_vtable = {
.timezone_change = timer_timezone_change,
.bus_set_property = bus_timer_set_property,
.test_start_limit = timer_test_start_limit,
};

View file

@ -1857,6 +1857,13 @@ int unit_start(Unit *u) {
assert(u);
/* Check start rate limiting early so that failure conditions don't cause us to enter a busy loop. */
if (UNIT_VTABLE(u)->test_start_limit) {
int r = UNIT_VTABLE(u)->test_start_limit(u);
if (r < 0)
return r;
}
/* If this is already started, then this will succeed. Note that this will even succeed if this unit
* is not startable by the user. This is relied on to detect when we need to wait for units and when
* waiting is finished. */

View file

@ -658,6 +658,10 @@ typedef struct UnitVTable {
* of this type will immediately fail. */
bool (*supported)(void);
/* If this function is set, it's invoked first as part of starting a unit to allow start rate
* limiting checks to occur before we do anything else. */
int (*test_start_limit)(Unit *u);
/* The strings to print in status messages */
UnitStatusMessageFormats status_message_formats;

View file

@ -0,0 +1 @@
../TEST-01-BASIC/Makefile

View file

@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -e
TEST_DESCRIPTION="https://github.com/systemd/systemd/issues/17433"
# shellcheck source=test/test-functions
. "${TEST_BASE_DIR:?}/test-functions"
do_test "$@"

View file

@ -33,6 +33,8 @@ if install_tests
install_dir : testdata_dir)
install_subdir('testsuite-52.units',
install_dir : testdata_dir)
install_subdir('testsuite-63.units',
install_dir : testdata_dir)
testsuite08_dir = testdata_dir + '/testsuite-08.units'
install_data('testsuite-08.units/-.mount',

View file

@ -1,6 +1,9 @@
[Unit]
Requires=test10.socket
ConditionPathExistsGlob=/tmp/nonexistent
# Make sure we hit the socket trigger limit in the test and not the service start limit.
StartLimitInterval=1000
StartLimitBurst=1000
[Service]
ExecStart=true

View file

@ -0,0 +1,2 @@
[Path]
PathExists=/tmp/test63

View file

@ -0,0 +1,5 @@
[Unit]
ConditionPathExists=!/tmp/nonexistent
[Service]
ExecStart=true

View file

@ -0,0 +1,16 @@
[Unit]
Description=TEST-63-ISSUE-17433
[Service]
ExecStartPre=rm -f /failed /testok
Type=oneshot
ExecStart=rm -f /tmp/nonexistent
ExecStart=systemctl start test63.path
ExecStart=touch /tmp/test63
# Make sure systemd has sufficient time to hit the start limit for test63.service.
ExecStart=sleep 2
ExecStart=sh -x -c 'test "$(systemctl show test63.service -P ActiveState)" = failed'
ExecStart=sh -x -c 'test "$(systemctl show test63.service -P Result)" = start-limit-hit'
ExecStart=sh -x -c 'test "$(systemctl show test63.path -P ActiveState)" = failed'
ExecStart=sh -x -c 'test "$(systemctl show test63.path -P Result)" = unit-start-limit-hit'
ExecStart=sh -x -c 'echo OK >/testok'