core: introduce a new job mode JOB_RESTART_DEPENDENCIES

This new job mode will enqueue a start job for a unit, and all units
depending on the unit will get a restart job enqueued. This is then used
for automatic sevice restarts: the unit itself is only started, the
depending units restarted. This way the unit will not go down
unnecessarily, triggering OnSuccess= needlessly.

This also introduces a new state SERVICE_AUTO_RESTART_QUEUED that is
entered once the restart jobs are enqueued. Previously we'd stay in
SERVICE_AUTO_RESTART, but that's problematic, since we'd lose
information whether we still need to enqueue the restart job during a
serialization/deserialization cycle or not. By having an explicit state
for this we know exactly whether we still need to enqueue the job or
not. It's also good since when we are in SERVICE_AUTO_RESTART_QUEUED we
want to act on unit_start(), but on SERVICE_AUTO_RESTART we want to wait
for the holdoff time to pass before we act on unit_start().

Fixes: #27722
This commit is contained in:
Lennart Poettering 2023-06-30 18:17:06 +02:00
parent 0c59d2e4ab
commit 09d04ad325
8 changed files with 54 additions and 38 deletions

View file

@ -204,6 +204,7 @@ static const char* const service_state_table[_SERVICE_STATE_MAX] = {
[SERVICE_FAILED_BEFORE_AUTO_RESTART] = "failed-before-auto-restart",
[SERVICE_DEAD_RESOURCES_PINNED] = "dead-resources-pinned",
[SERVICE_AUTO_RESTART] = "auto-restart",
[SERVICE_AUTO_RESTART_QUEUED] = "auto-restart-queued",
[SERVICE_CLEANING] = "cleaning",
};

View file

@ -149,6 +149,7 @@ typedef enum ServiceState {
SERVICE_FAILED_BEFORE_AUTO_RESTART,
SERVICE_DEAD_RESOURCES_PINNED, /* Like SERVICE_DEAD, but with pinned resources */
SERVICE_AUTO_RESTART,
SERVICE_AUTO_RESTART_QUEUED,
SERVICE_CLEANING,
_SERVICE_STATE_MAX,
_SERVICE_STATE_INVALID = -EINVAL,

View file

@ -1634,6 +1634,7 @@ static const char* const job_mode_table[_JOB_MODE_MAX] = {
[JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies",
[JOB_IGNORE_REQUIREMENTS] = "ignore-requirements",
[JOB_TRIGGERING] = "triggering",
[JOB_RESTART_DEPENDENCIES] = "restart-dependencies",
};
DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);

View file

@ -79,6 +79,7 @@ enum JobMode {
JOB_IGNORE_DEPENDENCIES, /* Ignore both requirement and ordering dependencies */
JOB_IGNORE_REQUIREMENTS, /* Ignore requirement dependencies */
JOB_TRIGGERING, /* Adds TRIGGERED_BY dependencies to the same transaction */
JOB_RESTART_DEPENDENCIES,/* A "start" job for the specified unit becomes "restart" for depending units */
_JOB_MODE_MAX,
_JOB_MODE_INVALID = -EINVAL,
};

View file

@ -2044,6 +2044,9 @@ int manager_add_job(
if (mode == JOB_TRIGGERING && type != JOB_STOP)
return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "--job-mode=triggering is only valid for stop.");
if (mode == JOB_RESTART_DEPENDENCIES && type != JOB_START)
return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "--job-mode=restart-dependencies is only valid for start.");
log_unit_debug(unit, "Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode));
type = job_type_collapse(type, unit);
@ -2059,7 +2062,8 @@ int manager_add_job(
/* by= */ NULL,
TRANSACTION_MATTERS |
(IN_SET(mode, JOB_IGNORE_DEPENDENCIES, JOB_IGNORE_REQUIREMENTS) ? TRANSACTION_IGNORE_REQUIREMENTS : 0) |
(mode == JOB_IGNORE_DEPENDENCIES ? TRANSACTION_IGNORE_ORDER : 0),
(mode == JOB_IGNORE_DEPENDENCIES ? TRANSACTION_IGNORE_ORDER : 0) |
(mode == JOB_RESTART_DEPENDENCIES ? TRANSACTION_PROPAGATE_START_AS_RESTART : 0),
error);
if (r < 0)
return r;

View file

@ -72,6 +72,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
[SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED,
[SERVICE_DEAD_RESOURCES_PINNED] = UNIT_INACTIVE,
[SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
[SERVICE_AUTO_RESTART_QUEUED] = UNIT_ACTIVATING,
[SERVICE_CLEANING] = UNIT_MAINTENANCE,
};
@ -101,6 +102,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] =
[SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED,
[SERVICE_DEAD_RESOURCES_PINNED] = UNIT_INACTIVE,
[SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
[SERVICE_AUTO_RESTART_QUEUED] = UNIT_ACTIVATING,
[SERVICE_CLEANING] = UNIT_MAINTENANCE,
};
@ -1258,7 +1260,7 @@ static void service_set_state(Service *s, ServiceState state) {
if (IN_SET(state,
SERVICE_DEAD, SERVICE_FAILED,
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART,
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED,
SERVICE_DEAD_RESOURCES_PINNED)) {
unit_unwatch_all_pids(UNIT(s));
unit_dequeue_rewatch_pids(UNIT(s));
@ -1363,7 +1365,7 @@ static int service_coldplug(Unit *u) {
if (!IN_SET(s->deserialized_state,
SERVICE_DEAD, SERVICE_FAILED,
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART,
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED,
SERVICE_CLEANING,
SERVICE_DEAD_RESOURCES_PINNED)) {
(void) unit_enqueue_rewatch_pids(u);
@ -1945,7 +1947,7 @@ static bool service_will_restart(Unit *u) {
assert(s);
if (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART))
if (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED))
return true;
return unit_will_restart_default(u);
@ -2511,11 +2513,9 @@ static void service_enter_restart(Service *s) {
return;
}
/* Any units that are bound to this service must also be
* restarted. We use JOB_RESTART (instead of the more obvious
* JOB_START) here so that those dependency jobs will be added
* as well. */
r = manager_add_job(UNIT(s)->manager, JOB_RESTART, UNIT(s), JOB_REPLACE, NULL, &error, NULL);
/* Any units that are bound to this service must also be restarted. We use JOB_START for ourselves
* but then set JOB_RESTART_DEPENDENCIES which will enqueue JOB_RESTART for those dependency jobs. */
r = manager_add_job(UNIT(s)->manager, JOB_START, UNIT(s), JOB_RESTART_DEPENDENCIES, NULL, &error, NULL);
if (r < 0)
goto fail;
@ -2534,11 +2534,10 @@ static void service_enter_restart(Service *s) {
"Scheduled restart job, restart counter is at %u.", s->n_restarts),
"N_RESTARTS=%u", s->n_restarts);
service_set_state(s, SERVICE_AUTO_RESTART_QUEUED);
/* Notify clients about changed restart counter */
unit_add_to_dbus_queue(UNIT(s));
/* Note that we stay in the SERVICE_AUTO_RESTART state here, it will be canceled as part of the
* service_stop() call that is executed as part of JOB_RESTART. */
return;
fail:
@ -2717,7 +2716,7 @@ static int service_start(Unit *u) {
if (IN_SET(s->state, SERVICE_AUTO_RESTART, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART))
return -EAGAIN;
assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_RESOURCES_PINNED));
assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_RESOURCES_PINNED, SERVICE_AUTO_RESTART_QUEUED));
r = unit_acquire_invocation_id(u);
if (r < 0)
@ -2775,7 +2774,8 @@ static int service_stop(Unit *u) {
return 0;
case SERVICE_AUTO_RESTART:
/* A restart will be scheduled or is in progress. */
case SERVICE_AUTO_RESTART_QUEUED:
/* Give up on the auto restart */
service_set_state(s, service_determine_dead_state(s));
return 0;
@ -3648,6 +3648,7 @@ static void service_notify_cgroup_empty_event(Unit *u) {
/* If the cgroup empty notification comes when the unit is not active, we must have failed to clean
* up the cgroup earlier and should do it now. */
case SERVICE_AUTO_RESTART:
case SERVICE_AUTO_RESTART_QUEUED:
unit_prune_cgroup(u);
break;

View file

@ -1054,34 +1054,38 @@ int transaction_add_job_and_dependencies(
}
}
if (IN_SET(type, JOB_STOP, JOB_RESTART)) {
_cleanup_set_free_ Set *propagated_restart = NULL;
/* We propagate STOP as STOP, but RESTART only as TRY_RESTART, in order not to start
* dependencies that are not around. */
_cleanup_set_free_ Set *propagated_restart = NULL;
if (type == JOB_RESTART)
UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_RESTART) {
JobType nt;
if (type == JOB_RESTART || (type == JOB_START && FLAGS_SET(flags, TRANSACTION_PROPAGATE_START_AS_RESTART))) {
r = set_ensure_put(&propagated_restart, NULL, dep);
if (r < 0)
/* We propagate RESTART only as TRY_RESTART, in order not to start dependencies that
* are not around. */
UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_RESTART) {
JobType nt;
r = set_ensure_put(&propagated_restart, NULL, dep);
if (r < 0)
return r;
nt = job_type_collapse(JOB_TRY_RESTART, dep);
if (nt == JOB_NOP)
continue;
r = transaction_add_job_and_dependencies(tr, nt, dep, ret, TRANSACTION_MATTERS | (flags & TRANSACTION_IGNORE_ORDER), e);
if (r < 0) {
if (r != -EBADR) /* job type not applicable */
return r;
nt = job_type_collapse(JOB_TRY_RESTART, dep);
if (nt == JOB_NOP)
continue;
r = transaction_add_job_and_dependencies(tr, nt, dep, ret, TRANSACTION_MATTERS | (flags & TRANSACTION_IGNORE_ORDER), e);
if (r < 0) {
if (r != -EBADR) /* job type not applicable */
return r;
sd_bus_error_free(e);
}
sd_bus_error_free(e);
}
}
}
if (type == JOB_STOP) {
/* The 'stop' part of a restart job is also propagated to units with
* UNIT_ATOM_PROPAGATE_STOP */
UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_STOP) {
/* Units experienced restart propagation are skipped */
if (set_contains(propagated_restart, dep))

View file

@ -21,10 +21,13 @@ Transaction *transaction_abort_and_free(Transaction *tr);
DEFINE_TRIVIAL_CLEANUP_FUNC(Transaction*, transaction_abort_and_free);
typedef enum TransactionAddFlags {
TRANSACTION_MATTERS = 1 << 0,
TRANSACTION_CONFLICTS = 1 << 1,
TRANSACTION_IGNORE_REQUIREMENTS = 1 << 2,
TRANSACTION_IGNORE_ORDER = 1 << 3,
TRANSACTION_MATTERS = 1 << 0,
TRANSACTION_CONFLICTS = 1 << 1,
TRANSACTION_IGNORE_REQUIREMENTS = 1 << 2,
TRANSACTION_IGNORE_ORDER = 1 << 3,
/* Propagate a START job to other units like a RESTART */
TRANSACTION_PROPAGATE_START_AS_RESTART = 1 << 4,
} TransactionAddFlags;
void transaction_add_propagate_reload_jobs(