core: implement Uphold= dependency type

This is like a really strong version of Wants=, that keeps starting the
specified unit if it is ever found inactive.

This is an alternative to Restart= inside a unit, acknowledging the fact
that whether to keep restarting the unit is sometimes not a property of
the unit itself but the state of the system.

This implements a part of what #4263 requests. i.e. there's no
distinction between "always" and "opportunistic". We just dumbly
implement "always" and become active whenever we see no job queued for
an inactive unit that is supposed to be upheld.
This commit is contained in:
Lennart Poettering 2021-04-14 15:24:08 +02:00
parent 294446dcb9
commit 0bc488c99a
12 changed files with 180 additions and 7 deletions

View file

@ -708,6 +708,24 @@
</listitem>
</varlistentry>
<varlistentry>
<term><varname>Upholds=</varname></term>
<listitem><para>Configures dependencies similar to <varname>Wants=</varname>, but as long a this unit
is up, all units listed in <varname>Upholds=</varname> are started whenever found to be inactive or
failed, and no job is queued for them. While a <varname>Wants=</varname> dependency on another unit
has a one-time effect when this units started, a <varname>Upholds=</varname> dependency on it has a
continuous effect, constantly restarting the unit if necessary. This is an alternative to the
<varname>Restart=</varname> setting of service units, to ensure they are kept running whatever
happens.</para>
<para>When <varname>Upholds=b.service</varname> is used on <filename>a.service</filename>, this
dependency will show as <varname>UpheldBy=a.service</varname> in the property listing of
<filename>b.service</filename>. The <varname>UpheldBy=</varname> dependency cannot be specified
directly.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>Conflicts=</varname></term>

View file

@ -265,10 +265,12 @@ static const char* const unit_dependency_table[_UNIT_DEPENDENCY_MAX] = {
[UNIT_WANTS] = "Wants",
[UNIT_BINDS_TO] = "BindsTo",
[UNIT_PART_OF] = "PartOf",
[UNIT_UPHOLDS] = "Upholds",
[UNIT_REQUIRED_BY] = "RequiredBy",
[UNIT_REQUISITE_OF] = "RequisiteOf",
[UNIT_WANTED_BY] = "WantedBy",
[UNIT_BOUND_BY] = "BoundBy",
[UNIT_UPHELD_BY] = "UpheldBy",
[UNIT_CONSISTS_OF] = "ConsistsOf",
[UNIT_CONFLICTS] = "Conflicts",
[UNIT_CONFLICTED_BY] = "ConflictedBy",

View file

@ -211,6 +211,7 @@ typedef enum UnitDependency {
UNIT_WANTS,
UNIT_BINDS_TO,
UNIT_PART_OF,
UNIT_UPHOLDS,
/* Inverse of the above */
UNIT_REQUIRED_BY, /* inverse of 'requires' is 'required_by' */
@ -218,6 +219,7 @@ typedef enum UnitDependency {
UNIT_WANTED_BY, /* inverse of 'wants' */
UNIT_BOUND_BY, /* inverse of 'binds_to' */
UNIT_CONSISTS_OF, /* inverse of 'part_of' */
UNIT_UPHELD_BY, /* inverse of 'uphold' */
/* Negative dependencies */
UNIT_CONFLICTS, /* inverse of 'conflicts' is 'conflicted_by' */

View file

@ -2304,6 +2304,7 @@ static int bus_unit_set_transient_property(
UNIT_WANTS,
UNIT_BINDS_TO,
UNIT_PART_OF,
UNIT_UPHOLDS,
UNIT_CONFLICTS,
UNIT_BEFORE,
UNIT_AFTER,

View file

@ -261,6 +261,7 @@ Unit.Requisite, config_parse_unit_deps,
Unit.Wants, config_parse_unit_deps, UNIT_WANTS, 0
Unit.BindsTo, config_parse_unit_deps, UNIT_BINDS_TO, 0
Unit.BindTo, config_parse_unit_deps, UNIT_BINDS_TO, 0
Unit.Upholds, config_parse_unit_deps, UNIT_UPHOLDS, 0
Unit.Conflicts, config_parse_unit_deps, UNIT_CONFLICTS, 0
Unit.Before, config_parse_unit_deps, UNIT_BEFORE, 0
Unit.After, config_parse_unit_deps, UNIT_AFTER, 0

View file

@ -1295,7 +1295,7 @@ static unsigned manager_dispatch_stop_when_unneeded_queue(Manager *m) {
/* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the
* service being unnecessary after a while. */
if (!ratelimit_below(&u->auto_stop_ratelimit)) {
if (!ratelimit_below(&u->auto_start_stop_ratelimit)) {
log_unit_warning(u, "Unit not needed anymore, but not stopping since we tried this too often recently.");
continue;
}
@ -1309,6 +1309,44 @@ static unsigned manager_dispatch_stop_when_unneeded_queue(Manager *m) {
return n;
}
static unsigned manager_dispatch_start_when_upheld_queue(Manager *m) {
unsigned n = 0;
Unit *u;
int r;
assert(m);
while ((u = m->start_when_upheld_queue)) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
Unit *culprit = NULL;
assert(u->in_start_when_upheld_queue);
LIST_REMOVE(start_when_upheld_queue, m->start_when_upheld_queue, u);
u->in_start_when_upheld_queue = false;
n++;
if (!unit_is_upheld_by_active(u, &culprit))
continue;
log_unit_debug(u, "Unit is started because upheld by active unit %s.", culprit->id);
/* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the
* service being unnecessary after a while. */
if (!ratelimit_below(&u->auto_start_stop_ratelimit)) {
log_unit_warning(u, "Unit needs to be started because active unit %s upholds it, but not starting since we tried this too often recently.", culprit->id);
continue;
}
r = manager_add_job(u->manager, JOB_START, u, JOB_FAIL, NULL, &error, NULL);
if (r < 0)
log_unit_warning_errno(u, r, "Failed to enqueue start job, ignoring: %s", bus_error_message(&error, r));
}
return n;
}
static void manager_clear_jobs_and_units(Manager *m) {
Unit *u;
@ -1327,6 +1365,7 @@ static void manager_clear_jobs_and_units(Manager *m) {
assert(!m->gc_unit_queue);
assert(!m->gc_job_queue);
assert(!m->stop_when_unneeded_queue);
assert(!m->start_when_upheld_queue);
assert(hashmap_isempty(m->jobs));
assert(hashmap_isempty(m->units));
@ -2954,6 +2993,9 @@ int manager_loop(Manager *m) {
if (manager_dispatch_cgroup_realize_queue(m) > 0)
continue;
if (manager_dispatch_start_when_upheld_queue(m) > 0)
continue;
if (manager_dispatch_stop_when_unneeded_queue(m) > 0)
continue;

View file

@ -187,6 +187,9 @@ struct Manager {
/* Units that might be subject to StopWhenUnneeded= clean-up */
LIST_HEAD(Unit, stop_when_unneeded_queue);
/* Units which are upheld by another other which we might need to act on */
LIST_HEAD(Unit, start_when_upheld_queue);
sd_event *event;
/* This maps PIDs we care about to units that are interested in. We allow multiple units to he interested in

View file

@ -35,6 +35,12 @@ static const UnitDependencyAtom atom_map[_UNIT_DEPENDENCY_MAX] = {
[UNIT_PART_OF] = UNIT_ATOM_ADD_DEFAULT_TARGET_DEPENDENCY_QUEUE,
[UNIT_UPHOLDS] = UNIT_ATOM_PULL_IN_START_IGNORED |
UNIT_ATOM_RETROACTIVE_START_REPLACE |
UNIT_ATOM_ADD_START_WHEN_UPHELD_QUEUE |
UNIT_ATOM_ADD_STOP_WHEN_UNNEEDED_QUEUE |
UNIT_ATOM_ADD_DEFAULT_TARGET_DEPENDENCY_QUEUE,
[UNIT_REQUIRED_BY] = UNIT_ATOM_PROPAGATE_STOP |
UNIT_ATOM_PROPAGATE_RESTART |
UNIT_ATOM_PROPAGATE_START_FAILURE |
@ -58,6 +64,10 @@ static const UnitDependencyAtom atom_map[_UNIT_DEPENDENCY_MAX] = {
UNIT_ATOM_PINS_STOP_WHEN_UNNEEDED |
UNIT_ATOM_DEFAULT_TARGET_DEPENDENCIES,
[UNIT_UPHELD_BY] = UNIT_ATOM_START_STEADILY |
UNIT_ATOM_DEFAULT_TARGET_DEPENDENCIES |
UNIT_ATOM_PINS_STOP_WHEN_UNNEEDED,
[UNIT_CONSISTS_OF] = UNIT_ATOM_PROPAGATE_STOP |
UNIT_ATOM_PROPAGATE_RESTART,
@ -140,6 +150,14 @@ UnitDependency unit_dependency_from_unique_atom(UnitDependencyAtom atom) {
case UNIT_ATOM_CANNOT_BE_ACTIVE_WITHOUT:
return UNIT_BINDS_TO;
case UNIT_ATOM_PULL_IN_START_IGNORED |
UNIT_ATOM_RETROACTIVE_START_REPLACE |
UNIT_ATOM_ADD_START_WHEN_UPHELD_QUEUE |
UNIT_ATOM_ADD_STOP_WHEN_UNNEEDED_QUEUE |
UNIT_ATOM_ADD_DEFAULT_TARGET_DEPENDENCY_QUEUE:
case UNIT_ATOM_ADD_START_WHEN_UPHELD_QUEUE:
return UNIT_UPHOLDS;
case UNIT_ATOM_PROPAGATE_STOP |
UNIT_ATOM_PROPAGATE_RESTART |
UNIT_ATOM_PROPAGATE_START_FAILURE |
@ -157,6 +175,12 @@ UnitDependency unit_dependency_from_unique_atom(UnitDependencyAtom atom) {
UNIT_ATOM_DEFAULT_TARGET_DEPENDENCIES:
return UNIT_BOUND_BY;
case UNIT_ATOM_START_STEADILY |
UNIT_ATOM_DEFAULT_TARGET_DEPENDENCIES |
UNIT_ATOM_PINS_STOP_WHEN_UNNEEDED:
case UNIT_ATOM_START_STEADILY:
return UNIT_UPHELD_BY;
case UNIT_ATOM_PULL_IN_STOP |
UNIT_ATOM_RETROACTIVE_STOP_ON_START:
case UNIT_ATOM_PULL_IN_STOP:

View file

@ -32,6 +32,11 @@ typedef enum UnitDependencyAtom {
/* Stop our unit if the other unit happens to inactive */
UNIT_ATOM_CANNOT_BE_ACTIVE_WITHOUT = UINT64_C(1) << 7,
/* Start this unit whenever we find it inactive and the other unit active */
UNIT_ATOM_START_STEADILY = UINT64_C(1) << 9,
/* Whenever our unit becomes active, add other unit to start_when_upheld_queue */
UNIT_ATOM_ADD_START_WHEN_UPHELD_QUEUE = UINT64_C(1) << 10,
/* If our unit unexpectedly becomes active, retroactively start the other unit too, in "replace" job
* mode */
UNIT_ATOM_RETROACTIVE_START_REPLACE = UINT64_C(1) << 11,

View file

@ -122,7 +122,7 @@ Unit* unit_new(Manager *m, size_t size) {
u->last_section_private = -1;
u->start_ratelimit = (RateLimit) { m->default_start_limit_interval, m->default_start_limit_burst };
u->auto_stop_ratelimit = (RateLimit) { 10 * USEC_PER_SEC, 16 };
u->auto_start_stop_ratelimit = (RateLimit) { 10 * USEC_PER_SEC, 16 };
for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++)
u->io_accounting_last[i] = UINT64_MAX;
@ -507,6 +507,22 @@ void unit_submit_to_stop_when_unneeded_queue(Unit *u) {
u->in_stop_when_unneeded_queue = true;
}
void unit_submit_to_start_when_upheld_queue(Unit *u) {
assert(u);
if (u->in_start_when_upheld_queue)
return;
if (!UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(u)))
return;
if (!unit_has_dependency(u, UNIT_ATOM_START_STEADILY, NULL))
return;
LIST_PREPEND(start_when_upheld_queue, u->manager->start_when_upheld_queue, u);
u->in_start_when_upheld_queue = true;
}
static void unit_clear_dependencies(Unit *u) {
assert(u);
@ -719,6 +735,9 @@ Unit* unit_free(Unit *u) {
if (u->in_stop_when_unneeded_queue)
LIST_REMOVE(stop_when_unneeded_queue, u->manager->stop_when_unneeded_queue, u);
if (u->in_start_when_upheld_queue)
LIST_REMOVE(start_when_upheld_queue, u->manager->start_when_upheld_queue, u);
safe_close(u->ip_accounting_ingress_map_fd);
safe_close(u->ip_accounting_egress_map_fd);
@ -2011,6 +2030,36 @@ bool unit_is_unneeded(Unit *u) {
return true;
}
bool unit_is_upheld_by_active(Unit *u, Unit **ret_culprit) {
Unit *other;
assert(u);
/* Checks if the unit needs to be started because it currently is not running, but some other unit
* that is active declared an Uphold= dependencies on it */
if (!UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(u)) || u->job) {
if (ret_culprit)
*ret_culprit = NULL;
return false;
}
UNIT_FOREACH_DEPENDENCY(other, u, UNIT_ATOM_START_STEADILY) {
if (other->job)
continue;
if (UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(other))) {
if (ret_culprit)
*ret_culprit = other;
return true;
}
}
if (ret_culprit)
*ret_culprit = NULL;
return false;
}
static void check_unneeded_dependencies(Unit *u) {
Unit *other;
assert(u);
@ -2021,6 +2070,16 @@ static void check_unneeded_dependencies(Unit *u) {
unit_submit_to_stop_when_unneeded_queue(other);
}
static void check_uphold_dependencies(Unit *u) {
Unit *other;
assert(u);
/* Add all units this unit depends on to the queue that processes Uphold= behaviour. */
UNIT_FOREACH_DEPENDENCY(other, u, UNIT_ATOM_ADD_START_WHEN_UPHELD_QUEUE)
unit_submit_to_start_when_upheld_queue(other);
}
static void unit_check_binds_to(Unit *u) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
bool stop = false;
@ -2056,7 +2115,7 @@ static void unit_check_binds_to(Unit *u) {
/* If stopping a unit fails continuously we might enter a stop
* loop here, hence stop acting on the service being
* unnecessary after a while. */
if (!ratelimit_below(&u->auto_stop_ratelimit)) {
if (!ratelimit_below(&u->auto_start_stop_ratelimit)) {
log_unit_warning(u, "Unit is bound to inactive unit %s, but not stopping since we tried this too often recently.", other->id);
return;
}
@ -2595,10 +2654,14 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
retroactively_stop_dependencies(u);
}
/* stop unneeded units regardless if going down was expected or not */
/* Stop unneeded units regardless if going down was expected or not */
if (UNIT_IS_INACTIVE_OR_FAILED(ns))
check_unneeded_dependencies(u);
/* Start uphold units regardless if going up was expected or not */
if (UNIT_IS_ACTIVE_OR_RELOADING(ns))
check_uphold_dependencies(u);
if (ns != os && ns == UNIT_FAILED) {
log_unit_debug(u, "Unit entered failed state.");
@ -2634,6 +2697,9 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
/* Maybe we finished startup and are now ready for being stopped because unneeded? */
unit_submit_to_stop_when_unneeded_queue(u);
/* Maybe someone wants us to remain up? */
unit_submit_to_start_when_upheld_queue(u);
/* Maybe we finished startup, but something we needed has vanished? Let's die then. (This happens when
* something BindsTo= to a Type=oneshot unit, as these units go directly from starting to inactive,
* without ever entering started.) */
@ -2901,11 +2967,13 @@ int unit_add_dependency(
[UNIT_REQUISITE] = UNIT_REQUISITE_OF,
[UNIT_BINDS_TO] = UNIT_BOUND_BY,
[UNIT_PART_OF] = UNIT_CONSISTS_OF,
[UNIT_UPHOLDS] = UNIT_UPHELD_BY,
[UNIT_REQUIRED_BY] = UNIT_REQUIRES,
[UNIT_REQUISITE_OF] = UNIT_REQUISITE,
[UNIT_WANTED_BY] = UNIT_WANTS,
[UNIT_BOUND_BY] = UNIT_BINDS_TO,
[UNIT_CONSISTS_OF] = UNIT_PART_OF,
[UNIT_UPHELD_BY] = UNIT_UPHOLDS,
[UNIT_CONFLICTS] = UNIT_CONFLICTED_BY,
[UNIT_CONFLICTED_BY] = UNIT_CONFLICTS,
[UNIT_BEFORE] = UNIT_AFTER,

View file

@ -229,9 +229,12 @@ typedef struct Unit {
/* Target dependencies queue */
LIST_FIELDS(Unit, target_deps_queue);
/* Queue of units with StopWhenUnneeded set that shell be checked for clean-up. */
/* Queue of units with StopWhenUnneeded= set that shall be checked for clean-up. */
LIST_FIELDS(Unit, stop_when_unneeded_queue);
/* Queue of units that have an Uphold= dependency from some other unit, and should be checked for starting */
LIST_FIELDS(Unit, start_when_upheld_queue);
/* PIDs we keep an eye on. Note that a unit might have many
* more, but these are the ones we care enough about to
* process SIGCHLD for */
@ -260,8 +263,8 @@ typedef struct Unit {
int success_action_exit_status, failure_action_exit_status;
char *reboot_arg;
/* Make sure we never enter endless loops with the check unneeded logic, or the BindsTo= logic */
RateLimit auto_stop_ratelimit;
/* Make sure we never enter endless loops with the StopWhenUnneeded=, BindsTo=, Uphold= logic */
RateLimit auto_start_stop_ratelimit;
/* Reference to a specific UID/GID */
uid_t ref_uid;
@ -383,6 +386,7 @@ typedef struct Unit {
bool in_cgroup_oom_queue:1;
bool in_target_deps_queue:1;
bool in_stop_when_unneeded_queue:1;
bool in_start_when_upheld_queue:1;
bool sent_dbus_new_signal:1;
@ -740,6 +744,7 @@ void unit_add_to_cleanup_queue(Unit *u);
void unit_add_to_gc_queue(Unit *u);
void unit_add_to_target_deps_queue(Unit *u);
void unit_submit_to_stop_when_unneeded_queue(Unit *u);
void unit_submit_to_start_when_upheld_queue(Unit *u);
int unit_merge(Unit *u, Unit *other);
int unit_merge_by_name(Unit *u, const char *other);
@ -869,6 +874,7 @@ bool unit_type_supported(UnitType t);
bool unit_is_pristine(Unit *u);
bool unit_is_unneeded(Unit *u);
bool unit_is_upheld_by_active(Unit *u, Unit **ret_culprit);
pid_t unit_control_pid(Unit *u);
pid_t unit_main_pid(Unit *u);

View file

@ -103,6 +103,7 @@ StopWhenUnneeded=
StopPropagatedFrom=
SuccessAction=
SuccessActionExitStatus=
Upholds=
Wants=
[Install]
Alias=