core: introduce a restart counter (#6495)

This adds a per-service restart counter. Each time an automatic
restart is scheduled (due to Restart=) it is increased by one. Its
current value is exposed over the bus as NRestarts=. It is also logged
(in a structured, recognizable way) on each restart.

Note that this really only counts automatic starts triggered by Restart=
(which it nicely complements). Manual restarts will reset the counter,
as will explicit calls to "systemctl reset-failed". It's supposed to be
a tool for measure the automatic restart feature, and nothing else.

Fixes: #4126
This commit is contained in:
Lennart Poettering 2017-08-09 21:12:55 +02:00 committed by GitHub
parent 97f7e3663e
commit 7a0019d373
4 changed files with 53 additions and 2 deletions

View file

@ -67,6 +67,7 @@ const sd_bus_vtable bus_service_vtable[] = {
SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("NRestarts", "u", bus_property_get_unsigned, offsetof(Service, n_restarts), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
BUS_EXEC_STATUS_VTABLE("ExecMain", offsetof(Service, main_exec_status), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),

View file

@ -21,6 +21,8 @@
#include <signal.h>
#include <unistd.h>
#include "sd-messages.h"
#include "alloc-util.h"
#include "async.h"
#include "bus-error.h"
@ -1514,7 +1516,10 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
goto fail;
service_set_state(s, SERVICE_AUTO_RESTART);
}
} else
/* If we shan't restart, then flush out the restart counter. But don't do that immediately, so that the
* user can still introspect the counter. Do so on the next start. */
s->flush_n_restarts = true;
/* The next restart might not be a manual stop, hence reset the flag indicating manual stops */
s->forbid_restart = false;
@ -1932,11 +1937,26 @@ static void service_enter_restart(Service *s) {
if (r < 0)
goto fail;
/* Count the jobs we enqueue for restarting. This counter is maintained as long as the unit isn't fully
* stopped, i.e. as long as it remains up or remains in auto-start states. The use can reset the counter
* explicitly however via the usual "systemctl reset-failure" logic. */
s->n_restarts ++;
s->flush_n_restarts = false;
log_struct(LOG_INFO,
"MESSAGE_ID=" SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR,
LOG_UNIT_ID(UNIT(s)),
LOG_UNIT_MESSAGE(UNIT(s), "Scheduled restart job, restart counter is at %u.", s->n_restarts),
"N_RESTARTS=%u", s->n_restarts,
NULL);
/* Notify clients about changed restart counter */
unit_add_to_dbus_queue(UNIT(s));
/* Note that we stay in the SERVICE_AUTO_RESTART state here,
* it will be canceled as part of the service_stop() call that
* is executed as part of JOB_RESTART. */
log_unit_debug(UNIT(s), "Scheduled restart job.");
return;
fail:
@ -2119,6 +2139,12 @@ static int service_start(Unit *u) {
s->watchdog_override_enable = false;
s->watchdog_override_usec = 0;
/* This is not an automatic restart? Flush the restart counter then */
if (s->flush_n_restarts) {
s->n_restarts = 0;
s->flush_n_restarts = false;
}
service_enter_start_pre(s);
return 1;
}
@ -2271,6 +2297,9 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
unit_serialize_item(u, f, "bus-name-good", yes_no(s->bus_name_good));
unit_serialize_item(u, f, "bus-name-owner", s->bus_name_owner);
unit_serialize_item_format(u, f, "n-restarts", "%u", s->n_restarts);
unit_serialize_item(u, f, "n-restarts", yes_no(s->flush_n_restarts));
r = unit_serialize_item_escaped(u, f, "status-text", s->status_text);
if (r < 0)
return r;
@ -2636,6 +2665,18 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
r = service_deserialize_exec_command(u, key, value);
if (r < 0)
log_unit_debug_errno(u, r, "Failed to parse serialized command \"%s\": %m", value);
} else if (streq(key, "n-restarts")) {
r = safe_atou(value, &s->n_restarts);
if (r < 0)
log_unit_debug_errno(u, r, "Failed to parse serialized restart counter '%s': %m", value);
} else if (streq(key, "flush-n-restarts")) {
r = parse_boolean(value);
if (r < 0)
log_unit_debug_errno(u, r, "Failed to parse serialized flush restart counter setting '%s': %m", value);
else
s->flush_n_restarts = r;
} else
log_unit_debug(u, "Unknown serialization key: %s", key);
@ -3548,6 +3589,8 @@ static void service_reset_failed(Unit *u) {
s->result = SERVICE_SUCCESS;
s->reload_result = SERVICE_SUCCESS;
s->n_restarts = 0;
s->flush_n_restarts = false;
}
static int service_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {

View file

@ -193,6 +193,9 @@ struct Service {
int stdin_fd;
int stdout_fd;
int stderr_fd;
unsigned n_restarts;
bool flush_n_restarts;
};
extern const UnitVTable service_vtable;

View file

@ -99,6 +99,10 @@ _SD_BEGIN_DECLARATIONS;
#define SD_MESSAGE_UNIT_RELOADED SD_ID128_MAKE(7b,05,eb,c6,68,38,42,22,ba,a8,88,11,79,cf,da,54)
#define SD_MESSAGE_UNIT_RELOADED_STR SD_ID128_MAKE_STR(7b,05,eb,c6,68,38,42,22,ba,a8,88,11,79,cf,da,54)
#define SD_MESSAGE_UNIT_RESTART_SCHEDULED SD_ID128_MAKE(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3)
#define SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR \
SD_ID128_MAKE_STR(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3)
#define SD_MESSAGE_SPAWN_FAILED SD_ID128_MAKE(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)
#define SD_MESSAGE_SPAWN_FAILED_STR SD_ID128_MAKE_STR(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)