watchdog: Add watchdog pretimeout support

Add support for managing and configuring watchdog pretimeout values if
the watchdog hardware supports it. The ping interval is adjusted to
account for a pretimeout so that it will still ping at half the timeout
interval before a pretimeout event would be triggered. By default the
pretimeout defaults to 0s or disabled.

The RuntimeWatchdogPreSec config option is added to allow the pretimeout
to be specified (similar to RuntimeWatchdogSec). The
RuntimeWatchdogPreUSec dbus property is added to override the pretimeout
value at runtime (similar to RuntimeWatchdogUSec). Setting the
pretimeout to 0s will disable the pretimeout.
This commit is contained in:
Curtis Klein 2021-06-27 15:36:49 -07:00 committed by Luca Boccassi
parent b491d74064
commit 5717062e93
12 changed files with 233 additions and 10 deletions

View file

@ -428,6 +428,18 @@
</listitem>
</varlistentry>
<varlistentry>
<term><varname>systemd.watchdog_pre_sec=</varname></term>
<listitem>
<para>Overrides the watchdog pre-timeout settings otherwise configured with
<varname>RuntimeWatchdogPreSec=</varname>. Takes a time value (if no unit is specified, seconds is the
implicitly assumed time unit) or the special strings <literal>off</literal> or
<literal>default</literal>. For details, see
<citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>systemd.cpu_affinity=</varname></term>

View file

@ -402,6 +402,9 @@ node /org/freedesktop/systemd1 {
readwrite t RuntimeWatchdogUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
@org.freedesktop.systemd1.Privileged("true")
readwrite t RuntimeWatchdogPreUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
@org.freedesktop.systemd1.Privileged("true")
readwrite t RebootWatchdogUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
@org.freedesktop.systemd1.Privileged("true")
@ -650,6 +653,8 @@ node /org/freedesktop/systemd1 {
<!--property RuntimeWatchdogUSec is not documented!-->
<!--property RuntimeWatchdogPreUSec is not documented!-->
<!--property RebootWatchdogUSec is not documented!-->
<!--property KExecWatchdogUSec is not documented!-->
@ -1052,6 +1057,8 @@ node /org/freedesktop/systemd1 {
<variablelist class="dbus-property" generated="True" extra-ref="RuntimeWatchdogUSec"/>
<variablelist class="dbus-property" generated="True" extra-ref="RuntimeWatchdogPreUSec"/>
<variablelist class="dbus-property" generated="True" extra-ref="RebootWatchdogUSec"/>
<variablelist class="dbus-property" generated="True" extra-ref="KExecWatchdogUSec"/>

View file

@ -177,6 +177,40 @@
<para>These settings have no effect if a hardware watchdog is not available.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>RuntimeWatchdogPreSec=</varname></term>
<listitem><para>Configure the hardware watchdog device pre-timeout value.
Takes a timeout value in seconds (or in other time units similar to
<varname>RuntimeWatchdogSec=</varname>). A watchdog pre-timeout is a
notification generated by the watchdog before the watchdog reset might
occur in the event the watchdog has not been serviced. This notification
is handled by the kernel and can be configured to take an action (i.e.
generate a kernel panic) using the
<filename>/sys/class/watchdog/watchdog0/pretimeout_governor</filename>
sysfs file for your watchdog device. The available actions (or
governors) are listed in the
<filename>/sys/class/watchdog/watchdog0/pretimeout_available_governors</filename>
sysfs file for your watchdog device. The default action for the
pre-timeout event is to log a kernel message but that can be changed in
the kernel's configuration. Not all watchdog hardware or drivers support
generating a pre-timeout and depending on the state of the system, the
kernel may be unable to take the configured action before the watchdog
reboot. The watchdog will be configured to generate the pre-timeout event
at the amount of time specified by <varname>RuntimeWatchdogPreSec=</varname>
before the runtime watchdog timeout (set by
<varname>RuntimeWatchdogSec=</varname>). For example, if the we have
<varname>RuntimeWatchdogSec=30</varname> and
<varname>RuntimeWatchdogPreSec=10</varname>, then the pre-timeout event
will occur if the watchdog has not pinged for 20s (10s before the
watchdog would fire). By default, <varname>RuntimeWatchdogPreSec=</varname>
defaults to 0 (off). The value set for <varname>RuntimeWatchdogPreSec=</varname>
must be smaller than the timeout value for <varname>RuntimeWatchdogSec=</varname>.
This setting has no effect if a hardware watchdog is not available or the
hardware watchdog does not support a pre-timeout and will be ignored by the
kernel if the setting is greater than the actual watchdog timeout.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>WatchdogDevice=</varname></term>

View file

@ -265,6 +265,24 @@ static int property_get_runtime_watchdog(
return sd_bus_message_append(reply, "t", manager_get_watchdog(m, WATCHDOG_RUNTIME));
}
static int property_get_pretimeout_watchdog(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
Manager *m = userdata;
assert(m);
assert(bus);
assert(reply);
return sd_bus_message_append(reply, "t", manager_get_watchdog(m, WATCHDOG_PRETIMEOUT));
}
static int property_get_reboot_watchdog(
sd_bus *bus,
const char *path,
@ -330,6 +348,18 @@ static int property_set_runtime_watchdog(
return property_set_watchdog(userdata, WATCHDOG_RUNTIME, value);
}
static int property_set_pretimeout_watchdog(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *value,
void *userdata,
sd_bus_error *error) {
return property_set_watchdog(userdata, WATCHDOG_PRETIMEOUT, value);
}
static int property_set_reboot_watchdog(
sd_bus *bus,
const char *path,
@ -2696,6 +2726,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_PROPERTY("DefaultStandardOutput", "s", bus_property_get_exec_output, offsetof(Manager, default_std_output), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("DefaultStandardError", "s", bus_property_get_exec_output, offsetof(Manager, default_std_error), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_WRITABLE_PROPERTY("RuntimeWatchdogUSec", "t", property_get_runtime_watchdog, property_set_runtime_watchdog, 0, 0),
SD_BUS_WRITABLE_PROPERTY("RuntimeWatchdogPreUSec", "t", property_get_pretimeout_watchdog, property_set_pretimeout_watchdog, 0, 0),
SD_BUS_WRITABLE_PROPERTY("RebootWatchdogUSec", "t", property_get_reboot_watchdog, property_set_reboot_watchdog, 0, 0),
/* The following item is an obsolete alias */
SD_BUS_WRITABLE_PROPERTY("ShutdownWatchdogUSec", "t", property_get_reboot_watchdog, property_set_reboot_watchdog, 0, SD_BUS_VTABLE_HIDDEN),

View file

@ -138,6 +138,7 @@ static unsigned arg_default_start_limit_burst;
static usec_t arg_runtime_watchdog;
static usec_t arg_reboot_watchdog;
static usec_t arg_kexec_watchdog;
static usec_t arg_pretimeout_watchdog;
static char *arg_early_core_pattern;
static char *arg_watchdog_device;
static char **arg_default_environment;
@ -557,6 +558,23 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
arg_kexec_watchdog = arg_reboot_watchdog = arg_runtime_watchdog;
} else if (proc_cmdline_key_streq(key, "systemd.watchdog_pre_sec")) {
if (proc_cmdline_value_missing(key, value))
return 0;
if (streq(value, "default"))
arg_pretimeout_watchdog = USEC_INFINITY;
else if (streq(value, "off"))
arg_pretimeout_watchdog = 0;
else {
r = parse_sec(value, &arg_pretimeout_watchdog);
if (r < 0) {
log_warning_errno(r, "Failed to parse systemd.watchdog_pre_sec= argument '%s', ignoring: %m", value);
return 0;
}
}
} else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
if (proc_cmdline_value_missing(key, value))
@ -709,6 +727,7 @@ static int parse_config_file(void) {
{ "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy },
{ "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
{ "Manager", "RuntimeWatchdogSec", config_parse_watchdog_sec, 0, &arg_runtime_watchdog },
{ "Manager", "RuntimeWatchdogPreSec", config_parse_watchdog_sec, 0, &arg_pretimeout_watchdog },
{ "Manager", "RebootWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog },
{ "Manager", "ShutdownWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
{ "Manager", "KExecWatchdogSec", config_parse_watchdog_sec, 0, &arg_kexec_watchdog },
@ -851,6 +870,7 @@ static void set_manager_settings(Manager *m) {
manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog);
manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog);
manager_set_watchdog(m, WATCHDOG_KEXEC, arg_kexec_watchdog);
manager_set_watchdog(m, WATCHDOG_PRETIMEOUT, arg_pretimeout_watchdog);
manager_set_show_status(m, arg_show_status, "commandline");
m->status_unit_format = arg_status_unit_format;
@ -1595,7 +1615,9 @@ static int become_shutdown(
watchdog_timer = arg_kexec_watchdog;
/* If we reboot or kexec let's set the shutdown watchdog and tell the
* shutdown binary to repeatedly ping it */
* shutdown binary to repeatedly ping it.
* Disable the pretimeout watchdog, as we do not support it from the shutdown binary. */
(void) watchdog_setup_pretimeout(0);
r = watchdog_setup(watchdog_timer);
watchdog_close(r < 0);
@ -2448,6 +2470,7 @@ static void reset_arguments(void) {
arg_runtime_watchdog = 0;
arg_reboot_watchdog = 10 * USEC_PER_MINUTE;
arg_kexec_watchdog = 0;
arg_pretimeout_watchdog = 0;
arg_early_core_pattern = NULL;
arg_watchdog_device = NULL;

View file

@ -118,6 +118,7 @@ int manager_serialize(
(void) serialize_usec(f, "runtime-watchdog-overridden", m->watchdog_overridden[WATCHDOG_RUNTIME]);
(void) serialize_usec(f, "reboot-watchdog-overridden", m->watchdog_overridden[WATCHDOG_REBOOT]);
(void) serialize_usec(f, "kexec-watchdog-overridden", m->watchdog_overridden[WATCHDOG_KEXEC]);
(void) serialize_usec(f, "pretimeout-watchdog-overridden", m->watchdog_overridden[WATCHDOG_PRETIMEOUT]);
for (ManagerTimestamp q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
_cleanup_free_ char *joined = NULL;
@ -455,6 +456,14 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
else
manager_override_watchdog(m, WATCHDOG_KEXEC, t);
} else if ((val = startswith(l, "pretimeout-watchdog-overridden="))) {
usec_t t;
if (deserialize_usec(val, &t) < 0)
log_notice("Failed to parse pretimeout-watchdog-overridden value '%s', ignoring.", val);
else
manager_override_watchdog(m, WATCHDOG_PRETIMEOUT, t);
} else if (startswith(l, "env=")) {
r = deserialize_environment(l + 4, &m->client_environment);
if (r < 0)

View file

@ -813,6 +813,7 @@ int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager
.watchdog_overridden[WATCHDOG_RUNTIME] = USEC_INFINITY,
.watchdog_overridden[WATCHDOG_REBOOT] = USEC_INFINITY,
.watchdog_overridden[WATCHDOG_KEXEC] = USEC_INFINITY,
.watchdog_overridden[WATCHDOG_PRETIMEOUT] = USEC_INFINITY,
.show_status_overridden = _SHOW_STATUS_INVALID,
@ -3232,9 +3233,12 @@ void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
if (m->watchdog[t] == timeout)
return;
if (t == WATCHDOG_RUNTIME)
if (t == WATCHDOG_RUNTIME) {
if (!timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME]))
(void) watchdog_setup(timeout);
} else if (t == WATCHDOG_PRETIMEOUT)
if (m->watchdog_overridden[WATCHDOG_PRETIMEOUT] == USEC_INFINITY)
(void) watchdog_setup_pretimeout(timeout);
m->watchdog[t] = timeout;
}
@ -3253,7 +3257,8 @@ void manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
usec_t usec = timestamp_is_set(timeout) ? timeout : m->watchdog[t];
(void) watchdog_setup(usec);
}
} else if (t == WATCHDOG_PRETIMEOUT)
(void) watchdog_setup_pretimeout(timeout);
m->watchdog_overridden[t] = timeout;
}

View file

@ -118,6 +118,7 @@ typedef enum WatchdogType {
WATCHDOG_RUNTIME,
WATCHDOG_REBOOT,
WATCHDOG_KEXEC,
WATCHDOG_PRETIMEOUT,
_WATCHDOG_TYPE_MAX,
} WatchdogType;

View file

@ -30,6 +30,7 @@
#NUMAPolicy=default
#NUMAMask=
#RuntimeWatchdogSec=off
#RuntimeWatchdogPreSec=off
#RebootWatchdogSec=10min
#KExecWatchdogSec=off
#WatchdogDevice=

View file

@ -18,6 +18,7 @@
static int watchdog_fd = -1;
static char *watchdog_device;
static usec_t watchdog_timeout; /* 0 → close device and USEC_INFINITY → don't change timeout */
static usec_t watchdog_pretimeout; /* 0 → disable pretimeout and USEC_INFINITY → don't change pretimeout */
static usec_t watchdog_last_ping = USEC_INFINITY;
/* Starting from kernel version 4.5, the maximum allowable watchdog timeout is
@ -84,6 +85,46 @@ static int watchdog_set_timeout(void) {
return 0;
}
static int watchdog_get_pretimeout(void) {
int sec = 0;
assert(watchdog_fd >= 0);
if (ioctl(watchdog_fd, WDIOC_GETPRETIMEOUT, &sec) < 0) {
watchdog_pretimeout = 0;
return log_full_errno(ERRNO_IS_NOT_SUPPORTED(errno) ? LOG_DEBUG : LOG_WARNING, errno, "Failed to get pretimeout value, ignoring: %m");
}
watchdog_pretimeout = sec * USEC_PER_SEC;
return 0;
}
static int watchdog_set_pretimeout(void) {
int sec;
assert(watchdog_fd >= 0);
assert(watchdog_pretimeout != USEC_INFINITY);
sec = saturated_usec_to_sec(watchdog_pretimeout);
if (ioctl(watchdog_fd, WDIOC_SETPRETIMEOUT, &sec) < 0) {
watchdog_pretimeout = 0;
if (ERRNO_IS_NOT_SUPPORTED(errno)) {
log_info("Watchdog does not support pretimeouts.");
return 0;
}
return log_error_errno(errno, "Failed to set pretimeout to %s: %m", FORMAT_TIMESPAN(sec, USEC_PER_SEC));
}
/* The set ioctl does not return the actual value set so get it now. */
(void) watchdog_get_pretimeout();
return 0;
}
static int watchdog_ping_now(void) {
assert(watchdog_fd >= 0);
@ -95,6 +136,34 @@ static int watchdog_ping_now(void) {
return 0;
}
static int update_pretimeout(void) {
int r, t_sec, pt_sec;
if (watchdog_fd < 0)
return 0;
if (watchdog_timeout == USEC_INFINITY || watchdog_pretimeout == USEC_INFINITY)
return 0;
/* Determine if the pretimeout is valid for the current watchdog timeout. */
t_sec = saturated_usec_to_sec(watchdog_timeout);
pt_sec = saturated_usec_to_sec(watchdog_pretimeout);
if (pt_sec >= t_sec) {
r = log_error_errno(SYNTHETIC_ERRNO(EINVAL),
"Cannot set watchdog pretimeout to %is (%s watchdog timeout of %is)",
pt_sec, pt_sec == t_sec ? "same as" : "longer than", t_sec);
(void) watchdog_get_pretimeout();
} else
r = watchdog_set_pretimeout();
if (watchdog_pretimeout == 0)
log_info("Watchdog pretimeout is disabled.");
else
log_info("Watchdog running with a pretimeout of %s.", FORMAT_TIMESPAN(watchdog_pretimeout, 0));
return r;
}
static int update_timeout(void) {
int r;
@ -121,6 +190,12 @@ static int update_timeout(void) {
return log_error_errno(r, "Failed to query watchdog HW timeout: %m");
}
/* If the watchdog timeout was changed, the pretimeout could have been
* changed as well by the driver or the kernel so we need to update the
* pretimeout now. Or if the watchdog is being configured for the first
* time, we want to configure the pretimeout before it is enabled. */
(void) update_pretimeout();
r = watchdog_set_enable(true);
if (r < 0)
return r;
@ -210,9 +285,31 @@ int watchdog_setup(usec_t timeout) {
return r;
}
usec_t watchdog_runtime_wait(void) {
int watchdog_setup_pretimeout(usec_t timeout) {
/* timeout=0 disables the pretimeout whereas timeout=USEC_INFINITY is a nop. */
if ((watchdog_fd >= 0 && timeout == watchdog_pretimeout) || timeout == USEC_INFINITY)
return 0;
if (!timestamp_is_set(watchdog_timeout))
/* Initialize the watchdog timeout with the caller value. This value is
* going to be updated by update_pretimeout() with the running value,
* even if it fails to update the timeout. */
watchdog_pretimeout = timeout;
return update_pretimeout();
}
static usec_t calc_timeout(void) {
/* Calculate the effective timeout which accounts for the watchdog
* pretimeout if configured and supported. */
if (timestamp_is_set(watchdog_pretimeout) && watchdog_timeout >= watchdog_pretimeout)
return watchdog_timeout - watchdog_pretimeout;
else
return watchdog_timeout;
}
usec_t watchdog_runtime_wait(void) {
usec_t timeout = calc_timeout();
if (!timestamp_is_set(timeout))
return USEC_INFINITY;
/* Sleep half the watchdog timeout since the last successful ping at most */
@ -220,14 +317,14 @@ usec_t watchdog_runtime_wait(void) {
usec_t ntime = now(clock_boottime_or_monotonic());
assert(ntime >= watchdog_last_ping);
return usec_sub_unsigned(watchdog_last_ping + (watchdog_timeout / 2), ntime);
return usec_sub_unsigned(watchdog_last_ping + (timeout / 2), ntime);
}
return watchdog_timeout / 2;
return timeout / 2;
}
int watchdog_ping(void) {
usec_t ntime;
usec_t ntime, timeout;
if (watchdog_timeout == 0)
return 0;
@ -237,12 +334,13 @@ int watchdog_ping(void) {
return open_watchdog();
ntime = now(clock_boottime_or_monotonic());
timeout = calc_timeout();
/* Never ping earlier than watchdog_timeout/4 and try to ping
* by watchdog_timeout/2 plus scheduling latencies the latest */
* by watchdog_timeout/2 plus scheduling latencies at the latest */
if (timestamp_is_set(watchdog_last_ping)) {
assert(ntime >= watchdog_last_ping);
if ((ntime - watchdog_last_ping) < (watchdog_timeout / 4))
if ((ntime - watchdog_last_ping) < (timeout / 4))
return 0;
}

View file

@ -8,6 +8,7 @@
int watchdog_set_device(const char *path);
int watchdog_setup(usec_t timeout);
int watchdog_setup_pretimeout(usec_t usec);
int watchdog_ping(void);
void watchdog_close(bool disarm);
usec_t watchdog_runtime_wait(void);

View file

@ -737,6 +737,7 @@ LogLevel=
LogLocation=
LogTarget=
RuntimeWatchdogSec=
RuntimeWatchdogPreSec=
ShowStatus=
RebootWatchdogSec=
ShutdownWatchdogSec=