Merge pull request #22629 from nishalkulkarni/oomd_service_result

core/oomd: Use oom-kill ServiceResult for oomd
This commit is contained in:
Lennart Poettering 2022-03-23 10:11:45 +01:00 committed by GitHub
commit 5b39139582
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 58 additions and 19 deletions

View file

@ -505,7 +505,7 @@
<term><varname>DefaultOOMPolicy=</varname></term>
<listitem><para>Configure the default policy for reacting to processes being killed by the Linux
Out-Of-Memory (OOM) killer. This may be used to pick a global default for the per-unit
Out-Of-Memory (OOM) killer or <command>systemd-oomd</command>. This may be used to pick a global default for the per-unit
<varname>OOMPolicy=</varname> setting. See
<citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>
for details. Note that this default is not used for services that have <varname>Delegate=</varname>

View file

@ -1039,7 +1039,7 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
normally at 0.</para>
<para>Use the <varname>OOMPolicy=</varname> setting of service units to configure how the service
manager shall react to the kernel OOM killer terminating a process of the service. See
manager shall react to the kernel OOM killer or <command>systemd-oomd</command> terminating a process of the service. See
<citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>
for details.</para></listitem>
</varlistentry>

View file

@ -1140,7 +1140,11 @@
shall be considered preferred or less preferred candidates for process termination by the Linux OOM
killer logic. See
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry> for
details.</para></listitem>
details.</para>
<para>This setting also applies to <command>systemd-oomd</command>, similar to kernel OOM kills
this setting determines the state of the service after <command>systemd-oomd</command> kills a cgroup associated
with the service.</para></listitem>
</varlistentry>
</variablelist>

View file

@ -2958,6 +2958,10 @@ static int on_cgroup_empty_event(sd_event_source *s, void *userdata) {
log_debug_errno(r, "Failed to reenable cgroup empty event source, ignoring: %m");
}
/* Update state based on OOM kills before we notify about cgroup empty event */
(void) unit_check_oom(u);
(void) unit_check_oomd_kill(u);
unit_add_to_gc_queue(u);
if (UNIT_VTABLE(u)->notify_cgroup_empty)
@ -3037,7 +3041,7 @@ int unit_check_oomd_kill(Unit *u) {
else if (r == 0)
return 0;
r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.oomd_kill", &value);
r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.oomd_ooms", &value);
if (r < 0 && r != -ENODATA)
return r;
@ -3053,11 +3057,25 @@ int unit_check_oomd_kill(Unit *u) {
if (!increased)
return 0;
n = 0;
value = mfree(value);
r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.oomd_kill", &value);
if (r >= 0 && !isempty(value))
(void) safe_atou64(value, &n);
if (n > 0)
log_unit_struct(u, LOG_NOTICE,
"MESSAGE_ID=" SD_MESSAGE_UNIT_OOMD_KILL_STR,
LOG_UNIT_INVOCATION_ID(u),
LOG_UNIT_MESSAGE(u, "systemd-oomd killed %"PRIu64" process(es) in this unit.", n));
LOG_UNIT_MESSAGE(u, "systemd-oomd killed %"PRIu64" process(es) in this unit.", n),
"N_PROCESSES=%" PRIu64, n);
else
log_unit_struct(u, LOG_NOTICE,
"MESSAGE_ID=" SD_MESSAGE_UNIT_OOMD_KILL_STR,
LOG_UNIT_INVOCATION_ID(u),
LOG_UNIT_MESSAGE(u, "systemd-oomd killed some process(es) in this unit."));
unit_notify_cgroup_oom(u, /* ManagedOOM= */ true);
return 1;
}
@ -3093,8 +3111,7 @@ int unit_check_oom(Unit *u) {
LOG_UNIT_INVOCATION_ID(u),
LOG_UNIT_MESSAGE(u, "A process of this unit has been killed by the OOM killer."));
if (UNIT_VTABLE(u)->notify_cgroup_oom)
UNIT_VTABLE(u)->notify_cgroup_oom(u);
unit_notify_cgroup_oom(u, /* ManagedOOM= */ false);
return 1;
}

View file

@ -2644,9 +2644,7 @@ static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) {
* We only do this for the cgroup the PID belonged to. */
(void) unit_check_oom(u1);
/* This only logs for now. In the future when the interface for kills/notifications
* is more stable we can extend service results table similar to how kernel oom kills
* are managed. */
/* We check if systemd-oomd perfomed a kill so that we log and notify appropriately */
(void) unit_check_oomd_kill(u1);
manager_invoke_sigchld_event(m, u1, &si);

View file

@ -60,9 +60,9 @@ typedef enum StatusType {
} StatusType;
typedef enum OOMPolicy {
OOM_CONTINUE, /* The kernel kills the process it wants to kill, and that's it */
OOM_STOP, /* The kernel kills the process it wants to kill, and we stop the unit */
OOM_KILL, /* The kernel kills the process it wants to kill, and all others in the unit, and we stop the unit */
OOM_CONTINUE, /* The kernel or systemd-oomd kills the process it wants to kill, and that's it */
OOM_STOP, /* The kernel or systemd-oomd kills the process it wants to kill, and we stop the unit */
OOM_KILL, /* The kernel or systemd-oomd kills the process it wants to kill, and all others in the unit, and we stop the unit */
_OOM_POLICY_MAX,
_OOM_POLICY_INVALID = -EINVAL,
} OOMPolicy;

View file

@ -3404,10 +3404,13 @@ static void service_notify_cgroup_empty_event(Unit *u) {
}
}
static void service_notify_cgroup_oom_event(Unit *u) {
static void service_notify_cgroup_oom_event(Unit *u, bool managed_oom) {
Service *s = SERVICE(u);
log_unit_debug(u, "Process of control group was killed by the OOM killer.");
if (managed_oom)
log_unit_debug(u, "Process(es) of control group were killed by systemd-oomd.");
else
log_unit_debug(u, "Process of control group was killed by the OOM killer.");
if (s->oom_policy == OOM_CONTINUE)
return;

View file

@ -75,7 +75,7 @@ typedef enum ServiceResult {
SERVICE_FAILURE_CORE_DUMP,
SERVICE_FAILURE_WATCHDOG,
SERVICE_FAILURE_START_LIMIT_HIT,
SERVICE_FAILURE_OOM_KILL,
SERVICE_FAILURE_OOM_KILL, /* OOM Kill by the Kernel or systemd-oomd */
SERVICE_SKIP_CONDITION,
_SERVICE_RESULT_MAX,
_SERVICE_RESULT_INVALID = -EINVAL,

View file

@ -3801,6 +3801,13 @@ int unit_kill(Unit *u, KillWho w, int signo, sd_bus_error *error) {
return UNIT_VTABLE(u)->kill(u, w, signo, error);
}
void unit_notify_cgroup_oom(Unit *u, bool managed_oom) {
assert(u);
if (UNIT_VTABLE(u)->notify_cgroup_oom)
UNIT_VTABLE(u)->notify_cgroup_oom(u, managed_oom);
}
static Set *unit_pid_set(pid_t main_pid, pid_t control_pid) {
_cleanup_set_free_ Set *pid_set = NULL;
int r;

View file

@ -285,7 +285,7 @@ typedef struct Unit {
nsec_t cpu_usage_base;
nsec_t cpu_usage_last; /* the most recently read value */
/* The current counter of processes sent SIGKILL by systemd-oomd */
/* The current counter of OOM kills initiated by systemd-oomd */
uint64_t managed_oom_kill_last;
/* The current counter of the oom_kill field in the memory.events cgroup attribute */
@ -596,7 +596,7 @@ typedef struct UnitVTable {
void (*notify_cgroup_empty)(Unit *u);
/* Called whenever an OOM kill event on this unit was seen */
void (*notify_cgroup_oom)(Unit *u);
void (*notify_cgroup_oom)(Unit *u, bool managed_oom);
/* Called whenever a process of this unit sends us a message */
void (*notify_message)(Unit *u, const struct ucred *ucred, char * const *tags, FDSet *fds);
@ -811,6 +811,8 @@ int unit_reload(Unit *u);
int unit_kill(Unit *u, KillWho w, int signo, sd_bus_error *error);
int unit_kill_common(Unit *u, KillWho who, int signo, pid_t main_pid, pid_t control_pid, sd_bus_error *error);
void unit_notify_cgroup_oom(Unit *u, bool managed_oom);
typedef enum UnitNotifyFlags {
UNIT_NOTIFY_RELOAD_FAILURE = 1 << 0,
UNIT_NOTIFY_WILL_AUTO_RESTART = 1 << 1,

View file

@ -192,6 +192,10 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) {
if (!pids_killed)
return -ENOMEM;
r = increment_oomd_xattr(path, "user.oomd_ooms", 1);
if (r < 0)
log_debug_errno(r, "Failed to set user.oomd_ooms before kill: %m");
if (recurse)
r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL);
else

View file

@ -77,12 +77,16 @@ static void test_oomd_cgroup_kill(void) {
abort();
}
assert_se(cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, cgroup, "user.oomd_ooms", &v) >= 0);
assert_se(streq(v, i == 0 ? "1" : "2"));
v = mfree(v);
/* Wait a bit since processes may take some time to be cleaned up. */
sleep(2);
assert_se(cg_is_empty(SYSTEMD_CGROUP_CONTROLLER, cgroup) == true);
assert_se(cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, cgroup, "user.oomd_kill", &v) >= 0);
assert_se(memcmp(v, i == 0 ? "2" : "4", 2) == 0);
assert_se(streq(v, i == 0 ? "2" : "4"));
}
}