oomd: threshold swap kill candidates to usages of more than 5%

In some instances, particularly with swap on zram, swap used will be high
while there is still a lot of memory available. FB OOMD handles this by
thresholding kills to X% of total swap usage. Let's do the same thing here.

Anecdotally with these thresholds and my laptop which is exclusively swap
on zram I can sit at 0K / 4G swap free with most of memory free and
systemd-oomd doesn't kill anything.

Partially addresses aggressive kill behavior from
https://bugzilla.redhat.com/show_bug.cgi?id=1941170
This commit is contained in:
Anita Zhang 2021-03-26 02:37:01 -07:00
parent cb13961ada
commit 685b0985f0
5 changed files with 16 additions and 11 deletions

View file

@ -52,9 +52,9 @@
<listitem><para>Sets the limit for swap usage on the system before <command>systemd-oomd</command>
will take action. If the fraction of swap used on the system is more than what is defined here,
<command>systemd-oomd</command> will act on eligible descendant control groups, starting from the
ones with the highest swap usage to the lowest swap usage. Which control groups are monitored and
what action gets taken depends on what the unit has configured for
<command>systemd-oomd</command> will act on eligible descendant control groups with swap usage greater
than 5% of total swap, starting from the ones with the highest swap usage. Which
control groups are monitored and what action gets taken depends on what the unit has configured for
<varname>ManagedOOMSwap=</varname>. Takes a value specified in percent (when suffixed with "%"),
permille ("‰") or permyriad ("‱"), between 0% and 100%, inclusive. Defaults to 90%.</para></listitem>
</varlistentry>

View file

@ -345,6 +345,7 @@ static int monitor_swap_contexts_handler(sd_event_source *s, uint64_t usec, void
if (oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) {
_cleanup_hashmap_free_ Hashmap *candidates = NULL;
_cleanup_free_ char *selected = NULL;
uint64_t threshold;
log_debug("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR,
m->system_context.swap_used, m->system_context.swap_total,
@ -356,7 +357,8 @@ static int monitor_swap_contexts_handler(sd_event_source *s, uint64_t usec, void
if (r < 0)
log_debug_errno(r, "Failed to get monitored swap cgroup candidates, ignoring: %m");
r = oomd_kill_by_swap_usage(candidates, m->dry_run, &selected);
threshold = m->system_context.swap_total * THRESHOLD_SWAP_USED_PERCENT / 100;
r = oomd_kill_by_swap_usage(candidates, threshold, m->dry_run, &selected);
if (r == -ENOMEM)
return log_oom();
if (r < 0)

View file

@ -19,6 +19,9 @@
#define DEFAULT_MEM_PRESSURE_LIMIT_PERCENT 60
#define DEFAULT_SWAP_USED_LIMIT_PERCENT 90
/* Only tackle candidates with large swap usage. */
#define THRESHOLD_SWAP_USED_PERCENT 5
#define RECLAIM_DURATION_USEC (30 * USEC_PER_SEC)
#define POST_ACTION_DELAY_USEC (15 * USEC_PER_SEC)

View file

@ -233,7 +233,7 @@ int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char
return ret;
}
int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected) {
int oomd_kill_by_swap_usage(Hashmap *h, uint64_t threshold_usage, bool dry_run, char **ret_selected) {
_cleanup_free_ OomdCGroupContext **sorted = NULL;
int n, r, ret = 0;
@ -244,12 +244,12 @@ int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected) {
if (n < 0)
return n;
/* Try to kill cgroups with non-zero swap usage until we either succeed in
* killing or we get to a cgroup with no swap usage. */
/* Try to kill cgroups with non-zero swap usage until we either succeed in killing or we get to a cgroup with
* no swap usage. Threshold killing only cgroups with more than threshold swap usage. */
for (int i = 0; i < n; i++) {
/* Skip over cgroups with no resource usage.
* Continue break since there might be "avoid" cgroups at the end. */
if (sorted[i]->swap_usage == 0)
/* Skip over cgroups with not enough swap usage. Don't break since there might be "avoid"
* cgroups at the end. */
if (sorted[i]->swap_usage <= threshold_usage)
continue;
r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);

View file

@ -111,7 +111,7 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run);
* everything in `h` is a candidate.
* Returns the killed cgroup in ret_selected. */
int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected);
int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected);
int oomd_kill_by_swap_usage(Hashmap *h, uint64_t threshold_usage, bool dry_run, char **ret_selected);
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret);
int oomd_system_context_acquire(const char *proc_swaps_path, OomdSystemContext *ret);