mirror of
https://github.com/systemd/systemd
synced 2024-10-15 12:34:37 +00:00
oomd: increase accuracy of SwapUsedLimit= to permyriads too
oomd.conf has two parameters with fractionals: SwapUsedLimit= and DefaultMemoryPressureLimit=, but one accepts permyriads, the other only percentages, for no apparent reason. One carries the "Percent" in the name, the other doesn't. Let's clean this up: always accept permyriads, and drop the suffix, given that it is misleading. I figure we should internally try to focus on scaling everything relative to UINT32_MAX, and if that isn't in the cards at least 10000, but never permille nor percent unless there's a really really good reason for it (e.g. interface defined by someone else).
This commit is contained in:
parent
d9d3f05def
commit
d06e7fb532
|
@ -48,36 +48,38 @@
|
||||||
|
|
||||||
<variablelist class='config-directives'>
|
<variablelist class='config-directives'>
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><varname>SwapUsedLimitPercent=</varname></term>
|
<term><varname>SwapUsedLimit=</varname></term>
|
||||||
|
|
||||||
<listitem><para>Sets the limit for swap usage on the system before <command>systemd-oomd</command> will
|
<listitem><para>Sets the limit for swap usage on the system before <command>systemd-oomd</command>
|
||||||
take action. If the percentage of swap used on the system is more than what is defined here,
|
will take action. If the fraction of swap used on the system is more than what is defined here,
|
||||||
<command>systemd-oomd</command> will act on eligible descendant cgroups, starting from the ones with the
|
<command>systemd-oomd</command> will act on eligible descendant control groups, starting from the
|
||||||
highest swap usage to the lowest swap usage. Which cgroups are monitored and what
|
ones with the highest swap usage to the lowest swap usage. Which control groups are monitored and
|
||||||
action gets taken depends on what the unit has configured for <varname>ManagedOOMSwap=</varname>.
|
what action gets taken depends on what the unit has configured for
|
||||||
Takes a percentage value between 0% and 100%, inclusive. Defaults to 90%.</para></listitem>
|
<varname>ManagedOOMSwap=</varname>. Takes a value specified in percent (when suffixed with "%"),
|
||||||
|
permille ("‰") or permyriad ("‱"), between 0% and 100%, inclusive. Defaults to 90%.</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><varname>DefaultMemoryPressureLimit=</varname></term>
|
<term><varname>DefaultMemoryPressureLimit=</varname></term>
|
||||||
|
|
||||||
<listitem><para>Sets the limit for memory pressure on the unit's cgroup before <command>systemd-oomd</command>
|
<listitem><para>Sets the limit for memory pressure on the unit's control group before
|
||||||
will take action. A unit can override this value with <varname>ManagedOOMMemoryPressureLimit=</varname>.
|
<command>systemd-oomd</command> will take action. A unit can override this value with
|
||||||
The memory pressure for this property represents the fraction of time in a 10 second window in which all tasks
|
<varname>ManagedOOMMemoryPressureLimit=</varname>. The memory pressure for this property represents
|
||||||
in the cgroup were delayed. For each monitored cgroup, if the memory pressure on that cgroup exceeds the
|
the fraction of time in a 10 second window in which all tasks in the control group were delayed. For
|
||||||
limit set for longer than the duration set by <varname>DefaultMemoryPressureDurationSec=</varname>,
|
each monitored control group, if the memory pressure on that control group exceeds the limit set for
|
||||||
<command>systemd-oomd</command> will act on eligible descendant cgroups,
|
longer than the duration set by <varname>DefaultMemoryPressureDurationSec=</varname>,
|
||||||
starting from the ones with the most reclaim activity to the least reclaim activity. Which cgroups are
|
<command>systemd-oomd</command> will act on eligible descendant control groups, starting from the
|
||||||
monitored and what action gets taken depends on what the unit has configured for
|
ones with the most reclaim activity to the least reclaim activity. Which control groups are monitored
|
||||||
<varname>ManagedOOMMemoryPressure=</varname>. Takes a percentage value between 0% and 100%, inclusive.
|
and what action gets taken depends on what the unit has configured for
|
||||||
Defaults to 60%.</para></listitem>
|
<varname>ManagedOOMMemoryPressure=</varname>. Takes a fraction specified in the same way as
|
||||||
|
<varname>SwapUsedLimit=</varname> above. Defaults to 60%.</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><varname>DefaultMemoryPressureDurationSec=</varname></term>
|
<term><varname>DefaultMemoryPressureDurationSec=</varname></term>
|
||||||
|
|
||||||
<listitem><para>Sets the amount of time a unit's cgroup needs to have exceeded memory pressure limits before
|
<listitem><para>Sets the amount of time a unit's control group needs to have exceeded memory pressure
|
||||||
<command>systemd-oomd</command> will take action. Memory pressure limits are defined by
|
limits before <command>systemd-oomd</command> will take action. Memory pressure limits are defined by
|
||||||
<varname>DefaultMemoryPressureLimit=</varname> and <varname>ManagedOOMMemoryPressureLimit=</varname>.
|
<varname>DefaultMemoryPressureLimit=</varname> and <varname>ManagedOOMMemoryPressureLimit=</varname>.
|
||||||
Defaults to 30 seconds when this property is unset or set to 0.</para></listitem>
|
Defaults to 30 seconds when this property is unset or set to 0.</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
|
@ -16,7 +16,7 @@ typedef struct ManagedOOMReply {
|
||||||
ManagedOOMMode mode;
|
ManagedOOMMode mode;
|
||||||
char *path;
|
char *path;
|
||||||
char *property;
|
char *property;
|
||||||
unsigned limit;
|
uint32_t limit;
|
||||||
} ManagedOOMReply;
|
} ManagedOOMReply;
|
||||||
|
|
||||||
static void managed_oom_reply_destroy(ManagedOOMReply *reply) {
|
static void managed_oom_reply_destroy(ManagedOOMReply *reply) {
|
||||||
|
@ -53,10 +53,10 @@ static int process_managed_oom_reply(
|
||||||
assert(m);
|
assert(m);
|
||||||
|
|
||||||
static const JsonDispatch dispatch_table[] = {
|
static const JsonDispatch dispatch_table[] = {
|
||||||
{ "mode", JSON_VARIANT_STRING, managed_oom_mode, offsetof(ManagedOOMReply, mode), JSON_MANDATORY },
|
{ "mode", JSON_VARIANT_STRING, managed_oom_mode, offsetof(ManagedOOMReply, mode), JSON_MANDATORY },
|
||||||
{ "path", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, path), JSON_MANDATORY },
|
{ "path", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, path), JSON_MANDATORY },
|
||||||
{ "property", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, property), JSON_MANDATORY },
|
{ "property", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, property), JSON_MANDATORY },
|
||||||
{ "limit", JSON_VARIANT_UNSIGNED, json_dispatch_unsigned, offsetof(ManagedOOMReply, limit), 0 },
|
{ "limit", JSON_VARIANT_UNSIGNED, json_dispatch_uint32, offsetof(ManagedOOMReply, limit), 0 },
|
||||||
{},
|
{},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -87,7 +87,8 @@ static int process_managed_oom_reply(
|
||||||
if (ret == -ENOMEM) {
|
if (ret == -ENOMEM) {
|
||||||
r = ret;
|
r = ret;
|
||||||
goto finish;
|
goto finish;
|
||||||
} else if (ret < 0)
|
}
|
||||||
|
if (ret < 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
monitor_hm = streq(reply.property, "ManagedOOMSwap") ?
|
monitor_hm = streq(reply.property, "ManagedOOMSwap") ?
|
||||||
|
@ -100,19 +101,15 @@ static int process_managed_oom_reply(
|
||||||
|
|
||||||
limit = m->default_mem_pressure_limit;
|
limit = m->default_mem_pressure_limit;
|
||||||
|
|
||||||
if (streq(reply.property, "ManagedOOMMemoryPressure")) {
|
if (streq(reply.property, "ManagedOOMMemoryPressure") && reply.limit > 0) {
|
||||||
if (reply.limit > UINT32_MAX) /* out of range */
|
int permyriad = UINT32_SCALE_TO_PERMYRIAD(reply.limit);
|
||||||
continue;
|
|
||||||
if (reply.limit != 0) {
|
|
||||||
int permyriad = UINT32_SCALE_TO_PERMYRIAD(reply.limit);
|
|
||||||
|
|
||||||
ret = store_loadavg_fixed_point(
|
ret = store_loadavg_fixed_point(
|
||||||
(unsigned long) permyriad / 100,
|
(unsigned long) permyriad / 100,
|
||||||
(unsigned long) permyriad % 100,
|
(unsigned long) permyriad % 100,
|
||||||
&limit);
|
&limit);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = oomd_insert_cgroup_context(NULL, monitor_hm, empty_to_root(reply.path));
|
ret = oomd_insert_cgroup_context(NULL, monitor_hm, empty_to_root(reply.path));
|
||||||
|
@ -354,11 +351,11 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (oomd_swap_free_below(&m->system_context, (100 - m->swap_used_limit))) {
|
if (oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) {
|
||||||
_cleanup_hashmap_free_ Hashmap *candidates = NULL;
|
_cleanup_hashmap_free_ Hashmap *candidates = NULL;
|
||||||
|
|
||||||
log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than %u%%",
|
log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR,
|
||||||
m->system_context.swap_used, m->system_context.swap_total, m->swap_used_limit);
|
m->system_context.swap_used, m->system_context.swap_total, PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
|
||||||
|
|
||||||
r = get_monitored_cgroup_contexts_candidates(m->monitored_swap_cgroup_contexts, &candidates);
|
r = get_monitored_cgroup_contexts_candidates(m->monitored_swap_cgroup_contexts, &candidates);
|
||||||
if (r == -ENOMEM)
|
if (r == -ENOMEM)
|
||||||
|
@ -484,7 +481,13 @@ static int manager_connect_bus(Manager *m) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec) {
|
int manager_start(
|
||||||
|
Manager *m,
|
||||||
|
bool dry_run,
|
||||||
|
int swap_used_limit_permyriad,
|
||||||
|
int mem_pressure_limit_permyriad,
|
||||||
|
usec_t mem_pressure_usec) {
|
||||||
|
|
||||||
unsigned long l, f;
|
unsigned long l, f;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
|
@ -492,10 +495,10 @@ int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressur
|
||||||
|
|
||||||
m->dry_run = dry_run;
|
m->dry_run = dry_run;
|
||||||
|
|
||||||
m->swap_used_limit = swap_used_limit != -1 ? swap_used_limit : DEFAULT_SWAP_USED_LIMIT;
|
m->swap_used_limit_permyriad = swap_used_limit_permyriad >= 0 ? swap_used_limit_permyriad : DEFAULT_SWAP_USED_LIMIT_PERCENT * 100;
|
||||||
assert(m->swap_used_limit <= 100);
|
assert(m->swap_used_limit_permyriad <= 10000);
|
||||||
|
|
||||||
if (mem_pressure_limit_permyriad != -1) {
|
if (mem_pressure_limit_permyriad >= 0) {
|
||||||
assert(mem_pressure_limit_permyriad <= 10000);
|
assert(mem_pressure_limit_permyriad <= 10000);
|
||||||
|
|
||||||
l = mem_pressure_limit_permyriad / 100;
|
l = mem_pressure_limit_permyriad / 100;
|
||||||
|
@ -543,12 +546,12 @@ int manager_get_dump_string(Manager *m, char **ret) {
|
||||||
|
|
||||||
fprintf(f,
|
fprintf(f,
|
||||||
"Dry Run: %s\n"
|
"Dry Run: %s\n"
|
||||||
"Swap Used Limit: %u%%\n"
|
"Swap Used Limit: " PERMYRIAD_AS_PERCENT_FORMAT_STR "\n"
|
||||||
"Default Memory Pressure Limit: %lu.%02lu%%\n"
|
"Default Memory Pressure Limit: %lu.%02lu%%\n"
|
||||||
"Default Memory Pressure Duration: %s\n"
|
"Default Memory Pressure Duration: %s\n"
|
||||||
"System Context:\n",
|
"System Context:\n",
|
||||||
yes_no(m->dry_run),
|
yes_no(m->dry_run),
|
||||||
m->swap_used_limit,
|
PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad),
|
||||||
LOAD_INT(m->default_mem_pressure_limit), LOAD_FRAC(m->default_mem_pressure_limit),
|
LOAD_INT(m->default_mem_pressure_limit), LOAD_FRAC(m->default_mem_pressure_limit),
|
||||||
format_timespan(buf, sizeof(buf), m->default_mem_pressure_duration_usec, USEC_PER_SEC));
|
format_timespan(buf, sizeof(buf), m->default_mem_pressure_duration_usec, USEC_PER_SEC));
|
||||||
oomd_dump_system_context(&m->system_context, f, "\t");
|
oomd_dump_system_context(&m->system_context, f, "\t");
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
* system.slice are assumed to be less latency sensitive. */
|
* system.slice are assumed to be less latency sensitive. */
|
||||||
#define DEFAULT_MEM_PRESSURE_DURATION_USEC (30 * USEC_PER_SEC)
|
#define DEFAULT_MEM_PRESSURE_DURATION_USEC (30 * USEC_PER_SEC)
|
||||||
#define DEFAULT_MEM_PRESSURE_LIMIT_PERCENT 60
|
#define DEFAULT_MEM_PRESSURE_LIMIT_PERCENT 60
|
||||||
#define DEFAULT_SWAP_USED_LIMIT 90
|
#define DEFAULT_SWAP_USED_LIMIT_PERCENT 90
|
||||||
|
|
||||||
#define RECLAIM_DURATION_USEC (30 * USEC_PER_SEC)
|
#define RECLAIM_DURATION_USEC (30 * USEC_PER_SEC)
|
||||||
#define POST_ACTION_DELAY_USEC (15 * USEC_PER_SEC)
|
#define POST_ACTION_DELAY_USEC (15 * USEC_PER_SEC)
|
||||||
|
@ -32,7 +32,7 @@ struct Manager {
|
||||||
Hashmap *polkit_registry;
|
Hashmap *polkit_registry;
|
||||||
|
|
||||||
bool dry_run;
|
bool dry_run;
|
||||||
unsigned swap_used_limit;
|
int swap_used_limit_permyriad;
|
||||||
loadavg_t default_mem_pressure_limit;
|
loadavg_t default_mem_pressure_limit;
|
||||||
usec_t default_mem_pressure_duration_usec;
|
usec_t default_mem_pressure_duration_usec;
|
||||||
|
|
||||||
|
@ -56,7 +56,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
|
||||||
|
|
||||||
int manager_new(Manager **ret);
|
int manager_new(Manager **ret);
|
||||||
|
|
||||||
int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec);
|
int manager_start(Manager *m, bool dry_run, int swap_used_limit_permyriad, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec);
|
||||||
|
|
||||||
int manager_get_dump_string(Manager *m, char **ret);
|
int manager_get_dump_string(Manager *m, char **ret);
|
||||||
|
|
||||||
|
|
|
@ -134,13 +134,13 @@ bool oomd_memory_reclaim(Hashmap *h) {
|
||||||
return pgscan_of > last_pgscan_of;
|
return pgscan_of > last_pgscan_of;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent) {
|
bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad) {
|
||||||
uint64_t swap_threshold;
|
uint64_t swap_threshold;
|
||||||
|
|
||||||
assert(ctx);
|
assert(ctx);
|
||||||
assert(threshold_percent <= 100);
|
assert(threshold_permyriad <= 10000);
|
||||||
|
|
||||||
swap_threshold = ctx->swap_total * threshold_percent / ((uint64_t) 100);
|
swap_threshold = ctx->swap_total * threshold_permyriad / (uint64_t) 10000;
|
||||||
return (ctx->swap_total - ctx->swap_used) < swap_threshold;
|
return (ctx->swap_total - ctx->swap_used) < swap_threshold;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -61,8 +61,8 @@ int oomd_pressure_above(Hashmap *h, usec_t duration, Set **ret);
|
||||||
* current sum is higher than the last interval's sum (there was some reclaim activity). */
|
* current sum is higher than the last interval's sum (there was some reclaim activity). */
|
||||||
bool oomd_memory_reclaim(Hashmap *h);
|
bool oomd_memory_reclaim(Hashmap *h);
|
||||||
|
|
||||||
/* Returns true if the amount of swap free is below the percentage of swap specified by `threshold_percent`. */
|
/* Returns true if the amount of swap free is below the permyriad of swap specified by `threshold_permyriad`. */
|
||||||
bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent);
|
bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad);
|
||||||
|
|
||||||
/* The compare functions will sort from largest to smallest, putting all the contexts with "avoid" at the end
|
/* The compare functions will sort from largest to smallest, putting all the contexts with "avoid" at the end
|
||||||
* (after the smallest values). */
|
* (after the smallest values). */
|
||||||
|
|
|
@ -17,13 +17,13 @@
|
||||||
#include "signal-util.h"
|
#include "signal-util.h"
|
||||||
|
|
||||||
static bool arg_dry_run = false;
|
static bool arg_dry_run = false;
|
||||||
static int arg_swap_used_limit = -1;
|
static int arg_swap_used_limit_permyriad = -1;
|
||||||
static int arg_mem_pressure_limit_permyriad = -1;
|
static int arg_mem_pressure_limit_permyriad = -1;
|
||||||
static usec_t arg_mem_pressure_usec = 0;
|
static usec_t arg_mem_pressure_usec = 0;
|
||||||
|
|
||||||
static int parse_config(void) {
|
static int parse_config(void) {
|
||||||
static const ConfigTableItem items[] = {
|
static const ConfigTableItem items[] = {
|
||||||
{ "OOM", "SwapUsedLimitPercent", config_parse_percent, 0, &arg_swap_used_limit },
|
{ "OOM", "SwapUsedLimit", config_parse_permyriad, 0, &arg_swap_used_limit_permyriad },
|
||||||
{ "OOM", "DefaultMemoryPressureLimit", config_parse_permyriad, 0, &arg_mem_pressure_limit_permyriad },
|
{ "OOM", "DefaultMemoryPressureLimit", config_parse_permyriad, 0, &arg_mem_pressure_limit_permyriad },
|
||||||
{ "OOM", "DefaultMemoryPressureDurationSec", config_parse_sec, 0, &arg_mem_pressure_usec },
|
{ "OOM", "DefaultMemoryPressureDurationSec", config_parse_sec, 0, &arg_mem_pressure_usec },
|
||||||
{}
|
{}
|
||||||
|
@ -159,7 +159,12 @@ static int run(int argc, char *argv[]) {
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return log_error_errno(r, "Failed to create manager: %m");
|
return log_error_errno(r, "Failed to create manager: %m");
|
||||||
|
|
||||||
r = manager_start(m, arg_dry_run, arg_swap_used_limit, arg_mem_pressure_limit_permyriad, arg_mem_pressure_usec);
|
r = manager_start(
|
||||||
|
m,
|
||||||
|
arg_dry_run,
|
||||||
|
arg_swap_used_limit_permyriad,
|
||||||
|
arg_mem_pressure_limit_permyriad,
|
||||||
|
arg_mem_pressure_usec);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return log_error_errno(r, "Failed to start up daemon: %m");
|
return log_error_errno(r, "Failed to start up daemon: %m");
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,6 @@
|
||||||
# See oomd.conf(5) for details
|
# See oomd.conf(5) for details
|
||||||
|
|
||||||
[OOM]
|
[OOM]
|
||||||
#SwapUsedLimitPercent=90%
|
#SwapUsedLimit=90%
|
||||||
#DefaultMemoryPressureLimit=60%
|
#DefaultMemoryPressureLimit=60%
|
||||||
#DefaultMemoryPressureDurationSec=30s
|
#DefaultMemoryPressureDurationSec=30s
|
||||||
|
|
|
@ -302,19 +302,19 @@ static void test_oomd_swap_free_below(void) {
|
||||||
.swap_total = 20971512 * 1024U,
|
.swap_total = 20971512 * 1024U,
|
||||||
.swap_used = 20971440 * 1024U,
|
.swap_used = 20971440 * 1024U,
|
||||||
};
|
};
|
||||||
assert_se(oomd_swap_free_below(&ctx, 20) == true);
|
assert_se(oomd_swap_free_below(&ctx, 2000) == true);
|
||||||
|
|
||||||
ctx = (OomdSystemContext) {
|
ctx = (OomdSystemContext) {
|
||||||
.swap_total = 20971512 * 1024U,
|
.swap_total = 20971512 * 1024U,
|
||||||
.swap_used = 3310136 * 1024U,
|
.swap_used = 3310136 * 1024U,
|
||||||
};
|
};
|
||||||
assert_se(oomd_swap_free_below(&ctx, 20) == false);
|
assert_se(oomd_swap_free_below(&ctx, 2000) == false);
|
||||||
|
|
||||||
ctx = (OomdSystemContext) {
|
ctx = (OomdSystemContext) {
|
||||||
.swap_total = 0,
|
.swap_total = 0,
|
||||||
.swap_used = 0,
|
.swap_used = 0,
|
||||||
};
|
};
|
||||||
assert_se(oomd_swap_free_below(&ctx, 20) == false);
|
assert_se(oomd_swap_free_below(&ctx, 2000) == false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_oomd_sort_cgroups(void) {
|
static void test_oomd_sort_cgroups(void) {
|
||||||
|
|
Loading…
Reference in a new issue