diff --git a/man/oomd.conf.xml b/man/oomd.conf.xml index 2a12be8cadf..6156c98fbd9 100644 --- a/man/oomd.conf.xml +++ b/man/oomd.conf.xml @@ -48,36 +48,38 @@ - SwapUsedLimitPercent= + SwapUsedLimit= - Sets the limit for swap usage on the system before systemd-oomd will - take action. If the percentage of swap used on the system is more than what is defined here, - systemd-oomd will act on eligible descendant cgroups, starting from the ones with the - highest swap usage to the lowest swap usage. Which cgroups are monitored and what - action gets taken depends on what the unit has configured for ManagedOOMSwap=. - Takes a percentage value between 0% and 100%, inclusive. Defaults to 90%. + Sets the limit for swap usage on the system before systemd-oomd + will take action. If the fraction of swap used on the system is more than what is defined here, + systemd-oomd will act on eligible descendant control groups, starting from the + ones with the highest swap usage to the lowest swap usage. Which control groups are monitored and + what action gets taken depends on what the unit has configured for + ManagedOOMSwap=. Takes a value specified in percent (when suffixed with "%"), + permille ("‰") or permyriad ("‱"), between 0% and 100%, inclusive. Defaults to 90%. DefaultMemoryPressureLimit= - Sets the limit for memory pressure on the unit's cgroup before systemd-oomd - will take action. A unit can override this value with ManagedOOMMemoryPressureLimit=. - The memory pressure for this property represents the fraction of time in a 10 second window in which all tasks - in the cgroup were delayed. For each monitored cgroup, if the memory pressure on that cgroup exceeds the - limit set for longer than the duration set by DefaultMemoryPressureDurationSec=, - systemd-oomd will act on eligible descendant cgroups, - starting from the ones with the most reclaim activity to the least reclaim activity. Which cgroups are - monitored and what action gets taken depends on what the unit has configured for - ManagedOOMMemoryPressure=. Takes a percentage value between 0% and 100%, inclusive. - Defaults to 60%. + Sets the limit for memory pressure on the unit's control group before + systemd-oomd will take action. A unit can override this value with + ManagedOOMMemoryPressureLimit=. The memory pressure for this property represents + the fraction of time in a 10 second window in which all tasks in the control group were delayed. For + each monitored control group, if the memory pressure on that control group exceeds the limit set for + longer than the duration set by DefaultMemoryPressureDurationSec=, + systemd-oomd will act on eligible descendant control groups, starting from the + ones with the most reclaim activity to the least reclaim activity. Which control groups are monitored + and what action gets taken depends on what the unit has configured for + ManagedOOMMemoryPressure=. Takes a fraction specified in the same way as + SwapUsedLimit= above. Defaults to 60%. DefaultMemoryPressureDurationSec= - Sets the amount of time a unit's cgroup needs to have exceeded memory pressure limits before - systemd-oomd will take action. Memory pressure limits are defined by + Sets the amount of time a unit's control group needs to have exceeded memory pressure + limits before systemd-oomd will take action. Memory pressure limits are defined by DefaultMemoryPressureLimit= and ManagedOOMMemoryPressureLimit=. Defaults to 30 seconds when this property is unset or set to 0. diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c index 65c0bfac614..fad1fb0d452 100644 --- a/src/oom/oomd-manager.c +++ b/src/oom/oomd-manager.c @@ -16,7 +16,7 @@ typedef struct ManagedOOMReply { ManagedOOMMode mode; char *path; char *property; - unsigned limit; + uint32_t limit; } ManagedOOMReply; static void managed_oom_reply_destroy(ManagedOOMReply *reply) { @@ -53,10 +53,10 @@ static int process_managed_oom_reply( assert(m); static const JsonDispatch dispatch_table[] = { - { "mode", JSON_VARIANT_STRING, managed_oom_mode, offsetof(ManagedOOMReply, mode), JSON_MANDATORY }, - { "path", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, path), JSON_MANDATORY }, - { "property", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, property), JSON_MANDATORY }, - { "limit", JSON_VARIANT_UNSIGNED, json_dispatch_unsigned, offsetof(ManagedOOMReply, limit), 0 }, + { "mode", JSON_VARIANT_STRING, managed_oom_mode, offsetof(ManagedOOMReply, mode), JSON_MANDATORY }, + { "path", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, path), JSON_MANDATORY }, + { "property", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, property), JSON_MANDATORY }, + { "limit", JSON_VARIANT_UNSIGNED, json_dispatch_uint32, offsetof(ManagedOOMReply, limit), 0 }, {}, }; @@ -87,7 +87,8 @@ static int process_managed_oom_reply( if (ret == -ENOMEM) { r = ret; goto finish; - } else if (ret < 0) + } + if (ret < 0) continue; monitor_hm = streq(reply.property, "ManagedOOMSwap") ? @@ -100,19 +101,15 @@ static int process_managed_oom_reply( limit = m->default_mem_pressure_limit; - if (streq(reply.property, "ManagedOOMMemoryPressure")) { - if (reply.limit > UINT32_MAX) /* out of range */ - continue; - if (reply.limit != 0) { - int permyriad = UINT32_SCALE_TO_PERMYRIAD(reply.limit); + if (streq(reply.property, "ManagedOOMMemoryPressure") && reply.limit > 0) { + int permyriad = UINT32_SCALE_TO_PERMYRIAD(reply.limit); - ret = store_loadavg_fixed_point( - (unsigned long) permyriad / 100, - (unsigned long) permyriad % 100, - &limit); - if (ret < 0) - continue; - } + ret = store_loadavg_fixed_point( + (unsigned long) permyriad / 100, + (unsigned long) permyriad % 100, + &limit); + if (ret < 0) + continue; } ret = oomd_insert_cgroup_context(NULL, monitor_hm, empty_to_root(reply.path)); @@ -354,11 +351,11 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo } } - if (oomd_swap_free_below(&m->system_context, (100 - m->swap_used_limit))) { + if (oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) { _cleanup_hashmap_free_ Hashmap *candidates = NULL; - log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than %u%%", - m->system_context.swap_used, m->system_context.swap_total, m->swap_used_limit); + log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR, + m->system_context.swap_used, m->system_context.swap_total, PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad)); r = get_monitored_cgroup_contexts_candidates(m->monitored_swap_cgroup_contexts, &candidates); if (r == -ENOMEM) @@ -484,7 +481,13 @@ static int manager_connect_bus(Manager *m) { return 0; } -int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec) { +int manager_start( + Manager *m, + bool dry_run, + int swap_used_limit_permyriad, + int mem_pressure_limit_permyriad, + usec_t mem_pressure_usec) { + unsigned long l, f; int r; @@ -492,10 +495,10 @@ int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressur m->dry_run = dry_run; - m->swap_used_limit = swap_used_limit != -1 ? swap_used_limit : DEFAULT_SWAP_USED_LIMIT; - assert(m->swap_used_limit <= 100); + m->swap_used_limit_permyriad = swap_used_limit_permyriad >= 0 ? swap_used_limit_permyriad : DEFAULT_SWAP_USED_LIMIT_PERCENT * 100; + assert(m->swap_used_limit_permyriad <= 10000); - if (mem_pressure_limit_permyriad != -1) { + if (mem_pressure_limit_permyriad >= 0) { assert(mem_pressure_limit_permyriad <= 10000); l = mem_pressure_limit_permyriad / 100; @@ -543,12 +546,12 @@ int manager_get_dump_string(Manager *m, char **ret) { fprintf(f, "Dry Run: %s\n" - "Swap Used Limit: %u%%\n" + "Swap Used Limit: " PERMYRIAD_AS_PERCENT_FORMAT_STR "\n" "Default Memory Pressure Limit: %lu.%02lu%%\n" "Default Memory Pressure Duration: %s\n" "System Context:\n", yes_no(m->dry_run), - m->swap_used_limit, + PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad), LOAD_INT(m->default_mem_pressure_limit), LOAD_FRAC(m->default_mem_pressure_limit), format_timespan(buf, sizeof(buf), m->default_mem_pressure_duration_usec, USEC_PER_SEC)); oomd_dump_system_context(&m->system_context, f, "\t"); diff --git a/src/oom/oomd-manager.h b/src/oom/oomd-manager.h index 50f10021c73..9ab8494c6d3 100644 --- a/src/oom/oomd-manager.h +++ b/src/oom/oomd-manager.h @@ -18,7 +18,7 @@ * system.slice are assumed to be less latency sensitive. */ #define DEFAULT_MEM_PRESSURE_DURATION_USEC (30 * USEC_PER_SEC) #define DEFAULT_MEM_PRESSURE_LIMIT_PERCENT 60 -#define DEFAULT_SWAP_USED_LIMIT 90 +#define DEFAULT_SWAP_USED_LIMIT_PERCENT 90 #define RECLAIM_DURATION_USEC (30 * USEC_PER_SEC) #define POST_ACTION_DELAY_USEC (15 * USEC_PER_SEC) @@ -32,7 +32,7 @@ struct Manager { Hashmap *polkit_registry; bool dry_run; - unsigned swap_used_limit; + int swap_used_limit_permyriad; loadavg_t default_mem_pressure_limit; usec_t default_mem_pressure_duration_usec; @@ -56,7 +56,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free); int manager_new(Manager **ret); -int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec); +int manager_start(Manager *m, bool dry_run, int swap_used_limit_permyriad, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec); int manager_get_dump_string(Manager *m, char **ret); diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c index 9dd9b17c6d3..b054ccacc48 100644 --- a/src/oom/oomd-util.c +++ b/src/oom/oomd-util.c @@ -134,13 +134,13 @@ bool oomd_memory_reclaim(Hashmap *h) { return pgscan_of > last_pgscan_of; } -bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent) { +bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad) { uint64_t swap_threshold; assert(ctx); - assert(threshold_percent <= 100); + assert(threshold_permyriad <= 10000); - swap_threshold = ctx->swap_total * threshold_percent / ((uint64_t) 100); + swap_threshold = ctx->swap_total * threshold_permyriad / (uint64_t) 10000; return (ctx->swap_total - ctx->swap_used) < swap_threshold; } diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h index bffccf75da7..181443ae7a6 100644 --- a/src/oom/oomd-util.h +++ b/src/oom/oomd-util.h @@ -61,8 +61,8 @@ int oomd_pressure_above(Hashmap *h, usec_t duration, Set **ret); * current sum is higher than the last interval's sum (there was some reclaim activity). */ bool oomd_memory_reclaim(Hashmap *h); -/* Returns true if the amount of swap free is below the percentage of swap specified by `threshold_percent`. */ -bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent); +/* Returns true if the amount of swap free is below the permyriad of swap specified by `threshold_permyriad`. */ +bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad); /* The compare functions will sort from largest to smallest, putting all the contexts with "avoid" at the end * (after the smallest values). */ diff --git a/src/oom/oomd.c b/src/oom/oomd.c index 2e331e267f7..674d53fdcfe 100644 --- a/src/oom/oomd.c +++ b/src/oom/oomd.c @@ -17,13 +17,13 @@ #include "signal-util.h" static bool arg_dry_run = false; -static int arg_swap_used_limit = -1; +static int arg_swap_used_limit_permyriad = -1; static int arg_mem_pressure_limit_permyriad = -1; static usec_t arg_mem_pressure_usec = 0; static int parse_config(void) { static const ConfigTableItem items[] = { - { "OOM", "SwapUsedLimitPercent", config_parse_percent, 0, &arg_swap_used_limit }, + { "OOM", "SwapUsedLimit", config_parse_permyriad, 0, &arg_swap_used_limit_permyriad }, { "OOM", "DefaultMemoryPressureLimit", config_parse_permyriad, 0, &arg_mem_pressure_limit_permyriad }, { "OOM", "DefaultMemoryPressureDurationSec", config_parse_sec, 0, &arg_mem_pressure_usec }, {} @@ -159,7 +159,12 @@ static int run(int argc, char *argv[]) { if (r < 0) return log_error_errno(r, "Failed to create manager: %m"); - r = manager_start(m, arg_dry_run, arg_swap_used_limit, arg_mem_pressure_limit_permyriad, arg_mem_pressure_usec); + r = manager_start( + m, + arg_dry_run, + arg_swap_used_limit_permyriad, + arg_mem_pressure_limit_permyriad, + arg_mem_pressure_usec); if (r < 0) return log_error_errno(r, "Failed to start up daemon: %m"); diff --git a/src/oom/oomd.conf b/src/oom/oomd.conf index bd6a9391c6e..35ba8474573 100644 --- a/src/oom/oomd.conf +++ b/src/oom/oomd.conf @@ -12,6 +12,6 @@ # See oomd.conf(5) for details [OOM] -#SwapUsedLimitPercent=90% +#SwapUsedLimit=90% #DefaultMemoryPressureLimit=60% #DefaultMemoryPressureDurationSec=30s diff --git a/src/oom/test-oomd-util.c b/src/oom/test-oomd-util.c index 49a02f94246..a0e583ac6b3 100644 --- a/src/oom/test-oomd-util.c +++ b/src/oom/test-oomd-util.c @@ -302,19 +302,19 @@ static void test_oomd_swap_free_below(void) { .swap_total = 20971512 * 1024U, .swap_used = 20971440 * 1024U, }; - assert_se(oomd_swap_free_below(&ctx, 20) == true); + assert_se(oomd_swap_free_below(&ctx, 2000) == true); ctx = (OomdSystemContext) { .swap_total = 20971512 * 1024U, .swap_used = 3310136 * 1024U, }; - assert_se(oomd_swap_free_below(&ctx, 20) == false); + assert_se(oomd_swap_free_below(&ctx, 2000) == false); ctx = (OomdSystemContext) { .swap_total = 0, .swap_used = 0, }; - assert_se(oomd_swap_free_below(&ctx, 20) == false); + assert_se(oomd_swap_free_below(&ctx, 2000) == false); } static void test_oomd_sort_cgroups(void) {