From 4fb0d2dc140c9a2c01c236d2a8dc09a44157e896 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Fri, 11 Aug 2023 13:51:20 +0200 Subject: [PATCH 1/4] cgroup: Add EffectiveMemoryMax=, EffectiveMemoryHigh= and EffectiveTasksMax= properties Users become perplexed when they run their workload in a unit with no explicit limits configured (moreover, listing the limit property would even show it's infinity) but they experience unexpected resource limitation. The memory and pid limits come as the most visible, therefore add new unit read-only properties: - EffectiveMemoryMax=, - EffectiveMemoryHigh=, - EffectiveTasksMax=. These properties represent the most stringent limit systemd is aware of for the given unit -- and that is typically(*) the effective value. Implement the properties by simply traversing all parents in the leaf-slice tree and picking the minimum value. Note that effective limits are thus defined even for units that don't enable explicit accounting (because of the hierarchy). (*) The evasive case is when systemd runs in a cgroupns and cannot reason about outer setup. Complete solution would need kernel support. --- man/org.freedesktop.systemd1.xml | 126 ++++++++++++++++++++++++++++++ man/systemd.resource-control.xml | 11 ++- src/core/cgroup.c | 48 ++++++++++++ src/core/cgroup.h | 13 +++ src/core/dbus-unit.c | 25 ++++++ src/shared/bus-print-properties.c | 6 +- 6 files changed, 224 insertions(+), 5 deletions(-) diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index ced9bea7db..383ae8d4ac 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -2792,6 +2792,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveMemoryMax = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveMemoryHigh = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly ay EffectiveCPUs = [...]; @@ -2800,6 +2804,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t TasksCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveTasksMax = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t IPIngressBytes = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t IPIngressPackets = ...; @@ -3425,6 +3431,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + + + @@ -3433,6 +3443,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -4067,6 +4079,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + + + @@ -4075,6 +4091,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -4871,6 +4889,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveMemoryMax = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveMemoryHigh = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly ay EffectiveCPUs = [...]; @@ -4879,6 +4901,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t TasksCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveTasksMax = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t IPIngressBytes = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t IPIngressPackets = ...; @@ -5514,6 +5538,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + + + @@ -5522,6 +5550,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + @@ -6138,6 +6168,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + + + @@ -6146,6 +6180,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + @@ -6816,6 +6852,10 @@ node /org/freedesktop/systemd1/unit/home_2emount { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveMemoryMax = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveMemoryHigh = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly ay EffectiveCPUs = [...]; @@ -6824,6 +6864,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t TasksCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveTasksMax = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t IPIngressBytes = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t IPIngressPackets = ...; @@ -7387,6 +7429,10 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + + + @@ -7395,6 +7441,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + @@ -7925,6 +7973,10 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + + + @@ -7933,6 +7985,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + @@ -8726,6 +8780,10 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveMemoryMax = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveMemoryHigh = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly ay EffectiveCPUs = [...]; @@ -8734,6 +8792,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t TasksCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveTasksMax = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t IPIngressBytes = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t IPIngressPackets = ...; @@ -9283,6 +9343,10 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + + + @@ -9291,6 +9355,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + @@ -9807,6 +9873,10 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + + + @@ -9815,6 +9885,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + @@ -10467,6 +10539,10 @@ node /org/freedesktop/systemd1/unit/system_2eslice { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveMemoryMax = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveMemoryHigh = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly ay EffectiveCPUs = [...]; @@ -10475,6 +10551,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t TasksCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveTasksMax = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t IPIngressBytes = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t IPIngressPackets = ...; @@ -10650,6 +10728,10 @@ node /org/freedesktop/systemd1/unit/system_2eslice { + + + + @@ -10658,6 +10740,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { + + @@ -10838,6 +10922,10 @@ node /org/freedesktop/systemd1/unit/system_2eslice { + + + + @@ -10846,6 +10934,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { + + @@ -11052,6 +11142,10 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveMemoryMax = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveMemoryHigh = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly ay EffectiveCPUs = [...]; @@ -11060,6 +11154,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t TasksCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t EffectiveTasksMax = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t IPIngressBytes = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t IPIngressPackets = ...; @@ -11255,6 +11351,10 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { + + + + @@ -11263,6 +11363,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { + + @@ -11473,6 +11575,10 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { + + + + @@ -11481,6 +11587,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { + + @@ -11873,6 +11981,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ MemorySwapCurrent, MemorySwapPeak, and MemoryZSwapCurrent were added in version 255. + EffectiveMemoryHigh, + EffectiveMemoryMax, + EffectiveTasksMax were added in version 256. Socket Unit Objects @@ -11904,6 +12015,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ MemorySwapCurrent, MemorySwapPeak, and MemoryZSwapCurrent were added in version 255. + EffectiveMemoryHigh, + EffectiveMemoryMax, + EffectiveTasksMax were added in version 256. Mount Unit Objects @@ -11933,6 +12047,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ MemorySwapCurrent, MemorySwapPeak, and MemoryZSwapCurrent were added in version 255. + EffectiveMemoryHigh, + EffectiveMemoryMax, + EffectiveTasksMax were added in version 256. Swap Unit Objects @@ -11962,6 +12079,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ MemorySwapCurrent, MemorySwapPeak, and MemoryZSwapCurrent were added in version 255. + EffectiveMemoryHigh, + EffectiveMemoryMax, + EffectiveTasksMax were added in version 256. Slice Unit Objects @@ -11982,6 +12102,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ MemorySwapCurrent, MemorySwapPeak, and MemoryZSwapCurrent were added in version 255. + EffectiveMemoryHigh, + EffectiveMemoryMax, + EffectiveTasksMax were added in version 256. Scope Unit Objects @@ -12003,6 +12126,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ MemorySwapCurrent, MemorySwapPeak, and MemoryZSwapCurrent were added in version 255. + EffectiveMemoryHigh, + EffectiveMemoryMax, + EffectiveTasksMax were added in version 256. Job Objects diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 42f265c950..bd8b6a5719 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -406,7 +406,9 @@ CPUWeight=20 DisableControllers=cpu / \ system. If assigned the special value infinity, no memory throttling is applied. This controls the memory.high control group attribute. For details about this control group attribute, see - Memory Interface Files. + Memory Interface Files. + The effective configuration is reported as EffectiveMemoryHigh= + (see also EffectiveMemoryMax=). While StartupMemoryHigh= applies to the startup and shutdown phases of the system, MemoryHigh= applies to normal runtime of the system, and if the former is not set also to @@ -434,7 +436,9 @@ CPUWeight=20 DisableControllers=cpu / \ percentage value may be specified, which is taken relative to the installed physical memory on the system. If assigned the special value infinity, no memory limit is applied. This controls the memory.max control group attribute. For details about this control group attribute, see - Memory Interface Files. + Memory Interface Files. + The effective configuration is reported as EffectiveMemoryMax= (the value is + the most stringent limit of the unit and parent slices). While StartupMemoryMax= applies to the startup and shutdown phases of the system, MemoryMax= applies to normal runtime of the system, and if the former is not set also to @@ -560,7 +564,8 @@ CPUWeight=20 DisableControllers=cpu / \ limit is applied. This controls the pids.max control group attribute. For details about this control group attribute, the pids controller - . + . + The effective configuration is reported as EffectiveTasksMax=. The system default for this setting may be controlled with DefaultTasksMax= in diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 5e66ef76b5..f7b776337a 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -4247,6 +4247,46 @@ int unit_get_ip_accounting( return r; } +static uint64_t unit_get_effective_limit_one(Unit *u, CGroupLimitType type) { + CGroupContext *cc; + + assert(u); + assert(UNIT_HAS_CGROUP_CONTEXT(u)); + + cc = unit_get_cgroup_context(u); + switch (type) { + /* Note: on legacy/hybrid hierarchies memory_max stays CGROUP_LIMIT_MAX unless configured + * explicitly. Effective value of MemoryLimit= (cgroup v1) is not implemented. */ + case CGROUP_LIMIT_MEMORY_MAX: + return cc->memory_max; + case CGROUP_LIMIT_MEMORY_HIGH: + return cc->memory_high; + case CGROUP_LIMIT_TASKS_MAX: + return cgroup_tasks_max_resolve(&cc->tasks_max); + default: + assert_not_reached(); + } +} + +int unit_get_effective_limit(Unit *u, CGroupLimitType type, uint64_t *ret) { + uint64_t infimum; + + assert(u); + assert(ret); + assert(type >= 0); + assert(type < _CGROUP_LIMIT_TYPE_MAX); + + if (!UNIT_HAS_CGROUP_CONTEXT(u)) + return -EINVAL; + + infimum = unit_get_effective_limit_one(u, type); + for (Unit *slice = UNIT_GET_SLICE(u); slice; slice = UNIT_GET_SLICE(slice)) + infimum = MIN(infimum, unit_get_effective_limit_one(slice, type)); + + *ret = infimum; + return 0; +} + static int unit_get_io_accounting_raw(Unit *u, uint64_t ret[static _CGROUP_IO_ACCOUNTING_METRIC_MAX]) { static const char *const field_names[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = { [CGROUP_IO_READ_BYTES] = "rbytes=", @@ -4667,3 +4707,11 @@ static const char* const cgroup_memory_accounting_metric_table[_CGROUP_MEMORY_AC }; DEFINE_STRING_TABLE_LOOKUP(cgroup_memory_accounting_metric, CGroupMemoryAccountingMetric); + +static const char *const cgroup_limit_type_table[_CGROUP_LIMIT_TYPE_MAX] = { + [CGROUP_LIMIT_MEMORY_MAX] = "EffectiveMemoryMax", + [CGROUP_LIMIT_MEMORY_HIGH] = "EffectiveMemoryHigh", + [CGROUP_LIMIT_TASKS_MAX] = "EffectiveTasksMax", +}; + +DEFINE_STRING_TABLE_LOOKUP(cgroup_limit_type, CGroupLimitType); diff --git a/src/core/cgroup.h b/src/core/cgroup.h index f1b674b4b7..54bce91ea1 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -276,6 +276,15 @@ typedef enum CGroupMemoryAccountingMetric { _CGROUP_MEMORY_ACCOUNTING_METRIC_INVALID = -EINVAL, } CGroupMemoryAccountingMetric; +/* Used for limits whose value sets have infimum */ +typedef enum CGroupLimitType { + CGROUP_LIMIT_MEMORY_MAX, + CGROUP_LIMIT_MEMORY_HIGH, + CGROUP_LIMIT_TASKS_MAX, + _CGROUP_LIMIT_TYPE_MAX, + _CGROUP_LIMIT_INVALID = -EINVAL, +} CGroupLimitType; + typedef struct Unit Unit; typedef struct Manager Manager; typedef enum ManagerState ManagerState; @@ -374,6 +383,7 @@ int unit_get_tasks_current(Unit *u, uint64_t *ret); int unit_get_cpu_usage(Unit *u, nsec_t *ret); int unit_get_io_accounting(Unit *u, CGroupIOAccountingMetric metric, bool allow_cache, uint64_t *ret); int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret); +int unit_get_effective_limit(Unit *u, CGroupLimitType type, uint64_t *ret); int unit_reset_cpu_accounting(Unit *u); void unit_reset_memory_accounting_last(Unit *u); @@ -425,5 +435,8 @@ CGroupIPAccountingMetric cgroup_ip_accounting_metric_from_string(const char *s) const char* cgroup_io_accounting_metric_to_string(CGroupIOAccountingMetric m) _const_; CGroupIOAccountingMetric cgroup_io_accounting_metric_from_string(const char *s) _pure_; +const char* cgroup_limit_type_to_string(CGroupLimitType m) _const_; +CGroupLimitType cgroup_limit_type_from_string(const char *s) _pure_; + const char* cgroup_memory_accounting_metric_to_string(CGroupMemoryAccountingMetric m) _const_; CGroupMemoryAccountingMetric cgroup_memory_accounting_metric_from_string(const char *s) _pure_; diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index 48b7e10ea5..23f675de0c 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -1442,6 +1442,28 @@ static int property_get_io_counter( return sd_bus_message_append(reply, "t", value); } +static int property_get_effective_limit( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + uint64_t value = CGROUP_LIMIT_MAX; + Unit *u = ASSERT_PTR(userdata); + ssize_t type; + + assert(bus); + assert(reply); + assert(property); + + assert_se((type = cgroup_limit_type_from_string(property)) >= 0); + (void) unit_get_effective_limit(u, type, &value); + return sd_bus_message_append(reply, "t", value); +} + int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd_bus_error *error) { _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; _cleanup_set_free_ Set *pids = NULL; @@ -1563,10 +1585,13 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = { SD_BUS_PROPERTY("MemorySwapPeak", "t", property_get_memory_accounting, 0, 0), SD_BUS_PROPERTY("MemoryZSwapCurrent", "t", property_get_memory_accounting, 0, 0), SD_BUS_PROPERTY("MemoryAvailable", "t", property_get_available_memory, 0, 0), + SD_BUS_PROPERTY("EffectiveMemoryMax", "t", property_get_effective_limit, 0, 0), + SD_BUS_PROPERTY("EffectiveMemoryHigh", "t", property_get_effective_limit, 0, 0), SD_BUS_PROPERTY("CPUUsageNSec", "t", property_get_cpu_usage, 0, 0), SD_BUS_PROPERTY("EffectiveCPUs", "ay", property_get_cpuset_cpus, 0, 0), SD_BUS_PROPERTY("EffectiveMemoryNodes", "ay", property_get_cpuset_mems, 0, 0), SD_BUS_PROPERTY("TasksCurrent", "t", property_get_current_tasks, 0, 0), + SD_BUS_PROPERTY("EffectiveTasksMax", "t", property_get_effective_limit, 0, 0), SD_BUS_PROPERTY("IPIngressBytes", "t", property_get_ip_counter, 0, 0), SD_BUS_PROPERTY("IPIngressPackets", "t", property_get_ip_counter, 0, 0), SD_BUS_PROPERTY("IPEgressBytes", "t", property_get_ip_counter, 0, 0), diff --git a/src/shared/bus-print-properties.c b/src/shared/bus-print-properties.c index 6704e1ef3d..99b1cc7c70 100644 --- a/src/shared/bus-print-properties.c +++ b/src/shared/bus-print-properties.c @@ -164,9 +164,11 @@ static int bus_print_property(const char *name, const char *expected_value, sd_b bus_print_property_value(name, expected_value, flags, "[not set]"); - else if ((ENDSWITH_SET(name, "MemoryLow", "MemoryMin", "MemoryHigh", "MemoryMax", "MemorySwapMax", "MemoryZSwapMax", "MemoryLimit") && + else if ((ENDSWITH_SET(name, "MemoryLow", "MemoryMin", + "MemoryHigh", "MemoryMax", + "MemorySwapMax", "MemoryZSwapMax", "MemoryLimit") && u == CGROUP_LIMIT_MAX) || - (STR_IN_SET(name, "TasksMax", "DefaultTasksMax") && u == UINT64_MAX) || + (endswith(name, "TasksMax") && u == UINT64_MAX) || (startswith(name, "Limit") && u == UINT64_MAX) || (startswith(name, "DefaultLimit") && u == UINT64_MAX)) From 834ca54624ae1d61ec4fcf3a63b10271c38c4860 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Wed, 9 Aug 2023 22:42:36 +0200 Subject: [PATCH 2/4] test: Convert rlimit test to subtest of generic limit testing No functional change intended. Preparation for new tests. --- test/units/testsuite-05.rlimit.sh | 25 +++++++++++++++++++++++++ test/units/testsuite-05.service | 2 +- test/units/testsuite-05.sh | 22 +++------------------- 3 files changed, 29 insertions(+), 20 deletions(-) create mode 100755 test/units/testsuite-05.rlimit.sh diff --git a/test/units/testsuite-05.rlimit.sh b/test/units/testsuite-05.rlimit.sh new file mode 100755 index 0000000000..bbf3adbe65 --- /dev/null +++ b/test/units/testsuite-05.rlimit.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eux +set -o pipefail + +P=/run/systemd/system.conf.d +mkdir $P + +cat >$P/rlimits.conf <$P/rlimits.conf < Date: Wed, 9 Aug 2023 22:43:31 +0200 Subject: [PATCH 3/4] test: Add effective cgroup limits testing --- test/units/testsuite-05.effective-limit.sh | 68 ++++++++++++++++++++++ test/units/util.sh | 9 +++ 2 files changed, 77 insertions(+) create mode 100755 test/units/testsuite-05.effective-limit.sh diff --git a/test/units/testsuite-05.effective-limit.sh b/test/units/testsuite-05.effective-limit.sh new file mode 100755 index 0000000000..3ff8e83140 --- /dev/null +++ b/test/units/testsuite-05.effective-limit.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eux +set -o pipefail + +# shellcheck source=test/units/util.sh +. "$(dirname "$0")"/util.sh + +pre=test05 +cat >/run/systemd/system/"$pre"alpha.slice </run/systemd/system/"$pre"alpha-beta.slice </run/systemd/system/"$pre"alpha-beta-gamma.slice < '$2'" >&2 + exit 1 + fi +)} + assert_in() {( set +ex From 93f8e88d23bd383b5134f32c1e2ee315ac3a38c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Mon, 14 Aug 2023 19:59:57 +0200 Subject: [PATCH 4/4] cgroup: Restrict effective limits with global resource provision Global resource (whole system or root cg's (e.g. in a container)) is also a well-defined limit for memory and tasks, take it into account when calculating effective limits. --- man/systemd.resource-control.xml | 2 +- src/core/cgroup.c | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index bd8b6a5719..c2aa5b57e8 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -438,7 +438,7 @@ CPUWeight=20 DisableControllers=cpu / \ memory.max control group attribute. For details about this control group attribute, see Memory Interface Files. The effective configuration is reported as EffectiveMemoryMax= (the value is - the most stringent limit of the unit and parent slices). + the most stringent limit of the unit and parent slices and it is capped by physical memory). While StartupMemoryMax= applies to the startup and shutdown phases of the system, MemoryMax= applies to normal runtime of the system, and if the former is not set also to diff --git a/src/core/cgroup.c b/src/core/cgroup.c index f7b776337a..60a8a55087 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -4253,6 +4253,17 @@ static uint64_t unit_get_effective_limit_one(Unit *u, CGroupLimitType type) { assert(u); assert(UNIT_HAS_CGROUP_CONTEXT(u)); + if (unit_has_name(u, SPECIAL_ROOT_SLICE)) + switch (type) { + case CGROUP_LIMIT_MEMORY_MAX: + case CGROUP_LIMIT_MEMORY_HIGH: + return physical_memory(); + case CGROUP_LIMIT_TASKS_MAX: + return system_tasks_max(); + default: + assert_not_reached(); + } + cc = unit_get_cgroup_context(u); switch (type) { /* Note: on legacy/hybrid hierarchies memory_max stays CGROUP_LIMIT_MAX unless configured