core: add Condition[Memory/CPU/IO]Pressure

By default checks PSI on /proc/pressure, and causes a unit to be skipped
if the threshold is above the given configuration for the avg300
measurement.
Also allow to pass a custom timespan, and a particular slice unit to
check under.

Fixes #20139
This commit is contained in:
Luca Boccassi 2021-11-17 10:00:12 +00:00 committed by Zbigniew Jędrzejewski-Szmek
parent 99f8a6d7f5
commit 81513b382b
7 changed files with 338 additions and 0 deletions

View file

@ -1658,6 +1658,29 @@
</listitem>
</varlistentry>
<varlistentry>
<term><varname>ConditionMemoryPressure=</varname></term>
<term><varname>ConditionCPUPressure=</varname></term>
<term><varname>ConditionIOPressure=</varname></term>
<listitem><para>Verify that the overall system (memory, CPU or IO) pressure is below or equal to a threshold.
This setting takes a threshold value as argument. It can be specified as a simple percentage value,
suffixed with <literal>%</literal>, in which case the pressure will be measured as an average over the last
five minutes before the attempt to start the unit is performed.
Alternatively, the average timespan can also be specified using <literal>/</literal> as a separator, for
example: <literal>10%/1min</literal>. The supported timespans match what the kernel provides, and are
limited to <literal>10sec</literal>, <literal>1min</literal> and <literal>5min</literal>. The
<literal>full</literal> PSI will be checked first, and if not found <literal>some</literal> will be
checked. For more details, see the documentation on <ulink
url="https://www.kernel.org/doc/html/latest/accounting/psi.html">PSI (Pressure Stall Information)
</ulink>.</para>
<para>Optionally, the threshold value can be prefixed with the slice unit under which the pressure will be checked,
followed by a <literal>:</literal>. If the slice unit is not specified, the overall system pressure will be measured,
instead of a particular cgroup's.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>AssertArchitecture=</varname></term>
<term><varname>AssertVirtualization=</varname></term>
@ -1686,6 +1709,9 @@
<term><varname>AssertMemory=</varname></term>
<term><varname>AssertCPUs=</varname></term>
<term><varname>AssertOSRelease=</varname></term>
<term><varname>AssertMemoryPressure=</varname></term>
<term><varname>AssertCPUPressure=</varname></term>
<term><varname>AssertIOPressure=</varname></term>
<listitem><para>Similar to the <varname>ConditionArchitecture=</varname>,
<varname>ConditionVirtualization=</varname>, …, condition settings described above, these settings

View file

@ -341,6 +341,9 @@ Unit.ConditionUser, config_parse_unit_condition_string,
Unit.ConditionGroup, config_parse_unit_condition_string, CONDITION_GROUP, offsetof(Unit, conditions)
Unit.ConditionControlGroupController, config_parse_unit_condition_string, CONDITION_CONTROL_GROUP_CONTROLLER, offsetof(Unit, conditions)
Unit.ConditionOSRelease, config_parse_unit_condition_string, CONDITION_OS_RELEASE, offsetof(Unit, conditions)
Unit.ConditionMemoryPressure, config_parse_unit_condition_string, CONDITION_MEMORY_PRESSURE, offsetof(Unit, conditions)
Unit.ConditionCPUPressure, config_parse_unit_condition_string, CONDITION_CPU_PRESSURE, offsetof(Unit, conditions)
Unit.ConditionIOPressure, config_parse_unit_condition_string, CONDITION_IO_PRESSURE, offsetof(Unit, conditions)
Unit.AssertPathExists, config_parse_unit_condition_path, CONDITION_PATH_EXISTS, offsetof(Unit, asserts)
Unit.AssertPathExistsGlob, config_parse_unit_condition_path, CONDITION_PATH_EXISTS_GLOB, offsetof(Unit, asserts)
Unit.AssertPathIsDirectory, config_parse_unit_condition_path, CONDITION_PATH_IS_DIRECTORY, offsetof(Unit, asserts)
@ -368,6 +371,9 @@ Unit.AssertUser, config_parse_unit_condition_string,
Unit.AssertGroup, config_parse_unit_condition_string, CONDITION_GROUP, offsetof(Unit, asserts)
Unit.AssertControlGroupController, config_parse_unit_condition_string, CONDITION_CONTROL_GROUP_CONTROLLER, offsetof(Unit, asserts)
Unit.AssertOSRelease, config_parse_unit_condition_string, CONDITION_OS_RELEASE, offsetof(Unit, asserts)
Unit.AssertMemoryPressure, config_parse_unit_condition_string, CONDITION_MEMORY_PRESSURE, offsetof(Unit, asserts)
Unit.AssertCPUPressure, config_parse_unit_condition_string, CONDITION_CPU_PRESSURE, offsetof(Unit, asserts)
Unit.AssertIOPressure, config_parse_unit_condition_string, CONDITION_IO_PRESSURE, offsetof(Unit, asserts)
Unit.CollectMode, config_parse_collect_mode, 0, offsetof(Unit, collect_mode)
Service.PIDFile, config_parse_pid_file, 0, offsetof(Service, pid_file)
Service.ExecCondition, config_parse_exec, SERVICE_EXEC_CONDITION, offsetof(Service, exec_command)

View file

@ -39,8 +39,10 @@
#include "os-util.h"
#include "parse-util.h"
#include "path-util.h"
#include "percent-util.h"
#include "proc-cmdline.h"
#include "process-util.h"
#include "psi-util.h"
#include "selinux-util.h"
#include "smack-util.h"
#include "stat-util.h"
@ -962,6 +964,131 @@ static int condition_test_file_is_executable(Condition *c, char **env) {
(st.st_mode & 0111));
}
static int condition_test_psi(Condition *c, char **env) {
_cleanup_free_ char *first = NULL, *second = NULL, *third = NULL, *fourth = NULL, *pressure_path = NULL;
const char *p, *value, *pressure_type;
loadavg_t *current, limit;
ResourcePressure pressure;
int r;
assert(c);
assert(c->parameter);
assert(IN_SET(c->type, CONDITION_MEMORY_PRESSURE, CONDITION_CPU_PRESSURE, CONDITION_IO_PRESSURE));
if (!is_pressure_supported()) {
log_debug("Pressure Stall Information (PSI) is not supported, skipping.");
return 1;
}
pressure_type = c->type == CONDITION_MEMORY_PRESSURE ? "memory" :
c->type == CONDITION_CPU_PRESSURE ? "cpu" :
"io";
p = c->parameter;
r = extract_many_words(&p, ":", 0, &first, &second, NULL);
if (r <= 0)
return log_debug_errno(r < 0 ? r : SYNTHETIC_ERRNO(EINVAL), "Failed to parse condition parameter %s: %m", c->parameter);
/* If only one parameter is passed, then we look at the global system pressure rather than a specific cgroup. */
if (r == 1) {
pressure_path = path_join("/proc/pressure", pressure_type);
if (!pressure_path)
return log_oom();
value = first;
} else {
const char *controller = strjoina(pressure_type, ".pressure");
_cleanup_free_ char *slice_path = NULL;
CGroupMask mask, required_mask;
char *slice;
required_mask = c->type == CONDITION_MEMORY_PRESSURE ? CGROUP_MASK_MEMORY :
c->type == CONDITION_CPU_PRESSURE ? CGROUP_MASK_CPU :
CGROUP_MASK_IO;
slice = strstrip(first);
if (!slice)
return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse condition parameter %s: %m", c->parameter);
r = cg_all_unified();
if (r < 0)
return log_debug_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m");
if (r == 0) {
log_debug("PSI condition check requires the unified cgroups hierarchy, skipping.");
return 1;
}
r = cg_mask_supported(&mask);
if (r < 0)
return log_debug_errno(r, "Failed to get supported cgroup controllers: %m");
if (!FLAGS_SET(mask, required_mask)) {
log_debug("Cgroup %s controller not available, skipping PSI condition check.", pressure_type);
return 1;
}
r = cg_slice_to_path(slice, &slice_path);
if (r < 0)
return log_debug_errno(r, "Cannot determine slice \"%s\" cgroup path: %m", slice);
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, slice_path, controller, &pressure_path);
if (r < 0)
return log_debug_errno(r, "Error getting cgroup pressure path from %s: %m", slice_path);
value = second;
}
/* If a value including a specific timespan (in the intervals allowed by the kernel),
* parse it, otherwise we assume just a plain percentage that will be checked if it is
* smaller or equal to the current pressure average over 5 minutes. */
r = extract_many_words(&value, "/", 0, &third, &fourth, NULL);
if (r <= 0)
return log_debug_errno(r < 0 ? r : SYNTHETIC_ERRNO(EINVAL), "Failed to parse condition parameter %s: %m", c->parameter);
if (r == 1)
current = &pressure.avg300;
else {
const char *timespan;
timespan = skip_leading_chars(fourth, NULL);
if (!timespan)
return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse condition parameter %s: %m", c->parameter);
if (startswith(timespan, "10sec"))
current = &pressure.avg10;
else if (startswith(timespan, "1min"))
current = &pressure.avg60;
else if (startswith(timespan, "5min"))
current = &pressure.avg300;
else
return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse condition parameter %s: %m", c->parameter);
}
value = strstrip(third);
if (!value)
return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse condition parameter %s: %m", c->parameter);
r = parse_permyriad(value);
if (r < 0)
return log_debug_errno(r, "Failed to parse permyriad: %s", c->parameter);
r = store_loadavg_fixed_point(r / 100LU, r % 100LU, &limit);
if (r < 0)
return log_debug_errno(r, "Failed to parse loadavg: %s", c->parameter);
r = read_resource_pressure(pressure_path, PRESSURE_TYPE_FULL, &pressure);
if (r == -ENODATA) /* cpu.pressure 'full' was added recently, fall back to 'some'. */
r = read_resource_pressure(pressure_path, PRESSURE_TYPE_SOME, &pressure);
if (r == -ENOENT) {
/* We already checked that /proc/pressure exists, so this means we were given a cgroup
* that doesn't exist or doesn't exist any longer. */
log_debug("\"%s\" not found, skipping PSI check.", pressure_path);
return 1;
}
if (r < 0)
return log_debug_errno(r, "Error parsing pressure from %s: %m", pressure_path);
return *current <= limit;
}
int condition_test(Condition *c, char **env) {
static int (*const condition_tests[_CONDITION_TYPE_MAX])(Condition *c, char **env) = {
@ -994,6 +1121,9 @@ int condition_test(Condition *c, char **env) {
[CONDITION_ENVIRONMENT] = condition_test_environment,
[CONDITION_CPU_FEATURE] = condition_test_cpufeature,
[CONDITION_OS_RELEASE] = condition_test_osrelease,
[CONDITION_MEMORY_PRESSURE] = condition_test_psi,
[CONDITION_CPU_PRESSURE] = condition_test_psi,
[CONDITION_IO_PRESSURE] = condition_test_psi,
};
int r, b;
@ -1119,6 +1249,9 @@ static const char* const condition_type_table[_CONDITION_TYPE_MAX] = {
[CONDITION_ENVIRONMENT] = "ConditionEnvironment",
[CONDITION_CPU_FEATURE] = "ConditionCPUFeature",
[CONDITION_OS_RELEASE] = "ConditionOSRelease",
[CONDITION_MEMORY_PRESSURE] = "ConditionMemoryPressure",
[CONDITION_CPU_PRESSURE] = "ConditionCPUPressure",
[CONDITION_IO_PRESSURE] = "ConditionIOPressure",
};
DEFINE_STRING_TABLE_LOOKUP(condition_type, ConditionType);
@ -1153,6 +1286,9 @@ static const char* const assert_type_table[_CONDITION_TYPE_MAX] = {
[CONDITION_ENVIRONMENT] = "AssertEnvironment",
[CONDITION_CPU_FEATURE] = "AssertCPUFeature",
[CONDITION_OS_RELEASE] = "AssertOSRelease",
[CONDITION_MEMORY_PRESSURE] = "AssertMemoryPressure",
[CONDITION_CPU_PRESSURE] = "AssertCPUPressure",
[CONDITION_IO_PRESSURE] = "AssertIOPressure",
};
DEFINE_STRING_TABLE_LOOKUP(assert_type, ConditionType);

View file

@ -22,6 +22,9 @@ typedef enum ConditionType {
CONDITION_ENVIRONMENT,
CONDITION_CPU_FEATURE,
CONDITION_OS_RELEASE,
CONDITION_MEMORY_PRESSURE,
CONDITION_CPU_PRESSURE,
CONDITION_IO_PRESSURE,
CONDITION_NEEDS_UPDATE,
CONDITION_FIRST_BOOT,

View file

@ -25,6 +25,7 @@
#include "nulstr-util.h"
#include "os-util.h"
#include "process-util.h"
#include "psi-util.h"
#include "selinux-util.h"
#include "set.h"
#include "smack-util.h"
@ -1031,4 +1032,158 @@ TEST(condition_test_os_release) {
condition_free(condition);
}
TEST(condition_test_psi) {
Condition *condition;
CGroupMask mask;
int r;
if (!is_pressure_supported())
return (void) log_notice("Pressure Stall Information (PSI) is not supported, skipping %s", __func__);
condition = condition_new(CONDITION_MEMORY_PRESSURE, "", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) < 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "sbarabau", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) < 0);
condition_free(condition);
condition = condition_new(CONDITION_MEMORY_PRESSURE, "10%sbarabau", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) < 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "10% sbarabau", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) < 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "-10", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) < 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "10%/10min", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) < 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "10min/10%", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) < 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "10% 5min", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) < 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "/5min", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) < 0);
condition_free(condition);
condition = condition_new(CONDITION_IO_PRESSURE, "10s / ", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) < 0);
condition_free(condition);
condition = condition_new(CONDITION_MEMORY_PRESSURE, "100%", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) >= 0);
condition_free(condition);
condition = condition_new(CONDITION_MEMORY_PRESSURE, "0%", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) >= 0);
condition_free(condition);
condition = condition_new(CONDITION_MEMORY_PRESSURE, "0.0%", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) >= 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "100%", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) >= 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "0%", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) >= 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "0.0%", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) >= 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "0.01%", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) >= 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "0.0%/10sec", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) >= 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "100.0% / 1min", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) >= 0);
condition_free(condition);
condition = condition_new(CONDITION_IO_PRESSURE, "50.0% / 1min", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) >= 0);
condition_free(condition);
r = cg_all_unified();
if (r < 0)
return (void) log_notice("Failed to determine whether the unified cgroups hierarchy is used, skipping %s", __func__);
if (r == 0)
return (void) log_notice("Requires the unified cgroups hierarchy, skipping %s", __func__);
if (cg_mask_supported(&mask) < 0)
return (void) log_notice("Failed to get supported cgroup controllers, skipping %s", __func__);
if (!FLAGS_SET(mask, CGROUP_MASK_MEMORY))
return (void) log_notice("Requires the cgroup memory controller, skipping %s", __func__);
if (!FLAGS_SET(mask, CGROUP_MASK_CPU))
return (void) log_notice("Requires the cgroup CPU controller, skipping %s", __func__);
condition = condition_new(CONDITION_MEMORY_PRESSURE, " : / ", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) < 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "hopefullythisisnotarealone.slice:100% / 10sec", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) > 0);
condition_free(condition);
condition = condition_new(CONDITION_CPU_PRESSURE, "-.slice:100.0% / 1min", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) >= 0);
condition_free(condition);
condition = condition_new(CONDITION_MEMORY_PRESSURE, "-.slice:0.0%/5min", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) >= 0);
condition_free(condition);
condition = condition_new(CONDITION_MEMORY_PRESSURE, "-.slice:100.0%", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) >= 0);
condition_free(condition);
condition = condition_new(CONDITION_IO_PRESSURE, "-.slice:0.0%", false, false);
assert_se(condition);
assert_se(condition_test(condition, environ) >= 0);
condition_free(condition);
}
DEFINE_TEST_MAIN(LOG_DEBUG);

View file

@ -12,14 +12,17 @@ AssertACPower=
AssertArchitecture=
AssertCapability=
AssertControlGroupController=
AssertCPUPressure=
AssertDirectoryNotEmpty=
AssertFileIsExecutable=
AssertFileNotEmpty=
AssertFirstBoot=
AssertGroup=
AssertHost=
AssertIOPressure=
AssertKernelCommandLine=
AssertKernelVersion=
AssertMemoryPressure=
AssertNeedsUpdate=
AssertOSRelease=
AssertPathExists=
@ -56,14 +59,17 @@ ConditionACPower=
ConditionArchitecture=
ConditionCapability=
ConditionControlGroupController=
ConditionCPUPressure=
ConditionDirectoryNotEmpty=
ConditionFileIsExecutable=
ConditionFileNotEmpty=
ConditionFirstBoot=
ConditionGroup=
ConditionHost=
ConditionIOPressure=
ConditionKernelCommandLine=
ConditionKernelVersion=
ConditionMemoryPressure=
ConditionNeedsUpdate=
ConditionOSRelease=
ConditionPathExists=

View file

@ -4,6 +4,7 @@ After=
AllowIsolate=
AssertACPower=
AssertArchitecture=
AssertCPUPressure=
AssertCPUs=
AssertCapability=
AssertControlGroupController=
@ -14,9 +15,11 @@ AssertFileNotEmpty=
AssertFirstBoot=
AssertGroup=
AssertHost=
AssertIOPressure=
AssertKernelCommandLine=
AssertKernelVersion=
AssertMemory=
AssertMemoryPressure=
AssertNeedsUpdate=
AssertOSRelease=
AssertPathExists=
@ -37,6 +40,7 @@ BindsTo=
CollectMode=
ConditionACPower=
ConditionArchitecture=
ConditionCPUPressure=
ConditionCPUs=
ConditionFirmware=
ConditionCapability=
@ -48,9 +52,11 @@ ConditionFileNotEmpty=
ConditionFirstBoot=
ConditionGroup=
ConditionHost=
ConditionIOPressure=
ConditionKernelCommandLine=
ConditionKernelVersion=
ConditionMemory=
ConditionMemoryPressure=
ConditionNeedsUpdate=
ConditionOSRelease=
ConditionPathExists=