Merge pull request #13481 from Antique/cgroupv2

cgroup: introduce support for cgroup v2 CPUSET controller
This commit is contained in:
Chris Down 2019-09-24 21:57:06 +02:00 committed by GitHub
commit a007d6fc2a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 269 additions and 5 deletions

View file

@ -226,6 +226,8 @@ All cgroup/resource control settings are available for transient units
✓ StartupCPUShares=
✓ CPUQuota=
✓ CPUQuotaPeriodSec=
✓ AllowedCPUs=
✓ AllowedMemoryNodes=
✓ MemoryAccounting=
✓ DefaultMemoryMin=
✓ MemoryMin=

View file

@ -214,6 +214,36 @@
</listitem>
</varlistentry>
<varlistentry>
<term><varname>AllowedCPUs=</varname></term>
<listitem>
<para>Restrict processes to be executed on specific CPUs. Takes a list of CPU indices or ranges separated by either
whitespace or commas. CPU ranges are specified by the lower and upper CPU indices separated by a dash.</para>
<para>Setting <varname>AllowedCPUs=</varname> doesn't guarantee that all of the CPUs will be used by the processes
as it may be limited by parent units. The effective configuration is reported as <varname>EffectiveCPUs=</varname>.</para>
<para>This setting is supported only with the unified control group hierarchy.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>AllowedMemoryNodes=</varname></term>
<listitem>
<para>Restrict processes to be executed on specific memory NUMA nodes. Takes a list of memory NUMA nodes indices
or ranges separated by either whitespace or commas. Memory NUMA nodes ranges are specified by the lower and upper
CPU indices separated by a dash.</para>
<para>Setting <varname>AllowedMemoryNodes=</varname> doesn't guarantee that all of the memory NUMA nodes will
be used by the processes as it may be limited by parent units. The effective configuration is reported as
<varname>EffectiveMemoryNodes=</varname>.</para>
<para>This setting is supported only with the unified control group hierarchy.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>MemoryAccounting=</varname></term>

View file

@ -2435,8 +2435,7 @@ int cg_mask_supported(CGroupMask *ret) {
if (r < 0)
return r;
/* Currently, we support the cpu, memory, io and pids controller in the unified hierarchy, mask
* everything else off. */
/* Mask controllers that are not supported in unified hierarchy. */
mask &= CGROUP_MASK_V2;
} else {
@ -2905,6 +2904,7 @@ bool fd_is_cgroup_fs(int fd) {
static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
[CGROUP_CONTROLLER_CPU] = "cpu",
[CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
[CGROUP_CONTROLLER_CPUSET] = "cpuset",
[CGROUP_CONTROLLER_IO] = "io",
[CGROUP_CONTROLLER_BLKIO] = "blkio",
[CGROUP_CONTROLLER_MEMORY] = "memory",

View file

@ -20,6 +20,7 @@ typedef enum CGroupController {
/* Original cgroup controllers */
CGROUP_CONTROLLER_CPU,
CGROUP_CONTROLLER_CPUACCT, /* v1 only */
CGROUP_CONTROLLER_CPUSET, /* v2 only */
CGROUP_CONTROLLER_IO, /* v2 only */
CGROUP_CONTROLLER_BLKIO, /* v1 only */
CGROUP_CONTROLLER_MEMORY,
@ -40,6 +41,7 @@ typedef enum CGroupController {
typedef enum CGroupMask {
CGROUP_MASK_CPU = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPU),
CGROUP_MASK_CPUACCT = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUACCT),
CGROUP_MASK_CPUSET = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUSET),
CGROUP_MASK_IO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_IO),
CGROUP_MASK_BLKIO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BLKIO),
CGROUP_MASK_MEMORY = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY),
@ -52,7 +54,7 @@ typedef enum CGroupMask {
CGROUP_MASK_V1 = CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT|CGROUP_MASK_BLKIO|CGROUP_MASK_MEMORY|CGROUP_MASK_DEVICES|CGROUP_MASK_PIDS,
/* All real cgroup v2 controllers */
CGROUP_MASK_V2 = CGROUP_MASK_CPU|CGROUP_MASK_IO|CGROUP_MASK_MEMORY|CGROUP_MASK_PIDS,
CGROUP_MASK_V2 = CGROUP_MASK_CPU|CGROUP_MASK_CPUSET|CGROUP_MASK_IO|CGROUP_MASK_MEMORY|CGROUP_MASK_PIDS,
/* All cgroup v2 BPF pseudo-controllers */
CGROUP_MASK_BPF = CGROUP_MASK_BPF_FIREWALL|CGROUP_MASK_BPF_DEVICES,

View file

@ -202,10 +202,15 @@ void cgroup_context_done(CGroupContext *c) {
c->ip_filters_ingress = strv_free(c->ip_filters_ingress);
c->ip_filters_egress = strv_free(c->ip_filters_egress);
cpu_set_reset(&c->cpuset_cpus);
cpu_set_reset(&c->cpuset_mems);
}
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
_cleanup_free_ char *disable_controllers_str = NULL;
_cleanup_free_ char *cpuset_cpus = NULL;
_cleanup_free_ char *cpuset_mems = NULL;
CGroupIODeviceLimit *il;
CGroupIODeviceWeight *iw;
CGroupIODeviceLatency *l;
@ -224,6 +229,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
(void) cg_mask_to_string(c->disable_controllers, &disable_controllers_str);
cpuset_cpus = cpu_set_to_range_string(&c->cpuset_cpus);
cpuset_mems = cpu_set_to_range_string(&c->cpuset_mems);
fprintf(f,
"%sCPUAccounting=%s\n"
"%sIOAccounting=%s\n"
@ -237,6 +245,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sStartupCPUShares=%" PRIu64 "\n"
"%sCPUQuotaPerSecSec=%s\n"
"%sCPUQuotaPeriodSec=%s\n"
"%sAllowedCPUs=%s\n"
"%sAllowedMemoryNodes=%s\n"
"%sIOWeight=%" PRIu64 "\n"
"%sStartupIOWeight=%" PRIu64 "\n"
"%sBlockIOWeight=%" PRIu64 "\n"
@ -265,6 +275,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, c->startup_cpu_shares,
prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
prefix, format_timespan(v, sizeof(v), c->cpu_quota_period_usec, 1),
prefix, cpuset_cpus,
prefix, cpuset_mems,
prefix, c->io_weight,
prefix, c->startup_io_weight,
prefix, c->blockio_weight,
@ -796,6 +808,16 @@ static uint64_t cgroup_cpu_weight_to_shares(uint64_t weight) {
CGROUP_CPU_SHARES_MIN, CGROUP_CPU_SHARES_MAX);
}
static void cgroup_apply_unified_cpuset(Unit *u, CPUSet cpus, const char *name) {
_cleanup_free_ char *buf = NULL;
buf = cpu_set_to_range_string(&cpus);
if (!buf)
return;
(void) set_attribute_and_warn(u, "cpuset", name, buf);
}
static bool cgroup_context_has_io_config(CGroupContext *c) {
return c->io_accounting ||
c->io_weight != CGROUP_WEIGHT_INVALID ||
@ -1036,6 +1058,11 @@ static void cgroup_context_apply(
}
}
if ((apply_mask & CGROUP_MASK_CPUSET) && !is_local_root) {
cgroup_apply_unified_cpuset(u, c->cpuset_cpus, "cpuset.cpus");
cgroup_apply_unified_cpuset(u, c->cpuset_mems, "cpuset.mems");
}
/* The 'io' controller attributes are not exported on the host's root cgroup (being a pure cgroup v2
* controller), and in case of containers we want to leave control of these attributes to the container manager
* (and we couldn't access that stuff anyway, even if we tried if proper delegation is used). */
@ -1408,6 +1435,9 @@ static CGroupMask unit_get_cgroup_mask(Unit *u) {
c->cpu_quota_per_sec_usec != USEC_INFINITY)
mask |= CGROUP_MASK_CPU;
if (c->cpuset_cpus.set || c->cpuset_mems.set)
mask |= CGROUP_MASK_CPUSET;
if (cgroup_context_has_io_config(c) || cgroup_context_has_blockio_config(c))
mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
@ -3560,4 +3590,32 @@ static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] =
[CGROUP_STRICT] = "strict",
};
int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) {
_cleanup_free_ char *v = NULL;
int r;
assert(u);
assert(cpus);
if (!u->cgroup_path)
return -ENODATA;
if ((u->cgroup_realized_mask & CGROUP_MASK_CPUSET) == 0)
return -ENODATA;
r = cg_all_unified();
if (r < 0)
return r;
if (r == 0)
return -ENODATA;
if (r > 0)
r = cg_get_attribute("cpuset", u->cgroup_path, name, &v);
if (r == -ENOENT)
return -ENODATA;
if (r < 0)
return r;
return parse_cpu_set_full(v, cpus, false, NULL, NULL, 0, NULL);
}
DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);

View file

@ -4,6 +4,7 @@
#include <stdbool.h>
#include "cgroup-util.h"
#include "cpu-set-util.h"
#include "ip-address-access.h"
#include "list.h"
#include "time-util.h"
@ -92,6 +93,9 @@ struct CGroupContext {
usec_t cpu_quota_per_sec_usec;
usec_t cpu_quota_period_usec;
CPUSet cpuset_cpus;
CPUSet cpuset_mems;
uint64_t io_weight;
uint64_t startup_io_weight;
LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
@ -254,3 +258,5 @@ CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_;
bool unit_cgroup_delegate(Unit *u);
int compare_job_priority(const void *a, const void *b);
int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name);

View file

@ -71,6 +71,27 @@ static int property_get_delegate_controllers(
return property_get_cgroup_mask(bus, path, interface, property, reply, &c->delegate_controllers, error);
}
static int property_get_cpuset(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
CPUSet *cpus = userdata;
_cleanup_free_ uint8_t *array = NULL;
size_t allocated;
assert(bus);
assert(reply);
assert(cpus);
(void) cpu_set_to_dbus(cpus, &array, &allocated);
return sd_bus_message_append_array(reply, 'y', array, allocated);
}
static int property_get_io_device_weight(
sd_bus *bus,
const char *path,
@ -332,6 +353,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0),
SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0),
SD_BUS_PROPERTY("CPUQuotaPeriodUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_period_usec), 0),
SD_BUS_PROPERTY("AllowedCPUs", "ay", property_get_cpuset, offsetof(CGroupContext, cpuset_cpus), 0),
SD_BUS_PROPERTY("AllowedMemoryNodes", "ay", property_get_cpuset, offsetof(CGroupContext, cpuset_mems), 0),
SD_BUS_PROPERTY("IOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, io_accounting), 0),
SD_BUS_PROPERTY("IOWeight", "t", NULL, offsetof(CGroupContext, io_weight), 0),
SD_BUS_PROPERTY("StartupIOWeight", "t", NULL, offsetof(CGroupContext, startup_io_weight), 0),
@ -856,6 +879,42 @@ int bus_cgroup_set_property(
return 1;
} else if (STR_IN_SET(name, "AllowedCPUs", "AllowedMemoryNodes")) {
const void *a;
size_t n;
_cleanup_(cpu_set_reset) CPUSet new_set = {};
r = sd_bus_message_read_array(message, 'y', &a, &n);
if (r < 0)
return r;
r = cpu_set_from_dbus(a, n, &new_set);
if (r < 0)
return r;
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
_cleanup_free_ char *setstr = NULL;
_cleanup_free_ char *data = NULL;
CPUSet *set;
setstr = cpu_set_to_range_string(&new_set);
if (streq(name, "AllowedCPUs"))
set = &c->cpuset_cpus;
else
set = &c->cpuset_mems;
if (asprintf(&data, "%s=%s", name, setstr) < 0)
return -ENOMEM;
cpu_set_reset(set);
cpu_set_add_all(set, &new_set);
unit_invalidate_cgroup(u, CGROUP_MASK_CPUSET);
unit_write_setting(u, flags, name, data);
}
return 1;
} else if ((iol_type = cgroup_io_limit_type_from_string(name)) >= 0) {
const char *path;
unsigned n = 0;

View file

@ -957,6 +957,52 @@ static int property_get_cpu_usage(
return sd_bus_message_append(reply, "t", ns);
}
static int property_get_cpuset_cpus(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
Unit *u = userdata;
_cleanup_(cpu_set_reset) CPUSet cpus = {};
_cleanup_free_ uint8_t *array = NULL;
size_t allocated;
assert(bus);
assert(reply);
assert(u);
(void) unit_get_cpuset(u, &cpus, "cpuset.cpus.effective");
(void) cpu_set_to_dbus(&cpus, &array, &allocated);
return sd_bus_message_append_array(reply, 'y', array, allocated);
}
static int property_get_cpuset_mems(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
Unit *u = userdata;
_cleanup_(cpu_set_reset) CPUSet mems = {};
_cleanup_free_ uint8_t *array = NULL;
size_t allocated;
assert(bus);
assert(reply);
assert(u);
(void) unit_get_cpuset(u, &mems, "cpuset.mems.effective");
(void) cpu_set_to_dbus(&mems, &array, &allocated);
return sd_bus_message_append_array(reply, 'y', array, allocated);
}
static int property_get_cgroup(
sd_bus *bus,
const char *path,
@ -1306,6 +1352,8 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = {
SD_BUS_PROPERTY("ControlGroup", "s", property_get_cgroup, 0, 0),
SD_BUS_PROPERTY("MemoryCurrent", "t", property_get_current_memory, 0, 0),
SD_BUS_PROPERTY("CPUUsageNSec", "t", property_get_cpu_usage, 0, 0),
SD_BUS_PROPERTY("EffectiveCPUs", "ay", property_get_cpuset_cpus, 0, 0),
SD_BUS_PROPERTY("EffectiveMemoryNodes", "ay", property_get_cpuset_mems, 0, 0),
SD_BUS_PROPERTY("TasksCurrent", "t", property_get_current_tasks, 0, 0),
SD_BUS_PROPERTY("IPIngressBytes", "t", property_get_ip_counter, 0, 0),
SD_BUS_PROPERTY("IPIngressPackets", "t", property_get_ip_counter, 0, 0),

View file

@ -173,6 +173,8 @@ $1.CPUShares, config_parse_cpu_shares, 0,
$1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares)
$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context)
$1.CPUQuotaPeriodSec, config_parse_sec_def_infinity, 0, offsetof($1, cgroup_context.cpu_quota_period_usec)
$1.CPUSetCpus, config_parse_cpuset_cpus, 0, offsetof($1, cgroup_context)
$1.CPUSetMems, config_parse_cpuset_mems, 0, offsetof($1, cgroup_context)
$1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting)
$1.MemoryMin, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.DefaultMemoryMin, config_parse_memory_limit, 0, offsetof($1, cgroup_context)

View file

@ -3149,6 +3149,44 @@ int config_parse_cpu_quota(
return 0;
}
int config_parse_cpuset_cpus(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
CGroupContext *c = data;
(void) parse_cpu_set_extend(rvalue, &c->cpuset_cpus, true, unit, filename, line, lvalue);
return 0;
}
int config_parse_cpuset_mems(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
CGroupContext *c = data;
(void) parse_cpu_set_extend(rvalue, &c->cpuset_mems, true, unit, filename, line, lvalue);
return 0;
}
int config_parse_memory_limit(
const char *unit,
const char *filename,

View file

@ -92,6 +92,8 @@ CONFIG_PARSER_PROTOTYPE(config_parse_set_status);
CONFIG_PARSER_PROTOTYPE(config_parse_namespace_path_strv);
CONFIG_PARSER_PROTOTYPE(config_parse_temporary_filesystems);
CONFIG_PARSER_PROTOTYPE(config_parse_cpu_quota);
CONFIG_PARSER_PROTOTYPE(config_parse_cpuset_cpus);
CONFIG_PARSER_PROTOTYPE(config_parse_cpuset_mems);
CONFIG_PARSER_PROTOTYPE(config_parse_protect_home);
CONFIG_PARSER_PROTOTYPE(config_parse_protect_system);
CONFIG_PARSER_PROTOTYPE(config_parse_bus_name);

View file

@ -435,6 +435,22 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
return bus_append_cg_cpu_shares_parse(m, field, eq);
if (STR_IN_SET(field, "AllowedCPUs", "AllowedMemoryNodes")) {
_cleanup_(cpu_set_reset) CPUSet cpuset = {};
_cleanup_free_ uint8_t *array = NULL;
size_t allocated;
r = parse_cpu_set(eq, &cpuset);
if (r < 0)
return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
r = cpu_set_to_dbus(&cpuset, &array, &allocated);
if (r < 0)
return log_error_errno(r, "Failed to serialize CPUSet: %m");
return bus_append_byte_array(m, field, array, allocated);
}
if (STR_IN_SET(field, "BlockIOWeight", "StartupBlockIOWeight"))
return bus_append_cg_blkio_weight_parse(m, field, eq);

View file

@ -5411,7 +5411,7 @@ static int print_property(const char *name, const char *expected_value, sd_bus_m
bus_print_property_value(name, expected_value, value, strempty(fields));
return 1;
} else if (contents[0] == SD_BUS_TYPE_BYTE && STR_IN_SET(name, "CPUAffinity", "NUMAMask")) {
} else if (contents[0] == SD_BUS_TYPE_BYTE && STR_IN_SET(name, "CPUAffinity", "NUMAMask", "AllowedCPUs", "AllowedMemoryNodes", "EffectiveCPUs", "EffectiveMemoryNodes")) {
_cleanup_free_ char *affinity = NULL;
_cleanup_(cpu_set_reset) CPUSet set = {};
const void *a;

View file

@ -129,9 +129,10 @@ static void test_cg_mask_to_string_one(CGroupMask mask, const char *t) {
static void test_cg_mask_to_string(void) {
test_cg_mask_to_string_one(0, NULL);
test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct io blkio memory devices pids bpf-firewall bpf-devices");
test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct cpuset io blkio memory devices pids bpf-firewall bpf-devices");
test_cg_mask_to_string_one(CGROUP_MASK_CPU, "cpu");
test_cg_mask_to_string_one(CGROUP_MASK_CPUACCT, "cpuacct");
test_cg_mask_to_string_one(CGROUP_MASK_CPUSET, "cpuset");
test_cg_mask_to_string_one(CGROUP_MASK_IO, "io");
test_cg_mask_to_string_one(CGROUP_MASK_BLKIO, "blkio");
test_cg_mask_to_string_one(CGROUP_MASK_MEMORY, "memory");