mirror of
https://github.com/systemd/systemd
synced 2024-07-21 10:17:21 +00:00
Merge pull request #8149 from poettering/fake-root-cgroup
Properly synthesize CPU+memory accounting data for the root cgroup
This commit is contained in:
commit
902c8502ad
9
TODO
9
TODO
|
@ -59,15 +59,6 @@ Features:
|
|||
sd_id128_get_machine_app_specific(). After all on long-running systems both
|
||||
IDs have similar properties.
|
||||
|
||||
* emulate properties of the root cgroup on controllers that don't support such
|
||||
properties natively on cpu/io/memory, the way we already do it for
|
||||
"pids". Also, add the same logic to cgtop.
|
||||
|
||||
* set TasksAccounting=1 on the root slice if we are running on the root cgroup,
|
||||
and similar for the others, as soon as we emulate them properly. After all,
|
||||
Linux keeps these system-wide stats anyway, and it costs nothing to expose
|
||||
them.
|
||||
|
||||
* sd-bus: add vtable flag, that may be used to request client creds implicitly
|
||||
and asynchronously before dispatching the operation
|
||||
|
||||
|
|
|
@ -228,6 +228,12 @@
|
|||
indefinitely.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-1</option></term>
|
||||
|
||||
<listitem><para>A shortcut for <option>--iterations=1</option>.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-d</option></term>
|
||||
<term><option>--delay=</option></term>
|
||||
|
|
67
mkosi.build
67
mkosi.build
|
@ -27,43 +27,46 @@ set -ex
|
|||
|
||||
export LC_CTYPE=en_US.UTF-8
|
||||
|
||||
sysvinit_path=`realpath /etc/init.d`
|
||||
if [ ! -f "$BUILDDIR"/build.ninja ] ; then
|
||||
sysvinit_path=`realpath /etc/init.d`
|
||||
|
||||
nobody_user=`id -u -n 65534 2> /dev/null`
|
||||
if [ "$nobody_user" != "" ] ; then
|
||||
# Validate that we can translate forth and back
|
||||
if [ "`id -u $nobody_user`" != 65534 ] ; then
|
||||
nobody_user=""
|
||||
nobody_user=`id -u -n 65534 2> /dev/null`
|
||||
if [ "$nobody_user" != "" ] ; then
|
||||
# Validate that we can translate forth and back
|
||||
if [ "`id -u $nobody_user`" != 65534 ] ; then
|
||||
nobody_user=""
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
if [ "$nobody_user" = "" ] ; then
|
||||
if id -u nobody 2> /dev/null ; then
|
||||
# The "nobody" user is defined already for something else, pick the Fedora name
|
||||
nobody_user=nfsnobody
|
||||
else
|
||||
# The "nobody" user name is free, use it
|
||||
nobody_user=nobody
|
||||
if [ "$nobody_user" = "" ] ; then
|
||||
if id -u nobody 2> /dev/null ; then
|
||||
# The "nobody" user is defined already for something else, pick the Fedora name
|
||||
nobody_user=nfsnobody
|
||||
else
|
||||
# The "nobody" user name is free, use it
|
||||
nobody_user=nobody
|
||||
fi
|
||||
fi
|
||||
|
||||
nobody_group=`id -g -n 65534 2> /dev/null`
|
||||
if [ "$nobody_group" != "" ] ; then
|
||||
# Validate that we can translate forth and back
|
||||
if [ "`id -g $nobody_group`" != 65534 ] ; then
|
||||
nobody_group=""
|
||||
fi
|
||||
fi
|
||||
if [ "$nobody_group" = "" ] ; then
|
||||
if id -u nobody 2> /dev/null ; then
|
||||
# The "nobody" group is defined already for something else, pick the Fedora name
|
||||
nobody_group=nfsnobody
|
||||
else
|
||||
# The "nobody" group name is free, use it
|
||||
nobody_group=nobody
|
||||
fi
|
||||
fi
|
||||
|
||||
meson "$BUILDDIR" -D "sysvinit-path=$sysvinit_path" -D default-hierarchy=unified -D man=false -D "nobody-user=$nobody_user" -D "nobody-group=$nobody_group"
|
||||
fi
|
||||
|
||||
nobody_group=`id -g -n 65534 2> /dev/null`
|
||||
if [ "$nobody_group" != "" ] ; then
|
||||
# Validate that we can translate forth and back
|
||||
if [ "`id -g $nobody_group`" != 65534 ] ; then
|
||||
nobody_group=""
|
||||
fi
|
||||
fi
|
||||
if [ "$nobody_group" = "" ] ; then
|
||||
if id -u nobody 2> /dev/null ; then
|
||||
# The "nobody" group is defined already for something else, pick the Fedora name
|
||||
nobody_group=nfsnobody
|
||||
else
|
||||
# The "nobody" group name is free, use it
|
||||
nobody_group=nobody
|
||||
fi
|
||||
fi
|
||||
|
||||
[ -f "$BUILDDIR"/build.ninja ] || meson "$BUILDDIR" -D "sysvinit-path=$sysvinit_path" -D default-hierarchy=unified -D man=false -D "nobody-user=$nobody_user" -D "nobody-group=$nobody_group"
|
||||
ninja -C "$BUILDDIR" all
|
||||
[ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test || ( RET="$?" ; cat "$BUILDDIR"/meson-logs/testlog.txt ; exit "$RET" )
|
||||
ninja -C "$BUILDDIR" install
|
||||
|
|
|
@ -2030,46 +2030,84 @@ int cg_get_attribute(const char *controller, const char *path, const char *attri
|
|||
return read_one_line_file(p, ret);
|
||||
}
|
||||
|
||||
int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
|
||||
_cleanup_free_ char *filename = NULL, *content = NULL;
|
||||
char *line, *p;
|
||||
int i, r;
|
||||
int cg_get_keyed_attribute(
|
||||
const char *controller,
|
||||
const char *path,
|
||||
const char *attribute,
|
||||
char **keys,
|
||||
char **ret_values) {
|
||||
|
||||
for (i = 0; keys[i]; i++)
|
||||
values[i] = NULL;
|
||||
_cleanup_free_ char *filename = NULL, *contents = NULL;
|
||||
_cleanup_fclose_ FILE *f = NULL;
|
||||
const char *p;
|
||||
size_t n, i, n_done = 0;
|
||||
char **v;
|
||||
int r;
|
||||
|
||||
/* Reads one or more fields of a cgroupsv2 keyed attribute file. The 'keys' parameter should be an strv with
|
||||
* all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
|
||||
* entries as 'keys'. On success each entry will be set to the value of the matching key.
|
||||
*
|
||||
* If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
|
||||
|
||||
r = cg_get_path(controller, path, attribute, &filename);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = read_full_file(filename, &content, NULL);
|
||||
r = read_full_file(filename, &contents, NULL);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
p = content;
|
||||
while ((line = strsep(&p, "\n"))) {
|
||||
char *key;
|
||||
n = strv_length(keys);
|
||||
if (n == 0) /* No keys to retrieve? That's easy, we are done then */
|
||||
return 0;
|
||||
|
||||
key = strsep(&line, " ");
|
||||
/* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
|
||||
v = newa0(char*, n);
|
||||
|
||||
for (i = 0; keys[i]; i++) {
|
||||
if (streq(key, keys[i])) {
|
||||
values[i] = strdup(line);
|
||||
break;
|
||||
for (p = contents; *p;) {
|
||||
const char *w = NULL;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
if (!v[i]) {
|
||||
w = first_word(p, keys[i]);
|
||||
if (w)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (w) {
|
||||
size_t l;
|
||||
|
||||
l = strcspn(w, NEWLINE);
|
||||
v[i] = strndup(w, l);
|
||||
if (!v[i]) {
|
||||
r = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
n_done++;
|
||||
if (n_done >= n)
|
||||
goto done;
|
||||
|
||||
p = w + l;
|
||||
} else
|
||||
p += strcspn(p, NEWLINE);
|
||||
|
||||
p += strspn(p, NEWLINE);
|
||||
}
|
||||
|
||||
for (i = 0; keys[i]; i++) {
|
||||
if (!values[i]) {
|
||||
for (i = 0; keys[i]; i++) {
|
||||
values[i] = mfree(values[i]);
|
||||
}
|
||||
return -ENOENT;
|
||||
}
|
||||
}
|
||||
r = -ENXIO;
|
||||
|
||||
fail:
|
||||
for (i = 0; i < n; i++)
|
||||
free(v[i]);
|
||||
|
||||
return r;
|
||||
|
||||
done:
|
||||
memcpy(ret_values, v, sizeof(char*) * n);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
|
||||
|
|
|
@ -186,7 +186,7 @@ int cg_create_and_attach(const char *controller, const char *path, pid_t pid);
|
|||
|
||||
int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value);
|
||||
int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret);
|
||||
int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values);
|
||||
int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, char **keys, char **values);
|
||||
|
||||
int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid);
|
||||
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
#include <errno.h>
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "def.h"
|
||||
#include "fd-util.h"
|
||||
#include "fileio.h"
|
||||
#include "parse-util.h"
|
||||
#include "process-util.h"
|
||||
|
@ -136,3 +138,131 @@ int procfs_tasks_get_current(uint64_t *ret) {
|
|||
|
||||
return safe_atou64(nr, ret);
|
||||
}
|
||||
|
||||
static uint64_t calc_gcd64(uint64_t a, uint64_t b) {
|
||||
|
||||
while (b > 0) {
|
||||
uint64_t t;
|
||||
|
||||
t = a % b;
|
||||
|
||||
a = b;
|
||||
b = t;
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
int procfs_cpu_get_usage(nsec_t *ret) {
|
||||
_cleanup_free_ char *first_line = NULL;
|
||||
unsigned long user_ticks = 0, nice_ticks = 0, system_ticks = 0,
|
||||
irq_ticks = 0, softirq_ticks = 0,
|
||||
guest_ticks = 0, guest_nice_ticks = 0;
|
||||
long ticks_per_second;
|
||||
uint64_t sum, gcd, a, b;
|
||||
const char *p;
|
||||
int r;
|
||||
|
||||
assert(ret);
|
||||
|
||||
r = read_one_line_file("/proc/stat", &first_line);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
p = first_word(first_line, "cpu");
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
|
||||
if (sscanf(p, "%lu %lu %lu %*u %*u %lu %lu %*u %lu %lu",
|
||||
&user_ticks,
|
||||
&nice_ticks,
|
||||
&system_ticks,
|
||||
&irq_ticks,
|
||||
&softirq_ticks,
|
||||
&guest_ticks,
|
||||
&guest_nice_ticks) < 5) /* we only insist on the first five fields */
|
||||
return -EINVAL;
|
||||
|
||||
ticks_per_second = sysconf(_SC_CLK_TCK);
|
||||
if (ticks_per_second < 0)
|
||||
return -errno;
|
||||
assert(ticks_per_second > 0);
|
||||
|
||||
sum = (uint64_t) user_ticks + (uint64_t) nice_ticks + (uint64_t) system_ticks +
|
||||
(uint64_t) irq_ticks + (uint64_t) softirq_ticks +
|
||||
(uint64_t) guest_ticks + (uint64_t) guest_nice_ticks;
|
||||
|
||||
/* Let's reduce this fraction before we apply it to avoid overflows when converting this to µsec */
|
||||
gcd = calc_gcd64(NSEC_PER_SEC, ticks_per_second);
|
||||
|
||||
a = (uint64_t) NSEC_PER_SEC / gcd;
|
||||
b = (uint64_t) ticks_per_second / gcd;
|
||||
|
||||
*ret = DIV_ROUND_UP((nsec_t) sum * (nsec_t) a, (nsec_t) b);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int procfs_memory_get_current(uint64_t *ret) {
|
||||
uint64_t mem_total = UINT64_MAX, mem_free = UINT64_MAX;
|
||||
_cleanup_fclose_ FILE *f = NULL;
|
||||
int r;
|
||||
|
||||
assert(ret);
|
||||
|
||||
f = fopen("/proc/meminfo", "re");
|
||||
if (!f)
|
||||
return -errno;
|
||||
|
||||
for (;;) {
|
||||
_cleanup_free_ char *line = NULL;
|
||||
uint64_t *v;
|
||||
char *p, *e;
|
||||
size_t n;
|
||||
|
||||
r = read_line(f, LONG_LINE_MAX, &line);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0)
|
||||
return -EINVAL; /* EOF: Couldn't find one or both fields? */
|
||||
|
||||
p = first_word(line, "MemTotal:");
|
||||
if (p)
|
||||
v = &mem_total;
|
||||
else {
|
||||
p = first_word(line, "MemFree:");
|
||||
if (p)
|
||||
v = &mem_free;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Determine length of numeric value */
|
||||
n = strspn(p, DIGITS);
|
||||
if (n == 0)
|
||||
return -EINVAL;
|
||||
e = p + n;
|
||||
|
||||
/* Ensure the line ends in " kB" */
|
||||
n = strspn(e, WHITESPACE);
|
||||
if (n == 0)
|
||||
return -EINVAL;
|
||||
if (!streq(e + n, "kB"))
|
||||
return -EINVAL;
|
||||
|
||||
*e = 0;
|
||||
r = safe_atou64(p, v);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (*v == UINT64_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
if (mem_total != UINT64_MAX && mem_free != UINT64_MAX)
|
||||
break;
|
||||
}
|
||||
|
||||
if (mem_free > mem_total)
|
||||
return -EINVAL;
|
||||
|
||||
*ret = (mem_total - mem_free) * 1024U;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -3,6 +3,12 @@
|
|||
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "time-util.h"
|
||||
|
||||
int procfs_tasks_get_limit(uint64_t *ret);
|
||||
int procfs_tasks_set_limit(uint64_t limit);
|
||||
int procfs_tasks_get_current(uint64_t *ret);
|
||||
|
||||
int procfs_cpu_get_usage(nsec_t *ret);
|
||||
|
||||
int procfs_memory_get_current(uint64_t *ret);
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
#include "terminal-util.h"
|
||||
#include "unit-name.h"
|
||||
#include "util.h"
|
||||
#include "virt.h"
|
||||
|
||||
typedef struct Group {
|
||||
char *path;
|
||||
|
@ -125,6 +126,30 @@ static const char *maybe_format_bytes(char *buf, size_t l, bool is_valid, uint64
|
|||
return format_bytes(buf, l, t);
|
||||
}
|
||||
|
||||
static bool is_root_cgroup(const char *path) {
|
||||
|
||||
/* Returns true if the specified path belongs to the root cgroup. The root cgroup is special on cgroupsv2 as it
|
||||
* carries only very few attributes in order not to export multiple truth about system state as most
|
||||
* information is available elsewhere in /proc anyway. We need to be able to deal with that, and need to get
|
||||
* our data from different sources in that case.
|
||||
*
|
||||
* There's one extra complication in all of this, though 😣: if the path to the cgroup indicates we are in the
|
||||
* root cgroup this might actually not be the case, because cgroup namespacing might be in effect
|
||||
* (CLONE_NEWCGROUP). Since there's no nice way to distuingish a real cgroup root from a fake namespaced one we
|
||||
* do an explicit container check here, under the assumption that CLONE_NEWCGROUP is generally used when
|
||||
* container managers are used too.
|
||||
*
|
||||
* Note that checking for a container environment is kinda ugly, since in theory people could use cgtop from
|
||||
* inside a container where cgroup namespacing is turned off to watch the host system. However, that's mostly a
|
||||
* theoretic usecase, and if people actually try all they'll lose is accounting for the top-level cgroup. Which
|
||||
* isn't too bad. */
|
||||
|
||||
if (detect_container() > 0)
|
||||
return false;
|
||||
|
||||
return isempty(path) || path_equal(path, "/");
|
||||
}
|
||||
|
||||
static int process(
|
||||
const char *controller,
|
||||
const char *path,
|
||||
|
@ -172,7 +197,8 @@ static int process(
|
|||
}
|
||||
}
|
||||
|
||||
if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && IN_SET(arg_count, COUNT_ALL_PROCESSES, COUNT_USERSPACE_PROCESSES)) {
|
||||
if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) &&
|
||||
IN_SET(arg_count, COUNT_ALL_PROCESSES, COUNT_USERSPACE_PROCESSES)) {
|
||||
_cleanup_fclose_ FILE *f = NULL;
|
||||
pid_t pid;
|
||||
|
||||
|
@ -196,7 +222,7 @@ static int process(
|
|||
|
||||
} else if (streq(controller, "pids") && arg_count == COUNT_PIDS) {
|
||||
|
||||
if (isempty(path) || path_equal(path, "/")) {
|
||||
if (is_root_cgroup(path)) {
|
||||
r = procfs_tasks_get_current(&g->n_tasks);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
@ -226,15 +252,18 @@ static int process(
|
|||
uint64_t new_usage;
|
||||
nsec_t timestamp;
|
||||
|
||||
if (all_unified) {
|
||||
const char *keys[] = { "usage_usec", NULL };
|
||||
if (is_root_cgroup(path)) {
|
||||
r = procfs_cpu_get_usage(&new_usage);
|
||||
if (r < 0)
|
||||
return r;
|
||||
} else if (all_unified) {
|
||||
_cleanup_free_ char *val = NULL;
|
||||
|
||||
if (!streq(controller, "cpu"))
|
||||
return 0;
|
||||
|
||||
r = cg_get_keyed_attribute("cpu", path, "cpu.stat", keys, &val);
|
||||
if (r == -ENOENT)
|
||||
r = cg_get_keyed_attribute("cpu", path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
|
||||
if (IN_SET(r, -ENOENT, -ENXIO))
|
||||
return 0;
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
@ -284,24 +313,31 @@ static int process(
|
|||
g->cpu_iteration = iteration;
|
||||
|
||||
} else if (streq(controller, "memory")) {
|
||||
_cleanup_free_ char *p = NULL, *v = NULL;
|
||||
|
||||
if (all_unified)
|
||||
r = cg_get_path(controller, path, "memory.current", &p);
|
||||
else
|
||||
r = cg_get_path(controller, path, "memory.usage_in_bytes", &p);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (is_root_cgroup(path)) {
|
||||
r = procfs_memory_get_current(&g->memory);
|
||||
if (r < 0)
|
||||
return r;
|
||||
} else {
|
||||
_cleanup_free_ char *p = NULL, *v = NULL;
|
||||
|
||||
r = read_one_line_file(p, &v);
|
||||
if (r == -ENOENT)
|
||||
return 0;
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (all_unified)
|
||||
r = cg_get_path(controller, path, "memory.current", &p);
|
||||
else
|
||||
r = cg_get_path(controller, path, "memory.usage_in_bytes", &p);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = safe_atou64(v, &g->memory);
|
||||
if (r < 0)
|
||||
return r;
|
||||
r = read_one_line_file(p, &v);
|
||||
if (r == -ENOENT)
|
||||
return 0;
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = safe_atou64(v, &g->memory);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (g->memory > 0)
|
||||
g->memory_valid = true;
|
||||
|
@ -506,6 +542,10 @@ static int refresh(const char *root, Hashmap *a, Hashmap *b, unsigned iteration)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static const char *empty_to_slash(const char *p) {
|
||||
return isempty(p) ? "/" : p;
|
||||
}
|
||||
|
||||
static int group_compare(const void*a, const void *b) {
|
||||
const Group *x = *(Group**)a, *y = *(Group**)b;
|
||||
|
||||
|
@ -515,9 +555,9 @@ static int group_compare(const void*a, const void *b) {
|
|||
* recursive summing is off, since that is actually
|
||||
* not accumulative for all children. */
|
||||
|
||||
if (path_startswith(y->path, x->path))
|
||||
if (path_startswith(empty_to_slash(y->path), empty_to_slash(x->path)))
|
||||
return -1;
|
||||
if (path_startswith(x->path, y->path))
|
||||
if (path_startswith(empty_to_slash(x->path), empty_to_slash(y->path)))
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -666,7 +706,7 @@ static void display(Hashmap *a) {
|
|||
|
||||
g = array[j];
|
||||
|
||||
path = isempty(g->path) ? "/" : g->path;
|
||||
path = empty_to_slash(g->path);
|
||||
ellipsized = ellipsize(path, path_columns, 33);
|
||||
printf("%-*s", path_columns, ellipsized ?: path);
|
||||
|
||||
|
@ -709,6 +749,7 @@ static void help(void) {
|
|||
" --recursive=BOOL Sum up process count recursively\n"
|
||||
" -d --delay=DELAY Delay between updates\n"
|
||||
" -n --iterations=N Run for N iterations before exiting\n"
|
||||
" -1 Shortcut for --iterations=1\n"
|
||||
" -b --batch Run in batch mode, accepting no input\n"
|
||||
" --depth=DEPTH Maximum traversal depth (default: %u)\n"
|
||||
" -M --machine= Show container\n"
|
||||
|
@ -745,7 +786,7 @@ static int parse_argv(int argc, char *argv[]) {
|
|||
assert(argc >= 1);
|
||||
assert(argv);
|
||||
|
||||
while ((c = getopt_long(argc, argv, "hptcmin:brd:kPM:", options, NULL)) >= 0)
|
||||
while ((c = getopt_long(argc, argv, "hptcmin:brd:kPM:1", options, NULL)) >= 0)
|
||||
|
||||
switch (c) {
|
||||
|
||||
|
@ -773,17 +814,15 @@ static int parse_argv(int argc, char *argv[]) {
|
|||
|
||||
case ARG_DEPTH:
|
||||
r = safe_atou(optarg, &arg_depth);
|
||||
if (r < 0) {
|
||||
log_error("Failed to parse depth parameter.");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse depth parameter: %s", optarg);
|
||||
|
||||
break;
|
||||
|
||||
case 'd':
|
||||
r = parse_sec(optarg, &arg_delay);
|
||||
if (r < 0 || arg_delay <= 0) {
|
||||
log_error("Failed to parse delay parameter.");
|
||||
log_error("Failed to parse delay parameter: %s", optarg);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -791,13 +830,15 @@ static int parse_argv(int argc, char *argv[]) {
|
|||
|
||||
case 'n':
|
||||
r = safe_atou(optarg, &arg_iterations);
|
||||
if (r < 0) {
|
||||
log_error("Failed to parse iterations parameter.");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse iterations parameter: %s", optarg);
|
||||
|
||||
break;
|
||||
|
||||
case '1':
|
||||
arg_iterations = 1;
|
||||
break;
|
||||
|
||||
case 'b':
|
||||
arg_batch = true;
|
||||
break;
|
||||
|
@ -853,10 +894,8 @@ static int parse_argv(int argc, char *argv[]) {
|
|||
|
||||
case ARG_RECURSIVE:
|
||||
r = parse_boolean(optarg);
|
||||
if (r < 0) {
|
||||
log_error("Failed to parse --recursive= argument: %s", optarg);
|
||||
return r;
|
||||
}
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse --recursive= argument: %s", optarg);
|
||||
|
||||
arg_recursive = r;
|
||||
arg_recursive_unset = r == 0;
|
||||
|
|
|
@ -38,19 +38,34 @@
|
|||
#include "stdio-util.h"
|
||||
#include "string-table.h"
|
||||
#include "string-util.h"
|
||||
#include "virt.h"
|
||||
|
||||
#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
|
||||
|
||||
bool manager_owns_root_cgroup(Manager *m) {
|
||||
assert(m);
|
||||
|
||||
/* Returns true if we are managing the root cgroup. Note that it isn't sufficient to just check whether the
|
||||
* group root path equals "/" since that will also be the case if CLONE_NEWCGROUP is in the mix. Since there's
|
||||
* appears to be no nice way to detect whether we are in a CLONE_NEWCGROUP namespace we instead just check if
|
||||
* we run in any kind of container virtualization. */
|
||||
|
||||
if (detect_container() > 0)
|
||||
return false;
|
||||
|
||||
return isempty(m->cgroup_root) || path_equal(m->cgroup_root, "/");
|
||||
}
|
||||
|
||||
bool unit_has_root_cgroup(Unit *u) {
|
||||
assert(u);
|
||||
|
||||
/* Returns whether this unit manages the root cgroup. Note that this is different from being named "-.slice",
|
||||
* as inside of containers the root slice won't be identical to the root cgroup. */
|
||||
/* Returns whether this unit manages the root cgroup. This will return true if this unit is the root slice and
|
||||
* the manager manages the root cgroup. */
|
||||
|
||||
if (!u->cgroup_path)
|
||||
if (!manager_owns_root_cgroup(u->manager))
|
||||
return false;
|
||||
|
||||
return isempty(u->cgroup_path) || path_equal(u->cgroup_path, "/");
|
||||
return unit_has_name(u, SPECIAL_ROOT_SLICE);
|
||||
}
|
||||
|
||||
static void cgroup_compat_warn(void) {
|
||||
|
@ -59,7 +74,9 @@ static void cgroup_compat_warn(void) {
|
|||
if (cgroup_compat_warned)
|
||||
return;
|
||||
|
||||
log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. See cgroup-compat debug messages for details.");
|
||||
log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. "
|
||||
"See cgroup-compat debug messages for details.");
|
||||
|
||||
cgroup_compat_warned = true;
|
||||
}
|
||||
|
||||
|
@ -2406,6 +2423,10 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
|
|||
if (!u->cgroup_path)
|
||||
return -ENODATA;
|
||||
|
||||
/* The root cgroup doesn't expose this information, let's get it from /proc instead */
|
||||
if (unit_has_root_cgroup(u))
|
||||
return procfs_memory_get_current(ret);
|
||||
|
||||
if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
|
||||
return -ENODATA;
|
||||
|
||||
|
@ -2437,13 +2458,13 @@ int unit_get_tasks_current(Unit *u, uint64_t *ret) {
|
|||
if (!u->cgroup_path)
|
||||
return -ENODATA;
|
||||
|
||||
if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0)
|
||||
return -ENODATA;
|
||||
|
||||
/* The root cgroup doesn't expose this information, let's get it from /proc instead */
|
||||
if (unit_has_root_cgroup(u))
|
||||
return procfs_tasks_get_current(ret);
|
||||
|
||||
if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0)
|
||||
return -ENODATA;
|
||||
|
||||
r = cg_get_attribute("pids", u->cgroup_path, "pids.current", &v);
|
||||
if (r == -ENOENT)
|
||||
return -ENODATA;
|
||||
|
@ -2464,20 +2485,25 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
|
|||
if (!u->cgroup_path)
|
||||
return -ENODATA;
|
||||
|
||||
/* The root cgroup doesn't expose this information, let's get it from /proc instead */
|
||||
if (unit_has_root_cgroup(u))
|
||||
return procfs_cpu_get_usage(ret);
|
||||
|
||||
r = cg_all_unified();
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r > 0) {
|
||||
const char *keys[] = { "usage_usec", NULL };
|
||||
_cleanup_free_ char *val = NULL;
|
||||
uint64_t us;
|
||||
|
||||
if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0)
|
||||
return -ENODATA;
|
||||
|
||||
r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", keys, &val);
|
||||
r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (IN_SET(r, -ENOENT, -ENXIO))
|
||||
return -ENODATA;
|
||||
|
||||
r = safe_atou64(val, &us);
|
||||
if (r < 0)
|
||||
|
|
|
@ -209,6 +209,7 @@ int unit_reset_ip_accounting(Unit *u);
|
|||
cc ? cc->name : false; \
|
||||
})
|
||||
|
||||
bool manager_owns_root_cgroup(Manager *m);
|
||||
bool unit_has_root_cgroup(Unit *u);
|
||||
|
||||
int manager_notify_cgroup_empty(Manager *m, const char *group);
|
||||
|
|
|
@ -313,19 +313,18 @@ _pure_ static const char *slice_sub_state_to_string(Unit *u) {
|
|||
return slice_state_to_string(SLICE(u)->state);
|
||||
}
|
||||
|
||||
static void slice_enumerate_perpetual(Manager *m, const char *name) {
|
||||
static int slice_make_perpetual(Manager *m, const char *name, Unit **ret) {
|
||||
Unit *u;
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
assert(name);
|
||||
|
||||
u = manager_get_unit(m, name);
|
||||
if (!u) {
|
||||
r = unit_new_for_name(m, sizeof(Slice), name, &u);
|
||||
if (r < 0) {
|
||||
log_error_errno(r, "Failed to allocate the special %s unit: %m", name);
|
||||
return;
|
||||
}
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to allocate the special %s unit: %m", name);
|
||||
}
|
||||
|
||||
u->perpetual = true;
|
||||
|
@ -333,15 +332,34 @@ static void slice_enumerate_perpetual(Manager *m, const char *name) {
|
|||
|
||||
unit_add_to_load_queue(u);
|
||||
unit_add_to_dbus_queue(u);
|
||||
|
||||
if (ret)
|
||||
*ret = u;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void slice_enumerate(Manager *m) {
|
||||
Unit *u;
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
|
||||
slice_enumerate_perpetual(m, SPECIAL_ROOT_SLICE);
|
||||
r = slice_make_perpetual(m, SPECIAL_ROOT_SLICE, &u);
|
||||
if (r >= 0 && manager_owns_root_cgroup(m)) {
|
||||
Slice *s = SLICE(u);
|
||||
|
||||
/* If we are managing the root cgroup then this means our root slice covers the whole system, which
|
||||
* means the kernel will track CPU/tasks/memory for us anyway, and it is all available in /proc. Let's
|
||||
* hence turn accounting on here, so that our APIs to query this data are available. */
|
||||
|
||||
s->cgroup_context.cpu_accounting = true;
|
||||
s->cgroup_context.tasks_accounting = true;
|
||||
s->cgroup_context.memory_accounting = true;
|
||||
}
|
||||
|
||||
if (MANAGER_IS_SYSTEM(m))
|
||||
slice_enumerate_perpetual(m, SPECIAL_SYSTEM_SLICE);
|
||||
(void) slice_make_perpetual(m, SPECIAL_SYSTEM_SLICE, NULL);
|
||||
}
|
||||
|
||||
const UnitVTable slice_vtable = {
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "special.h"
|
||||
#include "stat-util.h"
|
||||
#include "string-util.h"
|
||||
#include "strv.h"
|
||||
#include "test-helper.h"
|
||||
#include "user-util.h"
|
||||
#include "util.h"
|
||||
|
@ -404,6 +405,45 @@ static void test_cg_tests(void) {
|
|||
assert_se(!systemd);
|
||||
}
|
||||
|
||||
static void test_cg_get_keyed_attribute(void) {
|
||||
_cleanup_free_ char *val = NULL;
|
||||
char *vals3[3] = {}, *vals3a[3] = {};
|
||||
int i;
|
||||
|
||||
assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "no_such_file", STRV_MAKE("no_such_attr"), &val) == -ENOENT);
|
||||
assert_se(val == NULL);
|
||||
|
||||
if (access("/sys/fs/cgroup/init.scope/cpu.stat", R_OK) < 0) {
|
||||
log_info_errno(errno, "Skipping most of %s, /init.scope/cpu.stat not accessible: %m", __func__);
|
||||
return;
|
||||
}
|
||||
|
||||
assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("no_such_attr"), &val) == -ENXIO);
|
||||
assert_se(val == NULL);
|
||||
|
||||
assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec"), &val) == 0);
|
||||
log_info("cpu /init.scope cpu.stat [usage_usec] → \"%s\"", val);
|
||||
|
||||
assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec", "no_such_attr"), vals3) == -ENXIO);
|
||||
|
||||
assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec", "usage_usec"), vals3) == -ENXIO);
|
||||
|
||||
assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat",
|
||||
STRV_MAKE("usage_usec", "user_usec", "system_usec"), vals3) == 0);
|
||||
log_info("cpu /init.scope cpu.stat [usage_usec user_usec system_usec] → \"%s\", \"%s\", \"%s\"",
|
||||
vals3[0], vals3[1], vals3[2]);
|
||||
|
||||
assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat",
|
||||
STRV_MAKE("system_usec", "user_usec", "usage_usec"), vals3a) == 0);
|
||||
log_info("cpu /init.scope cpu.stat [system_usec user_usec usage_usec] → \"%s\", \"%s\", \"%s\"",
|
||||
vals3a[0], vals3a[1], vals3a[2]);
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
free(vals3[i]);
|
||||
free(vals3a[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
log_set_max_level(LOG_DEBUG);
|
||||
log_parse_environment();
|
||||
|
@ -429,6 +469,7 @@ int main(void) {
|
|||
test_is_wanted_print(false); /* run twice to test caching */
|
||||
test_is_wanted();
|
||||
test_cg_tests();
|
||||
test_cg_get_keyed_attribute();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -3,15 +3,24 @@
|
|||
#include <errno.h>
|
||||
|
||||
#include "log.h"
|
||||
#include "parse-util.h"
|
||||
#include "procfs-util.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
char buf[CONST_MAX(FORMAT_TIMESPAN_MAX, FORMAT_BYTES_MAX)];
|
||||
nsec_t nsec;
|
||||
uint64_t v;
|
||||
int r;
|
||||
|
||||
log_parse_environment();
|
||||
log_open();
|
||||
|
||||
assert_se(procfs_cpu_get_usage(&nsec) >= 0);
|
||||
log_info("Current sytem CPU time: %s", format_timespan(buf, sizeof(buf), nsec/NSEC_PER_USEC, 1));
|
||||
|
||||
assert_se(procfs_memory_get_current(&v) >= 0);
|
||||
log_info("Current memory usage: %s", format_bytes(buf, sizeof(buf), v));
|
||||
|
||||
assert_se(procfs_tasks_get_current(&v) >= 0);
|
||||
log_info("Current number of tasks: %" PRIu64, v);
|
||||
|
||||
|
|
Loading…
Reference in a new issue