perf report: Sort child tasks by tid

Commit 91e467bc56 ("perf machine: Use hashtable for machine
threads") made the iteration of thread tids unordered. The perf report
--tasks output now shows child threads in an order determined by the
hashing. For example, in this snippet tid 3 appears after tid 256 even
though they have the same ppid 2:

```
$ perf report --tasks
%      pid      tid     ppid  comm
         0        0       -1 |swapper
         2        2        0 | kthreadd
       256      256        2 |  kworker/12:1H-k
    693761   693761        2 |  kworker/10:1-mm
   1301762  1301762        2 |  kworker/1:1-mm_
   1302530  1302530        2 |  kworker/u32:0-k
         3        3        2 |  rcu_gp
...
```

The output is easier to read if threads appear numerically
increasing. To allow for this, read all threads into a list then sort
with a comparator that orders by the child task's of the first common
parent. The list creation and deletion are created as utilities on
machine.  The indentation is possible by counting the number of
parents a child has.

With this change the output for the same data file is now like:
```
$ perf report --tasks
%      pid      tid     ppid  comm
         0        0       -1 |swapper
         1        1        0 | systemd
       823      823        1 |  systemd-journal
       853      853        1 |  systemd-udevd
      3230     3230        1 |  systemd-timesyn
      3236     3236        1 |  auditd
      3239     3239     3236 |   audisp-syslog
      3321     3321        1 |  accounts-daemon
...
```

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240301053646.1449657-2-irogers@google.com
This commit is contained in:
Ian Rogers 2024-02-29 21:36:39 -08:00 committed by Namhyung Kim
parent 498d348637
commit 2f1e20feb9
3 changed files with 172 additions and 93 deletions

View file

@ -59,6 +59,7 @@
#include <linux/ctype.h>
#include <signal.h>
#include <linux/bitmap.h>
#include <linux/list_sort.h>
#include <linux/string.h>
#include <linux/stringify.h>
#include <linux/time64.h>
@ -828,35 +829,6 @@ static void tasks_setup(struct report *rep)
rep->tool.no_warn = true;
}
struct task {
struct thread *thread;
struct list_head list;
struct list_head children;
};
static struct task *tasks_list(struct task *task, struct machine *machine)
{
struct thread *parent_thread, *thread = task->thread;
struct task *parent_task;
/* Already listed. */
if (!list_empty(&task->list))
return NULL;
/* Last one in the chain. */
if (thread__ppid(thread) == -1)
return task;
parent_thread = machine__find_thread(machine, -1, thread__ppid(thread));
if (!parent_thread)
return ERR_PTR(-ENOENT);
parent_task = thread__priv(parent_thread);
thread__put(parent_thread);
list_add_tail(&task->list, &parent_task->children);
return tasks_list(parent_task, machine);
}
struct maps__fprintf_task_args {
int indent;
FILE *fp;
@ -900,10 +872,30 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp)
return args.printed;
}
static void task__print_level(struct task *task, FILE *fp, int level)
static int thread_level(struct machine *machine, const struct thread *thread)
{
struct thread *thread = task->thread;
struct task *child;
struct thread *parent_thread;
int res;
if (thread__tid(thread) <= 0)
return 0;
if (thread__ppid(thread) <= 0)
return 1;
parent_thread = machine__find_thread(machine, -1, thread__ppid(thread));
if (!parent_thread) {
pr_err("Missing parent thread of %d\n", thread__tid(thread));
return 0;
}
res = 1 + thread_level(machine, parent_thread);
thread__put(parent_thread);
return res;
}
static void task__print_level(struct machine *machine, struct thread *thread, FILE *fp)
{
int level = thread_level(machine, thread);
int comm_indent = fprintf(fp, " %8d %8d %8d |%*s",
thread__pid(thread), thread__tid(thread),
thread__ppid(thread), level, "");
@ -911,78 +903,125 @@ static void task__print_level(struct task *task, FILE *fp, int level)
fprintf(fp, "%s\n", thread__comm_str(thread));
maps__fprintf_task(thread__maps(thread), comm_indent, fp);
}
if (!list_empty(&task->children)) {
list_for_each_entry(child, &task->children, list)
task__print_level(child, fp, level + 1);
/*
* Sort two thread list nodes such that they form a tree. The first node is the
* root of the tree, its children are ordered numerically after it. If a child
* has children itself then they appear immediately after their parent. For
* example, the 4 threads in the order they'd appear in the list:
* - init with a TID 1 and a parent of 0
* - systemd with a TID 3000 and a parent of init/1
* - systemd child thread with TID 4000, the parent is 3000
* - NetworkManager is a child of init with a TID of 3500.
*/
static int task_list_cmp(void *priv, const struct list_head *la, const struct list_head *lb)
{
struct machine *machine = priv;
struct thread_list *task_a = list_entry(la, struct thread_list, list);
struct thread_list *task_b = list_entry(lb, struct thread_list, list);
struct thread *a = task_a->thread;
struct thread *b = task_b->thread;
int level_a, level_b, res;
/* Same thread? */
if (thread__tid(a) == thread__tid(b))
return 0;
/* Compare a and b to root. */
if (thread__tid(a) == 0)
return -1;
if (thread__tid(b) == 0)
return 1;
/* If parents match sort by tid. */
if (thread__ppid(a) == thread__ppid(b))
return thread__tid(a) < thread__tid(b) ? -1 : 1;
/*
* Find a and b such that if they are a child of each other a and b's
* tid's match, otherwise a and b have a common parent and distinct
* tid's to sort by. First make the depths of the threads match.
*/
level_a = thread_level(machine, a);
level_b = thread_level(machine, b);
a = thread__get(a);
b = thread__get(b);
for (int i = level_a; i > level_b; i--) {
struct thread *parent = machine__find_thread(machine, -1, thread__ppid(a));
thread__put(a);
if (!parent) {
pr_err("Missing parent thread of %d\n", thread__tid(a));
thread__put(b);
return -1;
}
a = parent;
}
for (int i = level_b; i > level_a; i--) {
struct thread *parent = machine__find_thread(machine, -1, thread__ppid(b));
thread__put(b);
if (!parent) {
pr_err("Missing parent thread of %d\n", thread__tid(b));
thread__put(a);
return 1;
}
b = parent;
}
/* Search up to a common parent. */
while (thread__ppid(a) != thread__ppid(b)) {
struct thread *parent;
parent = machine__find_thread(machine, -1, thread__ppid(a));
thread__put(a);
if (!parent)
pr_err("Missing parent thread of %d\n", thread__tid(a));
a = parent;
parent = machine__find_thread(machine, -1, thread__ppid(b));
thread__put(b);
if (!parent)
pr_err("Missing parent thread of %d\n", thread__tid(b));
b = parent;
if (!a || !b) {
/* Handle missing parent (unexpected) with some sanity. */
thread__put(a);
thread__put(b);
return !a && !b ? 0 : (!a ? -1 : 1);
}
}
if (thread__tid(a) == thread__tid(b)) {
/* a is a child of b or vice-versa, deeper levels appear later. */
res = level_a < level_b ? -1 : (level_a > level_b ? 1 : 0);
} else {
/* Sort by tid now the parent is the same. */
res = thread__tid(a) < thread__tid(b) ? -1 : 1;
}
thread__put(a);
thread__put(b);
return res;
}
static int tasks_print(struct report *rep, FILE *fp)
{
struct perf_session *session = rep->session;
struct machine *machine = &session->machines.host;
struct task *tasks, *task;
unsigned int nr = 0, itask = 0, i;
struct rb_node *nd;
LIST_HEAD(list);
struct machine *machine = &rep->session->machines.host;
LIST_HEAD(tasks);
int ret;
/*
* No locking needed while accessing machine->threads,
* because --tasks is single threaded command.
*/
ret = machine__thread_list(machine, &tasks);
if (!ret) {
struct thread_list *task;
/* Count all the threads. */
for (i = 0; i < THREADS__TABLE_SIZE; i++)
nr += machine->threads[i].nr;
list_sort(machine, &tasks, task_list_cmp);
tasks = malloc(sizeof(*tasks) * nr);
if (!tasks)
return -ENOMEM;
fprintf(fp, "# %8s %8s %8s %s\n", "pid", "tid", "ppid", "comm");
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];
for (nd = rb_first_cached(&threads->entries); nd;
nd = rb_next(nd)) {
task = tasks + itask++;
task->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
INIT_LIST_HEAD(&task->children);
INIT_LIST_HEAD(&task->list);
thread__set_priv(task->thread, task);
}
list_for_each_entry(task, &tasks, list)
task__print_level(machine, task->thread, fp);
}
/*
* Iterate every task down to the unprocessed parent
* and link all in task children list. Task with no
* parent is added into 'list'.
*/
for (itask = 0; itask < nr; itask++) {
task = tasks + itask;
if (!list_empty(&task->list))
continue;
task = tasks_list(task, machine);
if (IS_ERR(task)) {
pr_err("Error: failed to process tasks\n");
free(tasks);
return PTR_ERR(task);
}
if (task)
list_add_tail(&task->list, &list);
}
fprintf(fp, "# %8s %8s %8s %s\n", "pid", "tid", "ppid", "comm");
list_for_each_entry(task, &list, list)
task__print_level(task, fp, 0);
free(tasks);
return 0;
thread_list__delete(&tasks);
return ret;
}
static int __cmd_report(struct report *rep)

View file

@ -3261,6 +3261,36 @@ int machines__for_each_thread(struct machines *machines,
return rc;
}
static int thread_list_cb(struct thread *thread, void *data)
{
struct list_head *list = data;
struct thread_list *entry = malloc(sizeof(*entry));
if (!entry)
return -ENOMEM;
entry->thread = thread__get(thread);
list_add_tail(&entry->list, list);
return 0;
}
int machine__thread_list(struct machine *machine, struct list_head *list)
{
return machine__for_each_thread(machine, thread_list_cb, list);
}
void thread_list__delete(struct list_head *list)
{
struct thread_list *pos, *next;
list_for_each_entry_safe(pos, next, list, list) {
thread__zput(pos->thread);
list_del(&pos->list);
free(pos);
}
}
pid_t machine__get_current_tid(struct machine *machine, int cpu)
{
if (cpu < 0 || (size_t)cpu >= machine->current_tid_sz)

View file

@ -280,6 +280,16 @@ int machines__for_each_thread(struct machines *machines,
int (*fn)(struct thread *thread, void *p),
void *priv);
struct thread_list {
struct list_head list;
struct thread *thread;
};
/* Make a list of struct thread_list based on threads in the machine. */
int machine__thread_list(struct machine *machine, struct list_head *list);
/* Free up the nodes within the thread_list list. */
void thread_list__delete(struct list_head *list);
pid_t machine__get_current_tid(struct machine *machine, int cpu);
int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
pid_t tid);