diff --git a/compat/linux/procinfo.c b/compat/linux/procinfo.c index 0b47d44990..bc2f9382a1 100644 --- a/compat/linux/procinfo.c +++ b/compat/linux/procinfo.c @@ -4,26 +4,145 @@ #include "strvec.h" #include "trace2.h" -static void get_ancestry_names(struct strvec *names) -{ - /* - * NEEDSWORK: We could gather the entire pstree into an array to match - * functionality with compat/win32/trace2_win32_process_info.c. - * To do so, we may want to examine /proc//stat. For now, just - * gather the immediate parent name which is readily accessible from - * /proc/$(getppid())/comm. - */ - struct strbuf procfs_path = STRBUF_INIT; - struct strbuf name = STRBUF_INIT; +/* + * We need more complex parsing in stat_parent_pid() and + * parse_proc_stat() below than a dumb fscanf(). That's because while + * the statcomm field is surrounded by parentheses, the process itself + * is free to insert any arbitrary byte sequence its its name. That + * can include newlines, spaces, closing parentheses etc. + * + * See do_task_stat() in fs/proc/array.c in linux.git, this is in + * contrast with the escaped version of the name found in + * /proc/%d/status. + * + * So instead of using fscanf() we'll read N bytes from it, look for + * the first "(", and then the last ")", anything in-between is our + * process name. + * + * How much N do we need? On Linux /proc/sys/kernel/pid_max is 2^15 by + * default, but it can be raised set to values of up to 2^22. So + * that's 7 digits for a PID. We have 2 PIDs in the first four fields + * we're interested in, so 2 * 7 = 14. + * + * We then have 3 spaces between those four values, and we'd like to + * get to the space between the 4th and the 5th (the "pgrp" field) to + * make sure we read the entire "ppid" field. So that brings us up to + * 14 + 3 + 1 = 18. Add the two parentheses around the "comm" value + * and it's 20. The "state" value itself is then one character (now at + * 21). + * + * Finally the maximum length of the "comm" name itself is 15 + * characters, e.g. a setting of "123456789abcdefg" will be truncated + * to "123456789abcdef". See PR_SET_NAME in prctl(2). So all in all + * we'd need to read 21 + 15 = 36 bytes. + * + * Let's just read 2^6 (64) instead for good measure. If PID_MAX ever + * grows past 2^22 we'll be future-proof. We'll then anchor at the + * last ")" we find to locate the parent PID. + */ +#define STAT_PARENT_PID_READ_N 64 - /* try to use procfs if it's present. */ - strbuf_addf(&procfs_path, "/proc/%d/comm", getppid()); - if (strbuf_read_file(&name, procfs_path.buf, 0) > 0) { - strbuf_trim_trailing_newline(&name); - strvec_push(names, name.buf); +static int parse_proc_stat(struct strbuf *sb, struct strbuf *name, + int *statppid) +{ + const char *comm_lhs = strchr(sb->buf, '('); + const char *comm_rhs = strrchr(sb->buf, ')'); + const char *ppid_lhs, *ppid_rhs; + char *p; + pid_t ppid; + + if (!comm_lhs || !comm_rhs) + goto bad_kernel; + + /* + * We're at the ")", that's followed by " X ", where X is a + * single "state" character. So advance by 4 bytes. + */ + ppid_lhs = comm_rhs + 4; + + /* + * Read until the space between the "ppid" and "pgrp" fields + * to make sure we're anchored after the untruncated "ppid" + * field.. + */ + ppid_rhs = strchr(ppid_lhs, ' '); + if (!ppid_rhs) + goto bad_kernel; + + ppid = strtol(ppid_lhs, &p, 10); + if (ppid_rhs == p) { + const char *comm = comm_lhs + 1; + size_t commlen = comm_rhs - comm; + + strbuf_add(name, comm, commlen); + *statppid = ppid; + + return 0; } +bad_kernel: + /* + * We were able to read our STAT_PARENT_PID_READ_N bytes from + * /proc/%d/stat, but the content is bad. Broken kernel? + * Should not happen, but handle it gracefully. + */ + return -1; +} + +static int stat_parent_pid(pid_t pid, struct strbuf *name, int *statppid) +{ + struct strbuf procfs_path = STRBUF_INIT; + struct strbuf sb = STRBUF_INIT; + FILE *fp; + int ret = -1; + + /* try to use procfs if it's present. */ + strbuf_addf(&procfs_path, "/proc/%d/stat", pid); + fp = fopen(procfs_path.buf, "r"); + if (!fp) + goto cleanup; + + /* + * We could be more strict here and assert that we read at + * least STAT_PARENT_PID_READ_N. My reading of procfs(5) is + * that on any modern kernel (at least since 2.6.0 released in + * 2003) even if all the mandatory numeric fields were zero'd + * out we'd get at least 100 bytes, but let's just check that + * we got anything at all and trust the parse_proc_stat() + * function to handle its "Bad Kernel?" error checking. + */ + if (!strbuf_fread(&sb, STAT_PARENT_PID_READ_N, fp)) + goto cleanup; + if (parse_proc_stat(&sb, name, statppid) < 0) + goto cleanup; + + ret = 0; +cleanup: + if (fp) + fclose(fp); strbuf_release(&procfs_path); + strbuf_release(&sb); + + return ret; +} + +static void push_ancestry_name(struct strvec *names, pid_t pid) +{ + struct strbuf name = STRBUF_INIT; + int ppid; + + if (stat_parent_pid(pid, &name, &ppid) < 0) + goto cleanup; + + strvec_push(names, name.buf); + + /* + * Both errors and reaching the end of the process chain are + * reported as fields of 0 by proc(5) + */ + if (ppid) + push_ancestry_name(names, ppid); +cleanup: strbuf_release(&name); return; @@ -45,11 +164,7 @@ void trace2_collect_process_info(enum trace2_process_info_reason reason) */ break; case TRACE2_PROCESS_INFO_STARTUP: - /* - * NEEDSWORK: we could do the entire ptree in an array instead, - * see compat/win32/trace2_win32_process_info.c. - */ - get_ancestry_names(&names); + push_ancestry_name(&names, getppid()); if (names.nr) trace2_cmd_ancestry(names.v);