udev: add basic set of user-space defined tracepoints (USDT)

Debugging udev issues especially during the early boot is fairly
difficult. Currently, you need to enable (at least) debug logging and
start monitoring uevents, try to reproduce the issue and then analyze
and correlate two (usually) huge log files. This is not ideal.

This patch aims to provide much more focused debugging tool,
tracepoints. More often then not we tend to have at least the basic idea
about the issue we are trying to debug further, e.g. we know it is
storage related. Hence all of the debug data generated for network
devices is useless, adds clutter to the log files and generally
slows things down.

Using this set of tracepoints you can start asking very specific
questions related to event processing for given device or subsystem.
Tracepoints can be used with various tracing tools but I will provide
examples using bpftrace.

Another important aspect to consider is that using tracepoints you can
debug production systems. There is no need to install test packages with
added logging, no debuginfo packages, etc...

Example usage (you might be asking such questions during the debug session),

Q: How can I list all tracepoints?
A: bpftrace -l 'usdt:/usr/lib/systemd/systemd-udevd:udev:*'

Q: What are the arguments for each tracepoint?
A: Look at the code and search for use of DEVICE_TRACE_POINT macro.

Q: How many times we have executed external binary?
A: bpftrace -e 'usdt:/usr/lib/systemd/systemd-udevd:udev:spawn_exec { @cnt = count(); }'

Q: What binaries where executed while handling events for "dm-0" device?
A  bpftrace -e 'usdt:/usr/lib/systemd/systemd-udevd:udev:spawn_exec / str(arg1) == "dm-0"/ { @cmds[str(arg4)] = count(); }'

Thanks to Thomas Weißschuh <thomas@t-8ch.de> for reviewing this patch
and contributions that allowed us to drop the dependency on dtrace tool
and made the resulting code much more concise.
This commit is contained in:
Michal Sekletár 2020-08-24 11:21:44 +02:00 committed by Zbigniew Jędrzejewski-Szmek
parent b2e8fdc896
commit b428efa54b
5 changed files with 53 additions and 0 deletions

View file

@ -660,6 +660,7 @@ foreach header : ['crypt.h',
'valgrind/memcheck.h',
'valgrind/valgrind.h',
'linux/time_types.h',
'sys/sdt.h',
]
conf.set10('HAVE_' + header.underscorify().to_upper(),

View file

@ -1,6 +1,11 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
#if HAVE_SYS_SDT_H
#define SDT_USE_VARIADIC
#include <sys/sdt.h>
#endif
#include "sd-device.h"
#include "time-util.h"
@ -46,3 +51,30 @@ int udev_resolve_subsys_kernel(const char *string, char *result, size_t maxsize,
int udev_queue_is_empty(void);
int udev_queue_init(void);
#if HAVE_SYS_SDT_H
/* Each trace point can have different number of additional arguments. Note that when the macro is used only
* additional arguments are listed in the macro invocation!
*
* Default arguments for each trace point are as follows:
* - arg0 - action
* - arg1 - sysname
* - arg2 - syspath
* - arg3 - subsystem
*/
#define DEVICE_TRACE_POINT(name, dev, ...) \
do { \
PROTECT_ERRNO; \
const char *_n = NULL, *_p = NULL, *_s = NULL; \
sd_device *_d = (dev); \
sd_device_action_t _a = _SD_DEVICE_ACTION_INVALID; \
(void) sd_device_get_action(_d, &_a); \
(void) sd_device_get_sysname(_d, &_n); \
(void) sd_device_get_syspath(_d, &_p); \
(void) sd_device_get_subsystem(_d, &_s); \
STAP_PROBEV(udev, name, device_action_to_string(_a), _n, _p, _s __VA_OPT__(,) __VA_ARGS__);\
} while(false);
#else
#define DEVICE_TRACE_POINT(name, dev, ...) ((void) 0)
#endif

View file

@ -603,6 +603,8 @@ static int on_spawn_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
assert(spawn);
DEVICE_TRACE_POINT(spawn_timeout, spawn->device, spawn->cmd);
kill_and_sigcont(spawn->pid, spawn->timeout_signal);
log_device_error(spawn->device, "Spawned process '%s' ["PID_FMT"] timed out after %s, killing",
@ -648,6 +650,8 @@ static int on_spawn_sigchld(sd_event_source *s, const siginfo_t *si, void *userd
log_device_error(spawn->device, "Process '%s' failed due to unknown reason.", spawn->cmd);
}
DEVICE_TRACE_POINT(spawn_exit, spawn->device, spawn->cmd);
sd_event_exit(sd_event_source_get_event(s), ret);
return 1;
}
@ -785,6 +789,8 @@ int udev_event_spawn(UdevEvent *event,
(void) close_all_fds(NULL, 0);
(void) rlimit_nofile_safe();
DEVICE_TRACE_POINT(spawn_exec, event->dev, cmd);
execve(argv[0], argv, envp);
_exit(EXIT_FAILURE);
}
@ -1014,10 +1020,14 @@ int udev_event_execute_rules(
return r;
}
DEVICE_TRACE_POINT(rules_start, dev);
r = udev_rules_apply_to_event(rules, event, timeout_usec, timeout_signal, properties_list);
if (r < 0)
return log_device_debug_errno(dev, r, "Failed to apply udev rules: %m");
DEVICE_TRACE_POINT(rules_finished, dev);
r = rename_netif(event);
if (r < 0)
return r;

View file

@ -29,6 +29,7 @@
#include "udev-builtin.h"
#include "udev-event.h"
#include "udev-rules.h"
#include "udev-util.h"
#include "user-util.h"
#include "virt.h"
@ -2261,6 +2262,9 @@ static int udev_rule_apply_line_to_event(
return 0;
event->esc = ESCAPE_UNSET;
DEVICE_TRACE_POINT(rules_apply_line, event->dev, line->rule_file->filename, line->line_number);
LIST_FOREACH_SAFE(tokens, token, next_token, line->tokens) {
line->current_token = token;

View file

@ -617,6 +617,8 @@ static int worker_spawn(Manager *manager, struct event *event) {
return log_error_errno(r, "Failed to fork() worker: %m");
}
if (r == 0) {
DEVICE_TRACE_POINT(worker_spawned, event->dev, getpid());
/* Worker process */
r = worker_main(manager, worker_monitor, sd_device_ref(event->dev));
log_close();
@ -1045,6 +1047,8 @@ static int on_uevent(sd_device_monitor *monitor, sd_device *dev, void *userdata)
assert(manager);
DEVICE_TRACE_POINT(kernel_uevent_received, dev);
device_ensure_usec_initialized(dev, NULL);
r = event_queue_insert(manager, dev);
@ -1180,6 +1184,8 @@ static int synthesize_change_one(sd_device *dev, sd_device *target) {
if (r < 0)
return log_device_debug_errno(target, r, "Failed to trigger 'change' uevent: %m");
DEVICE_TRACE_POINT(synthetic_change_event, dev);
return 0;
}