From ddf0ed09bd8f83677407db36828aca2c10f419c9 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Wed, 19 Jun 2024 16:57:09 -0400 Subject: [PATCH] sdt: Implement SDT probes using hot-patching The idea here is to avoid a memory access and conditional branch per probe site. Instead, the probe is represented by an "unreachable" unconditional function call. asm goto is used to store the address of the probe site (represented by a no-op sled) and the address of the function call into a tracepoint record. Each SDT probe carries a list of tracepoints. When the probe is enabled, the no-op sled corresponding to each tracepoint is overwritten with a jmp to the corresponding label. The implementation uses smp_rendezvous() to park all other CPUs while the instruction is being overwritten, as this can't be done atomically in general. The compiler moves argument marshalling code and the sdt_probe() function call out-of-line, i.e., to the end of the function. Per gallatin@ in D43504, this approach has less overhead when probes are disabled. To make the implementation a bit simpler, I removed support for probes with 7 arguments; nothing makes use of this except a regression test case. It could be re-added later if need be. The approach taken in this patch enables some more improvements: 1. We can now automatically fill out the "function" field of SDT probe names. The SDT macros let the programmer specify the function and module names, but this is really a bug and shouldn't have been allowed. The intent was to be able to have the same probe in multiple functions and to let the user restrict which probes actually get enabled by specifying a function name or glob. 2. We can avoid branching on SDT_PROBES_ENABLED() by adding the ability to include blocks of code in the out-of-line path. For example: if (SDT_PROBES_ENABLED()) { int reason = CLD_EXITED; if (WCOREDUMP(signo)) reason = CLD_DUMPED; else if (WIFSIGNALED(signo)) reason = CLD_KILLED; SDT_PROBE1(proc, , , exit, reason); } could be written SDT_PROBE1_EXT(proc, , , exit, reason, int reason; reason = CLD_EXITED; if (WCOREDUMP(signo)) reason = CLD_DUMPED; else if (WIFSIGNALED(signo)) reason = CLD_KILLED; ); In the future I would like to use this mechanism more generally, e.g., to remove branches and marshalling code used by hwpmc, and generally to make it easier to add new tracepoint consumers without having to add more conditional branches to hot code paths. Reviewed by: Domagoj Stolfa, avg MFC after: 2 months Differential Revision: https://reviews.freebsd.org/D44483 --- .../dtrace/test/tst/common/sdt/tst.sdtargs.d | 11 +- sys/amd64/include/sdt_machdep.h | 12 ++ sys/arm/arm/sdt_machdep.c | 63 +++++++++ sys/arm/include/sdt_machdep.h | 12 ++ sys/arm64/arm64/sdt_machdep.c | 77 ++++++++++ sys/arm64/include/sdt_machdep.h | 12 ++ sys/cddl/dev/dtrace/dtrace_test.c | 6 +- sys/cddl/dev/sdt/sdt.c | 97 +++++++++++-- sys/conf/files.arm | 1 + sys/conf/files.arm64 | 1 + sys/conf/files.powerpc | 1 + sys/conf/files.riscv | 1 + sys/conf/files.x86 | 1 + sys/i386/include/sdt_machdep.h | 19 +++ sys/kern/kern_sdt.c | 21 ++- sys/modules/dtrace/Makefile | 4 +- sys/powerpc/include/sdt_machdep.h | 12 ++ sys/powerpc/powerpc/sdt_machdep.c | 59 ++++++++ sys/riscv/include/sdt_machdep.h | 12 ++ sys/riscv/riscv/sdt_machdep.c | 68 +++++++++ sys/sys/sdt.h | 133 ++++++++++-------- sys/x86/x86/sdt_machdep.c | 84 +++++++++++ 22 files changed, 625 insertions(+), 82 deletions(-) create mode 100644 sys/amd64/include/sdt_machdep.h create mode 100644 sys/arm/arm/sdt_machdep.c create mode 100644 sys/arm/include/sdt_machdep.h create mode 100644 sys/arm64/arm64/sdt_machdep.c create mode 100644 sys/arm64/include/sdt_machdep.h create mode 100644 sys/i386/include/sdt_machdep.h create mode 100644 sys/powerpc/include/sdt_machdep.h create mode 100644 sys/powerpc/powerpc/sdt_machdep.c create mode 100644 sys/riscv/include/sdt_machdep.h create mode 100644 sys/riscv/riscv/sdt_machdep.c create mode 100644 sys/x86/x86/sdt_machdep.c diff --git a/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/sdt/tst.sdtargs.d b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/sdt/tst.sdtargs.d index e965b05f2405..d3f7bfad5db1 100644 --- a/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/sdt/tst.sdtargs.d +++ b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/sdt/tst.sdtargs.d @@ -27,7 +27,7 @@ #pragma ident "%Z%%M% %I% %E% SMI" /* - * ASSERTION: Verify that argN (1..7) variables are properly remapped. + * ASSERTION: Verify that argN (1..6) variables are properly remapped. */ BEGIN @@ -44,13 +44,12 @@ ERROR } test:::sdttest -/arg0 != 1 || arg1 != 2 || arg2 != 3 || arg3 != 4 || arg4 != 5 || arg5 != 6 || - arg6 != 7/ +/arg0 != 1 || arg1 != 2 || arg2 != 3 || arg3 != 4 || arg4 != 5 || arg5 != 6/ { printf("sdt arg mismatch\n\n"); - printf("args are : %d, %d, %d, %d, %d, %d, %d\n", arg0, arg1, arg2, - arg3, arg4, arg5, arg6); - printf("should be : 1, 2, 3, 4, 5, 6, 7\n"); + printf("args are : %d, %d, %d, %d, %d, %d\n", arg0, arg1, arg2, + arg3, arg4, arg5); + printf("should be : 1, 2, 3, 4, 5, 6\n"); exit(1); } diff --git a/sys/amd64/include/sdt_machdep.h b/sys/amd64/include/sdt_machdep.h new file mode 100644 index 000000000000..2434abed2e0e --- /dev/null +++ b/sys/amd64/include/sdt_machdep.h @@ -0,0 +1,12 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Mark Johnston + */ + +#ifndef _SYS_SDT_MACHDEP_H_ +#define _SYS_SDT_MACHDEP_H_ + +#define _SDT_ASM_PATCH_INSTR "nop; nop; nop; nop; nop" + +#endif diff --git a/sys/arm/arm/sdt_machdep.c b/sys/arm/arm/sdt_machdep.c new file mode 100644 index 000000000000..02361497ee8d --- /dev/null +++ b/sys/arm/arm/sdt_machdep.c @@ -0,0 +1,63 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Mark Johnston + */ + +#include +#include + +#include + +/* + * Return true if we can overwrite a nop at "patchpoint" with a jump to the + * target address. + */ +bool +sdt_tracepoint_valid(uintptr_t patchpoint, uintptr_t target) +{ + int32_t offset; + + if (patchpoint == target || + (patchpoint & (INSN_SIZE - 1)) != 0 || + (target & (INSN_SIZE - 1)) != 0 || + patchpoint + 2 * INSN_SIZE < patchpoint) + return (false); + offset = target - (patchpoint + 2 * INSN_SIZE); + if (offset < -(1 << 24) || offset > (1 >> 24)) + return (false); + return (true); +} + +/* + * Overwrite the copy of _SDT_ASM_PATCH_INSTR at the tracepoint with a jump to + * the target address. + */ +void +sdt_tracepoint_patch(uintptr_t patchpoint, uintptr_t target) +{ + uint32_t instr; + + KASSERT(sdt_tracepoint_valid(patchpoint, target), + ("%s: invalid tracepoint %#x -> %#x", + __func__, patchpoint, target)); + + instr = + (((target - (patchpoint + 2 * INSN_SIZE)) >> 2) & ((1 << 24) - 1)) | + 0xea000000; + memcpy((void *)patchpoint, &instr, sizeof(instr)); + icache_sync(patchpoint, sizeof(instr)); +} + +/* + * Overwrite the patchpoint with a nop instruction. + */ +void +sdt_tracepoint_restore(uintptr_t patchpoint) +{ + uint32_t instr; + + instr = 0xe320f000u; + memcpy((void *)patchpoint, &instr, sizeof(instr)); + icache_sync(patchpoint, sizeof(instr)); +} diff --git a/sys/arm/include/sdt_machdep.h b/sys/arm/include/sdt_machdep.h new file mode 100644 index 000000000000..738d246832a2 --- /dev/null +++ b/sys/arm/include/sdt_machdep.h @@ -0,0 +1,12 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Mark Johnston + */ + +#ifndef _SYS_SDT_MACHDEP_H_ +#define _SYS_SDT_MACHDEP_H_ + +#define _SDT_ASM_PATCH_INSTR "nop" + +#endif /* _SYS_SDT_MACHDEP_H_ */ diff --git a/sys/arm64/arm64/sdt_machdep.c b/sys/arm64/arm64/sdt_machdep.c new file mode 100644 index 000000000000..23324ffbf333 --- /dev/null +++ b/sys/arm64/arm64/sdt_machdep.c @@ -0,0 +1,77 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Mark Johnston + */ + +#include +#include + +#include +#include + +#include +#include +#include + +/* + * Return true if we can overwrite a nop at "patchpoint" with a jump to the + * target address. + */ +bool +sdt_tracepoint_valid(uintptr_t patchpoint, uintptr_t target) +{ + void *addr; + int64_t offset; + + if (!arm64_get_writable_addr((void *)patchpoint, &addr)) + return (false); + + if (patchpoint == target || + (patchpoint & (INSN_SIZE - 1)) != 0 || + (target & (INSN_SIZE - 1)) != 0) + return (false); + offset = target - patchpoint; + if (offset < -(1 << 26) || offset > (1 << 26)) + return (false); + return (true); +} + +/* + * Overwrite the copy of _SDT_ASM_PATCH_INSTR at the tracepoint with a jump to the + * target address. + */ +void +sdt_tracepoint_patch(uintptr_t patchpoint, uintptr_t target) +{ + void *addr; + uint32_t instr; + + KASSERT(sdt_tracepoint_valid(patchpoint, target), + ("%s: invalid tracepoint %#lx -> %#lx", + __func__, patchpoint, target)); + + if (!arm64_get_writable_addr((void *)patchpoint, &addr)) + panic("%s: Unable to write new instruction", __func__); + + instr = (((target - patchpoint) >> 2) & 0x3fffffful) | 0x14000000; + memcpy(addr, &instr, sizeof(instr)); + cpu_icache_sync_range((void *)patchpoint, INSN_SIZE); +} + +/* + * Overwrite the patchpoint with a nop instruction. + */ +void +sdt_tracepoint_restore(uintptr_t patchpoint) +{ + void *addr; + uint32_t instr; + + if (!arm64_get_writable_addr((void *)patchpoint, &addr)) + panic("%s: Unable to write new instruction", __func__); + + instr = 0xd503201f; + memcpy(addr, &instr, sizeof(instr)); + cpu_icache_sync_range((void *)patchpoint, INSN_SIZE); +} diff --git a/sys/arm64/include/sdt_machdep.h b/sys/arm64/include/sdt_machdep.h new file mode 100644 index 000000000000..738d246832a2 --- /dev/null +++ b/sys/arm64/include/sdt_machdep.h @@ -0,0 +1,12 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Mark Johnston + */ + +#ifndef _SYS_SDT_MACHDEP_H_ +#define _SYS_SDT_MACHDEP_H_ + +#define _SDT_ASM_PATCH_INSTR "nop" + +#endif /* _SYS_SDT_MACHDEP_H_ */ diff --git a/sys/cddl/dev/dtrace/dtrace_test.c b/sys/cddl/dev/dtrace/dtrace_test.c index 92ba3a36a24e..c77448670917 100644 --- a/sys/cddl/dev/dtrace/dtrace_test.c +++ b/sys/cddl/dev/dtrace/dtrace_test.c @@ -37,8 +37,8 @@ SDT_PROVIDER_DEFINE(test); -SDT_PROBE_DEFINE7(test, , , sdttest, "int", "int", "int", "int", "int", - "int", "int"); +SDT_PROBE_DEFINE6(test, , , sdttest, "int", "int", "int", "int", "int", + "int"); /* * These are variables that the DTrace test suite references in the @@ -68,7 +68,7 @@ dtrace_test_sdttest(SYSCTL_HANDLER_ARGS) else if (val == 0) return (0); - SDT_PROBE7(test, , , sdttest, 1, 2, 3, 4, 5, 6, 7); + SDT_PROBE6(test, , , sdttest, 1, 2, 3, 4, 5, 6); return (error); } diff --git a/sys/cddl/dev/sdt/sdt.c b/sys/cddl/dev/sdt/sdt.c index 684374848290..51fa0432437c 100644 --- a/sys/cddl/dev/sdt/sdt.c +++ b/sys/cddl/dev/sdt/sdt.c @@ -19,7 +19,7 @@ * CDDL HEADER END * * Portions Copyright 2006-2008 John Birrell jb@freebsd.org - * + * Copyright 2024 Mark Johnston */ /* @@ -187,7 +187,7 @@ sdt_create_probe(struct sdt_probe *probe) if (dtrace_probe_lookup(prov->id, mod, func, name) != DTRACE_IDNONE) return; - (void)dtrace_probe_create(prov->id, mod, func, name, 1, probe); + (void)dtrace_probe_create(prov->id, mod, func, name, 0, probe); } /* @@ -200,10 +200,62 @@ sdt_provide_probes(void *arg, dtrace_probedesc_t *desc) { } +struct sdt_enable_cb_arg { + struct sdt_probe *probe; + int cpu; + int arrived; + int done; + bool enable; +}; + +static void +sdt_probe_update_cb(void *_arg) +{ + struct sdt_enable_cb_arg *arg; + struct sdt_tracepoint *tp; + + arg = _arg; + if (arg->cpu != curcpu) { + atomic_add_rel_int(&arg->arrived, 1); + while (atomic_load_acq_int(&arg->done) == 0) + cpu_spinwait(); + return; + } else { + while (atomic_load_acq_int(&arg->arrived) != mp_ncpus - 1) + cpu_spinwait(); + } + + STAILQ_FOREACH(tp, &arg->probe->tracepoint_list, tracepoint_entry) { + if (arg->enable) + sdt_tracepoint_patch(tp->patchpoint, tp->target); + else + sdt_tracepoint_restore(tp->patchpoint); + } + + atomic_store_rel_int(&arg->done, 1); +} + +static void +sdt_probe_update(struct sdt_probe *probe, bool enable) +{ + struct sdt_enable_cb_arg cbarg; + + sched_pin(); + cbarg.probe = probe; + cbarg.cpu = curcpu; + atomic_store_rel_int(&cbarg.arrived, 0); + atomic_store_rel_int(&cbarg.done, 0); + cbarg.enable = enable; + smp_rendezvous(NULL, sdt_probe_update_cb, NULL, &cbarg); + sched_unpin(); +} + static void sdt_enable(void *arg __unused, dtrace_id_t id, void *parg) { - struct sdt_probe *probe = parg; + struct sdt_probe *probe; + + probe = parg; probe->id = id; probe->sdtp_lf->nenabled++; @@ -215,15 +267,20 @@ sdt_enable(void *arg __unused, dtrace_id_t id, void *parg) sdt_probes_enabled_count++; if (sdt_probes_enabled_count == 1) sdt_probes_enabled = true; + + sdt_probe_update(probe, true); } static void sdt_disable(void *arg __unused, dtrace_id_t id, void *parg) { - struct sdt_probe *probe = parg; + struct sdt_probe *probe; + probe = parg; KASSERT(probe->sdtp_lf->nenabled > 0, ("no probes enabled")); + sdt_probe_update(probe, false); + sdt_probes_enabled_count--; if (sdt_probes_enabled_count == 0) sdt_probes_enabled = false; @@ -284,26 +341,47 @@ sdt_kld_load_providers(struct linker_file *lf) static void sdt_kld_load_probes(struct linker_file *lf) { - struct sdt_probe **probe, **p_begin, **p_end; - struct sdt_argtype **argtype, **a_begin, **a_end; + struct sdt_probe **p_begin, **p_end; + struct sdt_argtype **a_begin, **a_end; + struct sdt_tracepoint *tp_begin, *tp_end; if (linker_file_lookup_set(lf, "sdt_probes_set", &p_begin, &p_end, NULL) == 0) { - for (probe = p_begin; probe < p_end; probe++) { + for (struct sdt_probe **probe = p_begin; probe < p_end; + probe++) { (*probe)->sdtp_lf = lf; sdt_create_probe(*probe); TAILQ_INIT(&(*probe)->argtype_list); + STAILQ_INIT(&(*probe)->tracepoint_list); } } if (linker_file_lookup_set(lf, "sdt_argtypes_set", &a_begin, &a_end, NULL) == 0) { - for (argtype = a_begin; argtype < a_end; argtype++) { + for (struct sdt_argtype **argtype = a_begin; argtype < a_end; + argtype++) { (*argtype)->probe->n_args++; TAILQ_INSERT_TAIL(&(*argtype)->probe->argtype_list, *argtype, argtype_entry); } } + + if (linker_file_lookup_set(lf, __XSTRING(_SDT_TRACEPOINT_SET), + &tp_begin, &tp_end, NULL) == 0) { + for (struct sdt_tracepoint *tp = tp_begin; tp < tp_end; tp++) { + if (!sdt_tracepoint_valid(tp->patchpoint, tp->target)) { + printf( + "invalid tracepoint %#jx->%#jx for %s:%s:%s:%s\n", + (uintmax_t)tp->patchpoint, + (uintmax_t)tp->target, + tp->probe->prov->name, tp->probe->mod, + tp->probe->func, tp->probe->name); + continue; + } + STAILQ_INSERT_TAIL(&tp->probe->tracepoint_list, tp, + tracepoint_entry); + } + } } /* @@ -378,6 +456,7 @@ sdt_load(void) TAILQ_INIT(&sdt_prov_list); sdt_probe_func = dtrace_probe; + sdt_probe6_func = (sdt_probe6_func_t)dtrace_probe; sdt_kld_load_tag = EVENTHANDLER_REGISTER(kld_load, sdt_kld_load, NULL, EVENTHANDLER_PRI_ANY); @@ -403,6 +482,7 @@ sdt_unload(void) EVENTHANDLER_DEREGISTER(kld_unload_try, sdt_kld_unload_try_tag); sdt_probe_func = sdt_probe_stub; + sdt_probe6_func = (sdt_probe6_func_t)sdt_probe_stub; TAILQ_FOREACH_SAFE(prov, &sdt_prov_list, prov_entry, tmp) { ret = dtrace_unregister(prov->id); @@ -419,7 +499,6 @@ sdt_unload(void) static int sdt_modevent(module_t mod __unused, int type, void *data __unused) { - switch (type) { case MOD_LOAD: case MOD_UNLOAD: diff --git a/sys/conf/files.arm b/sys/conf/files.arm index b049479fbe82..825680b73165 100644 --- a/sys/conf/files.arm +++ b/sys/conf/files.arm @@ -58,6 +58,7 @@ arm/arm/pmu.c optional pmu | hwpmc arm/arm/pmu_fdt.c optional fdt pmu | fdt hwpmc arm/arm/ptrace_machdep.c standard arm/arm/sc_machdep.c optional sc +arm/arm/sdt_machdep.c optional kdtrace_hooks arm/arm/setcpsr.S standard arm/arm/setstack.S standard arm/arm/stack_machdep.c optional ddb | stack diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index 26f9eaf193af..a6bd1a1ba60a 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -68,6 +68,7 @@ arm64/arm64/ptrauth.c standard \ compile-with "${NORMAL_C:N-mbranch-protection*} -mbranch-protection=bti" arm64/arm64/pmap.c standard arm64/arm64/ptrace_machdep.c standard +arm64/arm64/sdt_machdep.c optional kdtrace_hooks arm64/arm64/sigtramp.S standard arm64/arm64/stack_machdep.c optional ddb | stack arm64/arm64/strcmp.S standard diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc index 1a0388884ee8..1baacb5628c7 100644 --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -336,6 +336,7 @@ powerpc/powerpc/platform.c standard powerpc/powerpc/platform_if.m standard powerpc/powerpc/ptrace_machdep.c standard powerpc/powerpc/sc_machdep.c optional sc +powerpc/powerpc/sdt_machdep.c optional powerpc64 kdtrace_hooks powerpc/powerpc/setjmp.S standard powerpc/powerpc/sigcode32.S optional powerpc | powerpcspe | compat_freebsd32 powerpc/powerpc/sigcode64.S optional powerpc64 | powerpc64le diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv index 90e2826ce55d..5b94eb327f93 100644 --- a/sys/conf/files.riscv +++ b/sys/conf/files.riscv @@ -61,6 +61,7 @@ riscv/riscv/riscv_syscon.c optional syscon riscv_syscon fdt riscv/riscv/sigtramp.S standard riscv/riscv/sbi.c standard riscv/riscv/sbi_ipi.c optional smp +riscv/riscv/sdt_machdep.c optional kdtrace_hooks riscv/riscv/stack_machdep.c optional ddb | stack riscv/riscv/support.S standard riscv/riscv/swtch.S standard diff --git a/sys/conf/files.x86 b/sys/conf/files.x86 index de698e82c823..8a68459ff215 100644 --- a/sys/conf/files.x86 +++ b/sys/conf/files.x86 @@ -377,6 +377,7 @@ x86/x86/x86_mem.c optional mem x86/x86/mp_x86.c optional smp x86/x86/nexus.c standard x86/x86/pvclock.c optional kvm_clock | xenhvm +x86/x86/sdt_machdep.c optional kdtrace_hooks x86/x86/stack_machdep.c optional ddb | stack x86/x86/tsc.c standard x86/x86/ucode.c standard diff --git a/sys/i386/include/sdt_machdep.h b/sys/i386/include/sdt_machdep.h new file mode 100644 index 000000000000..550cfa7dd692 --- /dev/null +++ b/sys/i386/include/sdt_machdep.h @@ -0,0 +1,19 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Mark Johnston + */ + +#ifndef _SYS_SDT_MACHDEP_H_ +#define _SYS_SDT_MACHDEP_H_ + +#define _SDT_ASM_PATCH_INSTR "nop; nop; nop; nop; nop" + +/* + * Work around an apparent clang bug or limitation which prevents the use of the + * "i" (immediate) constraint with the probe structure. + */ +#define _SDT_ASM_PROBE_CONSTRAINT "Ws" +#define _SDT_ASM_PROBE_OPERAND "p" + +#endif diff --git a/sys/kern/kern_sdt.c b/sys/kern/kern_sdt.c index b5f30057d535..b7213d2051fc 100644 --- a/sys/kern/kern_sdt.c +++ b/sys/kern/kern_sdt.c @@ -37,6 +37,7 @@ SDT_PROVIDER_DEFINE(sdt); * dtrace_probe() when it loads. */ sdt_probe_func_t sdt_probe_func = sdt_probe_stub; +sdt_probe6_func_t sdt_probe6_func = (sdt_probe6_func_t)sdt_probe_stub; volatile bool __read_frequently sdt_probes_enabled; /* @@ -45,10 +46,24 @@ volatile bool __read_frequently sdt_probes_enabled; * to enable it. */ void -sdt_probe_stub(uint32_t id, uintptr_t arg0, uintptr_t arg1, - uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) +sdt_probe_stub(uint32_t id __unused, uintptr_t arg0 __unused, + uintptr_t arg1 __unused, uintptr_t arg2 __unused, uintptr_t arg3 __unused, + uintptr_t arg4 __unused) { - printf("sdt_probe_stub: unexpectedly called\n"); kdb_backtrace(); } + +void +sdt_probe(uint32_t id, uintptr_t arg0, uintptr_t arg1, + uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) +{ + sdt_probe_func(id, arg0, arg1, arg2, arg3, arg4); +} + +void +sdt_probe6(uint32_t id, uintptr_t arg0, uintptr_t arg1, + uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5) +{ + sdt_probe6_func(id, arg0, arg1, arg2, arg3, arg4, arg5); +} diff --git a/sys/modules/dtrace/Makefile b/sys/modules/dtrace/Makefile index 73c71c5bd1fb..3cc672b62c3b 100644 --- a/sys/modules/dtrace/Makefile +++ b/sys/modules/dtrace/Makefile @@ -10,7 +10,6 @@ SUBDIR= dtaudit \ fbt \ profile \ prototype \ - sdt \ systrace .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" @@ -31,5 +30,8 @@ SUBDIR+= fasttrap ${MACHINE_ARCH} == "powerpc64" SUBDIR+= systrace_freebsd32 .endif +.if ${MACHINE_CPUARCH} != "powerpc" || ${MACHINE_ARCH} == "powerpc64" +SUBDIR+= sdt +.endif .include diff --git a/sys/powerpc/include/sdt_machdep.h b/sys/powerpc/include/sdt_machdep.h new file mode 100644 index 000000000000..8f6c3d88ea7d --- /dev/null +++ b/sys/powerpc/include/sdt_machdep.h @@ -0,0 +1,12 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Mark Johnston + */ + +#ifndef _SYS_SDT_MACHDEP_H_ +#define _SYS_SDT_MACHDEP_H_ + +#define _SDT_ASM_PATCH_INSTR "nop" + +#endif diff --git a/sys/powerpc/powerpc/sdt_machdep.c b/sys/powerpc/powerpc/sdt_machdep.c new file mode 100644 index 000000000000..8a84016a9571 --- /dev/null +++ b/sys/powerpc/powerpc/sdt_machdep.c @@ -0,0 +1,59 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Mark Johnston + */ + +#include +#include + +#include + +/* + * Return true if we can overwrite a nop at "patchpoint" with a jump to the + * target address. + */ +bool +sdt_tracepoint_valid(uintptr_t patchpoint, uintptr_t target) +{ + int64_t offset; + + if (patchpoint == target || + (patchpoint & 3) != 0 || (target & 3) != 0) + return (false); + offset = target - patchpoint; + if (offset < -(1 << 26) || offset > (1 << 26)) + return (false); + return (true); +} + +/* + * Overwrite the copy of _SDT_ASM_PATCH_INSTR at the tracepoint with a jump to + * the target address. + */ +void +sdt_tracepoint_patch(uintptr_t patchpoint, uintptr_t target) +{ + uint32_t instr; + + KASSERT(sdt_tracepoint_valid(patchpoint, target), + ("%s: invalid tracepoint %#lx -> %#lx", + __func__, patchpoint, target)); + + instr = ((target - patchpoint) & 0x7fffffful) | 0x48000000; + memcpy((void *)patchpoint, &instr, sizeof(instr)); + __syncicache((void *)patchpoint, sizeof(instr)); +} + +/* + * Overwrite the patchpoint with a nop instruction. + */ +void +sdt_tracepoint_restore(uintptr_t patchpoint) +{ + uint32_t instr; + + instr = 0x60000000; + memcpy((void *)patchpoint, &instr, sizeof(instr)); + __syncicache((void *)patchpoint, sizeof(instr)); +} diff --git a/sys/riscv/include/sdt_machdep.h b/sys/riscv/include/sdt_machdep.h new file mode 100644 index 000000000000..160f5d404204 --- /dev/null +++ b/sys/riscv/include/sdt_machdep.h @@ -0,0 +1,12 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Mark Johnston + */ + +#ifndef _SYS_SDT_MACHDEP_H_ +#define _SYS_SDT_MACHDEP_H_ + +#define _SDT_ASM_PATCH_INSTR ".option push; .option norvc; nop; .option pop" + +#endif /* _SYS_SDT_MACHDEP_H_ */ diff --git a/sys/riscv/riscv/sdt_machdep.c b/sys/riscv/riscv/sdt_machdep.c new file mode 100644 index 000000000000..4b139c8edd98 --- /dev/null +++ b/sys/riscv/riscv/sdt_machdep.c @@ -0,0 +1,68 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Mark Johnston + */ + +#include +#include + +#include + +/* + * Return true if we can overwrite a nop at "patchpoint" with a jump to the + * target address. + */ +bool +sdt_tracepoint_valid(uintptr_t patchpoint, uintptr_t target) +{ + int64_t offset; + + if (patchpoint == target || + (patchpoint & (INSN_C_SIZE - 1)) != 0 || + (target & (INSN_C_SIZE - 1)) != 0) + return (false); + offset = target - patchpoint; + if (offset < -(1 << 19) || offset > (1 << 19)) + return (false); + return (true); +} + +/* + * Overwrite the copy of _SDT_ASM_PATCH_INSTR at the tracepoint with a jump to + * the target address. + */ +void +sdt_tracepoint_patch(uintptr_t patchpoint, uintptr_t target) +{ + int32_t imm; + uint32_t instr; + + KASSERT(sdt_tracepoint_valid(patchpoint, target), + ("%s: invalid tracepoint %#lx -> %#lx", + __func__, patchpoint, target)); + + imm = target - patchpoint; + imm = (imm & 0x100000) | + ((imm & 0x7fe) << 8) | + ((imm & 0x800) >> 2) | + ((imm & 0xff000) >> 12); + instr = (imm << 12) | MATCH_JAL; + + memcpy((void *)patchpoint, &instr, sizeof(instr)); + fence_i(); +} + +/* + * Overwrite the patchpoint with a nop instruction. + */ +void +sdt_tracepoint_restore(uintptr_t patchpoint) +{ + uint32_t instr; + + instr = 0x13; /* uncompressed nop */ + + memcpy((void *)patchpoint, &instr, sizeof(instr)); + fence_i(); +} diff --git a/sys/sys/sdt.h b/sys/sys/sdt.h index fabf4d72740c..147d58c53ef4 100644 --- a/sys/sys/sdt.h +++ b/sys/sys/sdt.h @@ -77,8 +77,8 @@ #else /* _KERNEL */ -#include #include +#include extern volatile bool sdt_probes_enabled; @@ -102,8 +102,6 @@ extern volatile bool sdt_probes_enabled; #define SDT_PROBE_DEFINE5(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) #define SDT_PROBE_DEFINE6(prov, mod, func, name, arg0, arg1, arg2, \ arg3, arg4, arg5) -#define SDT_PROBE_DEFINE7(prov, mod, func, name, arg0, arg1, arg2, \ - arg3, arg4, arg5, arg6) #define SDT_PROBE0(prov, mod, func, name) #define SDT_PROBE1(prov, mod, func, name, arg0) @@ -112,8 +110,6 @@ extern volatile bool sdt_probes_enabled; #define SDT_PROBE4(prov, mod, func, name, arg0, arg1, arg2, arg3) #define SDT_PROBE5(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) #define SDT_PROBE6(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4, arg5) -#define SDT_PROBE7(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4, arg5, \ - arg6) #define MIB_SDT_PROBE1(...) #define MIB_SDT_PROBE2(...) @@ -130,9 +126,6 @@ extern volatile bool sdt_probes_enabled; arg1, xarg1, arg2, xarg2, arg3, xarg3, arg4, xarg4) #define SDT_PROBE_DEFINE6_XLATE(prov, mod, func, name, arg0, xarg0, \ arg1, xarg1, arg2, xarg2, arg3, xarg3, arg4, xarg4, arg5, xarg5) -#define SDT_PROBE_DEFINE7_XLATE(prov, mod, func, name, arg0, xarg0, \ - arg1, xarg1, arg2, xarg2, arg3, xarg3, arg4, xarg4, arg5, xarg5, arg6, \ - xarg6) #define DTRACE_PROBE(name) #define DTRACE_PROBE1(name, type0, arg0) @@ -144,6 +137,18 @@ extern volatile bool sdt_probes_enabled; #else +void sdt_probe(uint32_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t); +void sdt_probe6(uint32_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t); + +#define _SDT_TRACEPOINT_SET sdt_tracepoint_set +#define _SDT_TRACEPOINT_SECTION "set_sdt_tracepoint_set" + +bool sdt_tracepoint_valid(uintptr_t patchpoint, uintptr_t target); +void sdt_tracepoint_patch(uintptr_t patchpoint, uintptr_t target); +void sdt_tracepoint_restore(uintptr_t patchpoint); + #define __sdt_used SET_DECLARE(sdt_providers_set, struct sdt_provider); @@ -181,14 +186,58 @@ SET_DECLARE(sdt_argtypes_set, struct sdt_argtype); #define SDT_PROBES_ENABLED() __predict_false(sdt_probes_enabled) -#define SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) do { \ - if (SDT_PROBES_ENABLED()) { \ - if (__predict_false(_SDT_PROBE_NAME(prov, mod, func, name)->id)) \ - (*sdt_probe_func)(_SDT_PROBE_NAME(prov, mod, func, name)->id, \ - (uintptr_t) arg0, (uintptr_t) arg1, (uintptr_t) arg2, \ - (uintptr_t) arg3, (uintptr_t) arg4); \ - } \ +#ifdef _ILP32 +#define _SDT_ASM_WORD ".long" +#else +#define _SDT_ASM_WORD ".quad" +#endif + +#ifndef _SDT_ASM_PROBE_CONSTRAINT +#define _SDT_ASM_PROBE_CONSTRAINT "i" +#endif +#ifndef _SDT_ASM_PROBE_OPERAND +#define _SDT_ASM_PROBE_OPERAND "c" +#endif + +/* + * The asm below generates records corresponding to the structure's layout, so + * the two must be kept in sync. + */ +struct sdt_tracepoint { + struct sdt_probe *probe; + uintptr_t patchpoint; + uintptr_t target; + STAILQ_ENTRY(sdt_tracepoint) tracepoint_entry; +}; + +#define __SDT_PROBE(prov, mod, func, name, uniq, f, ...) do { \ + __WEAK(__CONCAT(__start_set_, _SDT_TRACEPOINT_SET)); \ + __WEAK(__CONCAT(__stop_set_, _SDT_TRACEPOINT_SET)); \ + asm goto( \ + "0:\n" \ + _SDT_ASM_PATCH_INSTR "\n" \ + ".pushsection " _SDT_TRACEPOINT_SECTION ", \"aw\"\n" \ + _SDT_ASM_WORD " %" _SDT_ASM_PROBE_OPERAND "0\n" \ + _SDT_ASM_WORD " 0b\n" \ + _SDT_ASM_WORD " %l1\n" \ + _SDT_ASM_WORD " 0\n" \ + ".popsection\n" \ + : \ + : _SDT_ASM_PROBE_CONSTRAINT (_SDT_PROBE_NAME(prov, mod, \ + func, name)) \ + : \ + : __sdt_probe##uniq); \ + if (0) { \ +__sdt_probe##uniq:; \ + f(_SDT_PROBE_NAME(prov, mod, func, name)->id, __VA_ARGS__); \ + } \ } while (0) +#define _SDT_PROBE(prov, mod, func, name, uniq, f, ...) \ + __SDT_PROBE(prov, mod, func, name, uniq, f, __VA_ARGS__) +#define SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) \ + _SDT_PROBE(prov, mod, func, name, __COUNTER__, sdt_probe, \ + (uintptr_t)arg0, (uintptr_t)arg1, (uintptr_t)arg2, \ + (uintptr_t)arg3, (uintptr_t)arg4) #define SDT_PROBE_ARGTYPE(_prov, _mod, _func, _name, _num, _type, _xtype) \ static struct sdt_argtype \ @@ -246,17 +295,6 @@ SET_DECLARE(sdt_argtypes_set, struct sdt_argtype); SDT_PROBE_ARGTYPE(prov, mod, func, name, 4, arg4, NULL); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 5, arg5, NULL) -#define SDT_PROBE_DEFINE7(prov, mod, func, name, arg0, arg1, arg2, arg3,\ - arg4, arg5, arg6) \ - SDT_PROBE_DEFINE(prov, mod, func, name); \ - SDT_PROBE_ARGTYPE(prov, mod, func, name, 0, arg0, NULL); \ - SDT_PROBE_ARGTYPE(prov, mod, func, name, 1, arg1, NULL); \ - SDT_PROBE_ARGTYPE(prov, mod, func, name, 2, arg2, NULL); \ - SDT_PROBE_ARGTYPE(prov, mod, func, name, 3, arg3, NULL); \ - SDT_PROBE_ARGTYPE(prov, mod, func, name, 4, arg4, NULL); \ - SDT_PROBE_ARGTYPE(prov, mod, func, name, 5, arg5, NULL); \ - SDT_PROBE_ARGTYPE(prov, mod, func, name, 6, arg6, NULL) - #define SDT_PROBE_DEFINE0_XLATE(prov, mod, func, name) \ SDT_PROBE_DEFINE(prov, mod, func, name) @@ -304,18 +342,6 @@ SET_DECLARE(sdt_argtypes_set, struct sdt_argtype); SDT_PROBE_ARGTYPE(prov, mod, func, name, 4, arg4, xarg4); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 5, arg5, xarg5) -#define SDT_PROBE_DEFINE7_XLATE(prov, mod, func, name, arg0, xarg0, \ - arg1, xarg1, arg2, xarg2, arg3, xarg3, arg4, xarg4, arg5, xarg5, arg6, \ - xarg6) \ - SDT_PROBE_DEFINE(prov, mod, func, name); \ - SDT_PROBE_ARGTYPE(prov, mod, func, name, 0, arg0, xarg0); \ - SDT_PROBE_ARGTYPE(prov, mod, func, name, 1, arg1, xarg1); \ - SDT_PROBE_ARGTYPE(prov, mod, func, name, 2, arg2, xarg2); \ - SDT_PROBE_ARGTYPE(prov, mod, func, name, 3, arg3, xarg3); \ - SDT_PROBE_ARGTYPE(prov, mod, func, name, 4, arg4, xarg4); \ - SDT_PROBE_ARGTYPE(prov, mod, func, name, 5, arg5, xarg5); \ - SDT_PROBE_ARGTYPE(prov, mod, func, name, 6, arg6, xarg6) - #define SDT_PROBE0(prov, mod, func, name) \ SDT_PROBE(prov, mod, func, name, 0, 0, 0, 0, 0) #define SDT_PROBE1(prov, mod, func, name, arg0) \ @@ -328,27 +354,10 @@ SET_DECLARE(sdt_argtypes_set, struct sdt_argtype); SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, 0) #define SDT_PROBE5(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) \ SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) -#define SDT_PROBE6(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4, arg5) \ - do { \ - if (_SDT_PROBE_NAME(prov, mod, func, name)->id) \ - (*(void (*)(uint32_t, uintptr_t, uintptr_t, uintptr_t, \ - uintptr_t, uintptr_t, uintptr_t))sdt_probe_func)( \ - _SDT_PROBE_NAME(prov, mod, func, name)->id, \ - (uintptr_t)arg0, (uintptr_t)arg1, (uintptr_t)arg2, \ - (uintptr_t)arg3, (uintptr_t)arg4, (uintptr_t)arg5);\ - } while (0) -#define SDT_PROBE7(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4, arg5, \ - arg6) \ - do { \ - if (_SDT_PROBE_NAME(prov, mod, func, name)->id) \ - (*(void (*)(uint32_t, uintptr_t, uintptr_t, uintptr_t, \ - uintptr_t, uintptr_t, uintptr_t, uintptr_t)) \ - sdt_probe_func)( \ - _SDT_PROBE_NAME(prov, mod, func, name)->id, \ - (uintptr_t)arg0, (uintptr_t)arg1, (uintptr_t)arg2, \ - (uintptr_t)arg3, (uintptr_t)arg4, (uintptr_t)arg5, \ - (uintptr_t)arg6); \ - } while (0) +#define SDT_PROBE6(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4, arg5) \ + _SDT_PROBE(prov, mod, func, name, __COUNTER__, sdt_probe6, \ + (uintptr_t)arg0, (uintptr_t)arg1, (uintptr_t)arg2, \ + (uintptr_t)arg3, (uintptr_t)arg4, (uintptr_t)arg5) #ifndef KDTRACE_NO_MIB_SDT #define MIB_SDT_PROBE1(...) SDT_PROBE1(mib, __VA_ARGS__) @@ -411,11 +420,14 @@ SET_DECLARE(sdt_argtypes_set, struct sdt_argtype); */ typedef void (*sdt_probe_func_t)(uint32_t, uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4); +typedef void (*sdt_probe6_func_t)(uint32_t, uintptr_t arg0, uintptr_t arg1, + uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5); /* * The 'sdt' provider will set it to dtrace_probe when it loads. */ -extern sdt_probe_func_t sdt_probe_func; +extern sdt_probe_func_t sdt_probe_func; +extern sdt_probe6_func_t sdt_probe6_func; struct sdt_probe; struct sdt_provider; @@ -436,6 +448,7 @@ struct sdt_probe { TAILQ_ENTRY(sdt_probe) probe_entry; /* SDT probe list entry. */ TAILQ_HEAD(, sdt_argtype) argtype_list; + STAILQ_HEAD(, sdt_tracepoint) tracepoint_list; const char *mod; const char *func; const char *name; diff --git a/sys/x86/x86/sdt_machdep.c b/sys/x86/x86/sdt_machdep.c new file mode 100644 index 000000000000..08b435de556e --- /dev/null +++ b/sys/x86/x86/sdt_machdep.c @@ -0,0 +1,84 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Mark Johnston + */ + +#include +#include + +#include +#include + +#include +#include + +#define SDT_PATCH_SIZE 5 + +/* + * Return true if we can overwrite a nop at "patchpoint" with a jump to the + * target address. + */ +bool +sdt_tracepoint_valid(uintptr_t patchpoint, uintptr_t target) +{ +#ifdef __amd64__ + if (patchpoint < KERNSTART || target < KERNSTART) + return (false); +#endif + if (patchpoint == target || + patchpoint + SDT_PATCH_SIZE < patchpoint) + return (false); +#ifdef __amd64__ + int64_t offset = target - (patchpoint + SDT_PATCH_SIZE); + if (offset < -(1l << 31) || offset > (1l << 31)) + return (false); +#endif + return (true); +} + +/* + * Overwrite the copy of _SDT_ASM_PATCH_INSTR at the tracepoint with a jump to + * the target address. + */ +void +sdt_tracepoint_patch(uintptr_t patchpoint, uintptr_t target) +{ + uint8_t instr[SDT_PATCH_SIZE]; + int32_t disp; + bool old_wp; + + KASSERT(sdt_tracepoint_valid(patchpoint, target), + ("%s: invalid tracepoint %#jx -> %#jx", + __func__, (uintmax_t)patchpoint, (uintmax_t)target)); + + instr[0] = 0xe9; + disp = target - (patchpoint + SDT_PATCH_SIZE); + memcpy(&instr[1], &disp, sizeof(disp)); + + old_wp = disable_wp(); + memcpy((void *)patchpoint, instr, sizeof(instr)); + restore_wp(old_wp); +} + +/* + * Overwrite the patchpoint with a nop instruction. + */ +void +sdt_tracepoint_restore(uintptr_t patchpoint) +{ + uint8_t instr[SDT_PATCH_SIZE]; + bool old_wp; + +#ifdef __amd64__ + KASSERT(patchpoint >= KERNSTART, + ("%s: invalid patchpoint %#lx", __func__, patchpoint)); +#endif + + for (int i = 0; i < SDT_PATCH_SIZE; i++) + instr[i] = 0x90; + + old_wp = disable_wp(); + memcpy((void *)patchpoint, instr, sizeof(instr)); + restore_wp(old_wp); +}