pmc: better distinguish pmu-events allocation path

Background:

The pm_ev field of struct pmc_op_pmcallocate and struct pmc
traditionally contains the index of the chosen event, corresponding to
the __PMC_EVENTS array in pmc_events.h. This is a static list of events,
maintained by FreeBSD.

In the usual case, libpmc translates the user supplied event name
(string) into the pm_ev index, which is passed as an argument to the
allocation syscall. On the kernel side, the allocation method for the
relevant hwpmc class translates the given index into the event code that
will be written to an event selection register.

In 2018, a new source of performance event definitions was introduced:
the pmu-events json files, which are maintained by the Linux kernel. The
result was better coverage for newer Intel processors with a reduced
maintenance burden for libpmc/hwpmc. Intel and AMD CPUs were
unconditionally switched to allocate events from pmu-events instead of
the traditional scheme (959826ca1b, 81eb4dcf9e).

Under the pmu-events scheme, the pm_ev field contains an index
corresponding to the selected event from the pmu-events table, something
which the kernel has no knowledge of. The configuration for the
performance counting registers is instead passed via class-dependent
fields (struct pmc_md_op_pmcallocate).

In 2021 I changed the allocation logic so that it would attempt to
pull from the pmu-events table first, and fall-back to the traditional
method (dfb4fb4116). Later, pmu-events support for arm64 and power8
CPUs was added (28dd6730a5 and b48a2770d4).

The problem that remains is that the pm_ev field is overloaded, without
a definitive way to determine whether the event allocation came from the
pmu-events table or FreeBSD's statically-defined PMC events. This
resulted in a recent fix, 21f7397a61.

Change:

To disambiguate these two supported but separate use-cases, add a new
flag, PMC_F_EV_PMU, to be set as part of the allocation, indicating that
the event index came from pmu-events.

This is useful in two ways:
 1. On the kernel side, we can validate the syscall arguments better.
    Some classes support only the traditional event scheme (e.g.
    hwpmc_armv7), while others support only the pmu-events method (e.g.
    hwpmc_core for Intel). We can now check for this. The hwpmc_arm64
    class supports both methods, so the new flag supersedes the existing
    MD flag, PM_MD_EVENT_RAW.

 2. The flag will be tracked in struct pmc for the duration of its
    lifetime, meaning it is communicated back to userspace. This allows
    libpmc to perform the reverse index-to-event-name translation
    without speculating about the meaning of the index value.

Adding the flag is a backwards-incompatible ABI change. We recently
bumped the major version of the hwpmc module, so this breakage is
acceptable.

Reviewed by:	jkoshy
MFC after:	3 days
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D40753
This commit is contained in:
Mitchell Horne 2023-06-06 14:26:46 -03:00
parent 45dcc17e2f
commit c190fb35f3
11 changed files with 36 additions and 23 deletions

View file

@ -33,7 +33,6 @@
#include <sys/pmc.h>
#include <sys/syscall.h>
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <err.h>
@ -1083,14 +1082,8 @@ pmc_allocate(const char *ctrspec, enum pmc_mode mode,
r = spec_copy = strdup(ctrspec);
ctrname = strsep(&r, ",");
if (pmc_pmu_enabled()) {
if (pmc_pmu_pmcallocate(ctrname, &pmc_config) == 0) {
/*
* XXX: pmclog_get_event exploits this to disambiguate
* PMU from PMC event codes in PMCALLOCATE events.
*/
assert(pmc_config.pm_ev < PMC_EVENT_FIRST);
if (pmc_pmu_pmcallocate(ctrname, &pmc_config) == 0)
goto found;
}
}
free(spec_copy);
spec_copy = NULL;

View file

@ -649,7 +649,6 @@ pmc_pmu_pmcallocate_md(const char *event_name, struct pmc_op_pmcallocate *pm)
assert(idx >= 0);
pm->pm_ev = idx;
pm->pm_md.pm_md_config = ped.ped_event;
pm->pm_md.pm_md_flags |= PM_MD_RAW_EVENT;
pm->pm_class = PMC_CLASS_ARMV8;
pm->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
@ -680,5 +679,6 @@ pmc_pmu_pmcallocate(const char *event_name, struct pmc_op_pmcallocate *pm)
return (error);
}
pm->pm_flags |= PMC_F_EV_PMU;
return (0);
}

View file

@ -357,15 +357,10 @@ pmclog_get_event(void *cookie, char **data, ssize_t *len,
PMCLOG_READ64(le,ev->pl_u.pl_a.pl_rate);
/*
* Could be either a PMC event code or a PMU event index;
* assume that their encodings don't overlap (i.e. no PMU event
* table is more than 0x1000 entries) to distinguish them here.
* Otherwise pmc_pmu_event_get_by_idx will go out of bounds if
* given a PMC event code when it knows about that CPU.
*
* XXX: Ideally we'd have user flags to give us that context.
* pl_event could contain either a PMC event code or a PMU
* event index.
*/
if (ev->pl_u.pl_a.pl_event < PMC_EVENT_FIRST)
if ((ev->pl_u.pl_a.pl_flags & PMC_F_EV_PMU) != 0)
ev->pl_u.pl_a.pl_evname =
pmc_pmu_event_get_by_idx(ps->ps_cpuid,
ev->pl_u.pl_a.pl_event);

View file

@ -45,8 +45,6 @@
union pmc_md_op_pmcallocate {
struct {
uint32_t pm_md_config;
uint32_t pm_md_flags;
#define PM_MD_RAW_EVENT 0x1
};
struct pmc_md_cmn600_pmu_op_pmcallocate pm_cmn600;
struct pmc_md_dmc620_pmu_op_pmcallocate pm_dmc620;

View file

@ -582,6 +582,9 @@ amd_allocate_pmc(int cpu, int ri, struct pmc *pm,
if (pd->pd_class != a->pm_class)
return EINVAL;
if ((a->pm_flags & PMC_F_EV_PMU) == 0)
return (EINVAL);
caps = pm->pm_caps;
PMCDBG2(MDP,ALL,1,"amd-allocate ri=%d caps=0x%x", ri, caps);

View file

@ -177,9 +177,9 @@ arm64_allocate_pmc(int cpu, int ri, struct pmc *pm,
}
pe = a->pm_ev;
/* Adjust the config value if needed. */
config = a->pm_md.pm_md_config;
if ((a->pm_md.pm_md_flags & PM_MD_RAW_EVENT) == 0) {
if ((a->pm_flags & PMC_F_EV_PMU) != 0) {
config = a->pm_md.pm_md_config;
} else {
config = (uint32_t)pe - PMC_EV_ARMV8_FIRST;
if (config > (PMC_EV_ARMV8_LAST - PMC_EV_ARMV8_FIRST))
return (EINVAL);

View file

@ -239,6 +239,9 @@ iaf_allocate_pmc(int cpu, int ri, struct pmc *pm,
if (a->pm_class != PMC_CLASS_IAF)
return (EINVAL);
if ((a->pm_flags & PMC_F_EV_PMU) == 0)
return (EINVAL);
iap = &a->pm_md.pm_iap;
config = iap->pm_iap_config;
ev = IAP_EVSEL_GET(config);
@ -721,6 +724,9 @@ iap_allocate_pmc(int cpu, int ri, struct pmc *pm,
if (a->pm_class != PMC_CLASS_IAP)
return (EINVAL);
if ((a->pm_flags & PMC_F_EV_PMU) == 0)
return (EINVAL);
iap = &a->pm_md.pm_iap;
ev = IAP_EVSEL_GET(iap->pm_iap_config);

View file

@ -3349,7 +3349,8 @@ pmc_do_op_pmcallocate(struct thread *td, struct pmc_op_pmcallocate *pa)
* Look for valid values for 'pm_flags'.
*/
if ((flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW |
PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN | PMC_F_USERCALLCHAIN)) != 0)
PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN | PMC_F_USERCALLCHAIN |
PMC_F_EV_PMU)) != 0)
return (EINVAL);
/* PMC_F_USERCALLCHAIN is only valid with PMC_F_CALLCHAIN. */

View file

@ -168,6 +168,9 @@ power8_allocate_pmc(int cpu, int ri, struct pmc *pm,
if (a->pm_class != PMC_CLASS_POWER8)
return (EINVAL);
if ((a->pm_flags & PMC_F_EV_PMU) == 0)
return (EINVAL);
/*
* PMC5 and PMC6 are not programmable and always count instructions
* completed and cycles, respectively.

View file

@ -199,6 +199,9 @@ ucf_allocate_pmc(int cpu, int ri, struct pmc *pm,
if (a->pm_class != PMC_CLASS_UCF)
return (EINVAL);
if ((a->pm_flags & PMC_F_EV_PMU) == 0)
return (EINVAL);
flags = UCF_EN;
pm->pm_md.pm_ucf.pm_ucf_ctrl = (flags << (ri * 4));
@ -498,6 +501,9 @@ ucp_allocate_pmc(int cpu, int ri, struct pmc *pm,
if (a->pm_class != PMC_CLASS_UCP)
return (EINVAL);
if ((a->pm_flags & PMC_F_EV_PMU) == 0)
return (EINVAL);
ucp = &a->pm_md.pm_ucp;
ev = UCP_EVSEL(ucp->pm_ucp_config);
switch (uncore_cputype) {

View file

@ -369,6 +369,14 @@ enum pmc_ops {
#define PMC_F_CALLCHAIN 0x00000080 /*OP ALLOCATE capture callchains */
#define PMC_F_USERCALLCHAIN 0x00000100 /*OP ALLOCATE use userspace stack */
/* V10 API */
#define PMC_F_EV_PMU 0x00000200 /*
* OP ALLOCATE: pm_ev has special
* userspace meaning; counter
* configuration is communicated
* through class-dependent fields
*/
/* internal flags */
#define PMC_F_ATTACHED_TO_OWNER 0x00010000 /*attached to owner*/
#define PMC_F_NEEDS_LOGFILE 0x00020000 /*needs log file */