2005-06-09 19:45:09 +00:00
|
|
|
/*-
|
2023-05-10 15:40:58 +00:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
2017-11-26 02:00:33 +00:00
|
|
|
*
|
2007-12-03 11:15:46 +00:00
|
|
|
* Copyright (c) 2005-2007 Joseph Koshy
|
|
|
|
* Copyright (c) 2007 The FreeBSD Foundation
|
2005-06-09 19:45:09 +00:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
2007-12-03 11:15:46 +00:00
|
|
|
* Portions of this software were developed by A. Joseph Koshy under
|
|
|
|
* sponsorship from the FreeBSD Foundation and Google, Inc.
|
|
|
|
*
|
2005-06-09 19:45:09 +00:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/pmc.h>
|
|
|
|
#include <sys/pmclog.h>
|
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <pmc.h>
|
|
|
|
#include <pmclog.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <strings.h>
|
|
|
|
#include <unistd.h>
|
2018-05-26 19:29:19 +00:00
|
|
|
#include <stdio.h>
|
2005-06-09 19:45:09 +00:00
|
|
|
|
|
|
|
#include <machine/pmc_mdep.h>
|
|
|
|
|
- Add support for PMCs in Intel CPUs of Family 6, model 0xE (Core Solo
and Core Duo), models 0xF (Core2), model 0x17 (Core2Extreme) and
model 0x1C (Atom).
In these CPUs, the actual numbers, kinds and widths of PMCs present
need to queried at run time. Support for specific "architectural"
events also needs to be queried at run time.
Model 0xE CPUs support programmable PMCs, subsequent CPUs
additionally support "fixed-function" counters.
- Use event names that are close to vendor documentation, taking in
account that:
- events with identical semantics on two or more CPUs in this family
can have differing names in vendor documentation,
- identical vendor event names may map to differing events across
CPUs,
- each type of CPU supports a different subset of measurable
events.
Fixed-function and programmable counters both use the same vendor
names for events. The use of a class name prefix ("iaf-" or
"iap-" respectively) permits these to be distinguished.
- In libpmc, refactor pmc_name_of_event() into a public interface
and an internal helper function, for use by log handling code.
- Minor code tweaks: staticize a global, freshen a few comments.
Tested by: gnn
2008-11-27 09:00:47 +00:00
|
|
|
#include "libpmcinternal.h"
|
|
|
|
|
2018-06-07 02:03:22 +00:00
|
|
|
#define PMCLOG_BUFFER_SIZE 512*1024
|
2005-06-09 19:45:09 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* API NOTES
|
|
|
|
*
|
|
|
|
* The pmclog(3) API is oriented towards parsing an event stream in
|
|
|
|
* "realtime", i.e., from an data source that may or may not preserve
|
|
|
|
* record boundaries -- for example when the data source is elsewhere
|
|
|
|
* on a network. The API allows data to be fed into the parser zero
|
|
|
|
* or more bytes at a time.
|
|
|
|
*
|
|
|
|
* The state for a log file parser is maintained in a 'struct
|
|
|
|
* pmclog_parse_state'. Parser invocations are done by calling
|
|
|
|
* 'pmclog_read()'; this function will inform the caller when a
|
|
|
|
* complete event is parsed.
|
|
|
|
*
|
|
|
|
* The parser first assembles a complete log file event in an internal
|
|
|
|
* work area (see "ps_saved" below). Once a complete log file event
|
|
|
|
* is read, the parser then parses it and converts it to an event
|
|
|
|
* descriptor usable by the client. We could possibly avoid this two
|
|
|
|
* step process by directly parsing the input log to set fields in the
|
|
|
|
* event record. However the parser's state machine would get
|
|
|
|
* insanely complicated, and this code is unlikely to be used in
|
|
|
|
* performance critical paths.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define PMCLOG_HEADER_FROM_SAVED_STATE(PS) \
|
|
|
|
(* ((uint32_t *) &(PS)->ps_saved))
|
|
|
|
|
|
|
|
#define PMCLOG_INITIALIZE_READER(LE,A) LE = (uint32_t *) &(A)
|
2023-06-20 19:31:43 +00:00
|
|
|
#define PMCLOG_SKIP32(LE) (LE)++
|
2005-06-09 19:45:09 +00:00
|
|
|
#define PMCLOG_READ32(LE,V) do { \
|
|
|
|
(V) = *(LE)++; \
|
|
|
|
} while (0)
|
|
|
|
#define PMCLOG_READ64(LE,V) do { \
|
|
|
|
uint64_t _v; \
|
|
|
|
_v = (uint64_t) *(LE)++; \
|
|
|
|
_v |= ((uint64_t) *(LE)++) << 32; \
|
|
|
|
(V) = _v; \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define PMCLOG_READSTRING(LE,DST,LEN) strlcpy((DST), (char *) (LE), (LEN))
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Assemble a log record from '*len' octets starting from address '*data'.
|
|
|
|
* Update 'data' and 'len' to reflect the number of bytes consumed.
|
|
|
|
*
|
|
|
|
* '*data' is potentially an unaligned address and '*len' octets may
|
|
|
|
* not be enough to complete a event record.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static enum pmclog_parser_state
|
|
|
|
pmclog_get_record(struct pmclog_parse_state *ps, char **data, ssize_t *len)
|
|
|
|
{
|
|
|
|
int avail, copylen, recordsize, used;
|
|
|
|
uint32_t h;
|
|
|
|
const int HEADERSIZE = sizeof(uint32_t);
|
|
|
|
char *src, *dst;
|
|
|
|
|
|
|
|
if ((avail = *len) <= 0)
|
|
|
|
return (ps->ps_state = PL_STATE_ERROR);
|
|
|
|
|
|
|
|
src = *data;
|
2018-05-29 21:02:13 +00:00
|
|
|
used = 0;
|
2005-06-09 19:45:09 +00:00
|
|
|
|
|
|
|
if (ps->ps_state == PL_STATE_NEW_RECORD)
|
|
|
|
ps->ps_svcount = 0;
|
|
|
|
|
|
|
|
dst = (char *) &ps->ps_saved + ps->ps_svcount;
|
|
|
|
|
|
|
|
switch (ps->ps_state) {
|
|
|
|
case PL_STATE_NEW_RECORD:
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Transitions:
|
|
|
|
*
|
|
|
|
* Case A: avail < headersize
|
|
|
|
* -> 'expecting header'
|
|
|
|
*
|
|
|
|
* Case B: avail >= headersize
|
|
|
|
* B.1: avail < recordsize
|
|
|
|
* -> 'partial record'
|
|
|
|
* B.2: avail >= recordsize
|
|
|
|
* -> 'new record'
|
|
|
|
*/
|
|
|
|
|
|
|
|
copylen = avail < HEADERSIZE ? avail : HEADERSIZE;
|
|
|
|
bcopy(src, dst, copylen);
|
|
|
|
ps->ps_svcount = used = copylen;
|
|
|
|
|
|
|
|
if (copylen < HEADERSIZE) {
|
|
|
|
ps->ps_state = PL_STATE_EXPECTING_HEADER;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
src += copylen;
|
|
|
|
dst += copylen;
|
|
|
|
|
|
|
|
h = PMCLOG_HEADER_FROM_SAVED_STATE(ps);
|
|
|
|
recordsize = PMCLOG_HEADER_TO_LENGTH(h);
|
|
|
|
|
|
|
|
if (recordsize <= 0)
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
if (recordsize <= avail) { /* full record available */
|
|
|
|
bcopy(src, dst, recordsize - copylen);
|
|
|
|
ps->ps_svcount = used = recordsize;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* header + a partial record is available */
|
|
|
|
bcopy(src, dst, avail - copylen);
|
|
|
|
ps->ps_svcount = used = avail;
|
|
|
|
ps->ps_state = PL_STATE_PARTIAL_RECORD;
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PL_STATE_EXPECTING_HEADER:
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Transitions:
|
|
|
|
*
|
|
|
|
* Case C: avail+saved < headersize
|
|
|
|
* -> 'expecting header'
|
|
|
|
*
|
|
|
|
* Case D: avail+saved >= headersize
|
|
|
|
* D.1: avail+saved < recordsize
|
|
|
|
* -> 'partial record'
|
|
|
|
* D.2: avail+saved >= recordsize
|
|
|
|
* -> 'new record'
|
|
|
|
* (see PARTIAL_RECORD handling below)
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (avail + ps->ps_svcount < HEADERSIZE) {
|
|
|
|
bcopy(src, dst, avail);
|
|
|
|
ps->ps_svcount += avail;
|
|
|
|
used = avail;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
used = copylen = HEADERSIZE - ps->ps_svcount;
|
|
|
|
bcopy(src, dst, copylen);
|
|
|
|
src += copylen;
|
|
|
|
dst += copylen;
|
|
|
|
avail -= copylen;
|
|
|
|
ps->ps_svcount += copylen;
|
|
|
|
|
|
|
|
/*FALLTHROUGH*/
|
|
|
|
|
|
|
|
case PL_STATE_PARTIAL_RECORD:
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Transitions:
|
|
|
|
*
|
|
|
|
* Case E: avail+saved < recordsize
|
|
|
|
* -> 'partial record'
|
|
|
|
*
|
|
|
|
* Case F: avail+saved >= recordsize
|
|
|
|
* -> 'new record'
|
|
|
|
*/
|
|
|
|
|
|
|
|
h = PMCLOG_HEADER_FROM_SAVED_STATE(ps);
|
|
|
|
recordsize = PMCLOG_HEADER_TO_LENGTH(h);
|
|
|
|
|
|
|
|
if (recordsize <= 0)
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
if (avail + ps->ps_svcount < recordsize) {
|
|
|
|
copylen = avail;
|
|
|
|
ps->ps_state = PL_STATE_PARTIAL_RECORD;
|
|
|
|
} else {
|
|
|
|
copylen = recordsize - ps->ps_svcount;
|
|
|
|
ps->ps_state = PL_STATE_NEW_RECORD;
|
|
|
|
}
|
|
|
|
|
|
|
|
bcopy(src, dst, copylen);
|
|
|
|
ps->ps_svcount += copylen;
|
|
|
|
used += copylen;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
*data += used;
|
|
|
|
*len -= used;
|
|
|
|
return ps->ps_state;
|
|
|
|
|
|
|
|
error:
|
|
|
|
ps->ps_state = PL_STATE_ERROR;
|
|
|
|
return ps->ps_state;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get an event from the stream pointed to by '*data'. '*len'
|
|
|
|
* indicates the number of bytes available to parse. Arguments
|
|
|
|
* '*data' and '*len' are updated to indicate the number of bytes
|
|
|
|
* consumed.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
pmclog_get_event(void *cookie, char **data, ssize_t *len,
|
|
|
|
struct pmclog_ev *ev)
|
|
|
|
{
|
|
|
|
int evlen, pathlen;
|
2023-06-20 19:31:43 +00:00
|
|
|
uint32_t h, *le, npc;
|
2005-06-09 19:45:09 +00:00
|
|
|
enum pmclog_parser_state e;
|
|
|
|
struct pmclog_parse_state *ps;
|
2018-06-07 02:03:22 +00:00
|
|
|
struct pmclog_header *ph;
|
2005-06-09 19:45:09 +00:00
|
|
|
|
|
|
|
ps = (struct pmclog_parse_state *) cookie;
|
|
|
|
|
|
|
|
assert(ps->ps_state != PL_STATE_ERROR);
|
|
|
|
|
|
|
|
if ((e = pmclog_get_record(ps,data,len)) == PL_STATE_ERROR) {
|
|
|
|
ev->pl_state = PMCLOG_ERROR;
|
2018-05-26 19:29:19 +00:00
|
|
|
printf("state error\n");
|
2005-06-09 19:45:09 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (e != PL_STATE_NEW_RECORD) {
|
|
|
|
ev->pl_state = PMCLOG_REQUIRE_DATA;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
PMCLOG_INITIALIZE_READER(le, ps->ps_saved);
|
2018-06-04 04:59:48 +00:00
|
|
|
ev->pl_data = le;
|
2018-06-07 02:03:22 +00:00
|
|
|
ph = (struct pmclog_header *)(uintptr_t)le;
|
2005-06-09 19:45:09 +00:00
|
|
|
|
2018-06-07 02:03:22 +00:00
|
|
|
h = ph->pl_header;
|
2005-06-09 19:45:09 +00:00
|
|
|
if (!PMCLOG_HEADER_CHECK_MAGIC(h)) {
|
2018-05-26 19:29:19 +00:00
|
|
|
printf("bad magic\n");
|
2005-06-09 19:45:09 +00:00
|
|
|
ps->ps_state = PL_STATE_ERROR;
|
|
|
|
ev->pl_state = PMCLOG_ERROR;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* copy out the time stamp */
|
2018-06-07 02:03:22 +00:00
|
|
|
ev->pl_ts.tv_sec = ph->pl_tsc;
|
|
|
|
le += sizeof(*ph)/4;
|
2005-06-09 19:45:09 +00:00
|
|
|
|
|
|
|
evlen = PMCLOG_HEADER_TO_LENGTH(h);
|
|
|
|
|
|
|
|
#define PMCLOG_GET_PATHLEN(P,E,TYPE) do { \
|
|
|
|
(P) = (E) - offsetof(struct TYPE, pl_pathname); \
|
|
|
|
if ((P) > PATH_MAX || (P) < 0) \
|
|
|
|
goto error; \
|
|
|
|
} while (0)
|
|
|
|
|
2007-12-03 11:15:46 +00:00
|
|
|
#define PMCLOG_GET_CALLCHAIN_SIZE(SZ,E) do { \
|
|
|
|
(SZ) = ((E) - offsetof(struct pmclog_callchain, pl_pc)) \
|
|
|
|
/ sizeof(uintfptr_t); \
|
|
|
|
} while (0);
|
|
|
|
|
2005-06-09 19:45:09 +00:00
|
|
|
switch (ev->pl_type = PMCLOG_HEADER_TO_TYPE(h)) {
|
2007-12-03 11:15:46 +00:00
|
|
|
case PMCLOG_TYPE_CALLCHAIN:
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pid);
|
2018-05-23 17:25:00 +00:00
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_tid);
|
2007-12-03 11:15:46 +00:00
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pmcid);
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_cpuflags);
|
|
|
|
PMCLOG_GET_CALLCHAIN_SIZE(ev->pl_u.pl_cc.pl_npc,evlen);
|
|
|
|
for (npc = 0; npc < ev->pl_u.pl_cc.pl_npc; npc++)
|
|
|
|
PMCLOG_READADDR(le,ev->pl_u.pl_cc.pl_pc[npc]);
|
|
|
|
for (;npc < PMC_CALLCHAIN_DEPTH_MAX; npc++)
|
|
|
|
ev->pl_u.pl_cc.pl_pc[npc] = (uintfptr_t) 0;
|
|
|
|
break;
|
2005-06-09 19:45:09 +00:00
|
|
|
case PMCLOG_TYPE_CLOSELOG:
|
2018-01-17 16:41:22 +00:00
|
|
|
ev->pl_state = PMCLOG_EOF;
|
|
|
|
return (-1);
|
2005-06-09 19:45:09 +00:00
|
|
|
case PMCLOG_TYPE_DROPNOTIFY:
|
|
|
|
/* nothing to do */
|
|
|
|
break;
|
|
|
|
case PMCLOG_TYPE_INITIALIZE:
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_i.pl_version);
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_i.pl_arch);
|
2018-06-07 02:03:22 +00:00
|
|
|
PMCLOG_READ64(le,ev->pl_u.pl_i.pl_tsc_freq);
|
|
|
|
memcpy(&ev->pl_u.pl_i.pl_ts, le, sizeof(struct timespec));
|
|
|
|
le += sizeof(struct timespec)/4;
|
2018-06-04 02:05:48 +00:00
|
|
|
PMCLOG_READSTRING(le, ev->pl_u.pl_i.pl_cpuid, PMC_CPUID_LEN);
|
2018-06-04 04:59:48 +00:00
|
|
|
memcpy(ev->pl_u.pl_i.pl_cpuid, le, PMC_CPUID_LEN);
|
2018-06-06 02:48:09 +00:00
|
|
|
ps->ps_cpuid = strdup(ev->pl_u.pl_i.pl_cpuid);
|
2005-06-09 19:45:09 +00:00
|
|
|
ps->ps_version = ev->pl_u.pl_i.pl_version;
|
|
|
|
ps->ps_arch = ev->pl_u.pl_i.pl_arch;
|
|
|
|
ps->ps_initialized = 1;
|
|
|
|
break;
|
2006-03-26 12:20:54 +00:00
|
|
|
case PMCLOG_TYPE_MAP_IN:
|
|
|
|
PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_map_in);
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_mi.pl_pid);
|
2023-06-20 19:31:43 +00:00
|
|
|
PMCLOG_SKIP32(le);
|
2006-03-26 12:20:54 +00:00
|
|
|
PMCLOG_READADDR(le,ev->pl_u.pl_mi.pl_start);
|
|
|
|
PMCLOG_READSTRING(le, ev->pl_u.pl_mi.pl_pathname, pathlen);
|
|
|
|
break;
|
|
|
|
case PMCLOG_TYPE_MAP_OUT:
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_mo.pl_pid);
|
2023-06-20 19:31:43 +00:00
|
|
|
PMCLOG_SKIP32(le);
|
2006-03-26 12:20:54 +00:00
|
|
|
PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_start);
|
|
|
|
PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_end);
|
2005-06-09 19:45:09 +00:00
|
|
|
break;
|
|
|
|
case PMCLOG_TYPE_PMCALLOCATE:
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_a.pl_pmcid);
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_a.pl_event);
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_a.pl_flags);
|
2023-06-20 19:31:43 +00:00
|
|
|
PMCLOG_SKIP32(le);
|
2018-06-06 02:48:09 +00:00
|
|
|
PMCLOG_READ64(le,ev->pl_u.pl_a.pl_rate);
|
libpmc: Handle PMCALLOCATE log with PMC code on PMU event system
On an arm64 system that reports as a Cortex A72 r0p3, running
pmcstat -P CPU_CYCLES command
works, but
pmcstat -P cpu-cycles command
does not. This is because the former uses the PMU event from the JSON
source, resulting in pl_event in the log event being a small index
(here, 5) into the generated events table, whilst the latter does not
match any of the JSON events and falls back on PMC's own tables, mapping
it to the PMC event 0x14111, i.e. PMC_EV_ARMV8_EVENT_11H. Then, when
libpmc gets the PMCALLOCATE event, it tries to use the event as an index
into the JSON-derived table, but doing so only makes sense for the
former, whilst for the latter it will go way out of bounds and either
read junk (which may trigger the != NULL assertion) or segfault. As far
as I can tell we don't have anything lying around to tell us which of
the two cases we're in, but we can exploit the fact that the first
0x1000 PMC event codes are reserved, and that none of our PMU events
tables reach that number of entries yet.
PR: 268857
Reviewed by: mhorne
MFC after: 1 month
Differential Revision: https://reviews.freebsd.org/D39592
2023-06-07 14:21:18 +00:00
|
|
|
|
|
|
|
/*
|
pmc: better distinguish pmu-events allocation path
Background:
The pm_ev field of struct pmc_op_pmcallocate and struct pmc
traditionally contains the index of the chosen event, corresponding to
the __PMC_EVENTS array in pmc_events.h. This is a static list of events,
maintained by FreeBSD.
In the usual case, libpmc translates the user supplied event name
(string) into the pm_ev index, which is passed as an argument to the
allocation syscall. On the kernel side, the allocation method for the
relevant hwpmc class translates the given index into the event code that
will be written to an event selection register.
In 2018, a new source of performance event definitions was introduced:
the pmu-events json files, which are maintained by the Linux kernel. The
result was better coverage for newer Intel processors with a reduced
maintenance burden for libpmc/hwpmc. Intel and AMD CPUs were
unconditionally switched to allocate events from pmu-events instead of
the traditional scheme (959826ca1bb0a, 81eb4dcf9e0d).
Under the pmu-events scheme, the pm_ev field contains an index
corresponding to the selected event from the pmu-events table, something
which the kernel has no knowledge of. The configuration for the
performance counting registers is instead passed via class-dependent
fields (struct pmc_md_op_pmcallocate).
In 2021 I changed the allocation logic so that it would attempt to
pull from the pmu-events table first, and fall-back to the traditional
method (dfb4fb41166bc3). Later, pmu-events support for arm64 and power8
CPUs was added (28dd6730a5d6 and b48a2770d48b).
The problem that remains is that the pm_ev field is overloaded, without
a definitive way to determine whether the event allocation came from the
pmu-events table or FreeBSD's statically-defined PMC events. This
resulted in a recent fix, 21f7397a61f7.
Change:
To disambiguate these two supported but separate use-cases, add a new
flag, PMC_F_EV_PMU, to be set as part of the allocation, indicating that
the event index came from pmu-events.
This is useful in two ways:
1. On the kernel side, we can validate the syscall arguments better.
Some classes support only the traditional event scheme (e.g.
hwpmc_armv7), while others support only the pmu-events method (e.g.
hwpmc_core for Intel). We can now check for this. The hwpmc_arm64
class supports both methods, so the new flag supersedes the existing
MD flag, PM_MD_EVENT_RAW.
2. The flag will be tracked in struct pmc for the duration of its
lifetime, meaning it is communicated back to userspace. This allows
libpmc to perform the reverse index-to-event-name translation
without speculating about the meaning of the index value.
Adding the flag is a backwards-incompatible ABI change. We recently
bumped the major version of the hwpmc module, so this breakage is
acceptable.
Reviewed by: jkoshy
MFC after: 3 days
Sponsored by: The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D40753
2023-06-06 17:26:46 +00:00
|
|
|
* pl_event could contain either a PMC event code or a PMU
|
|
|
|
* event index.
|
libpmc: Handle PMCALLOCATE log with PMC code on PMU event system
On an arm64 system that reports as a Cortex A72 r0p3, running
pmcstat -P CPU_CYCLES command
works, but
pmcstat -P cpu-cycles command
does not. This is because the former uses the PMU event from the JSON
source, resulting in pl_event in the log event being a small index
(here, 5) into the generated events table, whilst the latter does not
match any of the JSON events and falls back on PMC's own tables, mapping
it to the PMC event 0x14111, i.e. PMC_EV_ARMV8_EVENT_11H. Then, when
libpmc gets the PMCALLOCATE event, it tries to use the event as an index
into the JSON-derived table, but doing so only makes sense for the
former, whilst for the latter it will go way out of bounds and either
read junk (which may trigger the != NULL assertion) or segfault. As far
as I can tell we don't have anything lying around to tell us which of
the two cases we're in, but we can exploit the fact that the first
0x1000 PMC event codes are reserved, and that none of our PMU events
tables reach that number of entries yet.
PR: 268857
Reviewed by: mhorne
MFC after: 1 month
Differential Revision: https://reviews.freebsd.org/D39592
2023-06-07 14:21:18 +00:00
|
|
|
*/
|
pmc: better distinguish pmu-events allocation path
Background:
The pm_ev field of struct pmc_op_pmcallocate and struct pmc
traditionally contains the index of the chosen event, corresponding to
the __PMC_EVENTS array in pmc_events.h. This is a static list of events,
maintained by FreeBSD.
In the usual case, libpmc translates the user supplied event name
(string) into the pm_ev index, which is passed as an argument to the
allocation syscall. On the kernel side, the allocation method for the
relevant hwpmc class translates the given index into the event code that
will be written to an event selection register.
In 2018, a new source of performance event definitions was introduced:
the pmu-events json files, which are maintained by the Linux kernel. The
result was better coverage for newer Intel processors with a reduced
maintenance burden for libpmc/hwpmc. Intel and AMD CPUs were
unconditionally switched to allocate events from pmu-events instead of
the traditional scheme (959826ca1bb0a, 81eb4dcf9e0d).
Under the pmu-events scheme, the pm_ev field contains an index
corresponding to the selected event from the pmu-events table, something
which the kernel has no knowledge of. The configuration for the
performance counting registers is instead passed via class-dependent
fields (struct pmc_md_op_pmcallocate).
In 2021 I changed the allocation logic so that it would attempt to
pull from the pmu-events table first, and fall-back to the traditional
method (dfb4fb41166bc3). Later, pmu-events support for arm64 and power8
CPUs was added (28dd6730a5d6 and b48a2770d48b).
The problem that remains is that the pm_ev field is overloaded, without
a definitive way to determine whether the event allocation came from the
pmu-events table or FreeBSD's statically-defined PMC events. This
resulted in a recent fix, 21f7397a61f7.
Change:
To disambiguate these two supported but separate use-cases, add a new
flag, PMC_F_EV_PMU, to be set as part of the allocation, indicating that
the event index came from pmu-events.
This is useful in two ways:
1. On the kernel side, we can validate the syscall arguments better.
Some classes support only the traditional event scheme (e.g.
hwpmc_armv7), while others support only the pmu-events method (e.g.
hwpmc_core for Intel). We can now check for this. The hwpmc_arm64
class supports both methods, so the new flag supersedes the existing
MD flag, PM_MD_EVENT_RAW.
2. The flag will be tracked in struct pmc for the duration of its
lifetime, meaning it is communicated back to userspace. This allows
libpmc to perform the reverse index-to-event-name translation
without speculating about the meaning of the index value.
Adding the flag is a backwards-incompatible ABI change. We recently
bumped the major version of the hwpmc module, so this breakage is
acceptable.
Reviewed by: jkoshy
MFC after: 3 days
Sponsored by: The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D40753
2023-06-06 17:26:46 +00:00
|
|
|
if ((ev->pl_u.pl_a.pl_flags & PMC_F_EV_PMU) != 0)
|
libpmc: Handle PMCALLOCATE log with PMC code on PMU event system
On an arm64 system that reports as a Cortex A72 r0p3, running
pmcstat -P CPU_CYCLES command
works, but
pmcstat -P cpu-cycles command
does not. This is because the former uses the PMU event from the JSON
source, resulting in pl_event in the log event being a small index
(here, 5) into the generated events table, whilst the latter does not
match any of the JSON events and falls back on PMC's own tables, mapping
it to the PMC event 0x14111, i.e. PMC_EV_ARMV8_EVENT_11H. Then, when
libpmc gets the PMCALLOCATE event, it tries to use the event as an index
into the JSON-derived table, but doing so only makes sense for the
former, whilst for the latter it will go way out of bounds and either
read junk (which may trigger the != NULL assertion) or segfault. As far
as I can tell we don't have anything lying around to tell us which of
the two cases we're in, but we can exploit the fact that the first
0x1000 PMC event codes are reserved, and that none of our PMU events
tables reach that number of entries yet.
PR: 268857
Reviewed by: mhorne
MFC after: 1 month
Differential Revision: https://reviews.freebsd.org/D39592
2023-06-07 14:21:18 +00:00
|
|
|
ev->pl_u.pl_a.pl_evname =
|
|
|
|
pmc_pmu_event_get_by_idx(ps->ps_cpuid,
|
|
|
|
ev->pl_u.pl_a.pl_event);
|
|
|
|
else if (ev->pl_u.pl_a.pl_event <= PMC_EVENT_LAST)
|
|
|
|
ev->pl_u.pl_a.pl_evname =
|
|
|
|
_pmc_name_of_event(ev->pl_u.pl_a.pl_event,
|
|
|
|
ps->ps_arch);
|
|
|
|
else
|
|
|
|
ev->pl_u.pl_a.pl_evname = NULL;
|
|
|
|
if (ev->pl_u.pl_a.pl_evname == NULL) {
|
2018-05-26 19:29:19 +00:00
|
|
|
printf("unknown event\n");
|
2005-06-09 19:45:09 +00:00
|
|
|
goto error;
|
2018-05-26 19:29:19 +00:00
|
|
|
}
|
2005-06-09 19:45:09 +00:00
|
|
|
break;
|
2012-03-28 20:58:30 +00:00
|
|
|
case PMCLOG_TYPE_PMCALLOCATEDYN:
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_pmcid);
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_event);
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_flags);
|
2023-06-20 19:31:43 +00:00
|
|
|
PMCLOG_SKIP32(le);
|
2012-03-28 20:58:30 +00:00
|
|
|
PMCLOG_READSTRING(le,ev->pl_u.pl_ad.pl_evname,PMC_NAME_MAX);
|
|
|
|
break;
|
2005-06-09 19:45:09 +00:00
|
|
|
case PMCLOG_TYPE_PMCATTACH:
|
|
|
|
PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_pmcattach);
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pmcid);
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pid);
|
|
|
|
PMCLOG_READSTRING(le,ev->pl_u.pl_t.pl_pathname,pathlen);
|
|
|
|
break;
|
|
|
|
case PMCLOG_TYPE_PMCDETACH:
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pmcid);
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pid);
|
|
|
|
break;
|
|
|
|
case PMCLOG_TYPE_PROCCSW:
|
|
|
|
PMCLOG_READ64(le,ev->pl_u.pl_c.pl_value);
|
2018-06-07 02:03:22 +00:00
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pmcid);
|
2005-06-09 19:45:09 +00:00
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pid);
|
2018-05-23 17:25:00 +00:00
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_c.pl_tid);
|
2005-06-09 19:45:09 +00:00
|
|
|
break;
|
|
|
|
case PMCLOG_TYPE_PROCEXEC:
|
|
|
|
PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_procexec);
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pid);
|
2018-05-26 19:26:19 +00:00
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pmcid);
|
pmc: Rework PROCEXEC event to support PIEs
Currently the PROCEXEC event only reports a single address, entryaddr,
which is the entry point of the interpreter in the typical dynamic case,
and used solely to calculate the base address of the interpreter. For
PDEs this is fine, since the base address is known from the program
headers, but for PIEs the base address varies at run time based on where
the kernel chooses to load it, and so pmcstat has no way of knowing the
real address ranges for the executable. This was less of an issue in the
past since PIEs were rare, but now they're on by default on 64-bit
architectures it's more of a problem.
To solve this, pass through what was picked for et_dyn_addr by the
kernel, and use that as the offset for the executable's start address
just as is done for everything in the kernel. Since we're changing this
interface, sanitise the way we determine the interpreter's base address
by passing it through directly rather than indirectly via the entry
point and having to subtract off whatever the ELF header's e_entry is
(and anything that wants the entry point in future can still add that
back on as needed; this merely changes the interface to directly provide
the underlying variables involved).
This will be followed up by a bump to the pmc major version.
Reviewed by: jhb
Differential Revision: https://reviews.freebsd.org/D39595
2023-05-30 23:20:36 +00:00
|
|
|
PMCLOG_READADDR(le,ev->pl_u.pl_x.pl_baseaddr);
|
|
|
|
PMCLOG_READADDR(le,ev->pl_u.pl_x.pl_dynaddr);
|
2005-06-09 19:45:09 +00:00
|
|
|
PMCLOG_READSTRING(le,ev->pl_u.pl_x.pl_pathname,pathlen);
|
|
|
|
break;
|
|
|
|
case PMCLOG_TYPE_PROCEXIT:
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pmcid);
|
2018-05-26 19:26:19 +00:00
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pid);
|
2018-05-26 19:29:19 +00:00
|
|
|
PMCLOG_READ64(le,ev->pl_u.pl_e.pl_value);
|
2005-06-09 19:45:09 +00:00
|
|
|
break;
|
|
|
|
case PMCLOG_TYPE_PROCFORK:
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_f.pl_oldpid);
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_f.pl_newpid);
|
|
|
|
break;
|
|
|
|
case PMCLOG_TYPE_SYSEXIT:
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_se.pl_pid);
|
|
|
|
break;
|
|
|
|
case PMCLOG_TYPE_USERDATA:
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_u.pl_userdata);
|
|
|
|
break;
|
2018-06-05 04:26:40 +00:00
|
|
|
case PMCLOG_TYPE_THR_CREATE:
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_tc.pl_tid);
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_tc.pl_pid);
|
2018-06-06 02:48:09 +00:00
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_tc.pl_flags);
|
2023-06-20 19:31:43 +00:00
|
|
|
PMCLOG_SKIP32(le);
|
2018-06-05 04:26:40 +00:00
|
|
|
memcpy(ev->pl_u.pl_tc.pl_tdname, le, MAXCOMLEN+1);
|
|
|
|
break;
|
|
|
|
case PMCLOG_TYPE_THR_EXIT:
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_te.pl_tid);
|
|
|
|
break;
|
|
|
|
case PMCLOG_TYPE_PROC_CREATE:
|
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_pc.pl_pid);
|
2018-06-06 02:48:09 +00:00
|
|
|
PMCLOG_READ32(le,ev->pl_u.pl_pc.pl_flags);
|
2018-06-05 04:26:40 +00:00
|
|
|
memcpy(ev->pl_u.pl_pc.pl_pcomm, le, MAXCOMLEN+1);
|
|
|
|
break;
|
2005-06-09 19:45:09 +00:00
|
|
|
default: /* unknown record type */
|
|
|
|
ps->ps_state = PL_STATE_ERROR;
|
|
|
|
ev->pl_state = PMCLOG_ERROR;
|
2007-12-03 11:15:46 +00:00
|
|
|
return (-1);
|
2005-06-09 19:45:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ev->pl_offset = (ps->ps_offset += evlen);
|
|
|
|
ev->pl_count = (ps->ps_count += 1);
|
2018-06-04 04:59:48 +00:00
|
|
|
ev->pl_len = evlen;
|
2005-06-09 19:45:09 +00:00
|
|
|
ev->pl_state = PMCLOG_OK;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
error:
|
|
|
|
ev->pl_state = PMCLOG_ERROR;
|
|
|
|
ps->ps_state = PL_STATE_ERROR;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Extract and return the next event from the byte stream.
|
|
|
|
*
|
|
|
|
* Returns 0 and sets the event's state to PMCLOG_OK in case an event
|
|
|
|
* was successfully parsed. Otherwise this function returns -1 and
|
|
|
|
* sets the event's state to one of PMCLOG_REQUIRE_DATA (if more data
|
|
|
|
* is needed) or PMCLOG_EOF (if an EOF was seen) or PMCLOG_ERROR if
|
|
|
|
* a parse error was encountered.
|
|
|
|
*/
|
|
|
|
|
|
|
|
int
|
|
|
|
pmclog_read(void *cookie, struct pmclog_ev *ev)
|
|
|
|
{
|
2005-07-09 17:12:30 +00:00
|
|
|
int retval;
|
2005-06-09 19:45:09 +00:00
|
|
|
ssize_t nread;
|
|
|
|
struct pmclog_parse_state *ps;
|
|
|
|
|
|
|
|
ps = (struct pmclog_parse_state *) cookie;
|
|
|
|
|
|
|
|
if (ps->ps_state == PL_STATE_ERROR) {
|
|
|
|
ev->pl_state = PMCLOG_ERROR;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If there isn't enough data left for a new event try and get
|
|
|
|
* more data.
|
|
|
|
*/
|
|
|
|
if (ps->ps_len == 0) {
|
|
|
|
ev->pl_state = PMCLOG_REQUIRE_DATA;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we have a valid file descriptor to read from, attempt
|
|
|
|
* to read from that. This read may return with an error,
|
|
|
|
* (which may be EAGAIN or other recoverable error), or
|
|
|
|
* can return EOF.
|
|
|
|
*/
|
|
|
|
if (ps->ps_fd != PMCLOG_FD_NONE) {
|
2005-07-09 17:12:30 +00:00
|
|
|
refill:
|
2005-06-09 19:45:09 +00:00
|
|
|
nread = read(ps->ps_fd, ps->ps_buffer,
|
|
|
|
PMCLOG_BUFFER_SIZE);
|
|
|
|
|
|
|
|
if (nread <= 0) {
|
2005-06-30 19:01:26 +00:00
|
|
|
if (nread == 0)
|
|
|
|
ev->pl_state = PMCLOG_EOF;
|
|
|
|
else if (errno != EAGAIN) /* not restartable */
|
|
|
|
ev->pl_state = PMCLOG_ERROR;
|
2005-06-09 19:45:09 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
ps->ps_len = nread;
|
|
|
|
ps->ps_data = ps->ps_buffer;
|
2018-05-26 19:29:19 +00:00
|
|
|
} else {
|
2005-06-09 19:45:09 +00:00
|
|
|
return -1;
|
2018-05-26 19:29:19 +00:00
|
|
|
}
|
2005-06-09 19:45:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
assert(ps->ps_len > 0);
|
|
|
|
|
2005-07-09 17:12:30 +00:00
|
|
|
|
|
|
|
/* Retrieve one event from the byte stream. */
|
|
|
|
retval = pmclog_get_event(ps, &ps->ps_data, &ps->ps_len, ev);
|
2005-06-09 19:45:09 +00:00
|
|
|
/*
|
2005-07-09 17:12:30 +00:00
|
|
|
* If we need more data and we have a configured fd, try read
|
|
|
|
* from it.
|
2005-06-09 19:45:09 +00:00
|
|
|
*/
|
2005-07-09 17:12:30 +00:00
|
|
|
if (retval < 0 && ev->pl_state == PMCLOG_REQUIRE_DATA &&
|
|
|
|
ps->ps_fd != -1) {
|
|
|
|
assert(ps->ps_len == 0);
|
|
|
|
goto refill;
|
|
|
|
}
|
|
|
|
|
|
|
|
return retval;
|
2005-06-09 19:45:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Feed data to a memory based parser.
|
|
|
|
*
|
|
|
|
* The memory area pointed to by 'data' needs to be valid till the
|
|
|
|
* next error return from pmclog_next_event().
|
|
|
|
*/
|
|
|
|
|
|
|
|
int
|
|
|
|
pmclog_feed(void *cookie, char *data, int len)
|
|
|
|
{
|
|
|
|
struct pmclog_parse_state *ps;
|
|
|
|
|
|
|
|
ps = (struct pmclog_parse_state *) cookie;
|
|
|
|
|
|
|
|
if (len < 0 || /* invalid length */
|
|
|
|
ps->ps_buffer || /* called for a file parser */
|
|
|
|
ps->ps_len != 0) /* unnecessary call */
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
ps->ps_data = data;
|
|
|
|
ps->ps_len = len;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate and initialize parser state.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void *
|
|
|
|
pmclog_open(int fd)
|
|
|
|
{
|
|
|
|
struct pmclog_parse_state *ps;
|
|
|
|
|
|
|
|
if ((ps = (struct pmclog_parse_state *) malloc(sizeof(*ps))) == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
ps->ps_state = PL_STATE_NEW_RECORD;
|
|
|
|
ps->ps_arch = -1;
|
|
|
|
ps->ps_initialized = 0;
|
|
|
|
ps->ps_count = 0;
|
|
|
|
ps->ps_offset = (off_t) 0;
|
|
|
|
bzero(&ps->ps_saved, sizeof(ps->ps_saved));
|
2018-06-06 02:48:09 +00:00
|
|
|
ps->ps_cpuid = NULL;
|
2005-06-09 19:45:09 +00:00
|
|
|
ps->ps_svcount = 0;
|
|
|
|
ps->ps_fd = fd;
|
|
|
|
ps->ps_data = NULL;
|
|
|
|
ps->ps_buffer = NULL;
|
|
|
|
ps->ps_len = 0;
|
|
|
|
|
|
|
|
/* allocate space for a work area */
|
|
|
|
if (ps->ps_fd != PMCLOG_FD_NONE) {
|
2010-06-05 23:00:02 +00:00
|
|
|
if ((ps->ps_buffer = malloc(PMCLOG_BUFFER_SIZE)) == NULL) {
|
|
|
|
free(ps);
|
2005-06-09 19:45:09 +00:00
|
|
|
return NULL;
|
2010-06-05 23:00:02 +00:00
|
|
|
}
|
2005-06-09 19:45:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return ps;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free up parser state.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void
|
|
|
|
pmclog_close(void *cookie)
|
|
|
|
{
|
|
|
|
struct pmclog_parse_state *ps;
|
|
|
|
|
|
|
|
ps = (struct pmclog_parse_state *) cookie;
|
|
|
|
|
|
|
|
if (ps->ps_buffer)
|
|
|
|
free(ps->ps_buffer);
|
|
|
|
|
|
|
|
free(ps);
|
|
|
|
}
|