Perf events changes for v6.6:

- AMD IBS improvements
 - Intel PMU driver updates
 - Extend core perf facilities & the ARM PMU driver to better handle ARM big.LITTLE events
 - Micro-optimize software events and the ring-buffer code
 - Misc cleanups & fixes
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmTtBscRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1hHoQ/+IBQ8Xi/rcdd40n8OqEB/VBWVuSjNT3uN
 3pHHcTl2Pio9CxBeat42NekNijlRILCKJrZ3Lt3JWBmWyWv5l3KFabelj+lDF2xa
 TVCjTnQNe1+HvrODYnF4ECIs5vaoMVjcJ9jg8+VDgAcOQr1nZs4m5TVAd6TLqPpV
 urBEQVULkkzk7ZRhfrugKhw+wrpWFefgGCx0RV8ijZB7TLMHc2wE+Q/sTxKdKceL
 wNaJaDgV33pZh0aImwR9pKUE532hF1FiBdLuehkh61PZa1L82jzAX1xjw2s1hSa4
 eIWemPHJIYfivRlENbJsDWc4N8gk6ijVHwrxGcr4Axu+NN+zPtQ3ddhaGMAyKdTo
 qUKXH3MZSMIl++jI5Fkc6xM+XLvY1rML62epSzMwu/cc7Z5MeyWdQcri0N9YFuO7
 wUUNnFpU00lwQBLbyyUQ3Zi8E0QV7NuPW4axTkmntiIjMpLagaEvVSf6nf8qLpbE
 WTT16s707t19hUZNazNZ7ONmhly4ALbHFQEH65J2KoYn99fYqy9z68Hwk+xnmykw
 bc3qvfhpw0MImQQ+DqHiBwb4n4UuvY2WlkkZI3FfNeSG63DaM2mZikfpElpXYjn6
 9iOIXvx21Wiq/n0cbLhidI2q/ZzFCzYLCk6ikZ320wb+rhvd7EoSlZil6QSzn3pH
 Qdk+NEZgWQY=
 =ZT6+
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-2023-08-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf event updates from Ingo Molnar:

 - AMD IBS improvements

 - Intel PMU driver updates

 - Extend core perf facilities & the ARM PMU driver to better handle ARM big.LITTLE events

 - Micro-optimize software events and the ring-buffer code

 - Misc cleanups & fixes

* tag 'perf-core-2023-08-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/uncore: Remove unnecessary ?: operator around pcibios_err_to_errno() call
  perf/x86/intel: Add Crestmont PMU
  x86/cpu: Update Hybrids
  x86/cpu: Fix Crestmont uarch
  x86/cpu: Fix Gracemont uarch
  perf: Remove unused extern declaration arch_perf_get_page_size()
  perf: Remove unused PERF_PMU_CAP_HETEROGENEOUS_CPUS capability
  arm_pmu: Remove unused PERF_PMU_CAP_HETEROGENEOUS_CPUS capability
  perf/x86: Remove unused PERF_PMU_CAP_HETEROGENEOUS_CPUS capability
  arm_pmu: Add PERF_PMU_CAP_EXTENDED_HW_TYPE capability
  perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src
  perf/mem: Add PERF_MEM_LVLNUM_NA to PERF_MEM_NA
  perf/mem: Introduce PERF_MEM_LVLNUM_UNC
  perf/ring_buffer: Use local_try_cmpxchg in __perf_output_begin
  locking/arch: Avoid variable shadowing in local_try_cmpxchg()
  perf/core: Use local64_try_cmpxchg in perf_swevent_set_period
  perf/x86: Use local64_try_cmpxchg
  perf/amd: Prevent grouping of IBS events
This commit is contained in:
Linus Torvalds 2023-08-28 16:35:01 -07:00
commit 1a7c611546
29 changed files with 230 additions and 172 deletions

View file

@ -63,8 +63,8 @@ static inline long local_cmpxchg(local_t *l, long old, long new)
static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
{
typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old;
return try_cmpxchg_local(&l->a.counter, __old, new);
return try_cmpxchg_local(&l->a.counter,
(typeof(l->a.counter) *) old, new);
}
#define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n)))

View file

@ -101,8 +101,8 @@ static __inline__ long local_cmpxchg(local_t *l, long old, long new)
static __inline__ bool local_try_cmpxchg(local_t *l, long *old, long new)
{
typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old;
return try_cmpxchg_local(&l->a.counter, __old, new);
return try_cmpxchg_local(&l->a.counter,
(typeof(l->a.counter) *) old, new);
}
#define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n)))

View file

@ -156,8 +156,8 @@ perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
* count to the generic event atomically:
*/
prev_raw_count = local64_read(&hwc->prev_count);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
if (!local64_try_cmpxchg(&hwc->prev_count,
&prev_raw_count, new_raw_count))
return 0;
/*
@ -247,11 +247,33 @@ int forward_event_to_ibs(struct perf_event *event)
return -ENOENT;
}
/*
* Grouping of IBS events is not possible since IBS can have only
* one event active at any point in time.
*/
static int validate_group(struct perf_event *event)
{
struct perf_event *sibling;
if (event->group_leader == event)
return 0;
if (event->group_leader->pmu == event->pmu)
return -EINVAL;
for_each_sibling_event(sibling, event->group_leader) {
if (sibling->pmu == event->pmu)
return -EINVAL;
}
return 0;
}
static int perf_ibs_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct perf_ibs *perf_ibs;
u64 max_cnt, config;
int ret;
perf_ibs = get_ibs_pmu(event->attr.type);
if (!perf_ibs)
@ -265,6 +287,10 @@ static int perf_ibs_init(struct perf_event *event)
if (config & ~perf_ibs->config_mask)
return -EINVAL;
ret = validate_group(event);
if (ret)
return ret;
if (hwc->sample_period) {
if (config & perf_ibs->cnt_mask)
/* raw max_cnt may not be set */
@ -702,38 +728,63 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
return op_data2->data_src_lo;
}
static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
union ibs_op_data3 *op_data3,
struct perf_sample_data *data)
#define L(x) (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT))
#define LN(x) PERF_MEM_S(LVLNUM, x)
#define REM PERF_MEM_S(REMOTE, REMOTE)
#define HOPS(x) PERF_MEM_S(HOPS, x)
static u64 g_data_src[8] = {
[IBS_DATA_SRC_LOC_CACHE] = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0),
[IBS_DATA_SRC_DRAM] = L(LOC_RAM) | LN(RAM),
[IBS_DATA_SRC_REM_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
[IBS_DATA_SRC_IO] = L(IO) | LN(IO),
};
#define RMT_NODE_BITS (1 << IBS_DATA_SRC_DRAM)
#define RMT_NODE_APPLICABLE(x) (RMT_NODE_BITS & (1 << x))
static u64 g_zen4_data_src[32] = {
[IBS_DATA_SRC_EXT_LOC_CACHE] = L(L3) | LN(L3),
[IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0),
[IBS_DATA_SRC_EXT_DRAM] = L(LOC_RAM) | LN(RAM),
[IBS_DATA_SRC_EXT_FAR_CCX_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
[IBS_DATA_SRC_EXT_PMEM] = LN(PMEM),
[IBS_DATA_SRC_EXT_IO] = L(IO) | LN(IO),
[IBS_DATA_SRC_EXT_EXT_MEM] = LN(CXL),
};
#define ZEN4_RMT_NODE_BITS ((1 << IBS_DATA_SRC_EXT_DRAM) | \
(1 << IBS_DATA_SRC_EXT_PMEM) | \
(1 << IBS_DATA_SRC_EXT_EXT_MEM))
#define ZEN4_RMT_NODE_APPLICABLE(x) (ZEN4_RMT_NODE_BITS & (1 << x))
static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
union ibs_op_data3 *op_data3,
struct perf_sample_data *data)
{
union perf_mem_data_src *data_src = &data->data_src;
u8 ibs_data_src = perf_ibs_data_src(op_data2);
data_src->mem_lvl = 0;
data_src->mem_lvl_num = 0;
/*
* DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
* memory accesses. So, check DcUcMemAcc bit early.
*/
if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
return;
}
if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO)
return L(UNC) | LN(UNC);
/* L1 Hit */
if (op_data3->dc_miss == 0) {
data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
return;
}
if (op_data3->dc_miss == 0)
return L(L1) | LN(L1);
/* L2 Hit */
if (op_data3->l2_miss == 0) {
/* Erratum #1293 */
if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
!(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
return;
}
!(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc))
return L(L2) | LN(L2);
}
/*
@ -743,82 +794,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
if (data_src->mem_op != PERF_MEM_OP_LOAD)
goto check_mab;
/* L3 Hit */
if (ibs_caps & IBS_CAPS_ZEN4) {
if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
return;
u64 val = g_zen4_data_src[ibs_data_src];
if (!val)
goto check_mab;
/* HOPS_1 because IBS doesn't provide remote socket detail */
if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) {
if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM)
val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
else
val |= REM | HOPS(1);
}
return val;
} else {
if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
PERF_MEM_LVL_HIT;
return;
}
}
u64 val = g_data_src[ibs_data_src];
/* A peer cache in a near CCX */
if (ibs_caps & IBS_CAPS_ZEN4 &&
ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
return;
}
if (!val)
goto check_mab;
/* A peer cache in a far CCX */
if (ibs_caps & IBS_CAPS_ZEN4) {
if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
return;
/* HOPS_1 because IBS doesn't provide remote socket detail */
if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) {
if (ibs_data_src == IBS_DATA_SRC_DRAM)
val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
else
val |= REM | HOPS(1);
}
} else {
if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
return;
}
}
/* DRAM */
if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
if (op_data2->rmt_node == 0)
data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
else
data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
return;
}
/* PMEM */
if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
if (op_data2->rmt_node) {
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
/* IBS doesn't provide Remote socket detail */
data_src->mem_hops = PERF_MEM_HOPS_1;
}
return;
}
/* Extension Memory */
if (ibs_caps & IBS_CAPS_ZEN4 &&
ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL;
if (op_data2->rmt_node) {
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
/* IBS doesn't provide Remote socket detail */
data_src->mem_hops = PERF_MEM_HOPS_1;
}
return;
}
/* IO */
if (ibs_data_src == IBS_DATA_SRC_EXT_IO) {
data_src->mem_lvl = PERF_MEM_LVL_IO;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
if (op_data2->rmt_node) {
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
/* IBS doesn't provide Remote socket detail */
data_src->mem_hops = PERF_MEM_HOPS_1;
}
return;
return val;
}
check_mab:
@ -829,12 +834,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
* DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
* MAB only when IBS fails to provide DataSrc.
*/
if (op_data3->dc_miss_no_mab_alloc) {
data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
return;
}
if (op_data3->dc_miss_no_mab_alloc)
return L(LFB) | LN(LFB);
data_src->mem_lvl = PERF_MEM_LVL_NA;
/* Don't set HIT with NA */
return PERF_MEM_S(LVL, NA) | LN(NA);
}
static bool perf_ibs_cache_hit_st_valid(void)
@ -924,7 +928,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
union ibs_op_data2 *op_data2,
union ibs_op_data3 *op_data3)
{
perf_ibs_get_mem_lvl(op_data2, op_data3, data);
union perf_mem_data_src *data_src = &data->data_src;
data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data);
perf_ibs_get_mem_snoop(op_data2, data);
perf_ibs_get_tlb_lvl(op_data3, data);
perf_ibs_get_mem_lock(op_data3, data);

View file

@ -129,13 +129,11 @@ u64 x86_perf_event_update(struct perf_event *event)
* exchange a new raw count - then add that new-prev delta
* count to the generic event atomically:
*/
again:
prev_raw_count = local64_read(&hwc->prev_count);
rdpmcl(hwc->event_base_rdpmc, new_raw_count);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
do {
rdpmcl(hwc->event_base_rdpmc, new_raw_count);
} while (!local64_try_cmpxchg(&hwc->prev_count,
&prev_raw_count, new_raw_count));
/*
* Now we have the new raw value and have updated the prev
@ -2168,7 +2166,6 @@ static int __init init_hw_perf_events(void)
hybrid_pmu->pmu = pmu;
hybrid_pmu->pmu.type = -1;
hybrid_pmu->pmu.attr_update = x86_pmu.attr_update;
hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS;
hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_EXTENDED_HW_TYPE;
err = perf_pmu_register(&hybrid_pmu->pmu, hybrid_pmu->name,

View file

@ -2129,6 +2129,17 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
EVENT_ATTR_STR(topdown-retiring, td_retiring_cmt, "event=0x72,umask=0x0");
EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec_cmt, "event=0x73,umask=0x0");
static struct attribute *cmt_events_attrs[] = {
EVENT_PTR(td_fe_bound_tnt),
EVENT_PTR(td_retiring_cmt),
EVENT_PTR(td_bad_spec_cmt),
EVENT_PTR(td_be_bound_tnt),
NULL
};
static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
@ -4847,6 +4858,8 @@ PMU_FORMAT_ATTR(ldlat, "config1:0-15");
PMU_FORMAT_ATTR(frontend, "config1:0-23");
PMU_FORMAT_ATTR(snoop_rsp, "config1:0-63");
static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
@ -4877,6 +4890,13 @@ static struct attribute *slm_format_attr[] = {
NULL
};
static struct attribute *cmt_format_attr[] = {
&format_attr_offcore_rsp.attr,
&format_attr_ldlat.attr,
&format_attr_snoop_rsp.attr,
NULL
};
static struct attribute *skl_format_attr[] = {
&format_attr_frontend.attr,
NULL,
@ -5656,7 +5676,6 @@ static struct attribute *adl_hybrid_extra_attr[] = {
NULL
};
PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63");
FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small);
static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
@ -6174,7 +6193,7 @@ __init int intel_pmu_init(void)
name = "Tremont";
break;
case INTEL_FAM6_ALDERLAKE_N:
case INTEL_FAM6_ATOM_GRACEMONT:
x86_pmu.mid_ack = true;
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@ -6204,6 +6223,37 @@ __init int intel_pmu_init(void)
name = "gracemont";
break;
case INTEL_FAM6_ATOM_CRESTMONT:
case INTEL_FAM6_ATOM_CRESTMONT_X:
x86_pmu.mid_ack = true;
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
x86_pmu.event_constraints = intel_slm_event_constraints;
x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
x86_pmu.extra_regs = intel_cmt_extra_regs;
x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true;
x86_pmu.lbr_pt_coexist = true;
x86_pmu.pebs_block = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
intel_pmu_pebs_data_source_cmt();
x86_pmu.pebs_latency_data = mtl_latency_data_small;
x86_pmu.get_event_constraints = cmt_get_event_constraints;
x86_pmu.limit_period = spr_limit_period;
td_attr = cmt_events_attrs;
mem_attr = grt_mem_attrs;
extra_attr = cmt_format_attr;
pr_cont("Crestmont events, ");
name = "crestmont";
break;
case INTEL_FAM6_WESTMERE:
case INTEL_FAM6_WESTMERE_EP:
case INTEL_FAM6_WESTMERE_EX:

View file

@ -365,13 +365,11 @@ static void cstate_pmu_event_update(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
u64 prev_raw_count, new_raw_count;
again:
prev_raw_count = local64_read(&hwc->prev_count);
new_raw_count = cstate_pmu_read_counter(event);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
do {
new_raw_count = cstate_pmu_read_counter(event);
} while (!local64_try_cmpxchg(&hwc->prev_count,
&prev_raw_count, new_raw_count));
local64_add(new_raw_count - prev_raw_count, &event->count);
}
@ -671,6 +669,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &glm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &glm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &glm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates),
@ -686,7 +685,6 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates),

View file

@ -144,7 +144,7 @@ void __init intel_pmu_pebs_data_source_adl(void)
__intel_pmu_pebs_data_source_grt(data_source);
}
static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source)
static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source)
{
data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
@ -164,7 +164,12 @@ void __init intel_pmu_pebs_data_source_mtl(void)
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
intel_pmu_pebs_data_source_cmt(data_source);
__intel_pmu_pebs_data_source_cmt(data_source);
}
void __init intel_pmu_pebs_data_source_cmt(void)
{
__intel_pmu_pebs_data_source_cmt(pebs_data_source);
}
static u64 precise_store_data(u64 status)

View file

@ -1858,7 +1858,6 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init),
@ -1867,6 +1866,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_uncore_init),
{},
};
MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);

View file

@ -1502,7 +1502,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
pci_dev_put(ubox_dev);
return err ? pcibios_err_to_errno(err) : 0;
return pcibios_err_to_errno(err);
}
int snbep_uncore_pci_init(void)

View file

@ -106,7 +106,7 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_ROCKETLAKE:
case INTEL_FAM6_ALDERLAKE:
case INTEL_FAM6_ALDERLAKE_L:
case INTEL_FAM6_ALDERLAKE_N:
case INTEL_FAM6_ATOM_GRACEMONT:
case INTEL_FAM6_RAPTORLAKE:
case INTEL_FAM6_RAPTORLAKE_P:
case INTEL_FAM6_RAPTORLAKE_S:
@ -244,12 +244,10 @@ static void msr_event_update(struct perf_event *event)
s64 delta;
/* Careful, an NMI might modify the previous event value: */
again:
prev = local64_read(&event->hw.prev_count);
now = msr_read_counter(event);
if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
goto again;
do {
now = msr_read_counter(event);
} while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, now));
delta = now - prev;
if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {

View file

@ -1606,6 +1606,8 @@ void intel_pmu_pebs_data_source_grt(void);
void intel_pmu_pebs_data_source_mtl(void);
void intel_pmu_pebs_data_source_cmt(void);
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);

View file

@ -804,7 +804,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr),
X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &model_spr),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl),

View file

@ -98,8 +98,6 @@
#define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */
#define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */
#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */
#define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */
#define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */
@ -112,21 +110,24 @@
#define INTEL_FAM6_GRANITERAPIDS_X 0xAD
#define INTEL_FAM6_GRANITERAPIDS_D 0xAE
/* "Hybrid" Processors (P-Core/E-Core) */
#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */
#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
#define INTEL_FAM6_ALDERLAKE_N 0xBE
#define INTEL_FAM6_RAPTORLAKE 0xB7
#define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */
#define INTEL_FAM6_RAPTORLAKE_P 0xBA
#define INTEL_FAM6_RAPTORLAKE_S 0xBF
#define INTEL_FAM6_METEORLAKE 0xAC
#define INTEL_FAM6_METEORLAKE_L 0xAA
#define INTEL_FAM6_LUNARLAKE_M 0xBD
#define INTEL_FAM6_ARROWLAKE 0xC6
#define INTEL_FAM6_LUNARLAKE_M 0xBD
/* "Small Core" Processors (Atom/E-Core) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
@ -154,9 +155,10 @@
#define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */
#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */
#define INTEL_FAM6_SIERRAFOREST_X 0xAF
#define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */
#define INTEL_FAM6_GRANDRIDGE 0xB6
#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */
/* Xeon Phi */

View file

@ -127,8 +127,8 @@ static inline long local_cmpxchg(local_t *l, long old, long new)
static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
{
typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old;
return try_cmpxchg_local(&l->a.counter, __old, new);
return try_cmpxchg_local(&l->a.counter,
(typeof(l->a.counter) *) old, new);
}
/* Always has a lock prefix */

View file

@ -206,7 +206,7 @@ static int intel_epb_offline(unsigned int cpu)
static const struct x86_cpu_id intel_epb_normal[] = {
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,
ENERGY_PERF_BIAS_NORMAL_POWERSAVE),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,
ENERGY_PERF_BIAS_NORMAL_POWERSAVE),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,
ENERGY_PERF_BIAS_NORMAL_POWERSAVE),

View file

@ -272,7 +272,7 @@ static void __init probe_page_size_mask(void)
static const struct x86_cpu_id invlpg_miss_ids[] = {
INTEL_MATCH(INTEL_FAM6_ALDERLAKE ),
INTEL_MATCH(INTEL_FAM6_ALDERLAKE_L ),
INTEL_MATCH(INTEL_FAM6_ALDERLAKE_N ),
INTEL_MATCH(INTEL_FAM6_ATOM_GRACEMONT ),
INTEL_MATCH(INTEL_FAM6_RAPTORLAKE ),
INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_P),
INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_S),

View file

@ -906,7 +906,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = {
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg),
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg),
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SIERRAFOREST_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
{}
};
MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);

View file

@ -923,7 +923,7 @@ static struct cpuidle_state adl_l_cstates[] __initdata = {
.enter = NULL }
};
static struct cpuidle_state adl_n_cstates[] __initdata = {
static struct cpuidle_state gmt_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@ -1349,8 +1349,8 @@ static const struct idle_cpu idle_cpu_adl_l __initconst = {
.state_table = adl_l_cstates,
};
static const struct idle_cpu idle_cpu_adl_n __initconst = {
.state_table = adl_n_cstates,
static const struct idle_cpu idle_cpu_gmt __initconst = {
.state_table = gmt_cstates,
};
static const struct idle_cpu idle_cpu_spr __initconst = {
@ -1423,7 +1423,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &idle_cpu_adl_n),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &idle_cpu_gmt),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr),
X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
@ -1898,7 +1898,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
break;
case INTEL_FAM6_ALDERLAKE:
case INTEL_FAM6_ALDERLAKE_L:
case INTEL_FAM6_ALDERLAKE_N:
case INTEL_FAM6_ATOM_GRACEMONT:
adl_idle_state_table_update();
break;
}

View file

@ -877,11 +877,13 @@ struct arm_pmu *armpmu_alloc(void)
.attr_groups = pmu->attr_groups,
/*
* This is a CPU PMU potentially in a heterogeneous
* configuration (e.g. big.LITTLE). This is not an uncore PMU,
* and we have taken ctx sharing into account (e.g. with our
* pmu::filter callback and pmu::event_init group validation).
* configuration (e.g. big.LITTLE) so
* PERF_PMU_CAP_EXTENDED_HW_TYPE is required to open
* PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE events on a
* specific PMU.
*/
.capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS | PERF_PMU_CAP_EXTENDED_REGS,
.capabilities = PERF_PMU_CAP_EXTENDED_REGS |
PERF_PMU_CAP_EXTENDED_HW_TYPE,
};
pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =

View file

@ -1123,7 +1123,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = {
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, icl_core_init),
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, tgl_core_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, tgl_core_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, tgl_core_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, tgl_core_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, adl_core_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, tgl_core_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, adl_core_init),

View file

@ -720,7 +720,7 @@ static struct miscdevice isst_if_char_driver = {
static const struct x86_cpu_id hpm_cpu_ids[] = {
X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, NULL),
X86_MATCH_INTEL_FAM6_MODEL(SIERRAFOREST_X, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, NULL),
{}
};

View file

@ -1250,7 +1250,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &rapl_defaults_core),

View file

@ -142,7 +142,7 @@ static const struct x86_cpu_id pl4_support_ids[] = {
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, NULL),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, NULL),
X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, NULL),

View file

@ -60,7 +60,7 @@ static const struct x86_cpu_id tcc_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, NULL),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, NULL),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, NULL),

View file

@ -288,10 +288,9 @@ struct perf_event_pmu_context;
#define PERF_PMU_CAP_EXTENDED_REGS 0x0008
#define PERF_PMU_CAP_EXCLUSIVE 0x0010
#define PERF_PMU_CAP_ITRACE 0x0020
#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x0040
#define PERF_PMU_CAP_NO_EXCLUDE 0x0080
#define PERF_PMU_CAP_AUX_OUTPUT 0x0100
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0200
#define PERF_PMU_CAP_NO_EXCLUDE 0x0040
#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
struct perf_output_handle;
@ -1194,7 +1193,8 @@ struct perf_sample_data {
PERF_MEM_S(LVL, NA) |\
PERF_MEM_S(SNOOP, NA) |\
PERF_MEM_S(LOCK, NA) |\
PERF_MEM_S(TLB, NA))
PERF_MEM_S(TLB, NA) |\
PERF_MEM_S(LVLNUM, NA))
static inline void perf_sample_data_init(struct perf_sample_data *data,
u64 addr, u64 period)
@ -1860,10 +1860,6 @@ extern void arch_perf_update_userpage(struct perf_event *event,
struct perf_event_mmap_page *userpg,
u64 now);
#ifdef CONFIG_MMU
extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr);
#endif
/*
* Snapshot branch stack on software events.
*

View file

@ -1339,7 +1339,8 @@ union perf_mem_data_src {
#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
/* 5-0x8 available */
/* 5-0x7 available */
#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */
#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */

View file

@ -9595,16 +9595,16 @@ u64 perf_swevent_set_period(struct perf_event *event)
hwc->last_period = hwc->sample_period;
again:
old = val = local64_read(&hwc->period_left);
if (val < 0)
return 0;
old = local64_read(&hwc->period_left);
do {
val = old;
if (val < 0)
return 0;
nr = div64_u64(period + val, period);
offset = nr * period;
val -= offset;
if (local64_cmpxchg(&hwc->period_left, old, val) != old)
goto again;
nr = div64_u64(period + val, period);
offset = nr * period;
val -= offset;
} while (!local64_try_cmpxchg(&hwc->period_left, &old, val));
return nr;
}

View file

@ -191,9 +191,10 @@ __perf_output_begin(struct perf_output_handle *handle,
perf_output_get_handle(handle);
offset = local_read(&rb->head);
do {
head = offset;
tail = READ_ONCE(rb->user_page->data_tail);
offset = head = local_read(&rb->head);
if (!rb->overwrite) {
if (unlikely(!ring_buffer_has_space(head, tail,
perf_data_size(rb),
@ -217,7 +218,7 @@ __perf_output_begin(struct perf_output_handle *handle,
head += size;
else
head -= size;
} while (local_cmpxchg(&rb->head, offset, head) != offset);
} while (!local_try_cmpxchg(&rb->head, &offset, head));
if (backward) {
offset = head;

View file

@ -5447,7 +5447,7 @@ unsigned int intel_model_duplicates(unsigned int model)
case INTEL_FAM6_LAKEFIELD:
case INTEL_FAM6_ALDERLAKE:
case INTEL_FAM6_ALDERLAKE_L:
case INTEL_FAM6_ALDERLAKE_N:
case INTEL_FAM6_ATOM_GRACEMONT:
case INTEL_FAM6_RAPTORLAKE:
case INTEL_FAM6_RAPTORLAKE_P:
case INTEL_FAM6_RAPTORLAKE_S: