Merge branch kvm-arm64/pmu-bl into kvmarm-master/next

* kvm-arm64/pmu-bl:
  : .
  : Improve PMU support on heterogeneous systems, courtesy of Alexandru Elisei
  : .
  KVM: arm64: Refuse to run VCPU if the PMU doesn't match the physical CPU
  KVM: arm64: Add KVM_ARM_VCPU_PMU_V3_SET_PMU attribute
  KVM: arm64: Keep a list of probed PMUs
  KVM: arm64: Keep a per-VM pointer to the default PMU
  perf: Fix wrong name in comment for struct perf_cpu_context
  KVM: arm64: Do not change the PMU event filter after a VCPU has run

Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Marc Zyngier 2022-02-08 17:54:41 +00:00
commit 00e6dae00e
8 changed files with 189 additions and 34 deletions

View file

@ -70,7 +70,7 @@ irqchip.
-ENODEV PMUv3 not supported or GIC not initialized
-ENXIO PMUv3 not properly configured or in-kernel irqchip not
configured as required prior to calling this attribute
-EBUSY PMUv3 already initialized
-EBUSY PMUv3 already initialized or a VCPU has already run
-EINVAL Invalid filter range
======= ======================================================
@ -104,6 +104,38 @@ hardware event. Filtering event 0x1E (CHAIN) has no effect either, as it
isn't strictly speaking an event. Filtering the cycle counter is possible
using event 0x11 (CPU_CYCLES).
1.4 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_SET_PMU
------------------------------------------
:Parameters: in kvm_device_attr.addr the address to an int representing the PMU
identifier.
:Returns:
======= ====================================================
-EBUSY PMUv3 already initialized, a VCPU has already run or
an event filter has already been set
-EFAULT Error accessing the PMU identifier
-ENXIO PMU not found
-ENODEV PMUv3 not supported or GIC not initialized
-ENOMEM Could not allocate memory
======= ====================================================
Request that the VCPU uses the specified hardware PMU when creating guest events
for the purpose of PMU emulation. The PMU identifier can be read from the "type"
file for the desired PMU instance under /sys/devices (or, equivalent,
/sys/bus/even_source). This attribute is particularly useful on heterogeneous
systems where there are at least two CPU PMUs on the system. The PMU that is set
for one VCPU will be used by all the other VCPUs. It isn't possible to set a PMU
if a PMU event filter is already present.
Note that KVM will not make any attempts to run the VCPU on the physical CPUs
associated with the PMU specified by this attribute. This is entirely left to
userspace. However, attempting to run the VCPU on a physical CPU not supported
by the PMU will fail and KVM_RUN will return with
exit_reason = KVM_EXIT_FAIL_ENTRY and populate the fail_entry struct by setting
hardare_entry_failure_reason field to KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED and
the cpu field to the processor id.
2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
=================================

View file

@ -129,13 +129,16 @@ struct kvm_arch {
* up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+).
*/
unsigned long *pmu_filter;
unsigned int pmuver;
struct arm_pmu *arm_pmu;
cpumask_var_t supported_cpus;
u8 pfr0_csv2;
u8 pfr0_csv3;
/* Memory Tagging Extension enabled for the guest */
bool mte_enabled;
bool ran_once;
};
struct kvm_vcpu_fault_info {
@ -436,6 +439,7 @@ struct kvm_vcpu_arch {
#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \
@ -454,6 +458,15 @@ struct kvm_vcpu_arch {
#define vcpu_has_ptrauth(vcpu) false
#endif
#define vcpu_on_unsupported_cpu(vcpu) \
((vcpu)->arch.flags & KVM_ARM64_ON_UNSUPPORTED_CPU)
#define vcpu_set_on_unsupported_cpu(vcpu) \
((vcpu)->arch.flags |= KVM_ARM64_ON_UNSUPPORTED_CPU)
#define vcpu_clear_on_unsupported_cpu(vcpu) \
((vcpu)->arch.flags &= ~KVM_ARM64_ON_UNSUPPORTED_CPU)
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs)
/*

View file

@ -362,6 +362,7 @@ struct kvm_arm_copy_mte_tags {
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
#define KVM_ARM_VCPU_PMU_V3_INIT 1
#define KVM_ARM_VCPU_PMU_V3_FILTER 2
#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
@ -413,6 +414,9 @@ struct kvm_arm_copy_mte_tags {
#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
/* run->fail_entry.hardware_entry_failure_reason codes. */
#define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0)
#endif
#endif /* __ARM_KVM_H__ */

View file

@ -145,6 +145,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto out_free_stage2_pgd;
if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL))
goto out_free_stage2_pgd;
cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
kvm_vgic_early_init(kvm);
/* The maximum number of VCPUs is limited by the host's GIC model */
@ -171,6 +175,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
bitmap_free(kvm->arch.pmu_filter);
free_cpumask_var(kvm->arch.supported_cpus);
kvm_vgic_destroy(kvm);
@ -406,6 +411,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (vcpu_has_ptrauth(vcpu))
vcpu_ptrauth_disable(vcpu);
kvm_arch_vcpu_load_debug_state_flags(vcpu);
if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus))
vcpu_set_on_unsupported_cpu(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@ -419,6 +427,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_vcpu_pmu_restore_host(vcpu);
kvm_arm_vmid_clear_active();
vcpu_clear_on_unsupported_cpu(vcpu);
vcpu->cpu = -1;
}
@ -549,6 +558,10 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (kvm_vm_is_protected(kvm))
kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
mutex_lock(&kvm->lock);
kvm->arch.ran_once = true;
mutex_unlock(&kvm->lock);
return ret;
}
@ -707,6 +720,14 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
}
}
if (unlikely(vcpu_on_unsupported_cpu(vcpu))) {
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
run->fail_entry.hardware_entry_failure_reason = KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED;
run->fail_entry.cpu = smp_processor_id();
*ret = 0;
return true;
}
return kvm_request_pending(vcpu) ||
xfer_to_guest_mode_work_pending();
}

View file

@ -7,6 +7,7 @@
#include <linux/cpu.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/list.h>
#include <linux/perf_event.h>
#include <linux/perf/arm_pmu.h>
#include <linux/uaccess.h>
@ -16,6 +17,9 @@
DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
static LIST_HEAD(arm_pmus);
static DEFINE_MUTEX(arm_pmus_lock);
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
@ -24,7 +28,11 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
static u32 kvm_pmu_event_mask(struct kvm *kvm)
{
switch (kvm->arch.pmuver) {
unsigned int pmuver;
pmuver = kvm->arch.arm_pmu->pmuver;
switch (pmuver) {
case ID_AA64DFR0_PMUVER_8_0:
return GENMASK(9, 0);
case ID_AA64DFR0_PMUVER_8_1:
@ -33,7 +41,7 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
case ID_AA64DFR0_PMUVER_8_7:
return GENMASK(15, 0);
default: /* Shouldn't be here, just for sanity */
WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
WARN_ONCE(1, "Unknown PMU version %d\n", pmuver);
return 0;
}
}
@ -600,6 +608,7 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
*/
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
{
struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc;
struct perf_event *event;
@ -636,7 +645,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
return;
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.type = PERF_TYPE_RAW;
attr.type = arm_pmu->pmu.type;
attr.size = sizeof(attr);
attr.pinned = 1;
attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
@ -745,17 +754,33 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
void kvm_host_pmu_init(struct arm_pmu *pmu)
{
if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF &&
!kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled())
struct arm_pmu_entry *entry;
if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF ||
is_protected_kvm_enabled())
return;
mutex_lock(&arm_pmus_lock);
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out_unlock;
entry->arm_pmu = pmu;
list_add_tail(&entry->entry, &arm_pmus);
if (list_is_singular(&arm_pmus))
static_branch_enable(&kvm_arm_pmu_available);
out_unlock:
mutex_unlock(&arm_pmus_lock);
}
static int kvm_pmu_probe_pmuver(void)
static struct arm_pmu *kvm_pmu_probe_armpmu(void)
{
struct perf_event_attr attr = { };
struct perf_event *event;
struct arm_pmu *pmu;
int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF;
struct arm_pmu *pmu = NULL;
/*
* Create a dummy event that only counts user cycles. As we'll never
@ -780,19 +805,20 @@ static int kvm_pmu_probe_pmuver(void)
if (IS_ERR(event)) {
pr_err_once("kvm: pmu event creation failed %ld\n",
PTR_ERR(event));
return ID_AA64DFR0_PMUVER_IMP_DEF;
return NULL;
}
if (event->pmu) {
pmu = to_arm_pmu(event->pmu);
if (pmu->pmuver)
pmuver = pmu->pmuver;
if (pmu->pmuver == 0 ||
pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
pmu = NULL;
}
perf_event_disable(event);
perf_event_release_kernel(event);
return pmuver;
return pmu;
}
u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
@ -810,7 +836,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
* Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
* as RAZ
*/
if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4)
if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_4)
val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
base = 32;
}
@ -922,26 +948,64 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
return true;
}
static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
{
struct kvm *kvm = vcpu->kvm;
struct arm_pmu_entry *entry;
struct arm_pmu *arm_pmu;
int ret = -ENXIO;
mutex_lock(&kvm->lock);
mutex_lock(&arm_pmus_lock);
list_for_each_entry(entry, &arm_pmus, entry) {
arm_pmu = entry->arm_pmu;
if (arm_pmu->pmu.type == pmu_id) {
if (kvm->arch.ran_once ||
(kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) {
ret = -EBUSY;
break;
}
kvm->arch.arm_pmu = arm_pmu;
cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus);
ret = 0;
break;
}
}
mutex_unlock(&arm_pmus_lock);
mutex_unlock(&kvm->lock);
return ret;
}
int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
struct kvm *kvm = vcpu->kvm;
if (!kvm_vcpu_has_pmu(vcpu))
return -ENODEV;
if (vcpu->arch.pmu.created)
return -EBUSY;
if (!vcpu->kvm->arch.pmuver)
vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
return -ENODEV;
mutex_lock(&kvm->lock);
if (!kvm->arch.arm_pmu) {
/* No PMU set, get the default one */
kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
if (!kvm->arch.arm_pmu) {
mutex_unlock(&kvm->lock);
return -ENODEV;
}
}
mutex_unlock(&kvm->lock);
switch (attr->attr) {
case KVM_ARM_VCPU_PMU_V3_IRQ: {
int __user *uaddr = (int __user *)(long)attr->addr;
int irq;
if (!irqchip_in_kernel(vcpu->kvm))
if (!irqchip_in_kernel(kvm))
return -EINVAL;
if (get_user(irq, uaddr))
@ -951,7 +1015,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
return -EINVAL;
if (!pmu_irq_is_valid(vcpu->kvm, irq))
if (!pmu_irq_is_valid(kvm, irq))
return -EINVAL;
if (kvm_arm_pmu_irq_initialized(vcpu))
@ -966,7 +1030,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
struct kvm_pmu_event_filter filter;
int nr_events;
nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
nr_events = kvm_pmu_event_mask(kvm) + 1;
uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
@ -978,12 +1042,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
filter.action != KVM_PMU_EVENT_DENY))
return -EINVAL;
mutex_lock(&vcpu->kvm->lock);
mutex_lock(&kvm->lock);
if (!vcpu->kvm->arch.pmu_filter) {
vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
if (!vcpu->kvm->arch.pmu_filter) {
mutex_unlock(&vcpu->kvm->lock);
if (kvm->arch.ran_once) {
mutex_unlock(&kvm->lock);
return -EBUSY;
}
if (!kvm->arch.pmu_filter) {
kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
if (!kvm->arch.pmu_filter) {
mutex_unlock(&kvm->lock);
return -ENOMEM;
}
@ -994,20 +1063,29 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
* events, the default is to allow.
*/
if (filter.action == KVM_PMU_EVENT_ALLOW)
bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events);
bitmap_zero(kvm->arch.pmu_filter, nr_events);
else
bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events);
bitmap_fill(kvm->arch.pmu_filter, nr_events);
}
if (filter.action == KVM_PMU_EVENT_ALLOW)
bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
else
bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
mutex_unlock(&vcpu->kvm->lock);
mutex_unlock(&kvm->lock);
return 0;
}
case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
int __user *uaddr = (int __user *)(long)attr->addr;
int pmu_id;
if (get_user(pmu_id, uaddr))
return -EFAULT;
return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
}
case KVM_ARM_VCPU_PMU_V3_INIT:
return kvm_arm_pmu_v3_init(vcpu);
}
@ -1045,6 +1123,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
case KVM_ARM_VCPU_PMU_V3_IRQ:
case KVM_ARM_VCPU_PMU_V3_INIT:
case KVM_ARM_VCPU_PMU_V3_FILTER:
case KVM_ARM_VCPU_PMU_V3_SET_PMU:
if (kvm_vcpu_has_pmu(vcpu))
return 0;
}

View file

@ -29,6 +29,11 @@ struct kvm_pmu {
struct irq_work overflow_work;
};
struct arm_pmu_entry {
struct list_head entry;
struct arm_pmu *arm_pmu;
};
DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
static __always_inline bool kvm_arm_support_pmu_v3(void)

View file

@ -864,7 +864,7 @@ struct perf_event_context {
#define PERF_NR_CONTEXTS 4
/**
* struct perf_event_cpu_context - per cpu event context structure
* struct perf_cpu_context - per cpu event context structure
*/
struct perf_cpu_context {
struct perf_event_context ctx;

View file

@ -362,6 +362,7 @@ struct kvm_arm_copy_mte_tags {
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
#define KVM_ARM_VCPU_PMU_V3_INIT 1
#define KVM_ARM_VCPU_PMU_V3_FILTER 2
#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1