KVM/arm64 updates for Linux 6.6

- Add support for TLB range invalidation of Stage-2 page tables,
   avoiding unnecessary invalidations. Systems that do not implement
   range invalidation still rely on a full invalidation when dealing
   with large ranges.
 
 - Add infrastructure for forwarding traps taken from a L2 guest to
   the L1 guest, with L0 acting as the dispatcher, another baby step
   towards the full nested support.
 
 - Simplify the way we deal with the (long deprecated) 'CPU target',
   resulting in a much needed cleanup.
 
 - Fix another set of PMU bugs, both on the guest and host sides,
   as we seem to never have any shortage of those...
 
 - Relax the alignment requirements of EL2 VA allocations for
   non-stack allocations, as we were otherwise wasting a lot of that
   precious VA space.
 
 - The usual set of non-functional cleanups, although I note the lack
   of spelling fixes...
 -----BEGIN PGP SIGNATURE-----
 
 iQJDBAABCgAtFiEEn9UcU+C1Yxj9lZw9I9DQutE9ekMFAmTsXrUPHG1hekBrZXJu
 ZWwub3JnAAoJECPQ0LrRPXpDZpIQAJUM1rNEOJ8ExYRfoG1LaTfcOm5TD6D1IWlO
 uCUx4xLMBudw/55HusmUSdiomQ3Xg5UdRaU7vX5OYwPbdoWebjEUfgdP3jCA/TiW
 mZTMv3x9hOvp+EOS/UnS469cERvg1/KfwcdOQsWL0HsCFZnu2XmQHWPD++vovLNp
 F1892ij875mC6C6mOR60H2nyjIiCuqWh/8eKBkp65CARCbFDYxWhqBnmcmTvoquh
 E87pQDPdtgXc0KlOWCABh5bYOu1WGVEXE5f3ixtdY9cQakkSI3NkFKw27/mIWS4q
 TCsagByNnPFDXTglb1dJopNdluLMFi1iXhRJX78R/PYaHxf4uFafWcQk1U7eDdLg
 1kPANggwYe4KNAQZUvRhH7lIPWHCH0r4c1qHV+FsiOZVoDOSKHo4RW1ZFtirJSNW
 LNJMdk+8xyae0S7z164EpZB/tpFttX4gl3YvUT/T+4gH8+CRFAaoAlK39CoGDPpk
 f+P2GE1Z5YupF16YjpZtBnan55KkU1b6eORl5zpnAtoaz5WGXqj1t4qo0Q6e9WB9
 X4rdDVhH7vRUmhjmSP6PuEygb84hnITLdGpkH2BmWj/4uYuCN+p+U2B2o/QdMJoo
 cPxdflLOU/+1gfAFYPtHVjVKCqzhwbw3iLXQpO12gzRYqE13rUnAr7RuGDf5fBVC
 LW7Pv81o
 =DKhx
 -----END PGP SIGNATURE-----

Merge tag 'kvmarm-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 updates for Linux 6.6

- Add support for TLB range invalidation of Stage-2 page tables,
  avoiding unnecessary invalidations. Systems that do not implement
  range invalidation still rely on a full invalidation when dealing
  with large ranges.

- Add infrastructure for forwarding traps taken from a L2 guest to
  the L1 guest, with L0 acting as the dispatcher, another baby step
  towards the full nested support.

- Simplify the way we deal with the (long deprecated) 'CPU target',
  resulting in a much needed cleanup.

- Fix another set of PMU bugs, both on the guest and host sides,
  as we seem to never have any shortage of those...

- Relax the alignment requirements of EL2 VA allocations for
  non-stack allocations, as we were otherwise wasting a lot of that
  precious VA space.

- The usual set of non-functional cleanups, although I note the lack
  of spelling fixes...
This commit is contained in:
Paolo Bonzini 2023-08-31 13:18:53 -04:00
commit e0fb12c673
46 changed files with 2997 additions and 332 deletions

View file

@ -227,6 +227,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
return false;
}
static inline void kvm_vcpu_pmu_resync_el0(void) {}
/* PMU Version in DFR Register */
#define ARMV8_PMU_DFR_VER_NI 0
#define ARMV8_PMU_DFR_VER_V3P4 0x5

View file

@ -18,10 +18,19 @@
#define HCR_DCT (UL(1) << 57)
#define HCR_ATA_SHIFT 56
#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
#define HCR_TTLBOS (UL(1) << 55)
#define HCR_TTLBIS (UL(1) << 54)
#define HCR_ENSCXT (UL(1) << 53)
#define HCR_TOCU (UL(1) << 52)
#define HCR_AMVOFFEN (UL(1) << 51)
#define HCR_TICAB (UL(1) << 50)
#define HCR_TID4 (UL(1) << 49)
#define HCR_FIEN (UL(1) << 47)
#define HCR_FWB (UL(1) << 46)
#define HCR_NV2 (UL(1) << 45)
#define HCR_AT (UL(1) << 44)
#define HCR_NV1 (UL(1) << 43)
#define HCR_NV (UL(1) << 42)
#define HCR_API (UL(1) << 41)
#define HCR_APK (UL(1) << 40)
#define HCR_TEA (UL(1) << 37)
@ -89,7 +98,6 @@
HCR_BSU_IS | HCR_FB | HCR_TACR | \
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3)
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
@ -324,6 +332,47 @@
BIT(18) | \
GENMASK(16, 15))
/*
* FGT register definitions
*
* RES0 and polarity masks as of DDI0487J.a, to be updated as needed.
* We're not using the generated masks as they are usually ahead of
* the published ARM ARM, which we use as a reference.
*
* Once we get to a point where the two describe the same thing, we'll
* merge the definitions. One day.
*/
#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51))
#define __HFGRTR_EL2_MASK GENMASK(49, 0)
#define __HFGRTR_EL2_nMASK (GENMASK(55, 54) | BIT(50))
#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \
BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
GENMASK(26, 25) | BIT(21) | BIT(18) | \
GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
#define __HFGWTR_EL2_MASK GENMASK(49, 0)
#define __HFGWTR_EL2_nMASK (GENMASK(55, 54) | BIT(50))
#define __HFGITR_EL2_RES0 GENMASK(63, 57)
#define __HFGITR_EL2_MASK GENMASK(54, 0)
#define __HFGITR_EL2_nMASK GENMASK(56, 55)
#define __HDFGRTR_EL2_RES0 (BIT(49) | BIT(42) | GENMASK(39, 38) | \
GENMASK(21, 20) | BIT(8))
#define __HDFGRTR_EL2_MASK ~__HDFGRTR_EL2_nMASK
#define __HDFGRTR_EL2_nMASK GENMASK(62, 59)
#define __HDFGWTR_EL2_RES0 (BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \
BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \
BIT(22) | BIT(9) | BIT(6))
#define __HDFGWTR_EL2_MASK ~__HDFGWTR_EL2_nMASK
#define __HDFGWTR_EL2_nMASK GENMASK(62, 60)
/* Similar definitions for HCRX_EL2 */
#define __HCRX_EL2_RES0 (GENMASK(63, 16) | GENMASK(13, 12))
#define __HCRX_EL2_MASK (0)
#define __HCRX_EL2_nMASK (GENMASK(15, 14) | GENMASK(4, 0))
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
/*

View file

@ -70,6 +70,7 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
__KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
@ -229,6 +230,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
phys_addr_t ipa,
int level);
extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
phys_addr_t start, unsigned long pages);
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);

View file

@ -49,6 +49,7 @@
#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
@ -380,6 +381,7 @@ enum vcpu_sysreg {
CPTR_EL2, /* Architectural Feature Trap Register (EL2) */
HSTR_EL2, /* Hypervisor System Trap Register */
HACR_EL2, /* Hypervisor Auxiliary Control Register */
HCRX_EL2, /* Extended Hypervisor Configuration Register */
TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */
TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */
TCR_EL2, /* Translation Control Register (EL2) */
@ -400,6 +402,11 @@ enum vcpu_sysreg {
TPIDR_EL2, /* EL2 Software Thread ID Register */
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
SP_EL2, /* EL2 Stack Pointer */
HFGRTR_EL2,
HFGWTR_EL2,
HFGITR_EL2,
HDFGRTR_EL2,
HDFGWTR_EL2,
CNTHP_CTL_EL2,
CNTHP_CVAL_EL2,
CNTHV_CTL_EL2,
@ -567,8 +574,7 @@ struct kvm_vcpu_arch {
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
/* Target CPU and feature flags */
int target;
/* feature flags */
DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
@ -669,6 +675,8 @@ struct kvm_vcpu_arch {
#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1))
/* PTRAUTH exposed to guest */
#define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2))
/* KVM_ARM_VCPU_INIT completed */
#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(3))
/* Exception pending */
#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0))
@ -899,7 +907,6 @@ struct kvm_vcpu_stat {
u64 exits;
};
void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
@ -967,8 +974,6 @@ void kvm_arm_resume_guest(struct kvm *kvm);
#define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
#endif /* __KVM_NVHE_HYPERVISOR__ */
void force_vm_exit(const cpumask_t *mask);
int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
@ -983,6 +988,7 @@ int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
int __init kvm_sys_reg_table_init(void);
int __init populate_nv_trap_config(void);
bool lock_all_vcpus(struct kvm *kvm);
void unlock_all_vcpus(struct kvm *kvm);
@ -1049,8 +1055,6 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
return cpus_have_const_cap(ARM64_SPECTRE_V3A);
}
void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
@ -1113,13 +1117,15 @@ int __init kvm_set_ipa_limit(void);
#define __KVM_HAVE_ARCH_VM_ALLOC
struct kvm *kvm_arch_alloc_vm(void);
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
static inline bool kvm_vm_is_protected(struct kvm *kvm)
{
return false;
}
void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);

View file

@ -168,6 +168,7 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **haddr);
int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
void **haddr);
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
void __init free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);

View file

@ -11,6 +11,8 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features));
}
extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
struct sys_reg_params;
struct sys_reg_desc;

View file

@ -746,4 +746,14 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
* kvm_pgtable_prot format.
*/
enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
/**
* kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
*
* @mmu: Stage-2 KVM MMU struct
* @addr: The base Intermediate physical address from which to invalidate
* @size: Size of the range from the base to invalidate
*/
void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
phys_addr_t addr, size_t size);
#endif /* __ARM64_KVM_PGTABLE_H__ */

View file

@ -124,6 +124,37 @@
#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4)
#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6)
#define SYS_IC_IALLUIS sys_insn(1, 0, 7, 1, 0)
#define SYS_IC_IALLU sys_insn(1, 0, 7, 5, 0)
#define SYS_IC_IVAU sys_insn(1, 3, 7, 5, 1)
#define SYS_DC_IVAC sys_insn(1, 0, 7, 6, 1)
#define SYS_DC_IGVAC sys_insn(1, 0, 7, 6, 3)
#define SYS_DC_IGDVAC sys_insn(1, 0, 7, 6, 5)
#define SYS_DC_CVAC sys_insn(1, 3, 7, 10, 1)
#define SYS_DC_CGVAC sys_insn(1, 3, 7, 10, 3)
#define SYS_DC_CGDVAC sys_insn(1, 3, 7, 10, 5)
#define SYS_DC_CVAU sys_insn(1, 3, 7, 11, 1)
#define SYS_DC_CVAP sys_insn(1, 3, 7, 12, 1)
#define SYS_DC_CGVAP sys_insn(1, 3, 7, 12, 3)
#define SYS_DC_CGDVAP sys_insn(1, 3, 7, 12, 5)
#define SYS_DC_CVADP sys_insn(1, 3, 7, 13, 1)
#define SYS_DC_CGVADP sys_insn(1, 3, 7, 13, 3)
#define SYS_DC_CGDVADP sys_insn(1, 3, 7, 13, 5)
#define SYS_DC_CIVAC sys_insn(1, 3, 7, 14, 1)
#define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3)
#define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5)
/* Data cache zero operations */
#define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1)
#define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3)
#define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4)
/*
* Automatically generated definitions for system registers, the
* manual encodings below are in the process of being converted to
@ -163,6 +194,82 @@
#define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0)
#define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0)
#define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0))
#define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0)
#define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1))
#define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1)
#define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2))
#define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2)
#define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2)
#define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0)
#define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1)
#define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0)
#define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3)
#define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3)))
#define SYS_TRCACVR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3)))
#define SYS_TRCAUTHSTATUS sys_reg(2, 1, 7, 14, 6)
#define SYS_TRCAUXCTLR sys_reg(2, 1, 0, 6, 0)
#define SYS_TRCBBCTLR sys_reg(2, 1, 0, 15, 0)
#define SYS_TRCCCCTLR sys_reg(2, 1, 0, 14, 0)
#define SYS_TRCCIDCCTLR0 sys_reg(2, 1, 3, 0, 2)
#define SYS_TRCCIDCCTLR1 sys_reg(2, 1, 3, 1, 2)
#define SYS_TRCCIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 0)
#define SYS_TRCCLAIMCLR sys_reg(2, 1, 7, 9, 6)
#define SYS_TRCCLAIMSET sys_reg(2, 1, 7, 8, 6)
#define SYS_TRCCNTCTLR(m) sys_reg(2, 1, 0, (4 | (m & 3)), 5)
#define SYS_TRCCNTRLDVR(m) sys_reg(2, 1, 0, (0 | (m & 3)), 5)
#define SYS_TRCCNTVR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 5)
#define SYS_TRCCONFIGR sys_reg(2, 1, 0, 4, 0)
#define SYS_TRCDEVARCH sys_reg(2, 1, 7, 15, 6)
#define SYS_TRCDEVID sys_reg(2, 1, 7, 2, 7)
#define SYS_TRCEVENTCTL0R sys_reg(2, 1, 0, 8, 0)
#define SYS_TRCEVENTCTL1R sys_reg(2, 1, 0, 9, 0)
#define SYS_TRCEXTINSELR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 4)
#define SYS_TRCIDR0 sys_reg(2, 1, 0, 8, 7)
#define SYS_TRCIDR10 sys_reg(2, 1, 0, 2, 6)
#define SYS_TRCIDR11 sys_reg(2, 1, 0, 3, 6)
#define SYS_TRCIDR12 sys_reg(2, 1, 0, 4, 6)
#define SYS_TRCIDR13 sys_reg(2, 1, 0, 5, 6)
#define SYS_TRCIDR1 sys_reg(2, 1, 0, 9, 7)
#define SYS_TRCIDR2 sys_reg(2, 1, 0, 10, 7)
#define SYS_TRCIDR3 sys_reg(2, 1, 0, 11, 7)
#define SYS_TRCIDR4 sys_reg(2, 1, 0, 12, 7)
#define SYS_TRCIDR5 sys_reg(2, 1, 0, 13, 7)
#define SYS_TRCIDR6 sys_reg(2, 1, 0, 14, 7)
#define SYS_TRCIDR7 sys_reg(2, 1, 0, 15, 7)
#define SYS_TRCIDR8 sys_reg(2, 1, 0, 0, 6)
#define SYS_TRCIDR9 sys_reg(2, 1, 0, 1, 6)
#define SYS_TRCIMSPEC(m) sys_reg(2, 1, 0, (m & 7), 7)
#define SYS_TRCITEEDCR sys_reg(2, 1, 0, 2, 1)
#define SYS_TRCOSLSR sys_reg(2, 1, 1, 1, 4)
#define SYS_TRCPRGCTLR sys_reg(2, 1, 0, 1, 0)
#define SYS_TRCQCTLR sys_reg(2, 1, 0, 1, 1)
#define SYS_TRCRSCTLR(m) sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4)))
#define SYS_TRCRSR sys_reg(2, 1, 0, 10, 0)
#define SYS_TRCSEQEVR(m) sys_reg(2, 1, 0, (m & 3), 4)
#define SYS_TRCSEQRSTEVR sys_reg(2, 1, 0, 6, 4)
#define SYS_TRCSEQSTR sys_reg(2, 1, 0, 7, 4)
#define SYS_TRCSSCCR(m) sys_reg(2, 1, 1, (m & 7), 2)
#define SYS_TRCSSCSR(m) sys_reg(2, 1, 1, (8 | (m & 7)), 2)
#define SYS_TRCSSPCICR(m) sys_reg(2, 1, 1, (m & 7), 3)
#define SYS_TRCSTALLCTLR sys_reg(2, 1, 0, 11, 0)
#define SYS_TRCSTATR sys_reg(2, 1, 0, 3, 0)
#define SYS_TRCSYNCPR sys_reg(2, 1, 0, 13, 0)
#define SYS_TRCTRACEIDR sys_reg(2, 1, 0, 0, 1)
#define SYS_TRCTSCTLR sys_reg(2, 1, 0, 12, 0)
#define SYS_TRCVICTLR sys_reg(2, 1, 0, 0, 2)
#define SYS_TRCVIIECTLR sys_reg(2, 1, 0, 1, 2)
#define SYS_TRCVIPCSSCTLR sys_reg(2, 1, 0, 3, 2)
#define SYS_TRCVISSCTLR sys_reg(2, 1, 0, 2, 2)
#define SYS_TRCVMIDCCTLR0 sys_reg(2, 1, 3, 2, 2)
#define SYS_TRCVMIDCCTLR1 sys_reg(2, 1, 3, 3, 2)
#define SYS_TRCVMIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 1)
/* ETM */
#define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4)
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
@ -203,8 +310,13 @@
#define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1)
#define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2)
#define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3)
#define SYS_ERXPFGF_EL1 sys_reg(3, 0, 5, 4, 4)
#define SYS_ERXPFGCTL_EL1 sys_reg(3, 0, 5, 4, 5)
#define SYS_ERXPFGCDN_EL1 sys_reg(3, 0, 5, 4, 6)
#define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0)
#define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1)
#define SYS_ERXMISC2_EL1 sys_reg(3, 0, 5, 5, 2)
#define SYS_ERXMISC3_EL1 sys_reg(3, 0, 5, 5, 3)
#define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0)
#define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1)
@ -275,6 +387,8 @@
#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6)
#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
#define SYS_ACCDATA_EL1 sys_reg(3, 0, 13, 0, 5)
#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0)
#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)
@ -383,8 +497,6 @@
#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4)
#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5)
#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
@ -478,6 +590,158 @@
#define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0)
/* AT instructions */
#define AT_Op0 1
#define AT_CRn 7
#define OP_AT_S1E1R sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
#define OP_AT_S1E1W sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
#define OP_AT_S1E0R sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
/* TLBI instructions */
#define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0)
#define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1)
#define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2)
#define OP_TLBI_VAAE1OS sys_insn(1, 0, 8, 1, 3)
#define OP_TLBI_VALE1OS sys_insn(1, 0, 8, 1, 5)
#define OP_TLBI_VAALE1OS sys_insn(1, 0, 8, 1, 7)
#define OP_TLBI_RVAE1IS sys_insn(1, 0, 8, 2, 1)
#define OP_TLBI_RVAAE1IS sys_insn(1, 0, 8, 2, 3)
#define OP_TLBI_RVALE1IS sys_insn(1, 0, 8, 2, 5)
#define OP_TLBI_RVAALE1IS sys_insn(1, 0, 8, 2, 7)
#define OP_TLBI_VMALLE1IS sys_insn(1, 0, 8, 3, 0)
#define OP_TLBI_VAE1IS sys_insn(1, 0, 8, 3, 1)
#define OP_TLBI_ASIDE1IS sys_insn(1, 0, 8, 3, 2)
#define OP_TLBI_VAAE1IS sys_insn(1, 0, 8, 3, 3)
#define OP_TLBI_VALE1IS sys_insn(1, 0, 8, 3, 5)
#define OP_TLBI_VAALE1IS sys_insn(1, 0, 8, 3, 7)
#define OP_TLBI_RVAE1OS sys_insn(1, 0, 8, 5, 1)
#define OP_TLBI_RVAAE1OS sys_insn(1, 0, 8, 5, 3)
#define OP_TLBI_RVALE1OS sys_insn(1, 0, 8, 5, 5)
#define OP_TLBI_RVAALE1OS sys_insn(1, 0, 8, 5, 7)
#define OP_TLBI_RVAE1 sys_insn(1, 0, 8, 6, 1)
#define OP_TLBI_RVAAE1 sys_insn(1, 0, 8, 6, 3)
#define OP_TLBI_RVALE1 sys_insn(1, 0, 8, 6, 5)
#define OP_TLBI_RVAALE1 sys_insn(1, 0, 8, 6, 7)
#define OP_TLBI_VMALLE1 sys_insn(1, 0, 8, 7, 0)
#define OP_TLBI_VAE1 sys_insn(1, 0, 8, 7, 1)
#define OP_TLBI_ASIDE1 sys_insn(1, 0, 8, 7, 2)
#define OP_TLBI_VAAE1 sys_insn(1, 0, 8, 7, 3)
#define OP_TLBI_VALE1 sys_insn(1, 0, 8, 7, 5)
#define OP_TLBI_VAALE1 sys_insn(1, 0, 8, 7, 7)
#define OP_TLBI_VMALLE1OSNXS sys_insn(1, 0, 9, 1, 0)
#define OP_TLBI_VAE1OSNXS sys_insn(1, 0, 9, 1, 1)
#define OP_TLBI_ASIDE1OSNXS sys_insn(1, 0, 9, 1, 2)
#define OP_TLBI_VAAE1OSNXS sys_insn(1, 0, 9, 1, 3)
#define OP_TLBI_VALE1OSNXS sys_insn(1, 0, 9, 1, 5)
#define OP_TLBI_VAALE1OSNXS sys_insn(1, 0, 9, 1, 7)
#define OP_TLBI_RVAE1ISNXS sys_insn(1, 0, 9, 2, 1)
#define OP_TLBI_RVAAE1ISNXS sys_insn(1, 0, 9, 2, 3)
#define OP_TLBI_RVALE1ISNXS sys_insn(1, 0, 9, 2, 5)
#define OP_TLBI_RVAALE1ISNXS sys_insn(1, 0, 9, 2, 7)
#define OP_TLBI_VMALLE1ISNXS sys_insn(1, 0, 9, 3, 0)
#define OP_TLBI_VAE1ISNXS sys_insn(1, 0, 9, 3, 1)
#define OP_TLBI_ASIDE1ISNXS sys_insn(1, 0, 9, 3, 2)
#define OP_TLBI_VAAE1ISNXS sys_insn(1, 0, 9, 3, 3)
#define OP_TLBI_VALE1ISNXS sys_insn(1, 0, 9, 3, 5)
#define OP_TLBI_VAALE1ISNXS sys_insn(1, 0, 9, 3, 7)
#define OP_TLBI_RVAE1OSNXS sys_insn(1, 0, 9, 5, 1)
#define OP_TLBI_RVAAE1OSNXS sys_insn(1, 0, 9, 5, 3)
#define OP_TLBI_RVALE1OSNXS sys_insn(1, 0, 9, 5, 5)
#define OP_TLBI_RVAALE1OSNXS sys_insn(1, 0, 9, 5, 7)
#define OP_TLBI_RVAE1NXS sys_insn(1, 0, 9, 6, 1)
#define OP_TLBI_RVAAE1NXS sys_insn(1, 0, 9, 6, 3)
#define OP_TLBI_RVALE1NXS sys_insn(1, 0, 9, 6, 5)
#define OP_TLBI_RVAALE1NXS sys_insn(1, 0, 9, 6, 7)
#define OP_TLBI_VMALLE1NXS sys_insn(1, 0, 9, 7, 0)
#define OP_TLBI_VAE1NXS sys_insn(1, 0, 9, 7, 1)
#define OP_TLBI_ASIDE1NXS sys_insn(1, 0, 9, 7, 2)
#define OP_TLBI_VAAE1NXS sys_insn(1, 0, 9, 7, 3)
#define OP_TLBI_VALE1NXS sys_insn(1, 0, 9, 7, 5)
#define OP_TLBI_VAALE1NXS sys_insn(1, 0, 9, 7, 7)
#define OP_TLBI_IPAS2E1IS sys_insn(1, 4, 8, 0, 1)
#define OP_TLBI_RIPAS2E1IS sys_insn(1, 4, 8, 0, 2)
#define OP_TLBI_IPAS2LE1IS sys_insn(1, 4, 8, 0, 5)
#define OP_TLBI_RIPAS2LE1IS sys_insn(1, 4, 8, 0, 6)
#define OP_TLBI_ALLE2OS sys_insn(1, 4, 8, 1, 0)
#define OP_TLBI_VAE2OS sys_insn(1, 4, 8, 1, 1)
#define OP_TLBI_ALLE1OS sys_insn(1, 4, 8, 1, 4)
#define OP_TLBI_VALE2OS sys_insn(1, 4, 8, 1, 5)
#define OP_TLBI_VMALLS12E1OS sys_insn(1, 4, 8, 1, 6)
#define OP_TLBI_RVAE2IS sys_insn(1, 4, 8, 2, 1)
#define OP_TLBI_RVALE2IS sys_insn(1, 4, 8, 2, 5)
#define OP_TLBI_ALLE2IS sys_insn(1, 4, 8, 3, 0)
#define OP_TLBI_VAE2IS sys_insn(1, 4, 8, 3, 1)
#define OP_TLBI_ALLE1IS sys_insn(1, 4, 8, 3, 4)
#define OP_TLBI_VALE2IS sys_insn(1, 4, 8, 3, 5)
#define OP_TLBI_VMALLS12E1IS sys_insn(1, 4, 8, 3, 6)
#define OP_TLBI_IPAS2E1OS sys_insn(1, 4, 8, 4, 0)
#define OP_TLBI_IPAS2E1 sys_insn(1, 4, 8, 4, 1)
#define OP_TLBI_RIPAS2E1 sys_insn(1, 4, 8, 4, 2)
#define OP_TLBI_RIPAS2E1OS sys_insn(1, 4, 8, 4, 3)
#define OP_TLBI_IPAS2LE1OS sys_insn(1, 4, 8, 4, 4)
#define OP_TLBI_IPAS2LE1 sys_insn(1, 4, 8, 4, 5)
#define OP_TLBI_RIPAS2LE1 sys_insn(1, 4, 8, 4, 6)
#define OP_TLBI_RIPAS2LE1OS sys_insn(1, 4, 8, 4, 7)
#define OP_TLBI_RVAE2OS sys_insn(1, 4, 8, 5, 1)
#define OP_TLBI_RVALE2OS sys_insn(1, 4, 8, 5, 5)
#define OP_TLBI_RVAE2 sys_insn(1, 4, 8, 6, 1)
#define OP_TLBI_RVALE2 sys_insn(1, 4, 8, 6, 5)
#define OP_TLBI_ALLE2 sys_insn(1, 4, 8, 7, 0)
#define OP_TLBI_VAE2 sys_insn(1, 4, 8, 7, 1)
#define OP_TLBI_ALLE1 sys_insn(1, 4, 8, 7, 4)
#define OP_TLBI_VALE2 sys_insn(1, 4, 8, 7, 5)
#define OP_TLBI_VMALLS12E1 sys_insn(1, 4, 8, 7, 6)
#define OP_TLBI_IPAS2E1ISNXS sys_insn(1, 4, 9, 0, 1)
#define OP_TLBI_RIPAS2E1ISNXS sys_insn(1, 4, 9, 0, 2)
#define OP_TLBI_IPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 5)
#define OP_TLBI_RIPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 6)
#define OP_TLBI_ALLE2OSNXS sys_insn(1, 4, 9, 1, 0)
#define OP_TLBI_VAE2OSNXS sys_insn(1, 4, 9, 1, 1)
#define OP_TLBI_ALLE1OSNXS sys_insn(1, 4, 9, 1, 4)
#define OP_TLBI_VALE2OSNXS sys_insn(1, 4, 9, 1, 5)
#define OP_TLBI_VMALLS12E1OSNXS sys_insn(1, 4, 9, 1, 6)
#define OP_TLBI_RVAE2ISNXS sys_insn(1, 4, 9, 2, 1)
#define OP_TLBI_RVALE2ISNXS sys_insn(1, 4, 9, 2, 5)
#define OP_TLBI_ALLE2ISNXS sys_insn(1, 4, 9, 3, 0)
#define OP_TLBI_VAE2ISNXS sys_insn(1, 4, 9, 3, 1)
#define OP_TLBI_ALLE1ISNXS sys_insn(1, 4, 9, 3, 4)
#define OP_TLBI_VALE2ISNXS sys_insn(1, 4, 9, 3, 5)
#define OP_TLBI_VMALLS12E1ISNXS sys_insn(1, 4, 9, 3, 6)
#define OP_TLBI_IPAS2E1OSNXS sys_insn(1, 4, 9, 4, 0)
#define OP_TLBI_IPAS2E1NXS sys_insn(1, 4, 9, 4, 1)
#define OP_TLBI_RIPAS2E1NXS sys_insn(1, 4, 9, 4, 2)
#define OP_TLBI_RIPAS2E1OSNXS sys_insn(1, 4, 9, 4, 3)
#define OP_TLBI_IPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 4)
#define OP_TLBI_IPAS2LE1NXS sys_insn(1, 4, 9, 4, 5)
#define OP_TLBI_RIPAS2LE1NXS sys_insn(1, 4, 9, 4, 6)
#define OP_TLBI_RIPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 7)
#define OP_TLBI_RVAE2OSNXS sys_insn(1, 4, 9, 5, 1)
#define OP_TLBI_RVALE2OSNXS sys_insn(1, 4, 9, 5, 5)
#define OP_TLBI_RVAE2NXS sys_insn(1, 4, 9, 6, 1)
#define OP_TLBI_RVALE2NXS sys_insn(1, 4, 9, 6, 5)
#define OP_TLBI_ALLE2NXS sys_insn(1, 4, 9, 7, 0)
#define OP_TLBI_VAE2NXS sys_insn(1, 4, 9, 7, 1)
#define OP_TLBI_ALLE1NXS sys_insn(1, 4, 9, 7, 4)
#define OP_TLBI_VALE2NXS sys_insn(1, 4, 9, 7, 5)
#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6)
/* Misc instructions */
#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4)
#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5)
#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4)
#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5)
#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7)
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_ENTP2 (BIT(60))
#define SCTLR_ELx_DSSBS (BIT(44))

View file

@ -278,14 +278,77 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
*/
#define MAX_TLBI_OPS PTRS_PER_PTE
/*
* __flush_tlb_range_op - Perform TLBI operation upon a range
*
* @op: TLBI instruction that operates on a range (has 'r' prefix)
* @start: The start address of the range
* @pages: Range as the number of pages from 'start'
* @stride: Flush granularity
* @asid: The ASID of the task (0 for IPA instructions)
* @tlb_level: Translation Table level hint, if known
* @tlbi_user: If 'true', call an additional __tlbi_user()
* (typically for user ASIDs). 'flase' for IPA instructions
*
* When the CPU does not support TLB range operations, flush the TLB
* entries one by one at the granularity of 'stride'. If the TLB
* range ops are supported, then:
*
* 1. If 'pages' is odd, flush the first page through non-range
* operations;
*
* 2. For remaining pages: the minimum range granularity is decided
* by 'scale', so multiple range TLBI operations may be required.
* Start from scale = 0, flush the corresponding number of pages
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
* until no pages left.
*
* Note that certain ranges can be represented by either num = 31 and
* scale or num = 0 and scale + 1. The loop below favours the latter
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
*/
#define __flush_tlb_range_op(op, start, pages, stride, \
asid, tlb_level, tlbi_user) \
do { \
int num = 0; \
int scale = 0; \
unsigned long addr; \
\
while (pages > 0) { \
if (!system_supports_tlb_range() || \
pages % 2 == 1) { \
addr = __TLBI_VADDR(start, asid); \
__tlbi_level(op, addr, tlb_level); \
if (tlbi_user) \
__tlbi_user_level(op, addr, tlb_level); \
start += stride; \
pages -= stride >> PAGE_SHIFT; \
continue; \
} \
\
num = __TLBI_RANGE_NUM(pages, scale); \
if (num >= 0) { \
addr = __TLBI_VADDR_RANGE(start, asid, scale, \
num, tlb_level); \
__tlbi(r##op, addr); \
if (tlbi_user) \
__tlbi_user(r##op, addr); \
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
pages -= __TLBI_RANGE_PAGES(num, scale); \
} \
scale++; \
} \
} while (0)
#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
unsigned long stride, bool last_level,
int tlb_level)
{
int num = 0;
int scale = 0;
unsigned long asid, addr, pages;
unsigned long asid, pages;
start = round_down(start, stride);
end = round_up(end, stride);
@ -307,56 +370,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
dsb(ishst);
asid = ASID(vma->vm_mm);
/*
* When the CPU does not support TLB range operations, flush the TLB
* entries one by one at the granularity of 'stride'. If the TLB
* range ops are supported, then:
*
* 1. If 'pages' is odd, flush the first page through non-range
* operations;
*
* 2. For remaining pages: the minimum range granularity is decided
* by 'scale', so multiple range TLBI operations may be required.
* Start from scale = 0, flush the corresponding number of pages
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
* until no pages left.
*
* Note that certain ranges can be represented by either num = 31 and
* scale or num = 0 and scale + 1. The loop below favours the latter
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
*/
while (pages > 0) {
if (!system_supports_tlb_range() ||
pages % 2 == 1) {
addr = __TLBI_VADDR(start, asid);
if (last_level) {
__tlbi_level(vale1is, addr, tlb_level);
__tlbi_user_level(vale1is, addr, tlb_level);
} else {
__tlbi_level(vae1is, addr, tlb_level);
__tlbi_user_level(vae1is, addr, tlb_level);
}
start += stride;
pages -= stride >> PAGE_SHIFT;
continue;
}
if (last_level)
__flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
else
__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
num = __TLBI_RANGE_NUM(pages, scale);
if (num >= 0) {
addr = __TLBI_VADDR_RANGE(start, asid, scale,
num, tlb_level);
if (last_level) {
__tlbi(rvale1is, addr);
__tlbi_user(rvale1is, addr);
} else {
__tlbi(rvae1is, addr);
__tlbi_user(rvae1is, addr);
}
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
pages -= __TLBI_RANGE_PAGES(num, scale);
}
scale++;
}
dsb(ish);
}

View file

@ -2627,6 +2627,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LRCPC, IMP)
},
{
.desc = "Fine Grained Traps",
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.capability = ARM64_HAS_FGT,
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, IMP)
},
#ifdef CONFIG_ARM64_SME
{
.desc = "Scalable Matrix Extension",

View file

@ -25,7 +25,6 @@ menuconfig KVM
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_XFER_TO_GUEST_WORK
@ -43,6 +42,7 @@ menuconfig KVM
select SCHED_INFO
select GUEST_PERF_EVENTS if PERF_EVENTS
select INTERVAL_TREE
select XARRAY_MULTI
help
Support hosting virtualized guest machines.

View file

@ -36,6 +36,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_nested.h>
#include <asm/kvm_pkvm.h>
#include <asm/kvm_emulate.h>
#include <asm/sections.h>
@ -365,7 +366,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
#endif
/* Force users to call KVM_ARM_VCPU_INIT */
vcpu->arch.target = -1;
vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
@ -462,7 +463,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vcpu_ptrauth_disable(vcpu);
kvm_arch_vcpu_load_debug_state_flags(vcpu);
if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus))
if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
vcpu_set_on_unsupported_cpu(vcpu);
}
@ -574,7 +575,7 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
{
return vcpu->arch.target >= 0;
return vcpu_get_flag(vcpu, VCPU_INITIALIZED);
}
/*
@ -803,6 +804,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
kvm_pmu_handle_pmcr(vcpu,
__vcpu_sys_reg(vcpu, PMCR_EL0));
if (kvm_check_request(KVM_REQ_RESYNC_PMU_EL0, vcpu))
kvm_vcpu_pmu_restore_guest(vcpu);
if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
return kvm_vcpu_suspend(vcpu);
@ -818,6 +822,9 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
if (likely(!vcpu_mode_is_32bit(vcpu)))
return false;
if (vcpu_has_nv(vcpu))
return true;
return !kvm_supports_32bit_el0();
}
@ -1058,7 +1065,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* invalid. The VMM can try and fix it by issuing a
* KVM_ARM_VCPU_INIT if it really wants to.
*/
vcpu->arch.target = -1;
vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
ret = ARM_EXCEPTION_IL;
}
@ -1219,8 +1226,7 @@ static bool kvm_vcpu_init_changed(struct kvm_vcpu *vcpu,
{
unsigned long features = init->features[0];
return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES) ||
vcpu->arch.target != init->target;
return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
}
static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
@ -1236,20 +1242,18 @@ static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
!bitmap_equal(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES))
goto out_unlock;
vcpu->arch.target = init->target;
bitmap_copy(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
/* Now we know what it is, we can reset it. */
ret = kvm_reset_vcpu(vcpu);
if (ret) {
vcpu->arch.target = -1;
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
goto out_unlock;
}
bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES);
set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags);
vcpu_set_flag(vcpu, VCPU_INITIALIZED);
out_unlock:
mutex_unlock(&kvm->arch.config_lock);
return ret;
@ -1260,14 +1264,15 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
{
int ret;
if (init->target != kvm_target_cpu())
if (init->target != KVM_ARM_TARGET_GENERIC_V8 &&
init->target != kvm_target_cpu())
return -EINVAL;
ret = kvm_vcpu_init_check_features(vcpu, init);
if (ret)
return ret;
if (vcpu->arch.target == -1)
if (!kvm_vcpu_initialized(vcpu))
return __kvm_vcpu_set_target(vcpu, init);
if (kvm_vcpu_init_changed(vcpu, init))
@ -1532,12 +1537,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
}
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
kvm_flush_remote_tlbs(kvm);
}
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
struct kvm_arm_device_addr *dev_addr)
{
@ -1595,9 +1594,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
}
case KVM_ARM_PREFERRED_TARGET: {
struct kvm_vcpu_init init;
kvm_vcpu_preferred_target(&init);
struct kvm_vcpu_init init = {
.target = KVM_ARM_TARGET_GENERIC_V8,
};
if (copy_to_user(argp, &init, sizeof(init)))
return -EFAULT;
@ -2276,30 +2275,8 @@ static int __init init_hyp_mode(void)
for_each_possible_cpu(cpu) {
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
unsigned long hyp_addr;
/*
* Allocate a contiguous HYP private VA range for the stack
* and guard page. The allocation is also aligned based on
* the order of its size.
*/
err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
if (err) {
kvm_err("Cannot allocate hyp stack guard page\n");
goto out_err;
}
/*
* Since the stack grows downwards, map the stack to the page
* at the higher address and leave the lower guard page
* unbacked.
*
* Any valid stack address now has the PAGE_SHIFT bit as 1
* and addresses corresponding to the guard page have the
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
*/
err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
__pa(stack_page), PAGE_HYP);
err = create_hyp_stack(__pa(stack_page), &params->stack_hyp_va);
if (err) {
kvm_err("Cannot map hyp stack\n");
goto out_err;
@ -2312,8 +2289,6 @@ static int __init init_hyp_mode(void)
* has been mapped in the flexible private VA space.
*/
params->stack_pa = __pa(stack_page);
params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
for_each_possible_cpu(cpu) {

File diff suppressed because it is too large Load diff

View file

@ -884,21 +884,6 @@ u32 __attribute_const__ kvm_target_cpu(void)
return KVM_ARM_TARGET_GENERIC_V8;
}
void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
{
u32 target = kvm_target_cpu();
memset(init, 0, sizeof(*init));
/*
* For now, we don't return any features.
* In future, we might use features to return target
* specific features available for the preferred
* target type.
*/
init->target = (__u32)target;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
return -EINVAL;

View file

@ -222,7 +222,33 @@ static int kvm_handle_eret(struct kvm_vcpu *vcpu)
if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
return kvm_handle_ptrauth(vcpu);
kvm_emulate_nested_eret(vcpu);
/*
* If we got here, two possibilities:
*
* - the guest is in EL2, and we need to fully emulate ERET
*
* - the guest is in EL1, and we need to reinject the
* exception into the L1 hypervisor.
*
* If KVM ever traps ERET for its own use, we'll have to
* revisit this.
*/
if (is_hyp_ctxt(vcpu))
kvm_emulate_nested_eret(vcpu);
else
kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
return 1;
}
static int handle_svc(struct kvm_vcpu *vcpu)
{
/*
* So far, SVC traps only for NV via HFGITR_EL2. A SVC from a
* 32bit guest would be caught by vpcu_mode_is_bad_32bit(), so
* we should only have to deal with a 64 bit exception.
*/
kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
return 1;
}
@ -239,6 +265,7 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_SMC32] = handle_smc,
[ESR_ELx_EC_HVC64] = handle_hvc,
[ESR_ELx_EC_SMC64] = handle_smc,
[ESR_ELx_EC_SVC64] = handle_svc,
[ESR_ELx_EC_SYS64] = kvm_handle_sys_reg,
[ESR_ELx_EC_SVE] = handle_sve,
[ESR_ELx_EC_ERET] = kvm_handle_eret,

View file

@ -70,20 +70,26 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
}
}
static inline bool __hfgxtr_traps_required(void)
{
if (cpus_have_final_cap(ARM64_SME))
return true;
if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
return true;
return false;
}
static inline void __activate_traps_hfgxtr(void)
#define compute_clr_set(vcpu, reg, clr, set) \
do { \
u64 hfg; \
hfg = __vcpu_sys_reg(vcpu, reg) & ~__ ## reg ## _RES0; \
set |= hfg & __ ## reg ## _MASK; \
clr |= ~hfg & __ ## reg ## _nMASK; \
} while(0)
static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
u64 r_val, w_val;
if (!cpus_have_final_cap(ARM64_HAS_FGT))
return;
ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2);
ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2);
if (cpus_have_final_cap(ARM64_SME)) {
tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
@ -98,26 +104,72 @@ static inline void __activate_traps_hfgxtr(void)
if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
w_set |= HFGxTR_EL2_TCR_EL1_MASK;
sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
}
static inline void __deactivate_traps_hfgxtr(void)
{
u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
if (cpus_have_final_cap(ARM64_SME)) {
tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
r_set |= tmp;
w_set |= tmp;
if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set);
compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
}
if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
w_clr |= HFGxTR_EL2_TCR_EL1_MASK;
/* The default is not to trap anything but ACCDATA_EL1 */
r_val = __HFGRTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
r_val |= r_set;
r_val &= ~r_clr;
sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
w_val = __HFGWTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
w_val |= w_set;
w_val &= ~w_clr;
write_sysreg_s(r_val, SYS_HFGRTR_EL2);
write_sysreg_s(w_val, SYS_HFGWTR_EL2);
if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
return;
ctxt_sys_reg(hctxt, HFGITR_EL2) = read_sysreg_s(SYS_HFGITR_EL2);
r_set = r_clr = 0;
compute_clr_set(vcpu, HFGITR_EL2, r_clr, r_set);
r_val = __HFGITR_EL2_nMASK;
r_val |= r_set;
r_val &= ~r_clr;
write_sysreg_s(r_val, SYS_HFGITR_EL2);
ctxt_sys_reg(hctxt, HDFGRTR_EL2) = read_sysreg_s(SYS_HDFGRTR_EL2);
ctxt_sys_reg(hctxt, HDFGWTR_EL2) = read_sysreg_s(SYS_HDFGWTR_EL2);
r_clr = r_set = w_clr = w_set = 0;
compute_clr_set(vcpu, HDFGRTR_EL2, r_clr, r_set);
compute_clr_set(vcpu, HDFGWTR_EL2, w_clr, w_set);
r_val = __HDFGRTR_EL2_nMASK;
r_val |= r_set;
r_val &= ~r_clr;
w_val = __HDFGWTR_EL2_nMASK;
w_val |= w_set;
w_val &= ~w_clr;
write_sysreg_s(r_val, SYS_HDFGRTR_EL2);
write_sysreg_s(w_val, SYS_HDFGWTR_EL2);
}
static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
if (!cpus_have_final_cap(ARM64_HAS_FGT))
return;
write_sysreg_s(ctxt_sys_reg(hctxt, HFGRTR_EL2), SYS_HFGRTR_EL2);
write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
return;
write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2);
write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2);
write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
}
static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
@ -145,8 +197,21 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
if (__hfgxtr_traps_required())
__activate_traps_hfgxtr();
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
u64 hcrx = HCRX_GUEST_FLAGS;
if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
u64 clr = 0, set = 0;
compute_clr_set(vcpu, HCRX_EL2, clr, set);
hcrx |= set;
hcrx &= ~clr;
}
write_sysreg_s(hcrx, SYS_HCRX_EL2);
}
__activate_traps_hfgxtr(vcpu);
}
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
@ -162,8 +227,10 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
}
if (__hfgxtr_traps_required())
__deactivate_traps_hfgxtr();
if (cpus_have_final_cap(ARM64_HAS_HCX))
write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
__deactivate_traps_hfgxtr(vcpu);
}
static inline void ___activate_traps(struct kvm_vcpu *vcpu)
@ -177,9 +244,6 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu)
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
if (cpus_have_final_cap(ARM64_HAS_HCX))
write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2);
}
static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
@ -194,9 +258,6 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 &= ~HCR_VSE;
vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
}
if (cpus_have_final_cap(ARM64_HAS_HCX))
write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
}
static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)

View file

@ -26,6 +26,7 @@ int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot
int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
enum kvm_pgtable_prot prot,
unsigned long *haddr);
int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr);
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
#endif /* __KVM_HYP_MM_H */

View file

@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx
__kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
}
static void
handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
DECLARE_REG(unsigned long, pages, host_ctxt, 3);
__kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
}
static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
HANDLE_FUNC(__kvm_tlb_flush_vmid),
HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__vgic_v3_read_vmcr),

View file

@ -44,6 +44,27 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
return err;
}
static int __pkvm_alloc_private_va_range(unsigned long start, size_t size)
{
unsigned long cur;
hyp_assert_lock_held(&pkvm_pgd_lock);
if (!start || start < __io_map_base)
return -EINVAL;
/* The allocated size is always a multiple of PAGE_SIZE */
cur = start + PAGE_ALIGN(size);
/* Are we overflowing on the vmemmap ? */
if (cur > __hyp_vmemmap)
return -ENOMEM;
__io_map_base = cur;
return 0;
}
/**
* pkvm_alloc_private_va_range - Allocates a private VA range.
* @size: The size of the VA range to reserve.
@ -56,27 +77,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
*/
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
{
unsigned long base, addr;
int ret = 0;
unsigned long addr;
int ret;
hyp_spin_lock(&pkvm_pgd_lock);
/* Align the allocation based on the order of its size */
addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
/* The allocated size is always a multiple of PAGE_SIZE */
base = addr + PAGE_ALIGN(size);
/* Are we overflowing on the vmemmap ? */
if (!addr || base > __hyp_vmemmap)
ret = -ENOMEM;
else {
__io_map_base = base;
*haddr = addr;
}
addr = __io_map_base;
ret = __pkvm_alloc_private_va_range(addr, size);
hyp_spin_unlock(&pkvm_pgd_lock);
*haddr = addr;
return ret;
}
@ -340,6 +350,45 @@ int hyp_create_idmap(u32 hyp_va_bits)
return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
}
int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
{
unsigned long addr, prev_base;
size_t size;
int ret;
hyp_spin_lock(&pkvm_pgd_lock);
prev_base = __io_map_base;
/*
* Efficient stack verification using the PAGE_SHIFT bit implies
* an alignment of our allocation on the order of the size.
*/
size = PAGE_SIZE * 2;
addr = ALIGN(__io_map_base, size);
ret = __pkvm_alloc_private_va_range(addr, size);
if (!ret) {
/*
* Since the stack grows downwards, map the stack to the page
* at the higher address and leave the lower guard page
* unbacked.
*
* Any valid stack address now has the PAGE_SHIFT bit as 1
* and addresses corresponding to the guard page have the
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
*/
ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
PAGE_SIZE, phys, PAGE_HYP);
if (ret)
__io_map_base = prev_base;
}
hyp_spin_unlock(&pkvm_pgd_lock);
*haddr = addr + size;
return ret;
}
static void *admit_host_page(void *arg)
{
struct kvm_hyp_memcache *host_mc = arg;

View file

@ -113,7 +113,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
for (i = 0; i < hyp_nr_cpus; i++) {
struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
unsigned long hyp_addr;
start = (void *)kern_hyp_va(per_cpu_base[i]);
end = start + PAGE_ALIGN(hyp_percpu_size);
@ -121,33 +120,9 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
if (ret)
return ret;
/*
* Allocate a contiguous HYP private VA range for the stack
* and guard page. The allocation is also aligned based on
* the order of its size.
*/
ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
ret = pkvm_create_stack(params->stack_pa, &params->stack_hyp_va);
if (ret)
return ret;
/*
* Since the stack grows downwards, map the stack to the page
* at the higher address and leave the lower guard page
* unbacked.
*
* Any valid stack address now has the PAGE_SHIFT bit as 1
* and addresses corresponding to the guard page have the
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
*/
hyp_spin_lock(&pkvm_pgd_lock);
ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
PAGE_SIZE, params->stack_pa, PAGE_HYP);
hyp_spin_unlock(&pkvm_pgd_lock);
if (ret)
return ret;
/* Update stack_hyp_va to end of the stack's private VA range */
params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
/*

View file

@ -236,7 +236,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
* KVM_ARM_VCPU_INIT, however, this is likely not possible for
* protected VMs.
*/
vcpu->arch.target = -1;
vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
*exit_code |= ARM_EXCEPTION_IL;
}

View file

@ -182,6 +182,36 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
__tlb_switch_to_host(&cxt);
}
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
phys_addr_t start, unsigned long pages)
{
struct tlb_inv_context cxt;
unsigned long stride;
/*
* Since the range of addresses may not be mapped at
* the same level, assume the worst case as PAGE_SIZE
*/
stride = PAGE_SIZE;
start = round_down(start, stride);
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt, false);
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
dsb(ish);
__tlbi(vmalle1is);
dsb(ish);
isb();
/* See the comment in __kvm_tlb_flush_vmid_ipa() */
if (icache_is_vpipt())
icache_inval_all_pou();
__tlb_switch_to_host(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;

View file

@ -670,6 +670,26 @@ static bool stage2_has_fwb(struct kvm_pgtable *pgt)
return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
}
void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
phys_addr_t addr, size_t size)
{
unsigned long pages, inval_pages;
if (!system_supports_tlb_range()) {
kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
return;
}
pages = size >> PAGE_SHIFT;
while (pages > 0) {
inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages);
addr += inval_pages << PAGE_SHIFT;
pages -= inval_pages;
}
}
#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
@ -786,7 +806,8 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
* evicted pte value (if any).
*/
if (kvm_pte_table(ctx->old, ctx->level))
kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
kvm_tlb_flush_vmid_range(mmu, ctx->addr,
kvm_granule_size(ctx->level));
else if (kvm_pte_valid(ctx->old))
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
ctx->addr, ctx->level);
@ -810,16 +831,36 @@ static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t n
smp_store_release(ctx->ptep, new);
}
static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops)
static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
{
/*
* Clear the existing PTE, and perform break-before-make with
* TLB maintenance if it was valid.
* If FEAT_TLBIRANGE is implemented, defer the individual
* TLB invalidations until the entire walk is finished, and
* then use the range-based TLBI instructions to do the
* invalidations. Condition deferred TLB invalidation on the
* system supporting FWB as the optimization is entirely
* pointless when the unmap walker needs to perform CMOs.
*/
return system_supports_tlb_range() && stage2_has_fwb(pgt);
}
static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops)
{
struct kvm_pgtable *pgt = ctx->arg;
/*
* Clear the existing PTE, and perform break-before-make if it was
* valid. Depending on the system support, defer the TLB maintenance
* for the same until the entire unmap walk is completed.
*/
if (kvm_pte_valid(ctx->old)) {
kvm_clear_pte(ctx->ptep);
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
if (!stage2_unmap_defer_tlb_flush(pgt))
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
ctx->addr, ctx->level);
}
mm_ops->put_page(ctx->ptep);
@ -1077,7 +1118,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
* block entry and rely on the remaining portions being faulted
* back lazily.
*/
stage2_put_pte(ctx, mmu, mm_ops);
stage2_unmap_put_pte(ctx, mmu, mm_ops);
if (need_flush && mm_ops->dcache_clean_inval_poc)
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
@ -1091,13 +1132,19 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
int ret;
struct kvm_pgtable_walker walker = {
.cb = stage2_unmap_walker,
.arg = pgt,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
};
return kvm_pgtable_walk(pgt, addr, size, &walker);
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
if (stage2_unmap_defer_tlb_flush(pgt))
/* Perform the deferred TLB invalidations */
kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
return ret;
}
struct stage2_attr_data {

View file

@ -143,6 +143,34 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
__tlb_switch_to_host(&cxt);
}
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
phys_addr_t start, unsigned long pages)
{
struct tlb_inv_context cxt;
unsigned long stride;
/*
* Since the range of addresses may not be mapped at
* the same level, assume the worst case as PAGE_SIZE
*/
stride = PAGE_SIZE;
start = round_down(start, stride);
dsb(ishst);
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt);
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
dsb(ish);
__tlbi(vmalle1is);
dsb(ish);
isb();
__tlb_switch_to_host(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;

View file

@ -161,15 +161,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
}
/**
* kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
* kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
* @kvm: pointer to kvm structure.
*
* Interface to HYP function to flush all VM TLB entries
*/
void kvm_flush_remote_tlbs(struct kvm *kvm)
int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
++kvm->stat.generic.remote_tlb_flush_requests;
kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
return 0;
}
int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
gfn_t gfn, u64 nr_pages)
{
kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
return 0;
}
static bool kvm_is_device_pfn(unsigned long pfn)
@ -592,6 +600,25 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
return 0;
}
static int __hyp_alloc_private_va_range(unsigned long base)
{
lockdep_assert_held(&kvm_hyp_pgd_mutex);
if (!PAGE_ALIGNED(base))
return -EINVAL;
/*
* Verify that BIT(VA_BITS - 1) hasn't been flipped by
* allocating the new area, as it would indicate we've
* overflowed the idmap/IO address range.
*/
if ((base ^ io_map_base) & BIT(VA_BITS - 1))
return -ENOMEM;
io_map_base = base;
return 0;
}
/**
* hyp_alloc_private_va_range - Allocates a private VA range.
@ -612,26 +639,16 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
/*
* This assumes that we have enough space below the idmap
* page to allocate our VAs. If not, the check below will
* kick. A potential alternative would be to detect that
* overflow and switch to an allocation above the idmap.
* page to allocate our VAs. If not, the check in
* __hyp_alloc_private_va_range() will kick. A potential
* alternative would be to detect that overflow and switch
* to an allocation above the idmap.
*
* The allocated size is always a multiple of PAGE_SIZE.
*/
base = io_map_base - PAGE_ALIGN(size);
/* Align the allocation based on the order of its size */
base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size));
/*
* Verify that BIT(VA_BITS - 1) hasn't been flipped by
* allocating the new area, as it would indicate we've
* overflowed the idmap/IO address range.
*/
if ((base ^ io_map_base) & BIT(VA_BITS - 1))
ret = -ENOMEM;
else
*haddr = io_map_base = base;
size = PAGE_ALIGN(size);
base = io_map_base - size;
ret = __hyp_alloc_private_va_range(base);
mutex_unlock(&kvm_hyp_pgd_mutex);
@ -668,6 +685,48 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
return ret;
}
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
{
unsigned long base;
size_t size;
int ret;
mutex_lock(&kvm_hyp_pgd_mutex);
/*
* Efficient stack verification using the PAGE_SHIFT bit implies
* an alignment of our allocation on the order of the size.
*/
size = PAGE_SIZE * 2;
base = ALIGN_DOWN(io_map_base - size, size);
ret = __hyp_alloc_private_va_range(base);
mutex_unlock(&kvm_hyp_pgd_mutex);
if (ret) {
kvm_err("Cannot allocate hyp stack guard page\n");
return ret;
}
/*
* Since the stack grows downwards, map the stack to the page
* at the higher address and leave the lower guard page
* unbacked.
*
* Any valid stack address now has the PAGE_SHIFT bit as 1
* and addresses corresponding to the guard page have the
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
*/
ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
PAGE_HYP);
if (ret)
kvm_err("Cannot map hyp stack\n");
*haddr = base + size;
return ret;
}
/**
* create_hyp_io_mappings - Map IO into both kernel and HYP
* @phys_addr: The physical start address which gets mapped
@ -1075,7 +1134,7 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
write_lock(&kvm->mmu_lock);
stage2_wp_range(&kvm->arch.mmu, start, end);
write_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
kvm_flush_remote_tlbs_memslot(kvm, memslot);
}
/**
@ -1541,7 +1600,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
out_unlock:
read_unlock(&kvm->mmu_lock);
kvm_set_pfn_accessed(pfn);
kvm_release_pfn_clean(pfn);
return ret != -EAGAIN ? ret : 0;
}

View file

@ -71,8 +71,9 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
break;
case SYS_ID_AA64MMFR0_EL1:
/* Hide ECV, FGT, ExS, Secure Memory */
val &= ~(GENMASK_ULL(63, 43) |
/* Hide ECV, ExS, Secure Memory */
val &= ~(NV_FTR(MMFR0, ECV) |
NV_FTR(MMFR0, EXS) |
NV_FTR(MMFR0, TGRAN4_2) |
NV_FTR(MMFR0, TGRAN16_2) |
NV_FTR(MMFR0, TGRAN64_2) |
@ -116,7 +117,8 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
break;
case SYS_ID_AA64MMFR1_EL1:
val &= (NV_FTR(MMFR1, PAN) |
val &= (NV_FTR(MMFR1, HCX) |
NV_FTR(MMFR1, PAN) |
NV_FTR(MMFR1, LO) |
NV_FTR(MMFR1, HPDS) |
NV_FTR(MMFR1, VH) |
@ -124,8 +126,7 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
break;
case SYS_ID_AA64MMFR2_EL1:
val &= ~(NV_FTR(MMFR2, EVT) |
NV_FTR(MMFR2, BBM) |
val &= ~(NV_FTR(MMFR2, BBM) |
NV_FTR(MMFR2, TTL) |
GENMASK_ULL(47, 44) |
NV_FTR(MMFR2, ST) |

View file

@ -14,6 +14,7 @@
#include <asm/kvm_emulate.h>
#include <kvm/arm_pmu.h>
#include <kvm/arm_vgic.h>
#include <asm/arm_pmuv3.h>
#define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0)
@ -35,12 +36,8 @@ static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx)
return &vcpu->arch.pmu.pmc[cnt_idx];
}
static u32 kvm_pmu_event_mask(struct kvm *kvm)
static u32 __kvm_pmu_event_mask(unsigned int pmuver)
{
unsigned int pmuver;
pmuver = kvm->arch.arm_pmu->pmuver;
switch (pmuver) {
case ID_AA64DFR0_EL1_PMUVer_IMP:
return GENMASK(9, 0);
@ -55,6 +52,14 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
}
}
static u32 kvm_pmu_event_mask(struct kvm *kvm)
{
u64 dfr0 = IDREG(kvm, SYS_ID_AA64DFR0_EL1);
u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, dfr0);
return __kvm_pmu_event_mask(pmuver);
}
/**
* kvm_pmc_is_64bit - determine if counter is 64bit
* @pmc: counter context
@ -672,8 +677,11 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
{
struct arm_pmu_entry *entry;
if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
/*
* Check the sanitised PMU version for the system, as KVM does not
* support implementations where PMUv3 exists on a subset of CPUs.
*/
if (!pmuv3_implemented(kvm_arm_pmu_get_pmuver_limit()))
return;
mutex_lock(&arm_pmus_lock);
@ -750,11 +758,12 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
} else {
val = read_sysreg(pmceid1_el0);
/*
* Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
* Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled
* as RAZ
*/
if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4)
val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) |
BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) |
BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32));
base = 32;
}
@ -950,11 +959,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
return 0;
}
case KVM_ARM_VCPU_PMU_V3_FILTER: {
u8 pmuver = kvm_arm_pmu_get_pmuver_limit();
struct kvm_pmu_event_filter __user *uaddr;
struct kvm_pmu_event_filter filter;
int nr_events;
nr_events = kvm_pmu_event_mask(kvm) + 1;
/*
* Allow userspace to specify an event filter for the entire
* event range supported by PMUVer of the hardware, rather
* than the guest's PMUVer for KVM backward compatibility.
*/
nr_events = __kvm_pmu_event_mask(pmuver) + 1;
uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;

View file

@ -236,3 +236,21 @@ bool kvm_set_pmuserenr(u64 val)
ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
return true;
}
/*
* If we interrupted the guest to update the host PMU context, make
* sure we re-apply the guest EL0 state.
*/
void kvm_vcpu_pmu_resync_el0(void)
{
struct kvm_vcpu *vcpu;
if (!has_vhe() || !in_interrupt())
return;
vcpu = kvm_get_running_vcpu();
if (!vcpu)
return;
kvm_make_request(KVM_REQ_RESYNC_PMU_EL0, vcpu);
}

View file

@ -248,21 +248,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
}
}
switch (vcpu->arch.target) {
default:
if (vcpu_el1_is_32bit(vcpu)) {
pstate = VCPU_RESET_PSTATE_SVC;
} else if (vcpu_has_nv(vcpu)) {
pstate = VCPU_RESET_PSTATE_EL2;
} else {
pstate = VCPU_RESET_PSTATE_EL1;
}
if (vcpu_el1_is_32bit(vcpu))
pstate = VCPU_RESET_PSTATE_SVC;
else if (vcpu_has_nv(vcpu))
pstate = VCPU_RESET_PSTATE_EL2;
else
pstate = VCPU_RESET_PSTATE_EL1;
if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
ret = -EINVAL;
goto out;
}
break;
if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
ret = -EINVAL;
goto out;
}
/* Reset core registers */

View file

@ -2151,6 +2151,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
{ SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 },
{ SYS_DESC(SYS_ACCDATA_EL1), undef_access },
{ SYS_DESC(SYS_SCXTNUM_EL1), undef_access },
{ SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},
@ -2365,8 +2367,13 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
EL2_REG(HSTR_EL2, access_rw, reset_val, 0),
EL2_REG(HFGRTR_EL2, access_rw, reset_val, 0),
EL2_REG(HFGWTR_EL2, access_rw, reset_val, 0),
EL2_REG(HFGITR_EL2, access_rw, reset_val, 0),
EL2_REG(HACR_EL2, access_rw, reset_val, 0),
EL2_REG(HCRX_EL2, access_rw, reset_val, 0),
EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
@ -2374,6 +2381,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0),
EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0),
EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
EL2_REG(ELR_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
@ -3170,6 +3179,9 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
trace_kvm_handle_sys_reg(esr);
if (__check_nv_sr_forward(vcpu))
return 1;
params = esr_sys64_to_params(esr);
params.regval = vcpu_get_reg(vcpu, Rt);
@ -3587,5 +3599,8 @@ int __init kvm_sys_reg_table_init(void)
if (!first_idreg)
return -EINVAL;
if (kvm_get_mode() == KVM_MODE_NV)
return populate_nv_trap_config();
return 0;
}

View file

@ -364,6 +364,32 @@ TRACE_EVENT(kvm_inject_nested_exception,
__entry->hcr_el2)
);
TRACE_EVENT(kvm_forward_sysreg_trap,
TP_PROTO(struct kvm_vcpu *vcpu, u32 sysreg, bool is_read),
TP_ARGS(vcpu, sysreg, is_read),
TP_STRUCT__entry(
__field(u64, pc)
__field(u32, sysreg)
__field(bool, is_read)
),
TP_fast_assign(
__entry->pc = *vcpu_pc(vcpu);
__entry->sysreg = sysreg;
__entry->is_read = is_read;
),
TP_printk("%llx %c (%d,%d,%d,%d,%d)",
__entry->pc,
__entry->is_read ? 'R' : 'W',
sys_reg_Op0(__entry->sysreg),
sys_reg_Op1(__entry->sysreg),
sys_reg_CRn(__entry->sysreg),
sys_reg_CRm(__entry->sysreg),
sys_reg_Op2(__entry->sysreg))
);
#endif /* _TRACE_ARM_ARM64_KVM_H */
#undef TRACE_INCLUDE_PATH

View file

@ -199,7 +199,6 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
void vgic_v2_set_npie(struct kvm_vcpu *vcpu);
int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int offset, u32 *val);
@ -233,7 +232,6 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
void vgic_v3_set_npie(struct kvm_vcpu *vcpu);
void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v3_enable(struct kvm_vcpu *vcpu);

View file

@ -26,6 +26,7 @@ HAS_ECV
HAS_ECV_CNTPOFF
HAS_EPAN
HAS_EVT
HAS_FGT
HAS_GENERIC_AUTH
HAS_GENERIC_AUTH_ARCH_QARMA3
HAS_GENERIC_AUTH_ARCH_QARMA5

View file

@ -2156,6 +2156,135 @@ Field 1 ICIALLU
Field 0 ICIALLUIS
EndSysreg
Sysreg HDFGRTR_EL2 3 4 3 1 4
Field 63 PMBIDR_EL1
Field 62 nPMSNEVFR_EL1
Field 61 nBRBDATA
Field 60 nBRBCTL
Field 59 nBRBIDR
Field 58 PMCEIDn_EL0
Field 57 PMUSERENR_EL0
Field 56 TRBTRG_EL1
Field 55 TRBSR_EL1
Field 54 TRBPTR_EL1
Field 53 TRBMAR_EL1
Field 52 TRBLIMITR_EL1
Field 51 TRBIDR_EL1
Field 50 TRBBASER_EL1
Res0 49
Field 48 TRCVICTLR
Field 47 TRCSTATR
Field 46 TRCSSCSRn
Field 45 TRCSEQSTR
Field 44 TRCPRGCTLR
Field 43 TRCOSLSR
Res0 42
Field 41 TRCIMSPECn
Field 40 TRCID
Res0 39:38
Field 37 TRCCNTVRn
Field 36 TRCCLAIM
Field 35 TRCAUXCTLR
Field 34 TRCAUTHSTATUS
Field 33 TRC
Field 32 PMSLATFR_EL1
Field 31 PMSIRR_EL1
Field 30 PMSIDR_EL1
Field 29 PMSICR_EL1
Field 28 PMSFCR_EL1
Field 27 PMSEVFR_EL1
Field 26 PMSCR_EL1
Field 25 PMBSR_EL1
Field 24 PMBPTR_EL1
Field 23 PMBLIMITR_EL1
Field 22 PMMIR_EL1
Res0 21:20
Field 19 PMSELR_EL0
Field 18 PMOVS
Field 17 PMINTEN
Field 16 PMCNTEN
Field 15 PMCCNTR_EL0
Field 14 PMCCFILTR_EL0
Field 13 PMEVTYPERn_EL0
Field 12 PMEVCNTRn_EL0
Field 11 OSDLR_EL1
Field 10 OSECCR_EL1
Field 9 OSLSR_EL1
Res0 8
Field 7 DBGPRCR_EL1
Field 6 DBGAUTHSTATUS_EL1
Field 5 DBGCLAIM
Field 4 MDSCR_EL1
Field 3 DBGWVRn_EL1
Field 2 DBGWCRn_EL1
Field 1 DBGBVRn_EL1
Field 0 DBGBCRn_EL1
EndSysreg
Sysreg HDFGWTR_EL2 3 4 3 1 5
Res0 63
Field 62 nPMSNEVFR_EL1
Field 61 nBRBDATA
Field 60 nBRBCTL
Res0 59:58
Field 57 PMUSERENR_EL0
Field 56 TRBTRG_EL1
Field 55 TRBSR_EL1
Field 54 TRBPTR_EL1
Field 53 TRBMAR_EL1
Field 52 TRBLIMITR_EL1
Res0 51
Field 50 TRBBASER_EL1
Field 49 TRFCR_EL1
Field 48 TRCVICTLR
Res0 47
Field 46 TRCSSCSRn
Field 45 TRCSEQSTR
Field 44 TRCPRGCTLR
Res0 43
Field 42 TRCOSLAR
Field 41 TRCIMSPECn
Res0 40:38
Field 37 TRCCNTVRn
Field 36 TRCCLAIM
Field 35 TRCAUXCTLR
Res0 34
Field 33 TRC
Field 32 PMSLATFR_EL1
Field 31 PMSIRR_EL1
Res0 30
Field 29 PMSICR_EL1
Field 28 PMSFCR_EL1
Field 27 PMSEVFR_EL1
Field 26 PMSCR_EL1
Field 25 PMBSR_EL1
Field 24 PMBPTR_EL1
Field 23 PMBLIMITR_EL1
Res0 22
Field 21 PMCR_EL0
Field 20 PMSWINC_EL0
Field 19 PMSELR_EL0
Field 18 PMOVS
Field 17 PMINTEN
Field 16 PMCNTEN
Field 15 PMCCNTR_EL0
Field 14 PMCCFILTR_EL0
Field 13 PMEVTYPERn_EL0
Field 12 PMEVCNTRn_EL0
Field 11 OSDLR_EL1
Field 10 OSECCR_EL1
Res0 9
Field 8 OSLAR_EL1
Field 7 DBGPRCR_EL1
Res0 6
Field 5 DBGCLAIM
Field 4 MDSCR_EL1
Field 3 DBGWVRn_EL1
Field 2 DBGWCRn_EL1
Field 1 DBGBVRn_EL1
Field 0 DBGBCRn_EL1
EndSysreg
Sysreg ZCR_EL2 3 4 1 2 0
Fields ZCR_ELx
EndSysreg

View file

@ -896,7 +896,6 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
int kvm_arch_flush_remote_tlb(struct kvm *kvm);
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
#endif /* __MIPS_KVM_HOST_H__ */

View file

@ -199,7 +199,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
/* Flush slot from GPA */
kvm_mips_flush_gpa_pt(kvm, slot->base_gfn,
slot->base_gfn + slot->npages - 1);
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
kvm_flush_remote_tlbs_memslot(kvm, slot);
spin_unlock(&kvm->mmu_lock);
}
@ -235,7 +235,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn,
new->base_gfn + new->npages - 1);
if (needs_flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, new);
kvm_flush_remote_tlbs_memslot(kvm, new);
spin_unlock(&kvm->mmu_lock);
}
}
@ -981,18 +981,12 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
}
int kvm_arch_flush_remote_tlb(struct kvm *kvm)
int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
kvm_mips_callbacks->prepare_flush_shadow(kvm);
return 1;
}
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
kvm_flush_remote_tlbs(kvm);
}
int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
{
int r;

View file

@ -406,12 +406,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
}
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
kvm_flush_remote_tlbs(kvm);
}
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free)
{
}

View file

@ -1795,8 +1795,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void)
#define __KVM_HAVE_ARCH_VM_FREE
void kvm_arch_free_vm(struct kvm *kvm);
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
if (kvm_x86_ops.flush_remote_tlbs &&
!static_call(kvm_x86_flush_remote_tlbs)(kvm))
@ -1805,6 +1805,8 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
return -ENOTSUPP;
}
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
#define kvm_arch_pmi_in_guest(vcpu) \
((vcpu) && (vcpu)->arch.handling_intr_from_guest)

View file

@ -278,16 +278,12 @@ static inline bool kvm_available_flush_remote_tlbs_range(void)
return kvm_x86_ops.flush_remote_tlbs_range;
}
void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
gfn_t nr_pages)
int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
{
int ret = -EOPNOTSUPP;
if (!kvm_x86_ops.flush_remote_tlbs_range)
return -EOPNOTSUPP;
if (kvm_x86_ops.flush_remote_tlbs_range)
ret = static_call(kvm_x86_flush_remote_tlbs_range)(kvm, start_gfn,
nr_pages);
if (ret)
kvm_flush_remote_tlbs(kvm);
return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
}
static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
@ -6670,7 +6666,7 @@ static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
*/
if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte,
PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true))
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
kvm_flush_remote_tlbs_memslot(kvm, slot);
}
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
@ -6689,20 +6685,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
}
}
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
/*
* All current use cases for flushing the TLBs for a specific memslot
* related to dirty logging, and many do the TLB flush out of mmu_lock.
* The interaction between the various operations on memslot must be
* serialized by slots_locks to ensure the TLB flush from one operation
* is observed by any other operation on the same memslot.
*/
lockdep_assert_held(&kvm->slots_lock);
kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
}
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{

View file

@ -170,9 +170,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn,
int min_level);
void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
gfn_t nr_pages);
/* Flush the given page (huge or not) of guest memory. */
static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level)
{

View file

@ -12772,7 +12772,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
* See is_writable_pte() for more details (the case involving
* access-tracked SPTEs is particularly relevant).
*/
kvm_arch_flush_remote_tlbs_memslot(kvm, new);
kvm_flush_remote_tlbs_memslot(kvm, new);
}
}

View file

@ -772,6 +772,8 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu)
/* Enable all counters */
armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
kvm_vcpu_pmu_resync_el0();
}
static void armv8pmu_stop(struct arm_pmu *cpu_pmu)

View file

@ -12,7 +12,7 @@
#define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1)
#ifdef CONFIG_HW_PERF_EVENTS
#if IS_ENABLED(CONFIG_HW_PERF_EVENTS) && IS_ENABLED(CONFIG_KVM)
struct kvm_pmc {
u8 idx; /* index into the pmu->pmc array */
@ -74,6 +74,7 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu);
struct kvm_pmu_events *kvm_get_pmu_events(void);
void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
void kvm_vcpu_pmu_resync_el0(void);
#define kvm_vcpu_has_pmu(vcpu) \
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
@ -171,6 +172,7 @@ static inline u8 kvm_arm_pmu_get_pmuver_limit(void)
{
return 0;
}
static inline void kvm_vcpu_pmu_resync_el0(void) {}
#endif

View file

@ -1359,6 +1359,9 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode);
void kvm_flush_remote_tlbs(struct kvm *kvm);
void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min);
@ -1387,10 +1390,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
unsigned long mask);
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot);
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log);
int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
int *is_dirty, struct kvm_memory_slot **memslot);
@ -1479,11 +1479,23 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
}
#endif
#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
return -ENOTSUPP;
}
#else
int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
#endif
#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
gfn_t gfn, u64 nr_pages)
{
return -EOPNOTSUPP;
}
#else
int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
#endif
#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA

View file

@ -62,9 +62,6 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
config KVM_VFIO
bool
config HAVE_KVM_ARCH_TLB_FLUSH_ALL
bool
config HAVE_KVM_INVALID_WAKEUPS
bool

View file

@ -345,7 +345,6 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
}
EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
++kvm->stat.generic.remote_tlb_flush_requests;
@ -361,12 +360,38 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
* kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
* barrier here.
*/
if (!kvm_arch_flush_remote_tlb(kvm)
if (!kvm_arch_flush_remote_tlbs(kvm)
|| kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
++kvm->stat.generic.remote_tlb_flush;
}
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
#endif
void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
{
if (!kvm_arch_flush_remote_tlbs_range(kvm, gfn, nr_pages))
return;
/*
* Fall back to a flushing entire TLBs if the architecture range-based
* TLB invalidation is unsupported or can't be performed for whatever
* reason.
*/
kvm_flush_remote_tlbs(kvm);
}
void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
/*
* All current use cases for flushing the TLBs for a specific memslot
* are related to dirty logging, and many do the TLB flush out of
* mmu_lock. The interaction between the various operations on memslot
* must be serialized by slots_locks to ensure the TLB flush from one
* operation is observed by any other operation on the same memslot.
*/
lockdep_assert_held(&kvm->slots_lock);
kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
}
static void kvm_flush_shadow_all(struct kvm *kvm)
{
@ -2180,7 +2205,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
}
if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
kvm_flush_remote_tlbs_memslot(kvm, memslot);
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
return -EFAULT;
@ -2297,7 +2322,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
KVM_MMU_UNLOCK(kvm);
if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
kvm_flush_remote_tlbs_memslot(kvm, memslot);
return 0;
}