KVM x86 APIC changes for 6.3:

- Remove a superfluous variables from apic_get_tmcct()
 
  - Fix various edge cases in x2APIC MSR emulation
 
  - Mark APIC timer as expired if its in one-shot mode and the count
    underflows while the vCPU task was being migrated
 
  - Reset xAPIC when userspace forces "impossible" x2APIC => xAPIC transition
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCgAwFiEEMHr+pfEFOIzK+KY1YJEiAU0MEvkFAmPsB58SHHNlYW5qY0Bn
 b29nbGUuY29tAAoJEGCRIgFNDBL5CK0P/1hhxUWokhNJX0skgf8uKhxTf8bLAq5F
 xr221M4Ac9YwjJaS0p4PJVSLVJxcVXHsyvanCOQh6AE8q1Ugz+iDLr2gAI+fHbJY
 lnczpAj1UhhttaLSOl13/31TaJdE2Ep0/q3+5vf1qQrOJYkElKpiDYbf3M8T5G72
 pguUFhKKKeZcCB99Jpr0u0HupiwCZoYWvdx7mvzRhi11bWaUyYIWc9CBETmAb4kN
 1UAmov16UrVOFAg/ssde6qPgUsAgB8XwJjta6oIQLeEm70L5ci6g/2Tw0IEwMybR
 yLCCST9eATl2U/hPV4KwBzSN1gHCAx4JDp4TKBR8ic+c+Z8CceIZln05fz6rQ8Sz
 ljyaRVFhaQZyZpjrZJ0h3kqMG1JT/Q4Hj9dq8RZJ0K73KVuCspxaJDHqp6a2p9D0
 dDacDkD3LFIPBdem3hHcpmV2XduaMfQwspObJORarkkQTZZS6erxmPvK/6Quvmbk
 UdD+6hvuSQA8rxNKXF+fOBsnK/1xYvzkVis0sxMwthkSDvENdcPbmlD6kHLz52cg
 Jt+yw/85oIg7zBgEkG2c8+5bB2hw0SRPQBlW4j29jYUhRwXwHxuovllFS2GU7iIc
 fVNtocw5Q9WATp752va4bVjv9XeYBmExn99fd3xvFenTa/ya4+5gNFK8vc9zL++J
 x3fDhAPXmQHJ
 =ieB+
 -----END PGP SIGNATURE-----

Merge tag 'kvm-x86-apic-6.3' of https://github.com/kvm-x86/linux into HEAD

KVM x86 APIC changes for 6.3:

 - Remove a superfluous variables from apic_get_tmcct()

 - Fix various edge cases in x2APIC MSR emulation

 - Mark APIC timer as expired if its in one-shot mode and the count
   underflows while the vCPU task was being migrated

 - Reset xAPIC when userspace forces "impossible" x2APIC => xAPIC transition
This commit is contained in:
Paolo Bonzini 2023-02-21 20:00:44 -05:00
commit ddad47bfb9
4 changed files with 125 additions and 49 deletions

View file

@ -1487,7 +1487,6 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
{
ktime_t remaining, now;
s64 ns;
u32 tmcct;
ASSERT(apic != NULL);
@ -1502,10 +1501,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
remaining = 0;
ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
tmcct = div64_u64(ns,
(APIC_BUS_CYCLE_NS * apic->divide_count));
return tmcct;
return div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->divide_count));
}
static void __report_tpr_access(struct kvm_lapic *apic, bool write)
@ -1565,19 +1561,15 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
#define APIC_REGS_MASK(first, count) \
(APIC_REG_MASK(first) * ((1ull << (count)) - 1))
static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
void *data)
u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic)
{
unsigned char alignment = offset & 0xf;
u32 result;
/* this bitmask has a bit cleared for each reserved register */
/* Leave bits '0' for reserved and write-only registers. */
u64 valid_reg_mask =
APIC_REG_MASK(APIC_ID) |
APIC_REG_MASK(APIC_LVR) |
APIC_REG_MASK(APIC_TASKPRI) |
APIC_REG_MASK(APIC_PROCPRI) |
APIC_REG_MASK(APIC_LDR) |
APIC_REG_MASK(APIC_DFR) |
APIC_REG_MASK(APIC_SPIV) |
APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
@ -1597,21 +1589,33 @@ static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
if (kvm_lapic_lvt_supported(apic, LVT_CMCI))
valid_reg_mask |= APIC_REG_MASK(APIC_LVTCMCI);
/*
* ARBPRI and ICR2 are not valid in x2APIC mode. WARN if KVM reads ICR
* in x2APIC mode as it's an 8-byte register in x2APIC and needs to be
* manually handled by the caller.
*/
/* ARBPRI, DFR, and ICR2 are not valid in x2APIC mode. */
if (!apic_x2apic_mode(apic))
valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI) |
APIC_REG_MASK(APIC_DFR) |
APIC_REG_MASK(APIC_ICR2);
else
WARN_ON_ONCE(offset == APIC_ICR);
return valid_reg_mask;
}
EXPORT_SYMBOL_GPL(kvm_lapic_readable_reg_mask);
static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
void *data)
{
unsigned char alignment = offset & 0xf;
u32 result;
/*
* WARN if KVM reads ICR in x2APIC mode, as it's an 8-byte register in
* x2APIC and needs to be manually handled by the caller.
*/
WARN_ON_ONCE(apic_x2apic_mode(apic) && offset == APIC_ICR);
if (alignment + len > 4)
return 1;
if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
if (offset > 0x3f0 ||
!(kvm_lapic_readable_reg_mask(apic) & APIC_REG_MASK(offset)))
return 1;
result = __apic_read(apic, offset & ~0xf);
@ -1964,8 +1968,12 @@ static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
if (unlikely(count_reg != APIC_TMICT)) {
deadline = tmict_to_ns(apic,
kvm_lapic_get_reg(apic, count_reg));
if (unlikely(deadline <= 0))
deadline = apic->lapic_timer.period;
if (unlikely(deadline <= 0)) {
if (apic_lvtt_period(apic))
deadline = apic->lapic_timer.period;
else
deadline = 0;
}
else if (unlikely(deadline > apic->lapic_timer.period)) {
pr_info_ratelimited(
"vcpu %i: requested lapic timer restore with "
@ -2328,10 +2336,14 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
case APIC_SELF_IPI:
if (apic_x2apic_mode(apic))
kvm_apic_send_ipi(apic, APIC_DEST_SELF | (val & APIC_VECTOR_MASK), 0);
else
/*
* Self-IPI exists only when x2APIC is enabled. Bits 7:0 hold
* the vector, everything else is reserved.
*/
if (!apic_x2apic_mode(apic) || (val & ~APIC_VECTOR_MASK))
ret = 1;
else
kvm_apic_send_ipi(apic, APIC_DEST_SELF | val, 0);
break;
default:
ret = 1;
@ -2498,8 +2510,12 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
}
}
if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
if ((old_value ^ value) & X2APIC_ENABLE) {
if (value & X2APIC_ENABLE)
kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
else if (value & MSR_IA32_APICBASE_ENABLE)
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
}
if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) {
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
@ -3114,13 +3130,17 @@ static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data)
{
/*
* ICR is a 64-bit register in x2APIC mode (and Hyper'v PV vAPIC) and
* ICR is a 64-bit register in x2APIC mode (and Hyper-V PV vAPIC) and
* can be written as such, all other registers remain accessible only
* through 32-bit reads/writes.
*/
if (reg == APIC_ICR)
return kvm_x2apic_icr_write(apic, data);
/* Bits 63:32 are reserved in all other registers. */
if (data >> 32)
return 1;
return kvm_lapic_reg_write(apic, reg, (u32)data);
}
@ -3143,9 +3163,6 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
return 1;
if (reg == APIC_DFR)
return 1;
return kvm_lapic_msr_read(apic, reg, data);
}

View file

@ -146,6 +146,8 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
void kvm_lapic_exit(void);
u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic);
#define VEC_POS(v) ((v) & (32 - 1))
#define REG_POS(v) (((v) >> 5) << 4)

View file

@ -4018,29 +4018,20 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
vmx_set_msr_bitmap_write(msr_bitmap, msr);
}
static void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode)
{
unsigned long *msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
unsigned long read_intercept;
int msr;
read_intercept = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
unsigned int read_idx = msr / BITS_PER_LONG;
unsigned int write_idx = read_idx + (0x800 / sizeof(long));
msr_bitmap[read_idx] = read_intercept;
msr_bitmap[write_idx] = ~0ul;
}
}
static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
{
/*
* x2APIC indices for 64-bit accesses into the RDMSR and WRMSR halves
* of the MSR bitmap. KVM emulates APIC registers up through 0x3f0,
* i.e. MSR 0x83f, and so only needs to dynamically manipulate 64 bits.
*/
const int read_idx = APIC_BASE_MSR / BITS_PER_LONG_LONG;
const int write_idx = read_idx + (0x800 / sizeof(u64));
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 *msr_bitmap = (u64 *)vmx->vmcs01.msr_bitmap;
u8 mode;
if (!cpu_has_vmx_msr_bitmap())
if (!cpu_has_vmx_msr_bitmap() || WARN_ON_ONCE(!lapic_in_kernel(vcpu)))
return;
if (cpu_has_secondary_exec_ctrls() &&
@ -4058,7 +4049,18 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
vmx->x2apic_msr_bitmap_mode = mode;
vmx_reset_x2apic_msrs(vcpu, mode);
/*
* Reset the bitmap for MSRs 0x800 - 0x83f. Leave AMD's uber-extended
* registers (0x840 and above) intercepted, KVM doesn't support them.
* Intercept all writes by default and poke holes as needed. Pass
* through reads for all valid registers by default in x2APIC+APICv
* mode, only the current timer count needs on-demand emulation by KVM.
*/
if (mode & MSR_BITMAP_MODE_X2APIC_APICV)
msr_bitmap[read_idx] = ~kvm_lapic_readable_reg_mask(vcpu->arch.apic);
else
msr_bitmap[read_idx] = ~0ull;
msr_bitmap[write_idx] = ~0ull;
/*
* TPR reads and writes can be virtualized even if virtual interrupt

View file

@ -132,6 +132,59 @@ static void test_icr(struct xapic_vcpu *x)
__test_icr(x, -1ull & ~APIC_DM_FIXED_MASK);
}
static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base)
{
uint32_t apic_id, expected;
struct kvm_lapic_state xapic;
vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base);
vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
expected = apic_base & X2APIC_ENABLE ? vcpu->id : vcpu->id << 24;
apic_id = *((u32 *)&xapic.regs[APIC_ID]);
TEST_ASSERT(apic_id == expected,
"APIC_ID not set back to %s format; wanted = %x, got = %x",
(apic_base & X2APIC_ENABLE) ? "x2APIC" : "xAPIC",
expected, apic_id);
}
/*
* Verify that KVM switches the APIC_ID between xAPIC and x2APIC when userspace
* stuffs MSR_IA32_APICBASE. Setting the APIC_ID when x2APIC is enabled and
* when the APIC transitions for DISABLED to ENABLED is architectural behavior
* (on Intel), whereas the x2APIC => xAPIC transition behavior is KVM ABI since
* attempted to transition from x2APIC to xAPIC without disabling the APIC is
* architecturally disallowed.
*/
static void test_apic_id(void)
{
const uint32_t NR_VCPUS = 3;
struct kvm_vcpu *vcpus[NR_VCPUS];
uint64_t apic_base;
struct kvm_vm *vm;
int i;
vm = vm_create_with_vcpus(NR_VCPUS, NULL, vcpus);
vm_enable_cap(vm, KVM_CAP_X2APIC_API, KVM_X2APIC_API_USE_32BIT_IDS);
for (i = 0; i < NR_VCPUS; i++) {
apic_base = vcpu_get_msr(vcpus[i], MSR_IA32_APICBASE);
TEST_ASSERT(apic_base & MSR_IA32_APICBASE_ENABLE,
"APIC not in ENABLED state at vCPU RESET");
TEST_ASSERT(!(apic_base & X2APIC_ENABLE),
"APIC not in xAPIC mode at vCPU RESET");
__test_apic_id(vcpus[i], apic_base);
__test_apic_id(vcpus[i], apic_base | X2APIC_ENABLE);
__test_apic_id(vcpus[i], apic_base);
}
kvm_vm_free(vm);
}
int main(int argc, char *argv[])
{
struct xapic_vcpu x = {
@ -157,4 +210,6 @@ int main(int argc, char *argv[])
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
test_icr(&x);
kvm_vm_free(vm);
test_apic_id();
}