vmm: Use a cpuset_t for vCPUs waiting for STARTUP IPIs.

Retire the boot_state member of struct vlapic and instead use a cpuset
in the VM to track vCPUs waiting for STARTUP IPIs.  INIT IPIs add
vCPUs to this set, and STARTUP IPIs remove vCPUs from the set.
STARTUP IPIs are only reported to userland for vCPUs that were removed
from the set.

In particular, this permits a subsequent change to allocate vCPUs on
demand when the vCPU may not be allocated until after a STARTUP IPI is
reported to userland.

Reviewed by:	corvink, markj
Differential Revision:	https://reviews.freebsd.org/D37173
This commit is contained in:
John Baldwin 2022-11-18 10:05:10 -08:00
parent 223de44c93
commit c0f35dbf19
4 changed files with 41 additions and 42 deletions

View file

@ -319,9 +319,12 @@ int vm_restore_time(struct vm *vm);
typedef void (*vm_rendezvous_func_t)(struct vcpu *vcpu, void *arg);
int vm_smp_rendezvous(struct vcpu *vcpu, cpuset_t dest,
vm_rendezvous_func_t func, void *arg);
cpuset_t vm_active_cpus(struct vm *vm);
cpuset_t vm_debug_cpus(struct vm *vm);
cpuset_t vm_suspended_cpus(struct vm *vm);
cpuset_t vm_start_cpus(struct vm *vm, const cpuset_t *tostart);
void vm_await_start(struct vm *vm, const cpuset_t *waiting);
#endif /* _SYS__CPUSET_H_ */
static __inline int

View file

@ -1039,7 +1039,6 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
cpuset_t dmask, ipimask;
uint64_t icrval;
uint32_t dest, vec, mode, shorthand;
struct vlapic *vlapic2;
struct vcpu *vcpu;
struct vm_exit *vmexit;
struct LAPIC *lapic;
@ -1128,14 +1127,9 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
i == vlapic->vcpuid)
break;
/*
* Userland which doesn't support the IPI exit
* requires that the boot state is set to SIPI
* here.
*/
vcpu = vm_vcpu(vlapic->vm, i);
vlapic2 = vm_lapic(vcpu);
vlapic2->boot_state = BS_SIPI;
/* vCPU i is waiting for SIPI. */
CPU_SETOF(i, &dmask);
vm_await_start(vlapic->vm, &dmask);
break;
}
@ -1158,11 +1152,10 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
/*
* Ignore SIPIs in any state other than wait-for-SIPI
*/
vcpu = vm_vcpu(vlapic->vm, i);
vlapic2 = vm_lapic(vcpu);
if (vlapic2->boot_state != BS_SIPI)
CPU_SETOF(i, &dmask);
dmask = vm_start_cpus(vlapic->vm, &dmask);
if (CPU_EMPTY(&dmask))
break;
vlapic2->boot_state = BS_RUNNING;
vmexit = vm_exitinfo(vlapic->vcpu);
vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
@ -1173,19 +1166,10 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
break;
}
CPU_FOREACH_ISSET(i, &dmask) {
vcpu = vm_vcpu(vlapic->vm, i);
vlapic2 = vm_lapic(vcpu);
/*
* Ignore SIPIs in any state other than wait-for-SIPI
*/
if (vlapic2->boot_state != BS_SIPI)
continue;
vlapic2->boot_state = BS_RUNNING;
CPU_SET(i, &ipimask);
}
/*
* Ignore SIPIs in any state other than wait-for-SIPI
*/
ipimask = vm_start_cpus(vlapic->vm, &dmask);
break;
default:
return (1);
@ -1210,9 +1194,6 @@ vlapic_handle_init(struct vcpu *vcpu, void *arg)
struct vlapic *vlapic = vm_lapic(vcpu);
vlapic_reset(vlapic);
/* vlapic_reset modifies the boot state. */
vlapic->boot_state = BS_SIPI;
}
int
@ -1223,6 +1204,7 @@ vm_handle_ipi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
case APIC_DELMODE_INIT:
vm_smp_rendezvous(vcpu, vme->u.ipi.dmask, vlapic_handle_init,
NULL);
vm_await_start(vcpu_vm(vcpu), &vme->u.ipi.dmask);
break;
case APIC_DELMODE_STARTUP:
break;
@ -1598,11 +1580,6 @@ vlapic_reset(struct vlapic *vlapic)
lapic->dcr_timer = 0;
vlapic_dcr_write_handler(vlapic);
if (vlapic->vcpuid == 0)
vlapic->boot_state = BS_RUNNING; /* BSP */
else
vlapic->boot_state = BS_INIT; /* AP */
vlapic->svr_last = lapic->svr;
}
@ -1900,7 +1877,6 @@ vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta)
sizeof(vlapic->isrvec_stk),
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vlapic->boot_state, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last,
sizeof(vlapic->lvt_last),

View file

@ -125,12 +125,6 @@ do { \
VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]); \
} while (0)
enum boot_state {
BS_INIT,
BS_SIPI,
BS_RUNNING
};
/*
* 16 priority levels with at most one vector injected per level.
*/
@ -175,7 +169,6 @@ struct vlapic {
int isrvec_stk_top;
uint64_t msr_apicbase;
enum boot_state boot_state;
/*
* Copies of some registers in the virtual APIC page. We do this for

View file

@ -173,6 +173,7 @@ struct vm {
struct vrtc *vrtc; /* (o) virtual RTC */
volatile cpuset_t active_cpus; /* (i) active vcpus */
volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */
cpuset_t startup_cpus; /* (i) [r] waiting for startup */
int suspend; /* (i) stop VM execution */
volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
@ -486,6 +487,7 @@ vm_init(struct vm *vm, bool create)
CPU_ZERO(&vm->active_cpus);
CPU_ZERO(&vm->debug_cpus);
CPU_ZERO(&vm->startup_cpus);
vm->suspend = 0;
CPU_ZERO(&vm->suspended_cpus);
@ -2421,6 +2423,30 @@ vm_suspended_cpus(struct vm *vm)
return (vm->suspended_cpus);
}
/*
* Returns the subset of vCPUs in tostart that are awaiting startup.
* These vCPUs are also marked as no longer awaiting startup.
*/
cpuset_t
vm_start_cpus(struct vm *vm, const cpuset_t *tostart)
{
cpuset_t set;
mtx_lock(&vm->rendezvous_mtx);
CPU_AND(&set, &vm->startup_cpus, tostart);
CPU_ANDNOT(&vm->startup_cpus, &vm->startup_cpus, &set);
mtx_unlock(&vm->rendezvous_mtx);
return (set);
}
void
vm_await_start(struct vm *vm, const cpuset_t *waiting)
{
mtx_lock(&vm->rendezvous_mtx);
CPU_OR(&vm->startup_cpus, &vm->startup_cpus, waiting);
mtx_unlock(&vm->rendezvous_mtx);
}
void *
vcpu_stats(struct vcpu *vcpu)
{
@ -2769,6 +2795,7 @@ vm_snapshot_vm(struct vm *vm, struct vm_snapshot_meta *meta)
if (ret != 0)
goto done;
SNAPSHOT_VAR_OR_LEAVE(vm->startup_cpus, meta, ret, done);
done:
return (ret);
}