vmm: Allocate vCPUs on first use of a vCPU.

Convert the vcpu[] array in struct vm to an array of pointers and
allocate vCPUs on first use.  This avoids always allocating VM_MAXCPU
vCPUs for each VM, but instead only allocates the vCPUs in use.  A new
per-VM sx lock is added to serialize attempts to allocate vCPUs on
first use.  However, a given vCPU is never freed while the VM is
active, so the pointer is read via an unlocked read first to avoid the
need for the lock in the common case once the vCPU has been created.

Some ioctls need to lock all vCPUs.  To prevent races with ioctls that
want to allocate a new vCPU, these ioctls also lock the sx lock that
protects vCPU creation.

Reviewed by:	corvink, markj
Differential Revision:	https://reviews.freebsd.org/D37174
This commit is contained in:
John Baldwin 2022-11-18 10:05:35 -08:00
parent c0f35dbf19
commit 98568a005a
4 changed files with 149 additions and 79 deletions

View file

@ -219,6 +219,10 @@ extern const struct vmm_ops vmm_ops_intel;
extern const struct vmm_ops vmm_ops_amd;
int vm_create(const char *name, struct vm **retvm);
struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
void vm_disable_vcpu_creation(struct vm *vm);
void vm_slock_vcpus(struct vm *vm);
void vm_unlock_vcpus(struct vm *vm);
void vm_destroy(struct vm *vm);
int vm_reinit(struct vm *vm);
const char *vm_name(struct vm *vm);

View file

@ -1840,6 +1840,7 @@ int
vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta)
{
int ret;
struct vcpu *vcpu;
struct vlapic *vlapic;
struct LAPIC *lapic;
uint32_t ccr;
@ -1851,7 +1852,10 @@ vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta)
maxcpus = vm_get_maxcpus(vm);
for (i = 0; i < maxcpus; i++) {
vlapic = vm_lapic(vm_vcpu(vm, i));
vcpu = vm_vcpu(vm, i);
if (vcpu == NULL)
continue;
vlapic = vm_lapic(vcpu);
/* snapshot the page first; timer period depends on icr_timer */
lapic = vlapic->apic_page;

View file

@ -127,7 +127,6 @@ struct vcpu {
uint64_t tsc_offset; /* (o) TSC offsetting */
};
#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
#define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx))
#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
@ -175,6 +174,7 @@ struct vm {
volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */
cpuset_t startup_cpus; /* (i) [r] waiting for startup */
int suspend; /* (i) stop VM execution */
bool dying; /* (o) is dying */
volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
cpuset_t rendezvous_req_cpus; /* (x) [r] rendezvous requested */
@ -186,13 +186,14 @@ struct vm {
struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) [m+v] guest memory regions */
struct vmspace *vmspace; /* (o) guest's address space */
char name[VM_MAX_NAMELEN+1]; /* (o) virtual machine name */
struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */
struct vcpu *vcpu[VM_MAXCPU]; /* (x) guest vcpus */
/* The following describe the vm cpu topology */
uint16_t sockets; /* (o) num of sockets */
uint16_t cores; /* (o) num of cores/socket */
uint16_t threads; /* (o) num of threads/core */
uint16_t maxcpus; /* (o) max pluggable cpus */
struct sx mem_segs_lock; /* (o) */
struct sx vcpus_init_lock; /* (o) */
};
#define VMM_CTR0(vcpu, format) \
@ -319,10 +320,8 @@ vcpu_state2str(enum vcpu_state state)
#endif
static void
vcpu_cleanup(struct vm *vm, int i, bool destroy)
vcpu_cleanup(struct vcpu *vcpu, bool destroy)
{
struct vcpu *vcpu = &vm->vcpu[i];
vmmops_vlapic_cleanup(vcpu->vlapic);
vmmops_vcpu_cleanup(vcpu->cookie);
vcpu->cookie = NULL;
@ -333,30 +332,30 @@ vcpu_cleanup(struct vm *vm, int i, bool destroy)
}
}
static void
vcpu_init(struct vm *vm, int vcpu_id, bool create)
static struct vcpu *
vcpu_alloc(struct vm *vm, int vcpu_id)
{
struct vcpu *vcpu;
KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
("vcpu_init: invalid vcpu %d", vcpu_id));
vcpu = &vm->vcpu[vcpu_id];
if (create) {
KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already "
"initialized", vcpu_id));
vcpu_lock_init(vcpu);
vcpu->state = VCPU_IDLE;
vcpu->hostcpu = NOCPU;
vcpu->vcpuid = vcpu_id;
vcpu->vm = vm;
vcpu->guestfpu = fpu_save_area_alloc();
vcpu->stats = vmm_stat_alloc();
vcpu->tsc_offset = 0;
}
vcpu = malloc(sizeof(*vcpu), M_VM, M_WAITOK | M_ZERO);
vcpu_lock_init(vcpu);
vcpu->state = VCPU_IDLE;
vcpu->hostcpu = NOCPU;
vcpu->vcpuid = vcpu_id;
vcpu->vm = vm;
vcpu->guestfpu = fpu_save_area_alloc();
vcpu->stats = vmm_stat_alloc();
vcpu->tsc_offset = 0;
return (vcpu);
}
vcpu->cookie = vmmops_vcpu_init(vm->cookie, vcpu, vcpu_id);
static void
vcpu_init(struct vcpu *vcpu)
{
vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
vcpu->vlapic = vmmops_vlapic_init(vcpu->cookie);
vm_set_x2apic_state(vcpu, X2APIC_DISABLED);
vcpu->reqidle = 0;
@ -473,8 +472,6 @@ MODULE_VERSION(vmm, 1);
static void
vm_init(struct vm *vm, bool create)
{
int i;
vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
vm->iommu = NULL;
vm->vioapic = vioapic_init(vm);
@ -492,8 +489,61 @@ vm_init(struct vm *vm, bool create)
vm->suspend = 0;
CPU_ZERO(&vm->suspended_cpus);
for (i = 0; i < vm->maxcpus; i++)
vcpu_init(vm, i, create);
if (!create) {
for (int i = 0; i < vm->maxcpus; i++) {
if (vm->vcpu[i] != NULL)
vcpu_init(vm->vcpu[i]);
}
}
}
void
vm_disable_vcpu_creation(struct vm *vm)
{
sx_xlock(&vm->vcpus_init_lock);
vm->dying = true;
sx_xunlock(&vm->vcpus_init_lock);
}
struct vcpu *
vm_alloc_vcpu(struct vm *vm, int vcpuid)
{
struct vcpu *vcpu;
if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
return (NULL);
vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]);
if (__predict_true(vcpu != NULL))
return (vcpu);
sx_xlock(&vm->vcpus_init_lock);
vcpu = vm->vcpu[vcpuid];
if (vcpu == NULL && !vm->dying) {
vcpu = vcpu_alloc(vm, vcpuid);
vcpu_init(vcpu);
/*
* Ensure vCPU is fully created before updating pointer
* to permit unlocked reads above.
*/
atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid],
(uintptr_t)vcpu);
}
sx_xunlock(&vm->vcpus_init_lock);
return (vcpu);
}
void
vm_slock_vcpus(struct vm *vm)
{
sx_slock(&vm->vcpus_init_lock);
}
void
vm_unlock_vcpus(struct vm *vm)
{
sx_unlock(&vm->vcpus_init_lock);
}
/*
@ -528,6 +578,7 @@ vm_create(const char *name, struct vm **retvm)
vm->vmspace = vmspace;
mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
sx_init(&vm->mem_segs_lock, "vm mem_segs");
sx_init(&vm->vcpus_init_lock, "vm vcpus");
vm->sockets = 1;
vm->cores = cores_per_package; /* XXX backwards compatibility */
@ -558,17 +609,14 @@ vm_get_maxcpus(struct vm *vm)
int
vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
uint16_t threads, uint16_t maxcpus)
uint16_t threads, uint16_t maxcpus __unused)
{
if (maxcpus != 0)
return (EINVAL); /* XXX remove when supported */
/* Ignore maxcpus. */
if ((sockets * cores * threads) > vm->maxcpus)
return (EINVAL);
/* XXX need to check sockets * cores * threads == vCPU, how? */
vm->sockets = sockets;
vm->cores = cores;
vm->threads = threads;
vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */
return(0);
}
@ -593,8 +641,10 @@ vm_cleanup(struct vm *vm, bool destroy)
vatpic_cleanup(vm->vatpic);
vioapic_cleanup(vm->vioapic);
for (i = 0; i < vm->maxcpus; i++)
vcpu_cleanup(vm, i, destroy);
for (i = 0; i < vm->maxcpus; i++) {
if (vm->vcpu[i] != NULL)
vcpu_cleanup(vm->vcpu[i], destroy);
}
vmmops_cleanup(vm->cookie);
@ -619,6 +669,7 @@ vm_cleanup(struct vm *vm, bool destroy)
vmmops_vmspace_free(vm->vmspace);
vm->vmspace = NULL;
sx_destroy(&vm->vcpus_init_lock);
sx_destroy(&vm->mem_segs_lock);
mtx_destroy(&vm->rendezvous_mtx);
}
@ -2250,7 +2301,7 @@ vcpu_vcpuid(struct vcpu *vcpu)
struct vcpu *
vm_vcpu(struct vm *vm, int vcpuid)
{
return (&vm->vcpu[vcpuid]);
return (vm->vcpu[vcpuid]);
}
struct vlapic *
@ -2761,7 +2812,9 @@ vm_snapshot_vcpus(struct vm *vm, struct vm_snapshot_meta *meta)
now = rdtsc();
maxcpus = vm_get_maxcpus(vm);
for (i = 0; i < maxcpus; i++) {
vcpu = &vm->vcpu[i];
vcpu = vm->vcpu[i];
if (vcpu == NULL)
continue;
SNAPSHOT_VAR_OR_LEAVE(vcpu->x2apic_state, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->exitintinfo, meta, ret, done);
@ -2811,7 +2864,9 @@ vm_snapshot_vcpu(struct vm *vm, struct vm_snapshot_meta *meta)
maxcpus = vm_get_maxcpus(vm);
for (i = 0; i < maxcpus; i++) {
vcpu = &vm->vcpu[i];
vcpu = vm->vcpu[i];
if (vcpu == NULL)
continue;
error = vmmops_vcpu_snapshot(vcpu->cookie, meta);
if (error != 0) {
@ -2894,7 +2949,9 @@ vm_restore_time(struct vm *vm)
maxcpus = vm_get_maxcpus(vm);
for (i = 0; i < maxcpus; i++) {
vcpu = &vm->vcpu[i];
vcpu = vm->vcpu[i];
if (vcpu == NULL)
continue;
error = vmmops_restore_tsc(vcpu->cookie,
vcpu->tsc_offset - now);

View file

@ -125,24 +125,16 @@ vmm_priv_check(struct ucred *ucred)
}
static int
vcpu_lock_one(struct vmmdev_softc *sc, int vcpu)
vcpu_lock_one(struct vcpu *vcpu)
{
int error;
if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vm))
return (EINVAL);
error = vcpu_set_state(vm_vcpu(sc->vm, vcpu), VCPU_FROZEN, true);
return (error);
return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
}
static void
vcpu_unlock_one(struct vmmdev_softc *sc, int vcpuid)
vcpu_unlock_one(struct vmmdev_softc *sc, int vcpuid, struct vcpu *vcpu)
{
struct vcpu *vcpu;
enum vcpu_state state;
vcpu = vm_vcpu(sc->vm, vcpuid);
state = vcpu_get_state(vcpu, NULL);
if (state != VCPU_FROZEN) {
panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm),
@ -155,19 +147,29 @@ vcpu_unlock_one(struct vmmdev_softc *sc, int vcpuid)
static int
vcpu_lock_all(struct vmmdev_softc *sc)
{
int error, vcpu;
uint16_t maxcpus;
struct vcpu *vcpu;
int error;
uint16_t i, maxcpus;
vm_slock_vcpus(sc->vm);
maxcpus = vm_get_maxcpus(sc->vm);
for (vcpu = 0; vcpu < maxcpus; vcpu++) {
error = vcpu_lock_one(sc, vcpu);
for (i = 0; i < maxcpus; i++) {
vcpu = vm_vcpu(sc->vm, i);
if (vcpu == NULL)
continue;
error = vcpu_lock_one(vcpu);
if (error)
break;
}
if (error) {
while (--vcpu >= 0)
vcpu_unlock_one(sc, vcpu);
while (--i >= 0) {
vcpu = vm_vcpu(sc->vm, i);
if (vcpu == NULL)
continue;
vcpu_unlock_one(sc, i, vcpu);
}
vm_unlock_vcpus(sc->vm);
}
return (error);
@ -176,12 +178,17 @@ vcpu_lock_all(struct vmmdev_softc *sc)
static void
vcpu_unlock_all(struct vmmdev_softc *sc)
{
int vcpu;
uint16_t maxcpus;
struct vcpu *vcpu;
uint16_t i, maxcpus;
maxcpus = vm_get_maxcpus(sc->vm);
for (vcpu = 0; vcpu < maxcpus; vcpu++)
vcpu_unlock_one(sc, vcpu);
for (i = 0; i < maxcpus; i++) {
vcpu = vm_vcpu(sc->vm, i);
if (vcpu == NULL)
continue;
vcpu_unlock_one(sc, i, vcpu);
}
vm_unlock_vcpus(sc->vm);
}
static struct vmmdev_softc *
@ -363,20 +370,11 @@ vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
return (error);
}
static struct vcpu *
lookup_vcpu(struct vm *vm, int vcpuid)
{
if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
return (NULL);
return (vm_vcpu(vm, vcpuid));
}
static int
vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct thread *td)
{
int error, vcpuid, state_changed, size;
int error, vcpuid, size;
cpuset_t *cpuset;
struct vmmdev_softc *sc;
struct vcpu *vcpu;
@ -414,6 +412,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vm_readwrite_kernemu_device *kernemu;
uint64_t *regvals;
int *regnums;
enum { NONE, SINGLE, ALL } vcpus_locked;
bool memsegs_locked;
#ifdef BHYVE_SNAPSHOT
struct vm_snapshot_meta *snapshot_meta;
@ -429,7 +428,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
vcpuid = -1;
vcpu = NULL;
state_changed = 0;
vcpus_locked = NONE;
memsegs_locked = false;
/*
@ -465,11 +464,15 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
* ioctls that can operate only on vcpus that are not running.
*/
vcpuid = *(int *)data;
error = vcpu_lock_one(sc, vcpuid);
vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
if (vcpu == NULL) {
error = EINVAL;
goto done;
}
error = vcpu_lock_one(vcpu);
if (error)
goto done;
state_changed = 1;
vcpu = vm_vcpu(sc->vm, vcpuid);
vcpus_locked = SINGLE;
break;
#ifdef COMPAT_FREEBSD12
@ -501,7 +504,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = vcpu_lock_all(sc);
if (error)
goto done;
state_changed = 2;
vcpus_locked = ALL;
break;
#ifdef COMPAT_FREEBSD12
@ -528,7 +531,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
* a specific vCPU.
*/
vcpuid = *(int *)data;
vcpu = lookup_vcpu(sc->vm, vcpuid);
vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
if (vcpu == NULL) {
error = EINVAL;
goto done;
@ -545,7 +548,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
vcpuid = *(int *)data;
if (vcpuid == -1)
break;
vcpu = lookup_vcpu(sc->vm, vcpuid);
vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
if (vcpu == NULL) {
error = EINVAL;
goto done;
@ -957,9 +960,9 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
break;
}
if (state_changed == 1)
vcpu_unlock_one(sc, vcpuid);
else if (state_changed == 2)
if (vcpus_locked == SINGLE)
vcpu_unlock_one(sc, vcpuid, vcpu);
else if (vcpus_locked == ALL)
vcpu_unlock_all(sc);
if (memsegs_locked)
vm_unlock_memsegs(sc->vm);
@ -1041,8 +1044,10 @@ vmmdev_destroy(void *arg)
struct devmem_softc *dsc;
int error __diagused;
vm_disable_vcpu_creation(sc->vm);
error = vcpu_lock_all(sc);
KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
vm_unlock_vcpus(sc->vm);
while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));