diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 23ab8427cb74..b4f3312794dd 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -231,6 +231,9 @@ int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, /* * APIs that modify the guest memory map require all vcpus to be frozen. */ +void vm_slock_memsegs(struct vm *vm); +void vm_xlock_memsegs(struct vm *vm); +void vm_unlock_memsegs(struct vm *vm); int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off, size_t len, int prot, int flags); int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index c8a45c7aa811..89e406efcdb0 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -155,6 +156,11 @@ struct mem_map { * (o) initialized the first time the VM is created * (i) initialized when VM is created and when it is reinitialized * (x) initialized before use + * + * Locking: + * [m] mem_segs_lock + * [r] rendezvous_mtx + * [v] reads require one frozen vcpu, writes require freezing all vcpus */ struct vm { void *cookie; /* (i) cpu-specific data */ @@ -170,13 +176,13 @@ struct vm { int suspend; /* (i) stop VM execution */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ - cpuset_t rendezvous_req_cpus; /* (x) rendezvous requested */ - cpuset_t rendezvous_done_cpus; /* (x) rendezvous finished */ - void *rendezvous_arg; /* (x) rendezvous func/arg */ + cpuset_t rendezvous_req_cpus; /* (x) [r] rendezvous requested */ + cpuset_t rendezvous_done_cpus; /* (x) [r] rendezvous finished */ + void *rendezvous_arg; /* (x) [r] rendezvous func/arg */ vm_rendezvous_func_t rendezvous_func; struct mtx rendezvous_mtx; /* (o) rendezvous lock */ - struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ - struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ + struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) [m+v] guest address space */ + struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) [m+v] guest memory regions */ struct vmspace *vmspace; /* (o) guest's address space */ char name[VM_MAX_NAMELEN+1]; /* (o) virtual machine name */ struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ @@ -185,6 +191,7 @@ struct vm { uint16_t cores; /* (o) num of cores/socket */ uint16_t threads; /* (o) num of threads/core */ uint16_t maxcpus; /* (o) max pluggable cpus */ + struct sx mem_segs_lock; /* (o) */ }; #define VMM_CTR0(vcpu, format) \ @@ -518,6 +525,7 @@ vm_create(const char *name, struct vm **retvm) strcpy(vm->name, name); vm->vmspace = vmspace; mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); + sx_init(&vm->mem_segs_lock, "vm mem_segs"); vm->sockets = 1; vm->cores = cores_per_package; /* XXX backwards compatibility */ @@ -609,6 +617,7 @@ vm_cleanup(struct vm *vm, bool destroy) vmmops_vmspace_free(vm->vmspace); vm->vmspace = NULL; + sx_destroy(&vm->mem_segs_lock); mtx_destroy(&vm->rendezvous_mtx); } } @@ -645,6 +654,24 @@ vm_name(struct vm *vm) return (vm->name); } +void +vm_slock_memsegs(struct vm *vm) +{ + sx_slock(&vm->mem_segs_lock); +} + +void +vm_xlock_memsegs(struct vm *vm) +{ + sx_xlock(&vm->mem_segs_lock); +} + +void +vm_unlock_memsegs(struct vm *vm) +{ + sx_unlock(&vm->mem_segs_lock); +} + int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) { @@ -702,6 +729,8 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) struct mem_seg *seg; vm_object_t obj; + sx_assert(&vm->mem_segs_lock, SX_XLOCKED); + if (ident < 0 || ident >= VM_MAX_MEMSEGS) return (EINVAL); @@ -732,6 +761,8 @@ vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, { struct mem_seg *seg; + sx_assert(&vm->mem_segs_lock, SX_LOCKED); + if (ident < 0 || ident >= VM_MAX_MEMSEGS) return (EINVAL); @@ -1075,19 +1106,7 @@ void * vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, void **cookie) { -#ifdef INVARIANTS - /* - * All vcpus are frozen by ioctls that modify the memory map - * (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is - * guaranteed if at least one vcpu is in the VCPU_FROZEN state. - */ - int state; - for (int i = 0; i < vm->maxcpus; i++) { - state = vcpu_get_state(vm_vcpu(vm, i), NULL); - KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", - __func__, state)); - } -#endif + sx_assert(&vm->mem_segs_lock, SX_LOCKED); return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); } diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index 249131b16464..1b8b1e6d388f 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -221,8 +221,6 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) vm_paddr_t gpa, maxaddr; void *hpa, *cookie; struct vmmdev_softc *sc; - struct vcpu *vcpu; - uint16_t lastcpu; error = vmm_priv_check(curthread->td_ucred); if (error) @@ -233,13 +231,9 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) return (ENXIO); /* - * Get a read lock on the guest memory map by freezing any vcpu. + * Get a read lock on the guest memory map. */ - lastcpu = vm_get_maxcpus(sc->vm) - 1; - error = vcpu_lock_one(sc, lastcpu); - if (error) - return (error); - vcpu = vm_vcpu(sc->vm, lastcpu); + vm_slock_memsegs(sc->vm); prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); maxaddr = vmm_sysmem_maxaddr(sc->vm); @@ -256,7 +250,7 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) * Since this device does not support lseek(2), dd(1) will * read(2) blocks of data to simulate the lseek(2). */ - hpa = vm_gpa_hold(vcpu, gpa, c, prot, &cookie); + hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); if (hpa == NULL) { if (uio->uio_rw == UIO_READ && gpa < maxaddr) error = uiomove(__DECONST(void *, zero_region), @@ -268,7 +262,7 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) vm_gpa_release(cookie); } } - vcpu_unlock_one(sc, lastcpu); + vm_unlock_memsegs(sc->vm); return (error); } @@ -420,6 +414,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct vm_readwrite_kernemu_device *kernemu; uint64_t *regvals; int *regnums; + bool memsegs_locked; #ifdef BHYVE_SNAPSHOT struct vm_snapshot_meta *snapshot_meta; #endif @@ -435,6 +430,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, vcpuid = -1; vcpu = NULL; state_changed = 0; + memsegs_locked = false; /* * For VMM ioctls that operate on a single vCPU, lookup the @@ -476,10 +472,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, vcpu = vm_vcpu(sc->vm, vcpuid); break; - case VM_MAP_PPTDEV_MMIO: - case VM_UNMAP_PPTDEV_MMIO: - case VM_BIND_PPTDEV: - case VM_UNBIND_PPTDEV: #ifdef COMPAT_FREEBSD12 case VM_ALLOC_MEMSEG_FBSD12: #endif @@ -487,6 +479,21 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, case VM_MMAP_MEMSEG: case VM_MUNMAP_MEMSEG: case VM_REINIT: + /* + * ioctls that modify the memory map must lock memory + * segments exclusively. + */ + vm_xlock_memsegs(sc->vm); + memsegs_locked = true; + /* FALLTHROUGH */ + case VM_MAP_PPTDEV_MMIO: + case VM_UNMAP_PPTDEV_MMIO: + case VM_BIND_PPTDEV: + case VM_UNBIND_PPTDEV: +#ifdef BHYVE_SNAPSHOT + case VM_SNAPSHOT_REQ: + case VM_RESTORE_TIME: +#endif /* * ioctls that operate on the entire virtual machine must * prevent all vcpus from running. @@ -503,14 +510,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, case VM_GET_MEMSEG: case VM_MMAP_GETNEXT: /* - * Lock a vcpu to make sure that the memory map cannot be - * modified while it is being inspected. + * Lock the memory map while it is being inspected. */ - vcpuid = vm_get_maxcpus(sc->vm) - 1; - error = vcpu_lock_one(sc, vcpuid); - if (error) - goto done; - state_changed = 1; + vm_slock_memsegs(sc->vm); + memsegs_locked = true; break; #ifdef COMPAT_FREEBSD13 @@ -958,6 +961,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, vcpu_unlock_one(sc, vcpuid); else if (state_changed == 2) vcpu_unlock_all(sc); + if (memsegs_locked) + vm_unlock_memsegs(sc->vm); done: /* @@ -978,7 +983,6 @@ vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, size_t len; vm_ooffset_t segoff, first, last; int error, found, segid; - uint16_t lastcpu; bool sysmem; error = vmm_priv_check(curthread->td_ucred); @@ -997,12 +1001,9 @@ vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, } /* - * Get a read lock on the guest memory map by freezing any vcpu. + * Get a read lock on the guest memory map. */ - lastcpu = vm_get_maxcpus(sc->vm) - 1; - error = vcpu_lock_one(sc, lastcpu); - if (error) - return (error); + vm_slock_memsegs(sc->vm); gpa = 0; found = 0; @@ -1029,7 +1030,7 @@ vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, error = EINVAL; } } - vcpu_unlock_one(sc, lastcpu); + vm_unlock_memsegs(sc->vm); return (error); } @@ -1242,7 +1243,6 @@ devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, vm_ooffset_t first, last; size_t seglen; int error; - uint16_t lastcpu; bool sysmem; dsc = cdev->si_drv1; @@ -1256,16 +1256,14 @@ devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, if ((nprot & PROT_EXEC) || first < 0 || first >= last) return (EINVAL); - lastcpu = vm_get_maxcpus(dsc->sc->vm) - 1; - error = vcpu_lock_one(dsc->sc, lastcpu); - if (error) - return (error); + vm_slock_memsegs(dsc->sc->vm); error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); KASSERT(error == 0 && !sysmem && *objp != NULL, ("%s: invalid devmem segment %d", __func__, dsc->segid)); - vcpu_unlock_one(dsc->sc, lastcpu); + + vm_unlock_memsegs(dsc->sc->vm); if (seglen >= last) { vm_object_reference(*objp);