bhyve: support relocating fbuf and passthru data BARs

We want to allow the UEFI firmware to enumerate and assign
addresses to PCI devices so we can boot from NVMe[1]. Address
assignment of PCI BARs is properly handled by the PCI emulation
code in general, but a few specific cases need additional support.
fbuf and passthru map additional objects into the guest physical
address space and so need to handle address updates. Here we add a
callback to emulated PCI devices to inform them of a BAR
configuration change. fbuf and passthru then watch for these BAR
changes and relocate the frame buffer memory segment and passthru
device mmio area respectively.

We also add new VM_MUNMAP_MEMSEG and VM_UNMAP_PPTDEV_MMIO ioctls
to vmm(4) to facilitate the unmapping needed for addres updates.

[1]: https://github.com/freebsd/uefi-edk2/pull/9/

Originally by:	scottph
MFC After:	1 week
Sponsored by:	Intel Corporation
Reviewed by:	grehan
Approved by:	philip (mentor)
Differential Revision:	https://reviews.freebsd.org/D24066
This commit is contained in:
D Scott Phillips 2021-03-19 00:08:52 +08:00 committed by Ka Ho Ng
parent 621b509048
commit f8a6ec2d57
12 changed files with 232 additions and 38 deletions

View file

@ -251,6 +251,19 @@ vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr,
return (0);
}
int
vm_munmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, size_t len)
{
struct vm_munmap munmap;
int error;
munmap.gpa = gpa;
munmap.len = len;
error = ioctl(ctx->fd, VM_MUNMAP_MEMSEG, &munmap);
return (error);
}
int
vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
@ -980,6 +993,22 @@ vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
}
int
vm_unmap_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
vm_paddr_t gpa, size_t len)
{
struct vm_pptdev_mmio pptmmio;
bzero(&pptmmio, sizeof(pptmmio));
pptmmio.bus = bus;
pptmmio.slot = slot;
pptmmio.func = func;
pptmmio.gpa = gpa;
pptmmio.len = len;
return (ioctl(ctx->fd, VM_UNMAP_PPTDEV_MMIO, &pptmmio));
}
int
vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
uint64_t addr, uint64_t msg, int numvec)
@ -1644,7 +1673,7 @@ vm_get_ioctls(size_t *len)
/* keep in sync with machine/vmm_dev.h */
static const cap_ioctl_t vm_ioctl_cmds[] = { VM_RUN, VM_SUSPEND, VM_REINIT,
VM_ALLOC_MEMSEG, VM_GET_MEMSEG, VM_MMAP_MEMSEG, VM_MMAP_MEMSEG,
VM_MMAP_GETNEXT, VM_SET_REGISTER, VM_GET_REGISTER,
VM_MMAP_GETNEXT, VM_MUNMAP_MEMSEG, VM_SET_REGISTER, VM_GET_REGISTER,
VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR,
VM_SET_REGISTER_SET, VM_GET_REGISTER_SET,
VM_SET_KERNEMU_DEV, VM_GET_KERNEMU_DEV,
@ -1654,7 +1683,7 @@ vm_get_ioctls(size_t *len)
VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER,
VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV,
VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI,
VM_PPTDEV_MSIX, VM_PPTDEV_DISABLE_MSIX,
VM_PPTDEV_MSIX, VM_UNMAP_PPTDEV_MMIO, VM_PPTDEV_DISABLE_MSIX,
VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE,
VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,

View file

@ -111,6 +111,8 @@ void *vm_create_devmem(struct vmctx *ctx, int segid, const char *name,
int vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid,
vm_ooffset_t segoff, size_t len, int prot);
int vm_munmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, size_t len);
int vm_create(const char *name);
int vm_get_device_fd(struct vmctx *ctx);
struct vmctx *vm_open(const char *name);
@ -176,6 +178,8 @@ int vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
int vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
int vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
int vm_unmap_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
vm_paddr_t gpa, size_t len);
int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot,
int func, uint64_t addr, uint64_t msg, int numvec);
int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot,

View file

@ -231,6 +231,7 @@ int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
*/
int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
size_t len, int prot, int flags);
int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
void vm_free_memseg(struct vm *vm, int ident);
int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);

View file

@ -49,6 +49,11 @@ struct vm_memmap {
#define VM_MEMMAP_F_WIRED 0x01
#define VM_MEMMAP_F_IOMMU 0x02
struct vm_munmap {
vm_paddr_t gpa;
size_t len;
};
#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL)
struct vm_memseg {
int segid;
@ -270,6 +275,7 @@ enum {
IOCNUM_MMAP_MEMSEG = 16,
IOCNUM_MMAP_GETNEXT = 17,
IOCNUM_GLA2GPA_NOFAULT = 18,
IOCNUM_MUNMAP_MEMSEG = 19,
/* register/state accessors */
IOCNUM_SET_REGISTER = 20,
@ -302,6 +308,7 @@ enum {
IOCNUM_PPTDEV_MSI = 43,
IOCNUM_PPTDEV_MSIX = 44,
IOCNUM_PPTDEV_DISABLE_MSIX = 45,
IOCNUM_UNMAP_PPTDEV_MMIO = 46,
/* statistics */
IOCNUM_VM_STATS = 50,
@ -358,6 +365,8 @@ enum {
_IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
#define VM_MMAP_GETNEXT \
_IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
#define VM_MUNMAP_MEMSEG \
_IOW('v', IOCNUM_MUNMAP_MEMSEG, struct vm_munmap)
#define VM_SET_REGISTER \
_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
#define VM_GET_REGISTER \
@ -416,6 +425,8 @@ enum {
_IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix)
#define VM_PPTDEV_DISABLE_MSIX \
_IOW('v', IOCNUM_PPTDEV_DISABLE_MSIX, struct vm_pptdev)
#define VM_UNMAP_PPTDEV_MMIO \
_IOW('v', IOCNUM_UNMAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
#define VM_INJECT_NMI \
_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
#define VM_STATS \

View file

@ -224,7 +224,7 @@ ppt_find(struct vm *vm, int bus, int slot, int func, struct pptdev **pptp)
}
static void
ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
ppt_unmap_all_mmio(struct vm *vm, struct pptdev *ppt)
{
int i;
struct pptseg *seg;
@ -412,7 +412,7 @@ ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
pci_save_state(ppt->dev);
ppt_pci_reset(ppt->dev);
pci_restore_state(ppt->dev);
ppt_unmap_mmio(vm, ppt);
ppt_unmap_all_mmio(vm, ppt);
ppt_teardown_msi(ppt);
ppt_teardown_msix(ppt);
iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
@ -466,6 +466,32 @@ ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
return (ENOSPC);
}
int
ppt_unmap_mmio(struct vm *vm, int bus, int slot, int func,
vm_paddr_t gpa, size_t len)
{
int i, error;
struct pptseg *seg;
struct pptdev *ppt;
error = ppt_find(vm, bus, slot, func, &ppt);
if (error)
return (error);
for (i = 0; i < MAX_MMIOSEGS; i++) {
seg = &ppt->mmio[i];
if (seg->gpa == gpa && seg->len == len) {
error = vm_unmap_mmio(vm, seg->gpa, seg->len);
if (error == 0) {
seg->gpa = 0;
seg->len = 0;
}
return (error);
}
}
return (ENOENT);
}
static int
pptintr(void *arg)
{

View file

@ -34,6 +34,8 @@
int ppt_unassign_all(struct vm *vm);
int ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
int ppt_unmap_mmio(struct vm *vm, int bus, int slot, int func,
vm_paddr_t gpa, size_t len);
int ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
uint64_t addr, uint64_t msg, int numvec);
int ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,

View file

@ -797,6 +797,24 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
return (0);
}
int
vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len)
{
struct mem_map *m;
int i;
for (i = 0; i < VM_MAX_MEMMAPS; i++) {
m = &vm->mem_maps[i];
if (m->gpa == gpa && m->len == len &&
(m->flags & VM_MEMMAP_F_IOMMU) == 0) {
vm_free_memmap(vm, i);
return (0);
}
}
return (EINVAL);
}
int
vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)

View file

@ -381,6 +381,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vm_rtc_time *rtctime;
struct vm_rtc_data *rtcdata;
struct vm_memmap *mm;
struct vm_munmap *mu;
struct vm_cpu_topology *topology;
struct vm_readwrite_kernemu_device *kernemu;
uint64_t *regvals;
@ -435,6 +436,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
break;
case VM_MAP_PPTDEV_MMIO:
case VM_UNMAP_PPTDEV_MMIO:
case VM_BIND_PPTDEV:
case VM_UNBIND_PPTDEV:
#ifdef COMPAT_FREEBSD12
@ -442,6 +444,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
#endif
case VM_ALLOC_MEMSEG:
case VM_MMAP_MEMSEG:
case VM_MUNMAP_MEMSEG:
case VM_REINIT:
/*
* ioctls that operate on the entire virtual machine must
@ -525,6 +528,11 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
pptmmio->func, pptmmio->gpa, pptmmio->len,
pptmmio->hpa);
break;
case VM_UNMAP_PPTDEV_MMIO:
pptmmio = (struct vm_pptdev_mmio *)data;
error = ppt_unmap_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
pptmmio->func, pptmmio->gpa, pptmmio->len);
break;
case VM_BIND_PPTDEV:
pptdev = (struct vm_pptdev *)data;
error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
@ -643,6 +651,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
mm->len, mm->prot, mm->flags);
break;
case VM_MUNMAP_MEMSEG:
mu = (struct vm_munmap *)data;
error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
break;
#ifdef COMPAT_FREEBSD12
case VM_ALLOC_MEMSEG_FBSD12:
error = alloc_memseg(sc, (struct vm_memseg *)data,

View file

@ -506,10 +506,12 @@ pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
static void
modify_bar_registration(struct pci_devinst *pi, int idx, int registration)
{
struct pci_devemu *pe;
int error;
struct inout_port iop;
struct mem_range mr;
pe = pi->pi_d;
switch (pi->pi_bar[idx].type) {
case PCIBAR_IO:
bzero(&iop, sizeof(struct inout_port));
@ -523,6 +525,9 @@ modify_bar_registration(struct pci_devinst *pi, int idx, int registration)
error = register_inout(&iop);
} else
error = unregister_inout(&iop);
if (pe->pe_baraddr != NULL)
(*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
pi->pi_bar[idx].addr);
break;
case PCIBAR_MEM32:
case PCIBAR_MEM64:
@ -538,6 +543,9 @@ modify_bar_registration(struct pci_devinst *pi, int idx, int registration)
error = register_mem(&mr);
} else
error = unregister_mem(&mr);
if (pe->pe_baraddr != NULL)
(*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
pi->pi_bar[idx].addr);
break;
default:
error = EINVAL;

View file

@ -76,6 +76,9 @@ struct pci_devemu {
struct pci_devinst *pi, int baridx,
uint64_t offset, int size);
void (*pe_baraddr)(struct vmctx *ctx, struct pci_devinst *pi,
int baridx, int enabled, uint64_t address);
/* Save/restore device state */
int (*pe_snapshot)(struct vm_snapshot_meta *meta);
int (*pe_pause)(struct vmctx *ctx, struct pci_devinst *pi);

View file

@ -216,6 +216,30 @@ pci_fbuf_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
return (value);
}
static void
pci_fbuf_baraddr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
int enabled, uint64_t address)
{
struct pci_fbuf_softc *sc;
int prot;
if (baridx != 1)
return;
sc = pi->pi_arg;
if (!enabled && sc->fbaddr != 0) {
if (vm_munmap_memseg(ctx, sc->fbaddr, FB_SIZE) != 0)
EPRINTLN("pci_fbuf: munmap_memseg failed");
sc->fbaddr = 0;
} else if (sc->fb_base != NULL && sc->fbaddr == 0) {
prot = PROT_READ | PROT_WRITE;
if (vm_mmap_memseg(ctx, address, VM_FRAMEBUFFER, 0, FB_SIZE, prot) != 0)
EPRINTLN("pci_fbuf: mmap_memseg failed");
sc->fbaddr = address;
}
}
static int
pci_fbuf_parse_config(struct pci_fbuf_softc *sc, nvlist_t *nvl)
{
@ -457,6 +481,7 @@ struct pci_devemu pci_fbuf = {
.pe_init = pci_fbuf_init,
.pe_barwrite = pci_fbuf_write,
.pe_barread = pci_fbuf_read,
.pe_baraddr = pci_fbuf_baraddr,
#ifdef BHYVE_SNAPSHOT
.pe_snapshot = pci_fbuf_snapshot,
#endif

View file

@ -441,8 +441,8 @@ static int
init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
{
int b, s, f;
int error, idx;
size_t len, remaining;
int idx;
size_t remaining;
uint32_t table_size, table_offset;
uint32_t pba_size, pba_offset;
vm_paddr_t start;
@ -504,31 +504,6 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
}
}
/* Map everything before the MSI-X table */
if (table_offset > 0) {
len = table_offset;
error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base);
if (error)
return (error);
base += len;
start += len;
remaining -= len;
}
/* Skip the MSI-X table */
base += table_size;
start += table_size;
remaining -= table_size;
/* Map everything beyond the end of the MSI-X table */
if (remaining > 0) {
len = remaining;
error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base);
if (error)
return (error);
}
return (0);
}
@ -595,13 +570,6 @@ cfginitbar(struct vmctx *ctx, struct passthru_softc *sc)
error = init_msix_table(ctx, sc, base);
if (error)
return (-1);
} else if (bartype != PCIBAR_IO) {
/* Map the physical BAR in the guest MMIO space */
error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
pi->pi_bar[i].addr, pi->pi_bar[i].size, base);
if (error)
return (-1);
}
/*
@ -988,6 +956,92 @@ passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
return (val);
}
static void
passthru_msix_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
int enabled, uint64_t address)
{
struct passthru_softc *sc;
size_t remaining;
uint32_t table_size, table_offset;
sc = pi->pi_arg;
table_offset = rounddown2(pi->pi_msix.table_offset, 4096);
if (table_offset > 0) {
if (!enabled) {
if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
sc->psc_sel.pc_dev,
sc->psc_sel.pc_func, address,
table_offset) != 0)
warnx("pci_passthru: unmap_pptdev_mmio failed");
} else {
if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
sc->psc_sel.pc_dev,
sc->psc_sel.pc_func, address,
table_offset,
sc->psc_bar[baridx].addr) != 0)
warnx("pci_passthru: map_pptdev_mmio failed");
}
}
table_size = pi->pi_msix.table_offset - table_offset;
table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
table_size = roundup2(table_size, 4096);
remaining = pi->pi_bar[baridx].size - table_offset - table_size;
if (remaining > 0) {
address += table_offset + table_size;
if (!enabled) {
if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
sc->psc_sel.pc_dev,
sc->psc_sel.pc_func, address,
remaining) != 0)
warnx("pci_passthru: unmap_pptdev_mmio failed");
} else {
if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
sc->psc_sel.pc_dev,
sc->psc_sel.pc_func, address,
remaining,
sc->psc_bar[baridx].addr +
table_offset + table_size) != 0)
warnx("pci_passthru: map_pptdev_mmio failed");
}
}
}
static void
passthru_mmio_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
int enabled, uint64_t address)
{
struct passthru_softc *sc;
sc = pi->pi_arg;
if (!enabled) {
if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
sc->psc_sel.pc_dev,
sc->psc_sel.pc_func, address,
sc->psc_bar[baridx].size) != 0)
warnx("pci_passthru: unmap_pptdev_mmio failed");
} else {
if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
sc->psc_sel.pc_dev,
sc->psc_sel.pc_func, address,
sc->psc_bar[baridx].size,
sc->psc_bar[baridx].addr) != 0)
warnx("pci_passthru: map_pptdev_mmio failed");
}
}
static void
passthru_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
int enabled, uint64_t address)
{
if (pi->pi_bar[baridx].type == PCIBAR_IO)
return;
if (baridx == pci_msix_table_bar(pi))
passthru_msix_addr(ctx, pi, baridx, enabled, address);
else
passthru_mmio_addr(ctx, pi, baridx, enabled, address);
}
struct pci_devemu passthru = {
.pe_emu = "passthru",
.pe_init = passthru_init,
@ -996,5 +1050,6 @@ struct pci_devemu passthru = {
.pe_cfgread = passthru_cfgread,
.pe_barwrite = passthru_write,
.pe_barread = passthru_read,
.pe_baraddr = passthru_addr,
};
PCI_EMUL_SET(passthru);