bhyve: add ROM emulation

Some PCI devices especially GPUs require a ROM to work properly.
The ROM is executed by boot firmware to initialize the device.
To add a ROM to a device use the new ROM option for passthru device
(e.g. -s passthru,0/2/0,rom=<path>/<to>/<rom>).

It's necessary that the ROM is executed by the boot firmware.
It won't be executed by any OS.
Additionally, the boot firmware should be configured to execute the
ROM file.
For that reason, it's only possible to use a ROM when using
OVMF with enabled bus enumeration.

Differential Revision:	https://reviews.freebsd.org/D33129
Sponsored by:   Beckhoff Automation GmbH & Co. KG
MFC after:      1 month
This commit is contained in:
Corvin Köhne 2022-03-10 11:28:06 +01:00 committed by Emmanuel Vadot
parent 563fd2240e
commit e47fe3183e
7 changed files with 236 additions and 20 deletions

View file

@ -73,6 +73,7 @@ enum {
VM_SYSMEM,
VM_BOOTROM,
VM_FRAMEBUFFER,
VM_PCIROM,
};
/*

View file

@ -134,7 +134,7 @@ struct mem_seg {
bool sysmem;
struct vm_object *object;
};
#define VM_MAX_MEMSEGS 3
#define VM_MAX_MEMSEGS 4
struct mem_map {
vm_paddr_t gpa;

View file

@ -528,6 +528,11 @@ Connect to a PCI device on the host at the selector described by
and
.Ar function
numbers.
.It Li rom= Ns Ar romfile
Add
.Ar romfile
as option ROM to the PCI device.
The ROM will be loaded by firmware and should be capable of initializing the device.
.El
.Pp
Guest memory must be wired using the

View file

@ -515,6 +515,8 @@ Host PCI bus address of device to pass through.
Host PCI slot address of device to pass through.
.It Va func Ta integer Ta Ta
Host PCI function address of device to pass through.
.It Va rom Ta path Ta Ta
ROM file of the device which will be executed by OVMF to init the device.
.El
.Ss VirtIO 9p Settings
Each VirtIO 9p device exposes a single filesystem from a host path.

View file

@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/linker_set.h>
#include <sys/mman.h>
#include <ctype.h>
#include <err.h>
@ -101,6 +102,9 @@ static struct businfo *pci_businfo[MAXBUSES];
SET_DECLARE(pci_devemu_set, struct pci_devemu);
static uint64_t pci_emul_iobase;
static uint8_t *pci_emul_rombase;
static uint64_t pci_emul_romoffset;
static uint8_t *pci_emul_romlim;
static uint64_t pci_emul_membase32;
static uint64_t pci_emul_membase64;
static uint64_t pci_emul_memlim64;
@ -118,6 +122,8 @@ TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = TAILQ_HEAD_INITIALIZER(
#define PCI_EMUL_IOBASE 0x2000
#define PCI_EMUL_IOLIMIT 0x10000
#define PCI_EMUL_ROMSIZE 0x10000000
#define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */
#define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */
SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE);
@ -562,6 +568,12 @@ modify_bar_registration(struct pci_devinst *pi, int idx, int registration)
(*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
pi->pi_bar[idx].addr);
break;
case PCIBAR_ROM:
error = 0;
if (pe->pe_baraddr != NULL)
(*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
pi->pi_bar[idx].addr);
break;
default:
error = EINVAL;
break;
@ -583,6 +595,14 @@ register_bar(struct pci_devinst *pi, int idx)
modify_bar_registration(pi, idx, 1);
}
/* Is the ROM enabled for the emulated pci device? */
static int
romen(struct pci_devinst *pi)
{
return (pi->pi_bar[PCI_ROM_IDX].lobits & PCIM_BIOS_ENABLE) ==
PCIM_BIOS_ENABLE;
}
/* Are we decoding i/o port accesses for the emulated pci device? */
static int
porten(struct pci_devinst *pi)
@ -649,7 +669,8 @@ int
pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
uint64_t size)
{
assert(idx >= 0 && idx <= PCI_BARMAX);
assert((type == PCIBAR_ROM) || (idx >= 0 && idx <= PCI_BARMAX));
assert((type != PCIBAR_ROM) || (idx == PCI_ROM_IDX));
if ((size & (size - 1)) != 0)
size = 1UL << flsl(size); /* round up to a power of 2 */
@ -658,6 +679,9 @@ pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
if (type == PCIBAR_IO) {
if (size < 4)
size = 4;
} else if (type == PCIBAR_ROM) {
if (size < ~PCIM_BIOS_ADDR_MASK + 1)
size = ~PCIM_BIOS_ADDR_MASK + 1;
} else {
if (size < 16)
size = 16;
@ -773,6 +797,13 @@ pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx,
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
break;
case PCIBAR_ROM:
/* do not claim memory for ROM. OVMF will do it for us. */
baseptr = NULL;
limit = 0;
mask = PCIM_BIOS_ADDR_MASK;
lobits = 0;
break;
default:
printf("pci_emul_alloc_base: invalid bar type %d\n", type);
assert(0);
@ -807,7 +838,57 @@ pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx,
pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
}
register_bar(pdi, idx);
if (type != PCIBAR_ROM) {
register_bar(pdi, idx);
}
return (0);
}
int
pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size,
void **const addr)
{
/* allocate ROM space once on first call */
if (pci_emul_rombase == 0) {
pci_emul_rombase = vm_create_devmem(pdi->pi_vmctx, VM_PCIROM,
"pcirom", PCI_EMUL_ROMSIZE);
if (pci_emul_rombase == MAP_FAILED) {
warnx("%s: failed to create rom segment", __func__);
return (-1);
}
pci_emul_romlim = pci_emul_rombase + PCI_EMUL_ROMSIZE;
pci_emul_romoffset = 0;
}
/* ROM size should be a power of 2 and greater than 2 KB */
const uint64_t rom_size = MAX(1UL << flsl(size),
~PCIM_BIOS_ADDR_MASK + 1);
/* check if ROM fits into ROM space */
if (pci_emul_romoffset + rom_size > PCI_EMUL_ROMSIZE) {
warnx("%s: no space left in rom segment:", __func__);
warnx("%16lu bytes left",
PCI_EMUL_ROMSIZE - pci_emul_romoffset);
warnx("%16lu bytes required by %d/%d/%d", rom_size, pdi->pi_bus,
pdi->pi_slot, pdi->pi_func);
return (-1);
}
/* allocate ROM BAR */
const int error = pci_emul_alloc_bar(pdi, PCI_ROM_IDX, PCIBAR_ROM,
rom_size);
if (error)
return error;
/* return address */
*addr = pci_emul_rombase + pci_emul_romoffset;
/* save offset into ROM Space */
pdi->pi_romoffset = pci_emul_romoffset;
/* increase offset for next ROM */
pci_emul_romoffset += rom_size;
return (0);
}
@ -1885,7 +1966,7 @@ pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old)
* If the MMIO or I/O address space decoding has changed then
* register/unregister all BARs that decode that address space.
*/
for (i = 0; i <= PCI_BARMAX; i++) {
for (i = 0; i <= PCI_BARMAX_WITH_ROM; i++) {
switch (pi->pi_bar[i].type) {
case PCIBAR_NONE:
case PCIBAR_MEMHI64:
@ -1899,6 +1980,11 @@ pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old)
unregister_bar(pi, i);
}
break;
case PCIBAR_ROM:
/* skip (un-)register of ROM if it disabled */
if (!romen(pi))
break;
/* fallthrough */
case PCIBAR_MEM32:
case PCIBAR_MEM64:
/* MMIO address space decoding changed? */
@ -2019,16 +2105,21 @@ pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func,
return;
/*
* Special handling for write to BAR registers
* Special handling for write to BAR and ROM registers
*/
if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) {
if ((coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) ||
(coff >= PCIR_BIOS && coff < PCIR_BIOS + 4)) {
/*
* Ignore writes to BAR registers that are not
* 4-byte aligned.
*/
if (bytes != 4 || (coff & 0x3) != 0)
return;
idx = (coff - PCIR_BAR(0)) / 4;
if (coff != PCIR_BIOS) {
idx = (coff - PCIR_BAR(0)) / 4;
} else {
idx = PCI_ROM_IDX;
}
mask = ~(pi->pi_bar[idx].size - 1);
switch (pi->pi_bar[idx].type) {
case PCIBAR_NONE:
@ -2071,6 +2162,20 @@ pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func,
PCIBAR_MEMHI64);
}
break;
case PCIBAR_ROM:
addr = bar = *eax & mask;
if (memen(pi) && romen(pi)) {
unregister_bar(pi, idx);
}
pi->pi_bar[idx].addr = addr;
pi->pi_bar[idx].lobits = *eax &
PCIM_BIOS_ENABLE;
/* romen could have changed it value */
if (memen(pi) && romen(pi)) {
register_bar(pi, idx);
}
bar |= pi->pi_bar[idx].lobits;
break;
default:
assert(0);
}

View file

@ -42,6 +42,8 @@
#include <assert.h>
#define PCI_BARMAX PCIR_MAX_BAR_0 /* BAR registers in a Type 0 header */
#define PCI_BARMAX_WITH_ROM (PCI_BARMAX + 1)
#define PCI_ROM_IDX (PCI_BARMAX + 1)
struct vmctx;
struct pci_devinst;
@ -92,7 +94,8 @@ enum pcibar_type {
PCIBAR_IO,
PCIBAR_MEM32,
PCIBAR_MEM64,
PCIBAR_MEMHI64
PCIBAR_MEMHI64,
PCIBAR_ROM,
};
struct pcibar {
@ -165,7 +168,9 @@ struct pci_devinst {
void *pi_arg; /* devemu-private data */
u_char pi_cfgdata[PCI_REGMAX + 1];
struct pcibar pi_bar[PCI_BARMAX + 1];
/* ROM is handled like a BAR */
struct pcibar pi_bar[PCI_BARMAX_WITH_ROM + 1];
uint64_t pi_romoffset;
};
struct msicap {
@ -229,6 +234,8 @@ int init_pci(struct vmctx *ctx);
void pci_callback(void);
int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx,
enum pcibar_type type, uint64_t size);
int pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size,
void **const addr);
int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type);
void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes,

View file

@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mman.h>
#include <sys/pciio.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <dev/io/iodev.h>
#include <dev/pci/pcireg.h>
@ -80,7 +81,8 @@ static int pcifd = -1;
struct passthru_softc {
struct pci_devinst *psc_pi;
struct pcibar psc_bar[PCI_BARMAX + 1];
/* ROM is handled like a BAR */
struct pcibar psc_bar[PCI_BARMAX_WITH_ROM + 1];
struct {
int capoff;
int msgctrl;
@ -659,6 +661,58 @@ passthru_legacy_config(nvlist_t *nvl, const char *opts)
set_config_value_node(nvl, "slot", value);
snprintf(value, sizeof(value), "%d", func);
set_config_value_node(nvl, "func", value);
return (pci_parse_legacy_config(nvl, strchr(opts, ',')));
}
static int
passthru_init_rom(struct vmctx *const ctx, struct passthru_softc *const sc,
const char *const romfile)
{
if (romfile == NULL) {
return (0);
}
const int fd = open(romfile, O_RDONLY);
if (fd < 0) {
warnx("%s: can't open romfile \"%s\"", __func__, romfile);
return (-1);
}
struct stat sbuf;
if (fstat(fd, &sbuf) < 0) {
warnx("%s: can't fstat romfile \"%s\"", __func__, romfile);
close(fd);
return (-1);
}
const uint64_t rom_size = sbuf.st_size;
void *const rom_data = mmap(NULL, rom_size, PROT_READ, MAP_SHARED, fd,
0);
if (rom_data == MAP_FAILED) {
warnx("%s: unable to mmap romfile \"%s\" (%d)", __func__,
romfile, errno);
close(fd);
return (-1);
}
void *rom_addr;
int error = pci_emul_alloc_rom(sc->psc_pi, rom_size, &rom_addr);
if (error) {
warnx("%s: failed to alloc rom segment", __func__);
munmap(rom_data, rom_size);
close(fd);
return (error);
}
memcpy(rom_addr, rom_data, rom_size);
sc->psc_bar[PCI_ROM_IDX].type = PCIBAR_ROM;
sc->psc_bar[PCI_ROM_IDX].addr = (uint64_t)rom_addr;
sc->psc_bar[PCI_ROM_IDX].size = rom_size;
munmap(rom_data, rom_size);
close(fd);
return (0);
}
@ -707,7 +761,15 @@ passthru_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
sc->psc_pi = pi;
/* initialize config space */
error = cfginit(ctx, pi, bus, slot, func);
if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
goto done;
/* initialize ROM */
if ((error = passthru_init_rom(ctx, sc,
get_config_value_node(nvl, "rom"))) != 0)
goto done;
error = 0; /* success */
done:
if (error) {
free(sc);
@ -719,7 +781,8 @@ passthru_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
static int
bar_access(int coff)
{
if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1))
if ((coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) ||
coff == PCIR_BIOS)
return (1);
else
return (0);
@ -1011,16 +1074,49 @@ passthru_mmio_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
}
static void
passthru_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
int enabled, uint64_t address)
passthru_addr_rom(struct pci_devinst *const pi, const int idx,
const int enabled)
{
const uint64_t addr = pi->pi_bar[idx].addr;
const uint64_t size = pi->pi_bar[idx].size;
if (pi->pi_bar[baridx].type == PCIBAR_IO)
return;
if (baridx == pci_msix_table_bar(pi))
passthru_msix_addr(ctx, pi, baridx, enabled, address);
else
passthru_mmio_addr(ctx, pi, baridx, enabled, address);
if (!enabled) {
if (vm_munmap_memseg(pi->pi_vmctx, addr, size) != 0) {
errx(4, "%s: munmap_memseg @ [%016lx - %016lx] failed",
__func__, addr, addr + size);
}
} else {
if (vm_mmap_memseg(pi->pi_vmctx, addr, VM_PCIROM,
pi->pi_romoffset, size, PROT_READ | PROT_EXEC) != 0) {
errx(4, "%s: mnmap_memseg @ [%016lx - %016lx] failed",
__func__, addr, addr + size);
}
}
}
static void
passthru_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
int enabled, uint64_t address)
{
switch (pi->pi_bar[baridx].type) {
case PCIBAR_IO:
/* IO BARs are emulated */
break;
case PCIBAR_ROM:
passthru_addr_rom(pi, baridx, enabled);
break;
case PCIBAR_MEM32:
case PCIBAR_MEM64:
if (baridx == pci_msix_table_bar(pi))
passthru_msix_addr(ctx, pi, baridx, enabled, address);
else
passthru_mmio_addr(ctx, pi, baridx, enabled, address);
break;
default:
errx(4, "%s: invalid BAR type %d", __func__,
pi->pi_bar[baridx].type);
}
}
struct pci_devemu passthru = {