From eeb6b13a5a45d16a7b348891921d496bc5d6df2c Mon Sep 17 00:00:00 2001 From: Don Slutz Date: Thu, 30 Apr 2015 14:27:09 -0400 Subject: [PATCH 01/29] xen-hvm: Add trace to ioreq Signed-off-by: Stefano Stabellini Signed-off-by: Don Slutz --- trace-events | 7 +++++++ xen-hvm.c | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/trace-events b/trace-events index 0a82f0c1f2..985b041873 100644 --- a/trace-events +++ b/trace-events @@ -936,6 +936,13 @@ xen_map_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: % xen_unmap_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64 xen_map_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x" xen_unmap_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x" +handle_ioreq(void *req, uint32_t type, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p type=%d dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d" +handle_ioreq_read(void *req, uint32_t type, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p read type=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d" +handle_ioreq_write(void *req, uint32_t type, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p write type=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d" +cpu_ioreq_pio(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p pio dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d" +cpu_ioreq_pio_read_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio read reg data=%#"PRIx64" port=%#"PRIx64" size=%d" +cpu_ioreq_pio_write_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio write reg data=%#"PRIx64" port=%#"PRIx64" size=%d" +cpu_ioreq_move(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p copy dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d" # xen-mapcache.c xen_map_cache(uint64_t phys_addr) "want %#"PRIx64 diff --git a/xen-hvm.c b/xen-hvm.c index 040846236c..d17d3f33bc 100644 --- a/xen-hvm.c +++ b/xen-hvm.c @@ -814,9 +814,14 @@ static void cpu_ioreq_pio(ioreq_t *req) { uint32_t i; + trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr, + req->data, req->count, req->size); + if (req->dir == IOREQ_READ) { if (!req->data_is_ptr) { req->data = do_inp(req->addr, req->size); + trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr, + req->size); } else { uint32_t tmp; @@ -827,6 +832,8 @@ static void cpu_ioreq_pio(ioreq_t *req) } } else if (req->dir == IOREQ_WRITE) { if (!req->data_is_ptr) { + trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr, + req->size); do_outp(req->addr, req->size, req->data); } else { for (i = 0; i < req->count; i++) { @@ -843,6 +850,9 @@ static void cpu_ioreq_move(ioreq_t *req) { uint32_t i; + trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr, + req->data, req->count, req->size); + if (!req->data_is_ptr) { if (req->dir == IOREQ_READ) { for (i = 0; i < req->count; i++) { @@ -915,11 +925,18 @@ static void handle_vmport_ioreq(XenIOState *state, ioreq_t *req) static void handle_ioreq(XenIOState *state, ioreq_t *req) { + trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr, + req->addr, req->data, req->count, req->size); + if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) && (req->size < sizeof (target_ulong))) { req->data &= ((target_ulong) 1 << (8 * req->size)) - 1; } + if (req->dir == IOREQ_WRITE) + trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr, + req->addr, req->data, req->count, req->size); + switch (req->type) { case IOREQ_TYPE_PIO: cpu_ioreq_pio(req); @@ -959,6 +976,10 @@ static void handle_ioreq(XenIOState *state, ioreq_t *req) default: hw_error("Invalid ioreq type 0x%x\n", req->type); } + if (req->dir == IOREQ_READ) { + trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr, + req->addr, req->data, req->count, req->size); + } } static int handle_buffered_iopage(XenIOState *state) From 7bb836e4a27b7e364f3c8c4ebe41172fc8c70f75 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 15 Jul 2015 13:37:41 +0800 Subject: [PATCH 02/29] i440fx: make types configurable at run-time IGD passthrough wants to supply a different pci and host devices, inheriting i440fx devices. Make types configurable. Signed-off-by: Michael S. Tsirkin Signed-off-by: Tiejun Chen Signed-off-by: Stefano Stabellini --- hw/i386/pc_piix.c | 4 +++- hw/pci-host/piix.c | 9 ++++----- include/hw/i386/pc.h | 6 +++++- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index b82921d367..aaf7cfb051 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -196,7 +196,9 @@ static void pc_init1(MachineState *machine) } if (pci_enabled) { - pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi, + pci_bus = i440fx_init(TYPE_I440FX_PCI_HOST_BRIDGE, + TYPE_I440FX_PCI_DEVICE, + &i440fx_state, &piix3_devfn, &isa_bus, gsi, system_memory, system_io, machine->ram_size, pcms->below_4g_mem_size, pcms->above_4g_mem_size, diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index 1cb25f3fa6..e19badb23c 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -40,7 +40,6 @@ * http://download.intel.com/design/chipsets/datashts/29054901.pdf */ -#define TYPE_I440FX_PCI_HOST_BRIDGE "i440FX-pcihost" #define I440FX_PCI_HOST_BRIDGE(obj) \ OBJECT_CHECK(I440FXState, (obj), TYPE_I440FX_PCI_HOST_BRIDGE) @@ -95,7 +94,6 @@ typedef struct PIIX3State { #define PIIX3_PCI_DEVICE(obj) \ OBJECT_CHECK(PIIX3State, (obj), TYPE_PIIX3_PCI_DEVICE) -#define TYPE_I440FX_PCI_DEVICE "i440FX" #define I440FX_PCI_DEVICE(obj) \ OBJECT_CHECK(PCII440FXState, (obj), TYPE_I440FX_PCI_DEVICE) @@ -305,7 +303,8 @@ static void i440fx_realize(PCIDevice *dev, Error **errp) dev->config[I440FX_SMRAM] = 0x02; } -PCIBus *i440fx_init(PCII440FXState **pi440fx_state, +PCIBus *i440fx_init(const char *host_type, const char *pci_type, + PCII440FXState **pi440fx_state, int *piix3_devfn, ISABus **isa_bus, qemu_irq *pic, MemoryRegion *address_space_mem, @@ -325,7 +324,7 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, unsigned i; I440FXState *i440fx; - dev = qdev_create(NULL, TYPE_I440FX_PCI_HOST_BRIDGE); + dev = qdev_create(NULL, host_type); s = PCI_HOST_BRIDGE(dev); b = pci_bus_new(dev, NULL, pci_address_space, address_space_io, 0, TYPE_PCI_BUS); @@ -333,7 +332,7 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, object_property_add_child(qdev_get_machine(), "i440fx", OBJECT(dev), NULL); qdev_init_nofail(dev); - d = pci_create_simple(b, 0, TYPE_I440FX_PCI_DEVICE); + d = pci_create_simple(b, 0, pci_type); *pi440fx_state = I440FX_PCI_DEVICE(d); f = *pi440fx_state; f->system_memory = address_space_mem; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index d0cad87d21..6edacbd8fc 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -219,7 +219,11 @@ extern int no_hpet; struct PCII440FXState; typedef struct PCII440FXState PCII440FXState; -PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn, +#define TYPE_I440FX_PCI_HOST_BRIDGE "i440FX-pcihost" +#define TYPE_I440FX_PCI_DEVICE "i440FX" + +PCIBus *i440fx_init(const char *host_type, const char *pci_type, + PCII440FXState **pi440fx_state, int *piix_devfn, ISABus **isa_bus, qemu_irq *pic, MemoryRegion *address_space_mem, MemoryRegion *address_space_io, From 76d39ab49ec68608a3c0bafec7bed70f21302b0e Mon Sep 17 00:00:00 2001 From: Tiejun Chen Date: Wed, 15 Jul 2015 13:37:42 +0800 Subject: [PATCH 03/29] pc_init1: pass parameters just with types Pass types to configure pc_init1(). Signed-off-by: Tiejun Chen Signed-off-by: Stefano Stabellini Acked-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index aaf7cfb051..626a19fb06 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -76,7 +76,8 @@ static bool has_reserved_memory = true; static bool kvmclock_enabled = true; /* PC hardware initialisation */ -static void pc_init1(MachineState *machine) +static void pc_init1(MachineState *machine, + const char *host_type, const char *pci_type) { PCMachineState *pcms = PC_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); @@ -196,8 +197,8 @@ static void pc_init1(MachineState *machine) } if (pci_enabled) { - pci_bus = i440fx_init(TYPE_I440FX_PCI_HOST_BRIDGE, - TYPE_I440FX_PCI_DEVICE, + pci_bus = i440fx_init(host_type, + pci_type, &i440fx_state, &piix3_devfn, &isa_bus, gsi, system_memory, system_io, machine->ram_size, pcms->below_4g_mem_size, @@ -416,7 +417,7 @@ static void pc_init_isa(MachineState *machine) } x86_cpu_compat_kvm_no_autoenable(FEAT_KVM, 1 << KVM_FEATURE_PV_EOI); enable_compat_apic_id_mode(); - pc_init1(machine); + pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, TYPE_I440FX_PCI_DEVICE); } #ifdef CONFIG_XEN @@ -424,7 +425,7 @@ static void pc_xen_hvm_init(MachineState *machine) { PCIBus *bus; - pc_init1(machine); + pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, TYPE_I440FX_PCI_DEVICE); bus = pci_find_primary_bus(); if (bus != NULL) { @@ -440,7 +441,8 @@ static void pc_xen_hvm_init(MachineState *machine) if (compat) { \ compat(machine); \ } \ - pc_init1(machine); \ + pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ + TYPE_I440FX_PCI_DEVICE); \ } \ DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) From 595a4f07d6bd18ba11c78b95d6a78089fee26eca Mon Sep 17 00:00:00 2001 From: Tiejun Chen Date: Wed, 15 Jul 2015 13:37:43 +0800 Subject: [PATCH 04/29] piix: create host bridge to passthrough Implement a pci host bridge specific to passthrough. Actually this just inherits the standard one. And we also just expose a minimal real host bridge pci configuration subset. [Replace pread with lseek and read to fix Windows build] Signed-off-by: Tiejun Chen Signed-off-by: Stefano Stabellini Acked-by: Michael S. Tsirkin --- hw/pci-host/piix.c | 85 ++++++++++++++++++++++++++++++++++++++++++++ include/hw/i386/pc.h | 2 ++ 2 files changed, 87 insertions(+) diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index e19badb23c..1fb71c8081 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -739,6 +739,90 @@ static const TypeInfo i440fx_info = { .class_init = i440fx_class_init, }; +/* IGD Passthrough Host Bridge. */ +typedef struct { + uint8_t offset; + uint8_t len; +} IGDHostInfo; + +/* Here we just expose minimal host bridge offset subset. */ +static const IGDHostInfo igd_host_bridge_infos[] = { + {0x08, 2}, /* revision id */ + {0x2c, 2}, /* sybsystem vendor id */ + {0x2e, 2}, /* sybsystem id */ + {0x50, 2}, /* SNB: processor graphics control register */ + {0x52, 2}, /* processor graphics control register */ + {0xa4, 4}, /* SNB: graphics base of stolen memory */ + {0xa8, 4}, /* SNB: base of GTT stolen memory */ +}; + +static int host_pci_config_read(int pos, int len, uint32_t val) +{ + char path[PATH_MAX]; + int config_fd; + ssize_t size = sizeof(path); + /* Access real host bridge. */ + int rc = snprintf(path, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%d/%s", + 0, 0, 0, 0, "config"); + + if (rc >= size || rc < 0) { + return -ENODEV; + } + + config_fd = open(path, O_RDWR); + if (config_fd < 0) { + return -ENODEV; + } + + if (lseek(config_fd, pos, SEEK_SET) != pos) { + return -errno; + } + do { + rc = read(config_fd, (uint8_t *)&val, len); + } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); + if (rc != len) { + return -errno; + } + + return 0; +} + +static int igd_pt_i440fx_initfn(struct PCIDevice *pci_dev) +{ + uint32_t val = 0; + int rc, i, num; + int pos, len; + + num = ARRAY_SIZE(igd_host_bridge_infos); + for (i = 0; i < num; i++) { + pos = igd_host_bridge_infos[i].offset; + len = igd_host_bridge_infos[i].len; + rc = host_pci_config_read(pos, len, val); + if (rc) { + return -ENODEV; + } + pci_default_write_config(pci_dev, pos, val, len); + } + + return 0; +} + +static void igd_passthrough_i440fx_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->init = igd_pt_i440fx_initfn; + dc->desc = "IGD Passthrough Host bridge"; +} + +static const TypeInfo igd_passthrough_i440fx_info = { + .name = TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE, + .parent = TYPE_I440FX_PCI_DEVICE, + .instance_size = sizeof(PCII440FXState), + .class_init = igd_passthrough_i440fx_class_init, +}; + static const char *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge, PCIBus *rootbus) { @@ -780,6 +864,7 @@ static const TypeInfo i440fx_pcihost_info = { static void i440fx_register_types(void) { type_register_static(&i440fx_info); + type_register_static(&igd_passthrough_i440fx_info); type_register_static(&piix3_pci_type_info); type_register_static(&piix3_info); type_register_static(&piix3_xen_info); diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 6edacbd8fc..5cda2a3ff6 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -222,6 +222,8 @@ typedef struct PCII440FXState PCII440FXState; #define TYPE_I440FX_PCI_HOST_BRIDGE "i440FX-pcihost" #define TYPE_I440FX_PCI_DEVICE "i440FX" +#define TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE "igd-passthrough-i440FX" + PCIBus *i440fx_init(const char *host_type, const char *pci_type, PCII440FXState **pi440fx_state, int *piix_devfn, ISABus **isa_bus, qemu_irq *pic, From bcd7461e7e84a65f1dfaa6af6e00aa498978d29d Mon Sep 17 00:00:00 2001 From: Tiejun Chen Date: Wed, 15 Jul 2015 13:37:44 +0800 Subject: [PATCH 05/29] hw/pci-assign: split pci-assign.c We will try to reuse assign_dev_load_option_rom in xen side, and especially its a good beginning to unify pci assign codes both on kvm and xen in the future. [Fix build for Windows] Signed-off-by: Tiejun Chen Signed-off-by: Stefano Stabellini Acked-by: Michael S. Tsirkin --- hw/i386/Makefile.objs | 1 + hw/i386/kvm/pci-assign.c | 84 +++----------------------------- hw/i386/pci-assign-load-rom.c | 91 +++++++++++++++++++++++++++++++++++ include/hw/pci/pci-assign.h | 27 +++++++++++ 4 files changed, 127 insertions(+), 76 deletions(-) create mode 100644 hw/i386/pci-assign-load-rom.c create mode 100644 include/hw/pci/pci-assign.h diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index ebd1015a08..c250deb848 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -7,6 +7,7 @@ obj-$(CONFIG_XEN) += ../xenpv/ xen/ obj-y += kvmvapic.o obj-y += acpi-build.o +obj-y += pci-assign-load-rom.o gen-hex-y += hw/i386/acpi-dsdt.hex gen-hex-y += hw/i386/q35-acpi-dsdt.hex diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c index 74d22f4fd2..b1beaa66b2 100644 --- a/hw/i386/kvm/pci-assign.c +++ b/hw/i386/kvm/pci-assign.c @@ -37,6 +37,7 @@ #include "hw/pci/pci.h" #include "hw/pci/msi.h" #include "kvm_i386.h" +#include "hw/pci/pci-assign.h" #define MSIX_PAGE_SIZE 0x1000 @@ -48,17 +49,6 @@ #define IORESOURCE_PREFETCH 0x00002000 /* No side effects */ #define IORESOURCE_MEM_64 0x00100000 -//#define DEVICE_ASSIGNMENT_DEBUG - -#ifdef DEVICE_ASSIGNMENT_DEBUG -#define DEBUG(fmt, ...) \ - do { \ - fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \ - } while (0) -#else -#define DEBUG(fmt, ...) -#endif - typedef struct PCIRegion { int type; /* Memory or port I/O */ int valid; @@ -1896,73 +1886,15 @@ static void assign_register_types(void) type_init(assign_register_types) -/* - * Scan the assigned devices for the devices that have an option ROM, and then - * load the corresponding ROM data to RAM. If an error occurs while loading an - * option ROM, we just ignore that option ROM and continue with the next one. - */ static void assigned_dev_load_option_rom(AssignedDevice *dev) { - char name[32], rom_file[64]; - FILE *fp; - uint8_t val; - struct stat st; - void *ptr; + int size = 0; - /* If loading ROM from file, pci handles it */ - if (dev->dev.romfile || !dev->dev.rom_bar) { - return; + pci_assign_dev_load_option_rom(&dev->dev, OBJECT(dev), &size, + dev->host.domain, dev->host.bus, + dev->host.slot, dev->host.function); + + if (!size) { + error_report("pci-assign: Invalid ROM."); } - - snprintf(rom_file, sizeof(rom_file), - "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom", - dev->host.domain, dev->host.bus, dev->host.slot, - dev->host.function); - - if (stat(rom_file, &st)) { - return; - } - - if (access(rom_file, F_OK)) { - error_report("pci-assign: Insufficient privileges for %s", rom_file); - return; - } - - /* Write "1" to the ROM file to enable it */ - fp = fopen(rom_file, "r+"); - if (fp == NULL) { - return; - } - val = 1; - if (fwrite(&val, 1, 1, fp) != 1) { - goto close_rom; - } - fseek(fp, 0, SEEK_SET); - - snprintf(name, sizeof(name), "%s.rom", - object_get_typename(OBJECT(dev))); - memory_region_init_ram(&dev->dev.rom, OBJECT(dev), name, st.st_size, - &error_abort); - vmstate_register_ram(&dev->dev.rom, &dev->dev.qdev); - ptr = memory_region_get_ram_ptr(&dev->dev.rom); - memset(ptr, 0xff, st.st_size); - - if (!fread(ptr, 1, st.st_size, fp)) { - error_report("pci-assign: Cannot read from host %s", rom_file); - error_printf("Device option ROM contents are probably invalid " - "(check dmesg).\nSkip option ROM probe with rombar=0, " - "or load from file with romfile=\n"); - goto close_rom; - } - - pci_register_bar(&dev->dev, PCI_ROM_SLOT, 0, &dev->dev.rom); - dev->dev.has_rom = true; -close_rom: - /* Write "0" to disable ROM */ - fseek(fp, 0, SEEK_SET); - val = 0; - if (!fwrite(&val, 1, 1, fp)) { - DEBUG("%s\n", "Failed to disable pci-sysfs rom file"); - } - fclose(fp); } diff --git a/hw/i386/pci-assign-load-rom.c b/hw/i386/pci-assign-load-rom.c new file mode 100644 index 0000000000..34a3a7ed7f --- /dev/null +++ b/hw/i386/pci-assign-load-rom.c @@ -0,0 +1,91 @@ +/* + * This is splited from hw/i386/kvm/pci-assign.c + */ +#include +#include +#include +#include +#include "hw/hw.h" +#include "hw/i386/pc.h" +#include "qemu/error-report.h" +#include "ui/console.h" +#include "hw/loader.h" +#include "monitor/monitor.h" +#include "qemu/range.h" +#include "sysemu/sysemu.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci-assign.h" + +/* + * Scan the assigned devices for the devices that have an option ROM, and then + * load the corresponding ROM data to RAM. If an error occurs while loading an + * option ROM, we just ignore that option ROM and continue with the next one. + */ +void *pci_assign_dev_load_option_rom(PCIDevice *dev, struct Object *owner, + int *size, unsigned int domain, + unsigned int bus, unsigned int slot, + unsigned int function) +{ + char name[32], rom_file[64]; + FILE *fp; + uint8_t val; + struct stat st; + void *ptr = NULL; + + /* If loading ROM from file, pci handles it */ + if (dev->romfile || !dev->rom_bar) { + return NULL; + } + + snprintf(rom_file, sizeof(rom_file), + "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom", + domain, bus, slot, function); + + if (stat(rom_file, &st)) { + return NULL; + } + + if (access(rom_file, F_OK)) { + error_report("pci-assign: Insufficient privileges for %s", rom_file); + return NULL; + } + + /* Write "1" to the ROM file to enable it */ + fp = fopen(rom_file, "r+"); + if (fp == NULL) { + return NULL; + } + val = 1; + if (fwrite(&val, 1, 1, fp) != 1) { + goto close_rom; + } + fseek(fp, 0, SEEK_SET); + + snprintf(name, sizeof(name), "%s.rom", object_get_typename(owner)); + memory_region_init_ram(&dev->rom, owner, name, st.st_size, &error_abort); + vmstate_register_ram(&dev->rom, &dev->qdev); + ptr = memory_region_get_ram_ptr(&dev->rom); + memset(ptr, 0xff, st.st_size); + + if (!fread(ptr, 1, st.st_size, fp)) { + error_report("pci-assign: Cannot read from host %s", rom_file); + error_printf("Device option ROM contents are probably invalid " + "(check dmesg).\nSkip option ROM probe with rombar=0, " + "or load from file with romfile=\n"); + goto close_rom; + } + + pci_register_bar(dev, PCI_ROM_SLOT, 0, &dev->rom); + dev->has_rom = true; + *size = st.st_size; +close_rom: + /* Write "0" to disable ROM */ + fseek(fp, 0, SEEK_SET); + val = 0; + if (!fwrite(&val, 1, 1, fp)) { + DEBUG("%s\n", "Failed to disable pci-sysfs rom file"); + } + fclose(fp); + + return ptr; +} diff --git a/include/hw/pci/pci-assign.h b/include/hw/pci/pci-assign.h new file mode 100644 index 0000000000..55f42c56fa --- /dev/null +++ b/include/hw/pci/pci-assign.h @@ -0,0 +1,27 @@ +/* + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Just split from hw/i386/kvm/pci-assign.c. + */ +#ifndef PCI_ASSIGN_H +#define PCI_ASSIGN_H + +#include "hw/pci/pci.h" + +//#define DEVICE_ASSIGNMENT_DEBUG + +#ifdef DEVICE_ASSIGNMENT_DEBUG +#define DEBUG(fmt, ...) \ + do { \ + fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \ + } while (0) +#else +#define DEBUG(fmt, ...) +#endif + +void *pci_assign_dev_load_option_rom(PCIDevice *dev, struct Object *owner, + int *size, unsigned int domain, + unsigned int bus, unsigned int slot, + unsigned int function); +#endif /* PCI_ASSIGN_H */ From 798141799ccd5235a928b8fc0411d7d74e706489 Mon Sep 17 00:00:00 2001 From: Tiejun Chen Date: Wed, 15 Jul 2015 13:37:45 +0800 Subject: [PATCH 06/29] xen, gfx passthrough: basic graphics passthrough support basic gfx passthrough support: - add a vga type for gfx passthrough - register/unregister legacy VGA I/O ports and MMIOs for passthrough GFX Signed-off-by: Tiejun Chen Signed-off-by: Yang Zhang Acked-by: Stefano Stabellini Signed-off-by: Stefano Stabellini --- hw/core/machine.c | 20 +++++++ hw/xen/Makefile.objs | 1 + hw/xen/xen-host-pci-device.c | 5 ++ hw/xen/xen-host-pci-device.h | 1 + hw/xen/xen_pt.c | 4 ++ hw/xen/xen_pt.h | 10 +++- hw/xen/xen_pt_graphics.c | 111 +++++++++++++++++++++++++++++++++++ include/hw/boards.h | 1 + qemu-options.hx | 3 + vl.c | 10 ++++ 10 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 hw/xen/xen_pt_graphics.c diff --git a/hw/core/machine.c b/hw/core/machine.c index ac4654e9dd..51ed6b2e05 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -226,6 +226,20 @@ static void machine_set_usb(Object *obj, bool value, Error **errp) ms->usb_disabled = !value; } +static bool machine_get_igd_gfx_passthru(Object *obj, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + return ms->igd_gfx_passthru; +} + +static void machine_set_igd_gfx_passthru(Object *obj, bool value, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + ms->igd_gfx_passthru = value; +} + static char *machine_get_firmware(Object *obj, Error **errp) { MachineState *ms = MACHINE(obj); @@ -388,6 +402,12 @@ static void machine_initfn(Object *obj) object_property_set_description(obj, "usb", "Set on/off to enable/disable usb", NULL); + object_property_add_bool(obj, "igd-passthru", + machine_get_igd_gfx_passthru, + machine_set_igd_gfx_passthru, NULL); + object_property_set_description(obj, "igd-passthru", + "Set on/off to enable/disable igd passthrou", + NULL); object_property_add_str(obj, "firmware", machine_get_firmware, machine_set_firmware, NULL); diff --git a/hw/xen/Makefile.objs b/hw/xen/Makefile.objs index a0ca0aa3df..a9ad7e70f7 100644 --- a/hw/xen/Makefile.objs +++ b/hw/xen/Makefile.objs @@ -3,3 +3,4 @@ common-obj-$(CONFIG_XEN_BACKEND) += xen_backend.o xen_devconfig.o obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen-host-pci-device.o obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pt.o xen_pt_config_init.o xen_pt_msi.o +obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pt.o xen_pt_config_init.o xen_pt_msi.o xen_pt_graphics.o diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c index 743b37b991..a54b7ded9b 100644 --- a/hw/xen/xen-host-pci-device.c +++ b/hw/xen/xen-host-pci-device.c @@ -376,6 +376,11 @@ int xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain, goto error; } d->irq = v; + rc = xen_host_pci_get_hex_value(d, "class", &v); + if (rc) { + goto error; + } + d->class_code = v; d->is_virtfn = xen_host_pci_dev_is_virtfn(d); return 0; diff --git a/hw/xen/xen-host-pci-device.h b/hw/xen/xen-host-pci-device.h index c2486f0c19..f1e1c308f6 100644 --- a/hw/xen/xen-host-pci-device.h +++ b/hw/xen/xen-host-pci-device.h @@ -25,6 +25,7 @@ typedef struct XenHostPCIDevice { uint16_t vendor_id; uint16_t device_id; + uint32_t class_code; int irq; XenHostPCIIORegion io_regions[PCI_NUM_REGIONS - 1]; diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index ed5fcaec0d..42380c3976 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -502,6 +502,7 @@ static int xen_pt_register_regions(XenPCIPassthroughState *s, uint16_t *cmd) d->rom.size, d->rom.base_addr); } + xen_pt_register_vga_regions(d); return 0; } @@ -801,6 +802,7 @@ out: static void xen_pt_unregister_device(PCIDevice *d) { XenPCIPassthroughState *s = XEN_PT_DEVICE(d); + XenHostPCIDevice *host_dev = &s->real_device; uint8_t machine_irq = s->machine_irq; uint8_t intx = xen_pt_pci_intx(s); int rc; @@ -844,6 +846,8 @@ static void xen_pt_unregister_device(PCIDevice *d) /* delete all emulated config registers */ xen_pt_config_delete(s); + xen_pt_unregister_vga_regions(host_dev); + memory_listener_unregister(&s->memory_listener); memory_listener_unregister(&s->io_listener); diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index 393f36ccbf..5eb3c52546 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -305,5 +305,13 @@ static inline bool xen_pt_has_msix_mapping(XenPCIPassthroughState *s, int bar) return s->msix && s->msix->bar_index == bar; } - +extern bool has_igd_gfx_passthru; +static inline bool is_igd_vga_passthrough(XenHostPCIDevice *dev) +{ + return (has_igd_gfx_passthru + && ((dev->class_code >> 0x8) == PCI_CLASS_DISPLAY_VGA)); +} +int xen_pt_register_vga_regions(XenHostPCIDevice *dev); +int xen_pt_unregister_vga_regions(XenHostPCIDevice *dev); +int xen_pt_setup_vga(XenPCIPassthroughState *s, XenHostPCIDevice *dev); #endif /* !XEN_PT_H */ diff --git a/hw/xen/xen_pt_graphics.c b/hw/xen/xen_pt_graphics.c new file mode 100644 index 0000000000..9b3df811b1 --- /dev/null +++ b/hw/xen/xen_pt_graphics.c @@ -0,0 +1,111 @@ +/* + * graphics passthrough + */ +#include "xen_pt.h" +#include "xen-host-pci-device.h" +#include "hw/xen/xen_backend.h" + +typedef struct VGARegion { + int type; /* Memory or port I/O */ + uint64_t guest_base_addr; + uint64_t machine_base_addr; + uint64_t size; /* size of the region */ + int rc; +} VGARegion; + +#define IORESOURCE_IO 0x00000100 +#define IORESOURCE_MEM 0x00000200 + +static struct VGARegion vga_args[] = { + { + .type = IORESOURCE_IO, + .guest_base_addr = 0x3B0, + .machine_base_addr = 0x3B0, + .size = 0xC, + .rc = -1, + }, + { + .type = IORESOURCE_IO, + .guest_base_addr = 0x3C0, + .machine_base_addr = 0x3C0, + .size = 0x20, + .rc = -1, + }, + { + .type = IORESOURCE_MEM, + .guest_base_addr = 0xa0000 >> XC_PAGE_SHIFT, + .machine_base_addr = 0xa0000 >> XC_PAGE_SHIFT, + .size = 0x20, + .rc = -1, + }, +}; + +/* + * register VGA resources for the domain with assigned gfx + */ +int xen_pt_register_vga_regions(XenHostPCIDevice *dev) +{ + int i = 0; + + if (!is_igd_vga_passthrough(dev)) { + return 0; + } + + for (i = 0 ; i < ARRAY_SIZE(vga_args); i++) { + if (vga_args[i].type == IORESOURCE_IO) { + vga_args[i].rc = xc_domain_ioport_mapping(xen_xc, xen_domid, + vga_args[i].guest_base_addr, + vga_args[i].machine_base_addr, + vga_args[i].size, DPCI_ADD_MAPPING); + } else { + vga_args[i].rc = xc_domain_memory_mapping(xen_xc, xen_domid, + vga_args[i].guest_base_addr, + vga_args[i].machine_base_addr, + vga_args[i].size, DPCI_ADD_MAPPING); + } + + if (vga_args[i].rc) { + XEN_PT_ERR(NULL, "VGA %s mapping failed! (rc: %i)\n", + vga_args[i].type == IORESOURCE_IO ? "ioport" : "memory", + vga_args[i].rc); + return vga_args[i].rc; + } + } + + return 0; +} + +/* + * unregister VGA resources for the domain with assigned gfx + */ +int xen_pt_unregister_vga_regions(XenHostPCIDevice *dev) +{ + int i = 0; + + if (!is_igd_vga_passthrough(dev)) { + return 0; + } + + for (i = 0 ; i < ARRAY_SIZE(vga_args); i++) { + if (vga_args[i].type == IORESOURCE_IO) { + vga_args[i].rc = xc_domain_ioport_mapping(xen_xc, xen_domid, + vga_args[i].guest_base_addr, + vga_args[i].machine_base_addr, + vga_args[i].size, DPCI_REMOVE_MAPPING); + } else { + vga_args[i].rc = xc_domain_memory_mapping(xen_xc, xen_domid, + vga_args[i].guest_base_addr, + vga_args[i].machine_base_addr, + vga_args[i].size, DPCI_REMOVE_MAPPING); + } + + if (vga_args[i].rc) { + XEN_PT_ERR(NULL, "VGA %s unmapping failed! (rc: %i)\n", + vga_args[i].type == IORESOURCE_IO ? "ioport" : "memory", + vga_args[i].rc); + return vga_args[i].rc; + } + } + + return 0; +} diff --git a/include/hw/boards.h b/include/hw/boards.h index 3f84afdf1c..566a5cad13 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -137,6 +137,7 @@ struct MachineState { bool mem_merge; bool usb; bool usb_disabled; + bool igd_gfx_passthru; char *firmware; bool iommu; bool suppress_vmdesc; diff --git a/qemu-options.hx b/qemu-options.hx index efce775f66..b2f9dce0d5 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -38,6 +38,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \ " dump-guest-core=on|off include guest memory in a core dump (default=on)\n" " mem-merge=on|off controls memory merge support (default: on)\n" " iommu=on|off controls emulated Intel IOMMU (VT-d) support (default=off)\n" + " igd-passthru=on|off controls IGD GFX passthrough support (default=off)\n" " aes-key-wrap=on|off controls support for AES key wrapping (default=on)\n" " dea-key-wrap=on|off controls support for DEA key wrapping (default=on)\n" " suppress-vmdesc=on|off disables self-describing migration (default=off)\n", @@ -55,6 +56,8 @@ than one accelerator specified, the next one is used if the previous one fails to initialize. @item kernel_irqchip=on|off Enables in-kernel irqchip support for the chosen accelerator when available. +@item gfx_passthru=on|off +Enables IGD GFX passthrough support for the chosen machine when available. @item vmport=on|off|auto Enables emulation of VMWare IO port, for vmmouse etc. auto says to select the value based on accel. For accel=xen the default is off otherwise the default diff --git a/vl.c b/vl.c index 584ca88dda..76e0bb2a89 100644 --- a/vl.c +++ b/vl.c @@ -1338,6 +1338,13 @@ static inline void semihosting_arg_fallback(const char *file, const char *cmd) } } +/* Now we still need this for compatibility with XEN. */ +bool has_igd_gfx_passthru; +static void igd_gfx_passthru(void) +{ + has_igd_gfx_passthru = current_machine->igd_gfx_passthru; +} + /***********************************************************/ /* USB devices */ @@ -4528,6 +4535,9 @@ int main(int argc, char **argv, char **envp) exit(1); } + /* Check if IGD GFX passthrough. */ + igd_gfx_passthru(); + /* init generic devices */ if (qemu_opts_foreach(qemu_find_opts("device"), device_init_func, NULL, NULL)) { From 881213f1b9c5a8f4101405a5802d548cb62a4274 Mon Sep 17 00:00:00 2001 From: Tiejun Chen Date: Wed, 15 Jul 2015 13:37:46 +0800 Subject: [PATCH 07/29] xen, gfx passthrough: retrieve VGA BIOS to work Now we retrieve VGA bios like kvm stuff in qemu but we need to fix Device Identification in case if its not matched with the real IGD device since Seabios is always trying to compare this ID to work out VGA BIOS. Signed-off-by: Tiejun Chen Acked-by: Stefano Stabellini Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt.c | 10 +++++ hw/xen/xen_pt.h | 5 +++ hw/xen/xen_pt_graphics.c | 79 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+) diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index 42380c3976..15b02cbd98 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -725,6 +725,16 @@ static int xen_pt_initfn(PCIDevice *d) s->memory_listener = xen_pt_memory_listener; s->io_listener = xen_pt_io_listener; + /* Setup VGA bios for passthrough GFX */ + if ((s->real_device.domain == 0) && (s->real_device.bus == 0) && + (s->real_device.dev == 2) && (s->real_device.func == 0)) { + if (xen_pt_setup_vga(s, &s->real_device) < 0) { + XEN_PT_ERR(d, "Setup VGA BIOS of passthrough GFX failed!\n"); + xen_host_pci_device_put(&s->real_device); + return -1; + } + } + /* Handle real device's MMIO/PIO BARs */ xen_pt_register_regions(s, &cmd); diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index 5eb3c52546..a33e95ceb2 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -305,6 +305,11 @@ static inline bool xen_pt_has_msix_mapping(XenPCIPassthroughState *s, int bar) return s->msix && s->msix->bar_index == bar; } +extern void *pci_assign_dev_load_option_rom(PCIDevice *dev, + struct Object *owner, int *size, + unsigned int domain, + unsigned int bus, unsigned int slot, + unsigned int function); extern bool has_igd_gfx_passthru; static inline bool is_igd_vga_passthrough(XenHostPCIDevice *dev) { diff --git a/hw/xen/xen_pt_graphics.c b/hw/xen/xen_pt_graphics.c index 9b3df811b1..3232296bf8 100644 --- a/hw/xen/xen_pt_graphics.c +++ b/hw/xen/xen_pt_graphics.c @@ -109,3 +109,82 @@ int xen_pt_unregister_vga_regions(XenHostPCIDevice *dev) return 0; } + +static void *get_vgabios(XenPCIPassthroughState *s, int *size, + XenHostPCIDevice *dev) +{ + return pci_assign_dev_load_option_rom(&s->dev, OBJECT(&s->dev), size, + dev->domain, dev->bus, + dev->dev, dev->func); +} + +/* Refer to Seabios. */ +struct rom_header { + uint16_t signature; + uint8_t size; + uint8_t initVector[4]; + uint8_t reserved[17]; + uint16_t pcioffset; + uint16_t pnpoffset; +} __attribute__((packed)); + +struct pci_data { + uint32_t signature; + uint16_t vendor; + uint16_t device; + uint16_t vitaldata; + uint16_t dlen; + uint8_t drevision; + uint8_t class_lo; + uint16_t class_hi; + uint16_t ilen; + uint16_t irevision; + uint8_t type; + uint8_t indicator; + uint16_t reserved; +} __attribute__((packed)); + +int xen_pt_setup_vga(XenPCIPassthroughState *s, XenHostPCIDevice *dev) +{ + unsigned char *bios = NULL; + struct rom_header *rom; + int bios_size; + char *c = NULL; + char checksum = 0; + uint32_t len = 0; + struct pci_data *pd = NULL; + + if (!is_igd_vga_passthrough(dev)) { + return -1; + } + + bios = get_vgabios(s, &bios_size, dev); + if (!bios) { + XEN_PT_ERR(&s->dev, "VGA: Can't getting VBIOS!\n"); + return -1; + } + + /* Currently we fixed this address as a primary. */ + rom = (struct rom_header *)bios; + pd = (void *)(bios + (unsigned char)rom->pcioffset); + + /* We may need to fixup Device Identification. */ + if (pd->device != s->real_device.device_id) { + pd->device = s->real_device.device_id; + + len = rom->size * 512; + /* Then adjust the bios checksum */ + for (c = (char *)bios; c < ((char *)bios + len); c++) { + checksum += *c; + } + if (checksum) { + bios[len - 1] -= checksum; + XEN_PT_LOG(&s->dev, "vga bios checksum is adjusted %x!\n", + checksum); + } + } + + /* Currently we fixed this address as a primary for legacy BIOS. */ + cpu_physical_memory_rw(0xc0000, bios, bios_size, 1); + return 0; +} From bd8107d7301d3fa44f04aa435e06efb2226ca58c Mon Sep 17 00:00:00 2001 From: Tiejun Chen Date: Wed, 15 Jul 2015 13:37:47 +0800 Subject: [PATCH 08/29] igd gfx passthrough: create a isa bridge Currently IGD drivers always need to access PCH by 1f.0. But we don't want to poke that directly to get ID, and although in real world different GPU should have different PCH. But actually the different PCH DIDs likely map to different PCH SKUs. We do the same thing for the GPU. For PCH, the different SKUs are going to be all the same silicon design and implementation, just different features turn on and off with fuses. The SW interfaces should be consistent across all SKUs in a given family (eg LPT). But just same features may not be supported. Most of these different PCH features probably don't matter to the Gfx driver, but obviously any difference in display port connections will so it should be fine with any PCH in case of passthrough. So currently use one PCH version, 0x8c4e, to cover all HSW(Haswell) scenarios, 0x9cc3 for BDW(Broadwell). Signed-off-by: Tiejun Chen Signed-off-by: Stefano Stabellini Acked-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 112 +++++++++++++++++++++++++++++++++++++++++++ include/hw/i386/pc.h | 1 + 2 files changed, 113 insertions(+) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 626a19fb06..301a6753ea 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -882,6 +882,118 @@ static void pc_i440fx_0_10_machine_options(MachineClass *m) DEFINE_I440FX_MACHINE(v0_10, "pc-0.10", pc_compat_0_13, pc_i440fx_0_10_machine_options); +typedef struct { + uint16_t gpu_device_id; + uint16_t pch_device_id; + uint8_t pch_revision_id; +} IGDDeviceIDInfo; + +/* In real world different GPU should have different PCH. But actually + * the different PCH DIDs likely map to different PCH SKUs. We do the + * same thing for the GPU. For PCH, the different SKUs are going to be + * all the same silicon design and implementation, just different + * features turn on and off with fuses. The SW interfaces should be + * consistent across all SKUs in a given family (eg LPT). But just same + * features may not be supported. + * + * Most of these different PCH features probably don't matter to the + * Gfx driver, but obviously any difference in display port connections + * will so it should be fine with any PCH in case of passthrough. + * + * So currently use one PCH version, 0x8c4e, to cover all HSW(Haswell) + * scenarios, 0x9cc3 for BDW(Broadwell). + */ +static const IGDDeviceIDInfo igd_combo_id_infos[] = { + /* HSW Classic */ + {0x0402, 0x8c4e, 0x04}, /* HSWGT1D, HSWD_w7 */ + {0x0406, 0x8c4e, 0x04}, /* HSWGT1M, HSWM_w7 */ + {0x0412, 0x8c4e, 0x04}, /* HSWGT2D, HSWD_w7 */ + {0x0416, 0x8c4e, 0x04}, /* HSWGT2M, HSWM_w7 */ + {0x041E, 0x8c4e, 0x04}, /* HSWGT15D, HSWD_w7 */ + /* HSW ULT */ + {0x0A06, 0x8c4e, 0x04}, /* HSWGT1UT, HSWM_w7 */ + {0x0A16, 0x8c4e, 0x04}, /* HSWGT2UT, HSWM_w7 */ + {0x0A26, 0x8c4e, 0x06}, /* HSWGT3UT, HSWM_w7 */ + {0x0A2E, 0x8c4e, 0x04}, /* HSWGT3UT28W, HSWM_w7 */ + {0x0A1E, 0x8c4e, 0x04}, /* HSWGT2UX, HSWM_w7 */ + {0x0A0E, 0x8c4e, 0x04}, /* HSWGT1ULX, HSWM_w7 */ + /* HSW CRW */ + {0x0D26, 0x8c4e, 0x04}, /* HSWGT3CW, HSWM_w7 */ + {0x0D22, 0x8c4e, 0x04}, /* HSWGT3CWDT, HSWD_w7 */ + /* HSW Server */ + {0x041A, 0x8c4e, 0x04}, /* HSWSVGT2, HSWD_w7 */ + /* HSW SRVR */ + {0x040A, 0x8c4e, 0x04}, /* HSWSVGT1, HSWD_w7 */ + /* BSW */ + {0x1606, 0x9cc3, 0x03}, /* BDWULTGT1, BDWM_w7 */ + {0x1616, 0x9cc3, 0x03}, /* BDWULTGT2, BDWM_w7 */ + {0x1626, 0x9cc3, 0x03}, /* BDWULTGT3, BDWM_w7 */ + {0x160E, 0x9cc3, 0x03}, /* BDWULXGT1, BDWM_w7 */ + {0x161E, 0x9cc3, 0x03}, /* BDWULXGT2, BDWM_w7 */ + {0x1602, 0x9cc3, 0x03}, /* BDWHALOGT1, BDWM_w7 */ + {0x1612, 0x9cc3, 0x03}, /* BDWHALOGT2, BDWM_w7 */ + {0x1622, 0x9cc3, 0x03}, /* BDWHALOGT3, BDWM_w7 */ + {0x162B, 0x9cc3, 0x03}, /* BDWHALO28W, BDWM_w7 */ + {0x162A, 0x9cc3, 0x03}, /* BDWGT3WRKS, BDWM_w7 */ + {0x162D, 0x9cc3, 0x03}, /* BDWGT3SRVR, BDWM_w7 */ +}; + +static void isa_bridge_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + dc->desc = "ISA bridge faked to support IGD PT"; + k->vendor_id = PCI_VENDOR_ID_INTEL; + k->class_id = PCI_CLASS_BRIDGE_ISA; +}; + +static TypeInfo isa_bridge_info = { + .name = "igd-passthrough-isa-bridge", + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PCIDevice), + .class_init = isa_bridge_class_init, +}; + +static void pt_graphics_register_types(void) +{ + type_register_static(&isa_bridge_info); +} +type_init(pt_graphics_register_types) + +void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id) +{ + struct PCIDevice *bridge_dev; + int i, num; + uint16_t pch_dev_id = 0xffff; + uint8_t pch_rev_id; + + num = ARRAY_SIZE(igd_combo_id_infos); + for (i = 0; i < num; i++) { + if (gpu_dev_id == igd_combo_id_infos[i].gpu_device_id) { + pch_dev_id = igd_combo_id_infos[i].pch_device_id; + pch_rev_id = igd_combo_id_infos[i].pch_revision_id; + } + } + + if (pch_dev_id == 0xffff) { + return; + } + + /* Currently IGD drivers always need to access PCH by 1f.0. */ + bridge_dev = pci_create_simple(bus, PCI_DEVFN(0x1f, 0), + "igd-passthrough-isa-bridge"); + + /* + * Note that vendor id is always PCI_VENDOR_ID_INTEL. + */ + if (!bridge_dev) { + fprintf(stderr, "set igd-passthrough-isa-bridge failed!\n"); + return; + } + pci_config_set_device_id(bridge_dev->config, pch_dev_id); + pci_config_set_revision(bridge_dev->config, pch_rev_id); +} static void isapc_machine_options(MachineClass *m) { diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 5cda2a3ff6..0639e46b2a 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -726,4 +726,5 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); (m)->compat_props = props; \ } while (0) +extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); #endif From f37d630a69992822f2eb4a47a5a5fc6a54a6dd08 Mon Sep 17 00:00:00 2001 From: Tiejun Chen Date: Wed, 15 Jul 2015 13:37:48 +0800 Subject: [PATCH 09/29] xen, gfx passthrough: register a isa bridge Currently we just register this isa bridge when we use IGD passthrough in Xen side. Signed-off-by: Tiejun Chen Acked-by: Stefano Stabellini Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index 15b02cbd98..d67bccff3a 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -56,6 +56,7 @@ #include "hw/pci/pci.h" #include "hw/xen/xen.h" +#include "hw/i386/pc.h" #include "hw/xen/xen_backend.h" #include "xen_pt.h" #include "qemu/range.h" @@ -684,6 +685,17 @@ static const MemoryListener xen_pt_io_listener = { .priority = 10, }; +static void +xen_igd_passthrough_isa_bridge_create(XenPCIPassthroughState *s, + XenHostPCIDevice *dev) +{ + uint16_t gpu_dev_id; + PCIDevice *d = &s->dev; + + gpu_dev_id = dev->device_id; + igd_passthrough_isa_bridge_create(d->bus, gpu_dev_id); +} + /* init */ static int xen_pt_initfn(PCIDevice *d) @@ -728,11 +740,21 @@ static int xen_pt_initfn(PCIDevice *d) /* Setup VGA bios for passthrough GFX */ if ((s->real_device.domain == 0) && (s->real_device.bus == 0) && (s->real_device.dev == 2) && (s->real_device.func == 0)) { + if (!is_igd_vga_passthrough(&s->real_device)) { + XEN_PT_ERR(d, "Need to enable igd-passthru if you're trying" + " to passthrough IGD GFX.\n"); + xen_host_pci_device_put(&s->real_device); + return -1; + } + if (xen_pt_setup_vga(s, &s->real_device) < 0) { XEN_PT_ERR(d, "Setup VGA BIOS of passthrough GFX failed!\n"); xen_host_pci_device_put(&s->real_device); return -1; } + + /* Register ISA bridge for passthrough GFX. */ + xen_igd_passthrough_isa_bridge_create(s, &s->real_device); } /* Handle real device's MMIO/PIO BARs */ From 998250e976613decf9e0da68b3922df330eac3f6 Mon Sep 17 00:00:00 2001 From: Tiejun Chen Date: Wed, 15 Jul 2015 13:37:49 +0800 Subject: [PATCH 10/29] xen, gfx passthrough: register host bridge specific to passthrough Just register that pci host bridge specific to passthrough. Signed-off-by: Tiejun Chen Acked-by: Stefano Stabellini Signed-off-by: Stefano Stabellini --- hw/i386/pc_piix.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 301a6753ea..edef0cc038 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -50,7 +50,8 @@ #include "cpu.h" #include "qemu/error-report.h" #ifdef CONFIG_XEN -# include +#include +#include "hw/xen/xen_pt.h" #endif #include "migration/migration.h" @@ -421,11 +422,21 @@ static void pc_init_isa(MachineState *machine) } #ifdef CONFIG_XEN +static void pc_xen_hvm_init_pci(MachineState *machine) +{ + const char *pci_type = has_igd_gfx_passthru ? + TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE : TYPE_I440FX_PCI_DEVICE; + + pc_init1(machine, + TYPE_I440FX_PCI_HOST_BRIDGE, + pci_type); +} + static void pc_xen_hvm_init(MachineState *machine) { PCIBus *bus; - pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, TYPE_I440FX_PCI_DEVICE); + pc_xen_hvm_init_pci(machine); bus = pci_find_primary_bus(); if (bus != NULL) { From 5cec8aa38cc3b7c8cf9ce66abdda28b3598e2d88 Mon Sep 17 00:00:00 2001 From: Tiejun Chen Date: Wed, 15 Jul 2015 13:37:50 +0800 Subject: [PATCH 11/29] xen, gfx passthrough: add opregion mapping The OpRegion shouldn't be mapped 1:1 because the address in the host can't be used in the guest directly. This patch traps read and write access to the opregion of the Intel GPU config space (offset 0xfc). The original patch is from Jean Guyader Signed-off-by: Tiejun Chen Signed-off-by: Yang Zhang Acked-by: Stefano Stabellini Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt.h | 6 ++- hw/xen/xen_pt_config_init.c | 51 ++++++++++++++++++++++- hw/xen/xen_pt_graphics.c | 82 +++++++++++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 3 deletions(-) diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index a33e95ceb2..e89d231128 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -40,6 +40,9 @@ typedef struct XenPCIPassthroughState XenPCIPassthroughState; #define XEN_PT_DEVICE(obj) \ OBJECT_CHECK(XenPCIPassthroughState, (obj), TYPE_XEN_PT_DEVICE) +uint32_t igd_read_opregion(XenPCIPassthroughState *s); +void igd_write_opregion(XenPCIPassthroughState *s, uint32_t val); + /* function type for config reg */ typedef int (*xen_pt_conf_reg_init) (XenPCIPassthroughState *, XenPTRegInfo *, uint32_t real_offset, @@ -66,8 +69,9 @@ typedef int (*xen_pt_conf_byte_read) #define XEN_PT_BAR_ALLF 0xFFFFFFFF #define XEN_PT_BAR_UNMAPPED (-1) -#define PCI_CAP_MAX 48 +#define XEN_PCI_CAP_MAX 48 +#define XEN_PCI_INTEL_OPREGION 0xfc typedef enum { XEN_PT_GRP_TYPE_HARDWIRED = 0, /* 0 Hardwired reg group */ diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index dd37be38a4..9fb36704f8 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -552,6 +552,22 @@ static int xen_pt_exp_rom_bar_reg_write(XenPCIPassthroughState *s, return 0; } +static int xen_pt_intel_opregion_read(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, + uint32_t *value, uint32_t valid_mask) +{ + *value = igd_read_opregion(s); + return 0; +} + +static int xen_pt_intel_opregion_write(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, uint32_t *value, + uint32_t dev_value, uint32_t valid_mask) +{ + igd_write_opregion(s, *value); + return 0; +} + /* Header Type0 reg static information table */ static XenPTRegInfo xen_pt_emu_reg_header0[] = { /* Vendor ID reg */ @@ -1492,6 +1508,19 @@ static XenPTRegInfo xen_pt_emu_reg_msix[] = { }, }; +static XenPTRegInfo xen_pt_emu_reg_igd_opregion[] = { + /* Intel IGFX OpRegion reg */ + { + .offset = 0x0, + .size = 4, + .init_val = 0, + .u.dw.read = xen_pt_intel_opregion_read, + .u.dw.write = xen_pt_intel_opregion_write, + }, + { + .size = 0, + }, +}; /**************************** * Capabilities @@ -1729,6 +1758,14 @@ static const XenPTRegGroupInfo xen_pt_emu_reg_grps[] = { .size_init = xen_pt_msix_size_init, .emu_regs = xen_pt_emu_reg_msix, }, + /* Intel IGD Opregion group */ + { + .grp_id = XEN_PCI_INTEL_OPREGION, + .grp_type = XEN_PT_GRP_TYPE_EMU, + .grp_size = 0x4, + .size_init = xen_pt_reg_grp_size_init, + .emu_regs = xen_pt_emu_reg_igd_opregion, + }, { .grp_size = 0, }, @@ -1779,7 +1816,7 @@ out: static uint8_t find_cap_offset(XenPCIPassthroughState *s, uint8_t cap) { uint8_t id; - unsigned max_cap = PCI_CAP_MAX; + unsigned max_cap = XEN_PCI_CAP_MAX; uint8_t pos = PCI_CAPABILITY_LIST; uint8_t status = 0; @@ -1858,7 +1895,8 @@ int xen_pt_config_init(XenPCIPassthroughState *s) uint32_t reg_grp_offset = 0; XenPTRegGroup *reg_grp_entry = NULL; - if (xen_pt_emu_reg_grps[i].grp_id != 0xFF) { + if (xen_pt_emu_reg_grps[i].grp_id != 0xFF + && xen_pt_emu_reg_grps[i].grp_id != XEN_PCI_INTEL_OPREGION) { if (xen_pt_hide_dev_cap(&s->real_device, xen_pt_emu_reg_grps[i].grp_id)) { continue; @@ -1871,6 +1909,15 @@ int xen_pt_config_init(XenPCIPassthroughState *s) } } + /* + * By default we will trap up to 0x40 in the cfg space. + * If an intel device is pass through we need to trap 0xfc, + * therefore the size should be 0xff. + */ + if (xen_pt_emu_reg_grps[i].grp_id == XEN_PCI_INTEL_OPREGION) { + reg_grp_offset = XEN_PCI_INTEL_OPREGION; + } + reg_grp_entry = g_new0(XenPTRegGroup, 1); QLIST_INIT(®_grp_entry->reg_tbl_list); QLIST_INSERT_HEAD(&s->reg_grps, reg_grp_entry, entries); diff --git a/hw/xen/xen_pt_graphics.c b/hw/xen/xen_pt_graphics.c index 3232296bf8..df6069bf63 100644 --- a/hw/xen/xen_pt_graphics.c +++ b/hw/xen/xen_pt_graphics.c @@ -5,6 +5,11 @@ #include "xen-host-pci-device.h" #include "hw/xen/xen_backend.h" +static unsigned long igd_guest_opregion; +static unsigned long igd_host_opregion; + +#define XEN_PCI_INTEL_OPREGION_MASK 0xfff + typedef struct VGARegion { int type; /* Memory or port I/O */ uint64_t guest_base_addr; @@ -81,6 +86,7 @@ int xen_pt_register_vga_regions(XenHostPCIDevice *dev) int xen_pt_unregister_vga_regions(XenHostPCIDevice *dev) { int i = 0; + int ret = 0; if (!is_igd_vga_passthrough(dev)) { return 0; @@ -107,6 +113,17 @@ int xen_pt_unregister_vga_regions(XenHostPCIDevice *dev) } } + if (igd_guest_opregion) { + ret = xc_domain_memory_mapping(xen_xc, xen_domid, + (unsigned long)(igd_guest_opregion >> XC_PAGE_SHIFT), + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + 3, + DPCI_REMOVE_MAPPING); + if (ret) { + return ret; + } + } + return 0; } @@ -188,3 +205,68 @@ int xen_pt_setup_vga(XenPCIPassthroughState *s, XenHostPCIDevice *dev) cpu_physical_memory_rw(0xc0000, bios, bios_size, 1); return 0; } + +uint32_t igd_read_opregion(XenPCIPassthroughState *s) +{ + uint32_t val = 0; + + if (!igd_guest_opregion) { + return val; + } + + val = igd_guest_opregion; + + XEN_PT_LOG(&s->dev, "Read opregion val=%x\n", val); + return val; +} + +#define XEN_PCI_INTEL_OPREGION_PAGES 0x3 +#define XEN_PCI_INTEL_OPREGION_ENABLE_ACCESSED 0x1 +void igd_write_opregion(XenPCIPassthroughState *s, uint32_t val) +{ + int ret; + + if (igd_guest_opregion) { + XEN_PT_LOG(&s->dev, "opregion register already been set, ignoring %x\n", + val); + return; + } + + /* We just work with LE. */ + xen_host_pci_get_block(&s->real_device, XEN_PCI_INTEL_OPREGION, + (uint8_t *)&igd_host_opregion, 4); + igd_guest_opregion = (unsigned long)(val & ~XEN_PCI_INTEL_OPREGION_MASK) + | (igd_host_opregion & XEN_PCI_INTEL_OPREGION_MASK); + + ret = xc_domain_iomem_permission(xen_xc, xen_domid, + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + XEN_PCI_INTEL_OPREGION_PAGES, + XEN_PCI_INTEL_OPREGION_ENABLE_ACCESSED); + + if (ret) { + XEN_PT_ERR(&s->dev, "[%d]:Can't enable to access IGD host opregion:" + " 0x%lx.\n", ret, + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT)), + igd_guest_opregion = 0; + return; + } + + ret = xc_domain_memory_mapping(xen_xc, xen_domid, + (unsigned long)(igd_guest_opregion >> XC_PAGE_SHIFT), + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + XEN_PCI_INTEL_OPREGION_PAGES, + DPCI_ADD_MAPPING); + + if (ret) { + XEN_PT_ERR(&s->dev, "[%d]:Can't map IGD host opregion:0x%lx to" + " guest opregion:0x%lx.\n", ret, + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + (unsigned long)(igd_guest_opregion >> XC_PAGE_SHIFT)); + igd_guest_opregion = 0; + return; + } + + XEN_PT_LOG(&s->dev, "Map OpRegion: 0x%lx -> 0x%lx\n", + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + (unsigned long)(igd_guest_opregion >> XC_PAGE_SHIFT)); +} From e763addd19e59dbd1986d4b0faae63dcb9a0f6aa Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 13 Mar 2015 15:36:58 -0400 Subject: [PATCH 12/29] xen-hvm: When using xc_domain_add_to_physmap also include errno when reporting .errors - as it will most likely have the proper error value. Signed-off-by: Konrad Rzeszutek Wilk Acked-by: Stefano Stabellini Signed-off-by: Stefano Stabellini --- xen-hvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xen-hvm.c b/xen-hvm.c index d17d3f33bc..0f81f7fd9c 100644 --- a/xen-hvm.c +++ b/xen-hvm.c @@ -348,7 +348,7 @@ go_physmap: rc = xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn); if (rc) { DPRINTF("add_to_physmap MFN %"PRI_xen_pfn" to PFN %" - PRI_xen_pfn" failed: %d\n", idx, gpfn, rc); + PRI_xen_pfn" failed: %d (errno: %d)\n", idx, gpfn, rc, errno); return -rc; } } @@ -425,7 +425,7 @@ static int xen_remove_from_physmap(XenIOState *state, rc = xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn); if (rc) { fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %" - PRI_xen_pfn" failed: %d\n", idx, gpfn, rc); + PRI_xen_pfn" failed: %d (errno: %d)\n", idx, gpfn, rc, errno); return -rc; } } From d8b441a3fbfd075c48ab2a519d779d926624ed79 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 24 Jul 2015 03:38:28 -0600 Subject: [PATCH 13/29] xen/HVM: atomically access pointers in bufioreq handling The number of slots per page being 511 (i.e. not a power of two) means that the (32-bit) read and write indexes going beyond 2^32 will likely disturb operation. The hypervisor side gets I/O req server creation extended so we can indicate that we're using suitable atomic accesses where needed, allowing it to atomically canonicalize both pointers when both have gone through at least one cycle. The Xen side counterpart (which is not a functional prereq to this change, albeit a build one) went in already (commit b7007bc6f9). Signed-off-by: Jan Beulich Signed-off-by: Stefano Stabellini --- configure | 27 +++++++++++++++++++++++++++ include/hw/xen/xen_common.h | 12 +++++++++++- xen-hvm.c | 26 ++++++++++++++++++-------- 3 files changed, 56 insertions(+), 9 deletions(-) diff --git a/configure b/configure index d854936682..5618bf1bc0 100755 --- a/configure +++ b/configure @@ -1881,6 +1881,33 @@ EOF #if !defined(HVM_MAX_VCPUS) # error HVM_MAX_VCPUS not defined #endif +int main(void) { + xc_interface *xc; + xs_daemon_open(); + xc = xc_interface_open(0, 0, 0); + xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); + xc_gnttab_open(NULL, 0); + xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0); + xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000); + xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL); + return 0; +} +EOF + compile_prog "" "$xen_libs" + then + xen_ctrl_version=460 + xen=yes + + # Xen 4.5 + elif + cat > $TMPC < +#include +#include +#include +#if !defined(HVM_MAX_VCPUS) +# error HVM_MAX_VCPUS not defined +#endif int main(void) { xc_interface *xc; xs_daemon_open(); diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h index ed5fd3e1a2..19d0bca14e 100644 --- a/include/hw/xen/xen_common.h +++ b/include/hw/xen/xen_common.h @@ -186,6 +186,15 @@ static inline int xen_get_vmport_regs_pfn(XenXC xc, domid_t dom, } #endif +/* Xen before 4.6 */ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 460 + +#ifndef HVM_IOREQSRV_BUFIOREQ_ATOMIC +#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2 +#endif + +#endif + /* Xen before 4.5 */ #if CONFIG_XEN_CTRL_INTERFACE_VERSION < 450 @@ -370,7 +379,8 @@ static inline void xen_unmap_pcidev(XenXC xc, domid_t dom, static inline int xen_create_ioreq_server(XenXC xc, domid_t dom, ioservid_t *ioservid) { - int rc = xc_hvm_create_ioreq_server(xc, dom, 1, ioservid); + int rc = xc_hvm_create_ioreq_server(xc, dom, HVM_IOREQSRV_BUFIOREQ_ATOMIC, + ioservid); if (rc == 0) { trace_xen_ioreq_server_create(*ioservid); diff --git a/xen-hvm.c b/xen-hvm.c index 0f81f7fd9c..cbd0a79afd 100644 --- a/xen-hvm.c +++ b/xen-hvm.c @@ -984,19 +984,30 @@ static void handle_ioreq(XenIOState *state, ioreq_t *req) static int handle_buffered_iopage(XenIOState *state) { + buffered_iopage_t *buf_page = state->buffered_io_page; buf_ioreq_t *buf_req = NULL; ioreq_t req; int qw; - if (!state->buffered_io_page) { + if (!buf_page) { return 0; } memset(&req, 0x00, sizeof(req)); - while (state->buffered_io_page->read_pointer != state->buffered_io_page->write_pointer) { - buf_req = &state->buffered_io_page->buf_ioreq[ - state->buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM]; + for (;;) { + uint32_t rdptr = buf_page->read_pointer, wrptr; + + xen_rmb(); + wrptr = buf_page->write_pointer; + xen_rmb(); + if (rdptr != buf_page->read_pointer) { + continue; + } + if (rdptr == wrptr) { + break; + } + buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM]; req.size = 1UL << buf_req->size; req.count = 1; req.addr = buf_req->addr; @@ -1008,15 +1019,14 @@ static int handle_buffered_iopage(XenIOState *state) req.data_is_ptr = 0; qw = (req.size == 8); if (qw) { - buf_req = &state->buffered_io_page->buf_ioreq[ - (state->buffered_io_page->read_pointer + 1) % IOREQ_BUFFER_SLOT_NUM]; + buf_req = &buf_page->buf_ioreq[(rdptr + 1) % + IOREQ_BUFFER_SLOT_NUM]; req.data |= ((uint64_t)buf_req->data) << 32; } handle_ioreq(state, &req); - xen_mb(); - state->buffered_io_page->read_pointer += qw ? 2 : 1; + atomic_add(&buf_page->read_pointer, qw + 1); } return req.count; From d3b9facba71ed45c9c1c09ca28eb5568a194b4de Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 24 Jun 2015 17:16:01 -0400 Subject: [PATCH 14/29] xen/pt: Update comments with proper function name. It has changed but the comments still refer to the old names. Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index d67bccff3a..ba043e64af 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -379,7 +379,7 @@ static void xen_pt_pci_write_config(PCIDevice *d, uint32_t addr, } } - /* need to shift back before passing them to xen_host_pci_device */ + /* need to shift back before passing them to xen_host_pci_set_block. */ val >>= (addr & 3) << 3; memory_region_transaction_commit(); @@ -407,7 +407,7 @@ out: (uint8_t *)&val + index, len); if (rc < 0) { - XEN_PT_ERR(d, "pci_write_block failed. return value: %d.\n", rc); + XEN_PT_ERR(d, "xen_host_pci_set_block failed. return value: %d.\n", rc); } } } From cf8124f0078a7625fdf9836589210267817ae0ae Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 24 Jun 2015 17:26:43 -0400 Subject: [PATCH 15/29] xen/pt: Make xen_pt_msi_set_enable static As we do not use it outside our code. Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt.h | 1 - hw/xen/xen_pt_msi.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index e89d231128..6763abc31c 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -293,7 +293,6 @@ static inline uint8_t xen_pt_pci_intx(XenPCIPassthroughState *s) } /* MSI/MSI-X */ -int xen_pt_msi_set_enable(XenPCIPassthroughState *s, bool en); int xen_pt_msi_setup(XenPCIPassthroughState *s); int xen_pt_msi_update(XenPCIPassthroughState *d); void xen_pt_msi_disable(XenPCIPassthroughState *s); diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c index 263e0514a2..5822df5f60 100644 --- a/hw/xen/xen_pt_msi.c +++ b/hw/xen/xen_pt_msi.c @@ -220,7 +220,7 @@ static int msi_msix_disable(XenPCIPassthroughState *s, * MSI virtualization functions */ -int xen_pt_msi_set_enable(XenPCIPassthroughState *s, bool enable) +static int xen_pt_msi_set_enable(XenPCIPassthroughState *s, bool enable) { XEN_PT_LOG(&s->dev, "%s MSI.\n", enable ? "enabling" : "disabling"); From 52c7265f602701751b55d437b347bd73debf9d91 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 29 Jun 2015 13:58:17 -0400 Subject: [PATCH 16/29] xen/pt: xen_host_pci_config_read returns -errno, not -1 on failure However the init routines assume that on errors the return code is -1 (as the libxc API is) - while those xen_host_* routines follow another paradigm - negative errno on return, 0 on success. Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index ba043e64af..3b1544c422 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -729,7 +729,7 @@ static int xen_pt_initfn(PCIDevice *d) /* Initialize virtualized PCI configuration (Extended 256 Bytes) */ if (xen_host_pci_get_block(&s->real_device, 0, d->config, - PCI_CONFIG_SPACE_SIZE) == -1) { + PCI_CONFIG_SPACE_SIZE) < 0) { xen_host_pci_device_put(&s->real_device); return -1; } From 20a544c7dc2428e8816ed4a87af732884e885f2d Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 29 Jun 2015 12:51:05 -0400 Subject: [PATCH 17/29] xen: use errno instead of rc for xc_domain_add_to_physmap In Xen 4.6 commit cd2f100f0f61b3f333d52d1737dd73f02daee592 "libxc: Fix do_memory_op to return negative value on errors" made the libxc API less odd-ball: On errors, return value is -1 and error code is in errno. On success the return value is either 0 or an positive value. Since we could be running with an old toolstack in which the Exx value is in rc or the newer, we add an wrapper around the xc_domain_add_to_physmap (called xen_xc_domain_add_to_physmap) which will always return the EXX. Xen 4.6 did not change the libxc functions mentioned (same parameters) so we piggyback on the fact that Xen 4.6 has a new function: commit 504ed2053362381ac01b98db9313454488b7db40 "tools/libxc: Expose new hypercall xc_reserved_device_memory_map" and check for that. Reviewed-by: Stefano Stabellini Suggested-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- configure | 1 + include/hw/xen/xen_common.h | 22 ++++++++++++++++++++++ xen-hvm.c | 4 ++-- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/configure b/configure index 5618bf1bc0..5c06663f3c 100755 --- a/configure +++ b/configure @@ -1890,6 +1890,7 @@ int main(void) { xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0); xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000); xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL); + xc_reserved_device_memory_map(xc, 0, 0, 0, 0, NULL, 0); return 0; } EOF diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h index 19d0bca14e..d7fa6a4d01 100644 --- a/include/hw/xen/xen_common.h +++ b/include/hw/xen/xen_common.h @@ -417,4 +417,26 @@ static inline int xen_set_ioreq_server_state(XenXC xc, domid_t dom, #endif +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 460 +static inline int xen_xc_domain_add_to_physmap(XenXC xch, uint32_t domid, + unsigned int space, + unsigned long idx, + xen_pfn_t gpfn) +{ + return xc_domain_add_to_physmap(xch, domid, space, idx, gpfn); +} +#else +static inline int xen_xc_domain_add_to_physmap(XenXC xch, uint32_t domid, + unsigned int space, + unsigned long idx, + xen_pfn_t gpfn) +{ + /* In Xen 4.6 rc is -1 and errno contains the error value. */ + int rc = xc_domain_add_to_physmap(xch, domid, space, idx, gpfn); + if (rc == -1) + return errno; + return rc; +} +#endif + #endif /* QEMU_HW_XEN_COMMON_H */ diff --git a/xen-hvm.c b/xen-hvm.c index cbd0a79afd..3371c4ea31 100644 --- a/xen-hvm.c +++ b/xen-hvm.c @@ -345,7 +345,7 @@ go_physmap: unsigned long idx = pfn + i; xen_pfn_t gpfn = start_gpfn + i; - rc = xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn); + rc = xen_xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn); if (rc) { DPRINTF("add_to_physmap MFN %"PRI_xen_pfn" to PFN %" PRI_xen_pfn" failed: %d (errno: %d)\n", idx, gpfn, rc, errno); @@ -422,7 +422,7 @@ static int xen_remove_from_physmap(XenIOState *state, xen_pfn_t idx = start_addr + i; xen_pfn_t gpfn = phys_offset + i; - rc = xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn); + rc = xen_xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn); if (rc) { fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %" PRI_xen_pfn" failed: %d (errno: %d)\n", idx, gpfn, rc, errno); From faf5f56bf9f45ffc24a41fc8fc5ab76677aef688 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 29 Jun 2015 12:30:37 -0400 Subject: [PATCH 18/29] xen/pt/msi: Add the register value when printing logging and error messages We would like to know what the MSI register value is to help in troubleshooting in the field. As such modify the logging logic to include such details in xen_pt_msgctrl_reg_write. Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt_config_init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index 9fb36704f8..56c84e115f 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -1102,7 +1102,7 @@ static int xen_pt_msgctrl_reg_write(XenPCIPassthroughState *s, /* setup MSI pirq for the first time */ if (!msi->initialized) { /* Init physical one */ - XEN_PT_LOG(&s->dev, "setup MSI\n"); + XEN_PT_LOG(&s->dev, "setup MSI (register: %x).\n", *val); if (xen_pt_msi_setup(s)) { /* We do not broadcast the error to the framework code, so * that MSI errors are contained in MSI emulation code and @@ -1110,12 +1110,12 @@ static int xen_pt_msgctrl_reg_write(XenPCIPassthroughState *s, * Guest MSI would be actually not working. */ *val &= ~PCI_MSI_FLAGS_ENABLE; - XEN_PT_WARN(&s->dev, "Can not map MSI.\n"); + XEN_PT_WARN(&s->dev, "Can not map MSI (register: %x)!\n", *val); return 0; } if (xen_pt_msi_update(s)) { *val &= ~PCI_MSI_FLAGS_ENABLE; - XEN_PT_WARN(&s->dev, "Can not bind MSI\n"); + XEN_PT_WARN(&s->dev, "Can not bind MSI (register: %x)!\n", *val); return 0; } msi->initialized = true; From 54fd08136e4ac8b88b88b15c397010e3b0de379f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 29 Jun 2015 16:06:19 -0400 Subject: [PATCH 19/29] xen/pt: Use XEN_PT_LOG properly to guard against compiler warnings. If XEN_PT_LOGGING_ENABLED is enabled the XEN_PT_LOG macros start using the first argument. Which means if within the function there is only one user of the argument ('d') and XEN_PT_LOGGING_ENABLED is not set, we get compiler warnings. This is not the case now but with the "xen/pt: Use xen_host_pci_get_[byte|word] instead of dev.config" we will hit - so this sync up the function to the rest of them. Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt_config_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index 56c84e115f..7b5e65fc18 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -1434,7 +1434,7 @@ static int xen_pt_msixctrl_reg_init(XenPCIPassthroughState *s, reg_field = pci_get_word(d->config + real_offset); if (reg_field & PCI_MSIX_FLAGS_ENABLE) { - XEN_PT_LOG(d, "MSIX already enabled, disabling it first\n"); + XEN_PT_LOG(&s->dev, "MSIX already enabled, disabling it first\n"); xen_host_pci_set_word(&s->real_device, real_offset, reg_field & ~PCI_MSIX_FLAGS_ENABLE); } From 6aa07b1494d2725f24af097ca19c750ac25a7c11 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 29 Jun 2015 14:01:13 -0400 Subject: [PATCH 20/29] xen/pt: Use xen_host_pci_get_[byte|word] instead of dev.config During init time we treat the dev.config area as a cache of the host view. However during execution time we treat it as guest view (by the generic PCI API). We need to sync Xen's code to the generic PCI API view. This is the first step by replacing all of the code that uses dev.config or pci_get_[byte|word] to get host value to actually use the xen_host_pci_get_[byte|word] functions. Interestingly in 'xen_pt_ptr_reg_init' we also needed to swap reg_field from uint32_t to uint8_t - since the access is only for one byte not four bytes. We can split this as a seperate patch however we would have to use a cast to thwart compiler warnings in the meantime. We also truncated 'flags' to 'flag' to make the code fit within the 80 characters. Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt.c | 24 ++++++++++-- hw/xen/xen_pt_config_init.c | 77 +++++++++++++++++++++++++------------ 2 files changed, 73 insertions(+), 28 deletions(-) diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index 3b1544c422..1c84d3d22a 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -702,7 +702,7 @@ static int xen_pt_initfn(PCIDevice *d) { XenPCIPassthroughState *s = XEN_PT_DEVICE(d); int rc = 0; - uint8_t machine_irq = 0; + uint8_t machine_irq = 0, scratch; uint16_t cmd = 0; int pirq = XEN_PT_UNASSIGNED_PIRQ; @@ -768,7 +768,12 @@ static int xen_pt_initfn(PCIDevice *d) } /* Bind interrupt */ - if (!s->dev.config[PCI_INTERRUPT_PIN]) { + rc = xen_host_pci_get_byte(&s->real_device, PCI_INTERRUPT_PIN, &scratch); + if (rc) { + XEN_PT_ERR(d, "Failed to read PCI_INTERRUPT_PIN! (rc:%d)\n", rc); + scratch = 0; + } + if (!scratch) { XEN_PT_LOG(d, "no pin interrupt\n"); goto out; } @@ -818,8 +823,19 @@ static int xen_pt_initfn(PCIDevice *d) out: if (cmd) { - xen_host_pci_set_word(&s->real_device, PCI_COMMAND, - pci_get_word(d->config + PCI_COMMAND) | cmd); + uint16_t val; + + rc = xen_host_pci_get_word(&s->real_device, PCI_COMMAND, &val); + if (rc) { + XEN_PT_ERR(d, "Failed to read PCI_COMMAND! (rc: %d)\n", rc); + } else { + val |= cmd; + rc = xen_host_pci_set_word(&s->real_device, PCI_COMMAND, val); + if (rc) { + XEN_PT_ERR(d, "Failed to write PCI_COMMAND val=0x%x!(rc: %d)\n", + val, rc); + } + } } memory_listener_register(&s->memory_listener, &s->dev.bus_master_as); diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index 7b5e65fc18..a75baea870 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -816,15 +816,21 @@ static XenPTRegInfo xen_pt_emu_reg_vendor[] = { static inline uint8_t get_capability_version(XenPCIPassthroughState *s, uint32_t offset) { - uint8_t flags = pci_get_byte(s->dev.config + offset + PCI_EXP_FLAGS); - return flags & PCI_EXP_FLAGS_VERS; + uint8_t flag; + if (xen_host_pci_get_byte(&s->real_device, offset + PCI_EXP_FLAGS, &flag)) { + return 0; + } + return flag & PCI_EXP_FLAGS_VERS; } static inline uint8_t get_device_type(XenPCIPassthroughState *s, uint32_t offset) { - uint8_t flags = pci_get_byte(s->dev.config + offset + PCI_EXP_FLAGS); - return (flags & PCI_EXP_FLAGS_TYPE) >> 4; + uint8_t flag; + if (xen_host_pci_get_byte(&s->real_device, offset + PCI_EXP_FLAGS, &flag)) { + return 0; + } + return (flag & PCI_EXP_FLAGS_TYPE) >> 4; } /* initialize Link Control register */ @@ -873,8 +879,14 @@ static int xen_pt_linkctrl2_reg_init(XenPCIPassthroughState *s, reg_field = XEN_PT_INVALID_REG; } else { /* set Supported Link Speed */ - uint8_t lnkcap = pci_get_byte(s->dev.config + real_offset - reg->offset - + PCI_EXP_LNKCAP); + uint8_t lnkcap; + int rc; + rc = xen_host_pci_get_byte(&s->real_device, + real_offset - reg->offset + PCI_EXP_LNKCAP, + &lnkcap); + if (rc) { + return rc; + } reg_field |= PCI_EXP_LNKCAP_SLS & lnkcap; } @@ -1055,13 +1067,15 @@ static int xen_pt_msgctrl_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg, uint32_t real_offset, uint32_t *data) { - PCIDevice *d = &s->dev; XenPTMSI *msi = s->msi; - uint16_t reg_field = 0; + uint16_t reg_field; + int rc; /* use I/O device register's value as initial value */ - reg_field = pci_get_word(d->config + real_offset); - + rc = xen_host_pci_get_word(&s->real_device, real_offset, ®_field); + if (rc) { + return rc; + } if (reg_field & PCI_MSI_FLAGS_ENABLE) { XEN_PT_LOG(&s->dev, "MSI already enabled, disabling it first\n"); xen_host_pci_set_word(&s->real_device, real_offset, @@ -1427,12 +1441,14 @@ static int xen_pt_msixctrl_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg, uint32_t real_offset, uint32_t *data) { - PCIDevice *d = &s->dev; - uint16_t reg_field = 0; + uint16_t reg_field; + int rc; /* use I/O device register's value as initial value */ - reg_field = pci_get_word(d->config + real_offset); - + rc = xen_host_pci_get_word(&s->real_device, real_offset, ®_field); + if (rc) { + return rc; + } if (reg_field & PCI_MSIX_FLAGS_ENABLE) { XEN_PT_LOG(&s->dev, "MSIX already enabled, disabling it first\n"); xen_host_pci_set_word(&s->real_device, real_offset, @@ -1540,8 +1556,7 @@ static int xen_pt_vendor_size_init(XenPCIPassthroughState *s, const XenPTRegGroupInfo *grp_reg, uint32_t base_offset, uint8_t *size) { - *size = pci_get_byte(s->dev.config + base_offset + 0x02); - return 0; + return xen_host_pci_get_byte(&s->real_device, base_offset + 0x02, size); } /* get PCI Express Capability Structure register group size */ static int xen_pt_pcie_size_init(XenPCIPassthroughState *s, @@ -1620,12 +1635,15 @@ static int xen_pt_msi_size_init(XenPCIPassthroughState *s, const XenPTRegGroupInfo *grp_reg, uint32_t base_offset, uint8_t *size) { - PCIDevice *d = &s->dev; uint16_t msg_ctrl = 0; uint8_t msi_size = 0xa; + int rc; - msg_ctrl = pci_get_word(d->config + (base_offset + PCI_MSI_FLAGS)); - + rc = xen_host_pci_get_word(&s->real_device, base_offset + PCI_MSI_FLAGS, + &msg_ctrl); + if (rc) { + return rc; + } /* check if 64-bit address is capable of per-vector masking */ if (msg_ctrl & PCI_MSI_FLAGS_64BIT) { msi_size += 4; @@ -1776,11 +1794,14 @@ static int xen_pt_ptr_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg, uint32_t real_offset, uint32_t *data) { - int i; - uint8_t *config = s->dev.config; - uint32_t reg_field = pci_get_byte(config + real_offset); + int i, rc; + uint8_t reg_field; uint8_t cap_id = 0; + rc = xen_host_pci_get_byte(&s->real_device, real_offset, ®_field); + if (rc) { + return rc; + } /* find capability offset */ while (reg_field) { for (i = 0; xen_pt_emu_reg_grps[i].grp_size != 0; i++) { @@ -1789,7 +1810,11 @@ static int xen_pt_ptr_reg_init(XenPCIPassthroughState *s, continue; } - cap_id = pci_get_byte(config + reg_field + PCI_CAP_LIST_ID); + rc = xen_host_pci_get_byte(&s->real_device, + reg_field + PCI_CAP_LIST_ID, &cap_id); + if (rc) { + return rc; + } if (xen_pt_emu_reg_grps[i].grp_id == cap_id) { if (xen_pt_emu_reg_grps[i].grp_type == XEN_PT_GRP_TYPE_EMU) { goto out; @@ -1800,7 +1825,11 @@ static int xen_pt_ptr_reg_init(XenPCIPassthroughState *s, } /* next capability */ - reg_field = pci_get_byte(config + reg_field + PCI_CAP_LIST_NEXT); + rc = xen_host_pci_get_byte(&s->real_device, + reg_field + PCI_CAP_LIST_NEXT, ®_field); + if (rc) { + return rc; + } } out: From 2e87512eccf3c5e40f3142ff5a763f4f850839f4 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 29 Jun 2015 16:24:40 -0400 Subject: [PATCH 21/29] xen/pt: Sync up the dev.config and data values. For a passthrough device we maintain a state of emulated registers value contained within d->config. We also consult the host registers (and apply ro and write masks) whenever the guest access the registers. This is done in xen_pt_pci_write_config and xen_pt_pci_read_config. Also in this picture we call pci_default_write_config which updates the d->config and if the d->config[PCI_COMMAND] register has PCI_COMMAND_MEMORY (or PCI_COMMAND_IO) acts on those changes. On startup the d->config[PCI_COMMAND] are the host values, not what the guest initial values should be, which is exactly what we do _not_ want to do for 64-bit BARs when the guest just wants to read the size of the BAR. Huh you say? To get the size of 64-bit memory space BARs, the guest has to calculate ((BAR[x] & 0xFFFFFFF0) + ((BAR[x+1] & 0xFFFFFFFF) << 32)) which means it has to do two writes of ~0 to BARx and BARx+1. prior to this patch and with XSA120-addendum patch (Linux kernel) the PCI_COMMAND register is copied from the host it can have PCI_COMMAND_MEMORY bit set which means that QEMU will try to update the hypervisor's P2M with BARx+1 value to ~0 (0xffffffff) (to sync the guest state to host) instead of just having xen_pt_pci_write_config and xen_pt_bar_reg_write apply the proper masks and return the size to the guest. To thwart this, this patch syncs up the host values with the guest values taking into account the emu_mask (bit set means we emulate, PCI_COMMAND_MEMORY and PCI_COMMAND_IO are set). That is we copy the host values - masking out any bits which we will emulate. Then merge it with the initial emulation register values. Lastly this value is then copied both in dev.config _and_ XenPTReg->data field. There is also reg->size accounting taken into consideration that ends up being used in patch. xen/pt: Check if reg->init function sets the 'data' past the reg->size This fixes errors such as these: (XEN) memory_map:add: dom2 gfn=fffe0 mfn=fbce0 nr=20 (DEBUG) 189 pci dev 04:0 BAR16 wrote ~0. (DEBUG) 200 pci dev 04:0 BAR16 read 0x0fffe0004. (XEN) memory_map:remove: dom2 gfn=fffe0 mfn=fbce0 nr=20 (DEBUG) 204 pci dev 04:0 BAR16 wrote 0x0fffe0004. (DEBUG) 217 pci dev 04:0 BAR16 read upper 0x000000000. (XEN) memory_map:add: dom2 gfn=ffffffff00000 mfn=fbce0 nr=20 (XEN) p2m.c:883:d0v0 p2m_set_entry failed! mfn=ffffffffffffffff rc:-22 (XEN) memory_map:fail: dom2 gfn=ffffffff00000 mfn=fbce0 nr=20 ret:-22 (XEN) memory_map:remove: dom2 gfn=ffffffff00000 mfn=fbce0 nr=20 (XEN) p2m.c:920:d0v0 gfn_to_mfn failed! gfn=ffffffff00000 type:4 (XEN) p2m.c:920:d0v0 gfn_to_mfn failed! gfn=ffffffff00001 type:4 .. (XEN) memory_map: error -22 removing dom2 access to [fbce0,fbcff] (DEBUG) 222 pci dev 04:0 BAR16 read upper 0x0ffffffff. (XEN) memory_map:remove: dom2 gfn=ffffffff00000 mfn=fbce0 nr=20 (XEN) memory_map: error -22 removing dom2 access to [fbce0,fbcff] [The DEBUG is to illustate what the hvmloader was doing] Also we swap from xen_host_pci_long to using xen_host_pci_get_[byte,word,long]. Otherwise we get: xen_pt_config_reg_init: Offset 0x0004 mismatch! Emulated=0x0000, host=0x2300017, syncing to 0x2300014. xen_pt_config_reg_init: Error: Offset 0x0004:0x2300014 expands past register size(2)! which is not surprising. We read the value as an 32-bit (from host), then operate it as a 16-bit - and the remainder is left unchanged. We end up writing the value as 16-bit (so 0014) to dev.config (as we use proper xen_set_host_[byte,word,long] so we don't spill to other registers) but in XenPTReg->data it is as 32-bit (0x2300014)! It is harmless as the read/write functions end up using an size mask and never modify the bits past 16-bit (reg->size is 2). This patch fixes the warnings by reading the value using the proper size. Note that the check for size is still left in-case the developer sets bits past the reg->size in the ->init routines. The author tried to fiddle with QEMU_BUILD_BUG to make this work but failed. Reviewed-by: Stefano Stabellini Reported-by: Sander Eikelenboom Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt_config_init.c | 59 ++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index a75baea870..b1ad743754 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -1893,6 +1893,10 @@ static int xen_pt_config_reg_init(XenPCIPassthroughState *s, reg_entry->reg = reg; if (reg->init) { + uint32_t host_mask, size_mask; + unsigned int offset; + uint32_t val; + /* initialize emulate register */ rc = reg->init(s, reg_entry->reg, reg_grp->base_offset + reg->offset, &data); @@ -1905,8 +1909,61 @@ static int xen_pt_config_reg_init(XenPCIPassthroughState *s, g_free(reg_entry); return 0; } + /* Sync up the data to dev.config */ + offset = reg_grp->base_offset + reg->offset; + size_mask = 0xFFFFFFFF >> ((4 - reg->size) << 3); + + switch (reg->size) { + case 1: rc = xen_host_pci_get_byte(&s->real_device, offset, (uint8_t *)&val); + break; + case 2: rc = xen_host_pci_get_word(&s->real_device, offset, (uint16_t *)&val); + break; + case 4: rc = xen_host_pci_get_long(&s->real_device, offset, &val); + break; + default: assert(1); + } + if (rc) { + /* Serious issues when we cannot read the host values! */ + g_free(reg_entry); + return rc; + } + /* Set bits in emu_mask are the ones we emulate. The dev.config shall + * contain the emulated view of the guest - therefore we flip the mask + * to mask out the host values (which dev.config initially has) . */ + host_mask = size_mask & ~reg->emu_mask; + + if ((data & host_mask) != (val & host_mask)) { + uint32_t new_val; + + /* Mask out host (including past size). */ + new_val = val & host_mask; + /* Merge emulated ones (excluding the non-emulated ones). */ + new_val |= data & host_mask; + /* Leave intact host and emulated values past the size - even though + * we do not care as we write per reg->size granularity, but for the + * logging below lets have the proper value. */ + new_val |= ((val | data)) & ~size_mask; + XEN_PT_LOG(&s->dev,"Offset 0x%04x mismatch! Emulated=0x%04x, host=0x%04x, syncing to 0x%04x.\n", + offset, data, val, new_val); + val = new_val; + } else + val = data; + + /* This could be just pci_set_long as we don't modify the bits + * past reg->size, but in case this routine is run in parallel + * we do not want to over-write other registers. */ + switch (reg->size) { + case 1: pci_set_byte(s->dev.config + offset, (uint8_t)val); + break; + case 2: pci_set_word(s->dev.config + offset, (uint16_t)val); + break; + case 4: pci_set_long(s->dev.config + offset, val); + break; + default: assert(1); + } /* set register value */ - reg_entry->data = data; + reg_entry->data = val; + } /* list add register entry */ QLIST_INSERT_HEAD(®_grp->reg_tbl_list, reg_entry, entries); From 5b4dd0f55ed3027557ed9a6fd89d5aa379122feb Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 29 Jun 2015 16:41:14 -0400 Subject: [PATCH 22/29] xen/pt: Check if reg->init function sets the 'data' past the reg->size It should never happen, but in case it does (an developer adds a new register and the 'init_val' expands past the register size) we want to report. The code will only write up to reg->size so there is no runtime danger of the register spilling across other ones - however to catch this sort of thing we still return an error. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt_config_init.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index b1ad743754..b58bf3983c 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -1949,9 +1949,15 @@ static int xen_pt_config_reg_init(XenPCIPassthroughState *s, } else val = data; + if (val & ~size_mask) { + XEN_PT_ERR(&s->dev,"Offset 0x%04x:0x%04x expands past register size(%d)!\n", + offset, val, reg->size); + g_free(reg_entry); + return -ENXIO; + } /* This could be just pci_set_long as we don't modify the bits - * past reg->size, but in case this routine is run in parallel - * we do not want to over-write other registers. */ + * past reg->size, but in case this routine is run in parallel or the + * init value is larger, we do not want to over-write registers. */ switch (reg->size) { case 1: pci_set_byte(s->dev.config + offset, (uint8_t)val); break; From e2779de053b64f023de382fd87b3596613d47d1e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 1 Jul 2015 15:41:33 -0400 Subject: [PATCH 23/29] xen/pt: Remove XenPTReg->data field. We do not want to have two entries to cache the guest configuration registers: XenPTReg->data and dev.config. Instead we want to use only the dev.config. To do without much complications we rip out the ->data field and replace it with an pointer to the dev.config. This way we have the type-checking (uint8_t, uint16_t, etc) and as well and pre-computed location. Alternatively we could compute the offset in dev.config by using the XenPTRRegInfo and XenPTRegGroup every time but this way we have the pre-computed values. This change also exposes some mis-use: - In 'xen_pt_status_reg_init' we used u32 for the Capabilities Pointer register, but said register is an an u16. - In 'xen_pt_msgdata_reg_write' we used u32 but should have only use u16. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt.h | 6 ++- hw/xen/xen_pt_config_init.c | 73 ++++++++++++++++++++++--------------- 2 files changed, 49 insertions(+), 30 deletions(-) diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index 6763abc31c..76551efb26 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -138,7 +138,11 @@ struct XenPTRegInfo { struct XenPTReg { QLIST_ENTRY(XenPTReg) entries; XenPTRegInfo *reg; - uint32_t data; /* emulated value */ + union { + uint8_t *byte; + uint16_t *half_word; + uint32_t *word; + } ptr; /* pointer to dev.config. */ }; typedef const struct XenPTRegGroupInfo XenPTRegGroupInfo; diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index b58bf3983c..e0829b1673 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -128,10 +128,11 @@ static int xen_pt_byte_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, { XenPTRegInfo *reg = cfg_entry->reg; uint8_t valid_emu_mask = 0; + uint8_t *data = cfg_entry->ptr.byte; /* emulate byte register */ valid_emu_mask = reg->emu_mask & valid_mask; - *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + *value = XEN_PT_MERGE_VALUE(*value, *data, ~valid_emu_mask); return 0; } @@ -140,10 +141,11 @@ static int xen_pt_word_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, { XenPTRegInfo *reg = cfg_entry->reg; uint16_t valid_emu_mask = 0; + uint16_t *data = cfg_entry->ptr.half_word; /* emulate word register */ valid_emu_mask = reg->emu_mask & valid_mask; - *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + *value = XEN_PT_MERGE_VALUE(*value, *data, ~valid_emu_mask); return 0; } @@ -152,10 +154,11 @@ static int xen_pt_long_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, { XenPTRegInfo *reg = cfg_entry->reg; uint32_t valid_emu_mask = 0; + uint32_t *data = cfg_entry->ptr.word; /* emulate long register */ valid_emu_mask = reg->emu_mask & valid_mask; - *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + *value = XEN_PT_MERGE_VALUE(*value, *data, ~valid_emu_mask); return 0; } @@ -169,10 +172,11 @@ static int xen_pt_byte_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, XenPTRegInfo *reg = cfg_entry->reg; uint8_t writable_mask = 0; uint8_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint8_t *data = cfg_entry->ptr.byte; /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); @@ -186,10 +190,11 @@ static int xen_pt_word_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, XenPTRegInfo *reg = cfg_entry->reg; uint16_t writable_mask = 0; uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint16_t *data = cfg_entry->ptr.half_word; /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); @@ -203,10 +208,11 @@ static int xen_pt_long_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, XenPTRegInfo *reg = cfg_entry->reg; uint32_t writable_mask = 0; uint32_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint32_t *data = cfg_entry->ptr.word; /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); @@ -255,7 +261,7 @@ static int xen_pt_status_reg_init(XenPCIPassthroughState *s, reg_entry = xen_pt_find_reg(reg_grp_entry, PCI_CAPABILITY_LIST); if (reg_entry) { /* check Capabilities Pointer register */ - if (reg_entry->data) { + if (*reg_entry->ptr.half_word) { reg_field |= PCI_STATUS_CAP_LIST; } else { reg_field &= ~PCI_STATUS_CAP_LIST; @@ -301,10 +307,11 @@ static int xen_pt_cmd_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, XenPTRegInfo *reg = cfg_entry->reg; uint16_t writable_mask = 0; uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint16_t *data = cfg_entry->ptr.half_word; /* modify emulate register */ writable_mask = ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ if (*val & PCI_COMMAND_INTX_DISABLE) { @@ -447,7 +454,7 @@ static int xen_pt_bar_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, /* emulate BAR */ valid_emu_mask = bar_emu_mask & valid_mask; - *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + *value = XEN_PT_MERGE_VALUE(*value, *cfg_entry->ptr.word, ~valid_emu_mask); return 0; } @@ -464,6 +471,7 @@ static int xen_pt_bar_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, uint32_t bar_ro_mask = 0; uint32_t r_size = 0; int index = 0; + uint32_t *data = cfg_entry->ptr.word; index = xen_pt_bar_offset_to_index(reg->offset); if (index < 0 || index >= PCI_NUM_REGIONS) { @@ -500,7 +508,7 @@ static int xen_pt_bar_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, /* modify emulate register */ writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* check whether we need to update the virtual region address or not */ switch (s->bases[index].bar_flag) { @@ -533,6 +541,7 @@ static int xen_pt_exp_rom_bar_reg_write(XenPCIPassthroughState *s, uint32_t throughable_mask = get_throughable_mask(s, reg, valid_mask); pcibus_t r_size = 0; uint32_t bar_ro_mask = 0; + uint32_t *data = cfg_entry->ptr.word; r_size = d->io_regions[PCI_ROM_SLOT].size; base = &s->bases[PCI_ROM_SLOT]; @@ -544,7 +553,7 @@ static int xen_pt_exp_rom_bar_reg_write(XenPCIPassthroughState *s, /* modify emulate register */ writable_mask = ~bar_ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); @@ -999,10 +1008,11 @@ static int xen_pt_pmcsr_reg_write(XenPCIPassthroughState *s, XenPTRegInfo *reg = cfg_entry->reg; uint16_t writable_mask = 0; uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint16_t *data = cfg_entry->ptr.half_word; /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value & ~PCI_PM_CTRL_PME_STATUS, @@ -1097,6 +1107,7 @@ static int xen_pt_msgctrl_reg_write(XenPCIPassthroughState *s, XenPTMSI *msi = s->msi; uint16_t writable_mask = 0; uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint16_t *data = cfg_entry->ptr.half_word; /* Currently no support for multi-vector */ if (*val & PCI_MSI_FLAGS_QSIZE) { @@ -1105,8 +1116,8 @@ static int xen_pt_msgctrl_reg_write(XenPCIPassthroughState *s, /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); - msi->flags |= cfg_entry->data & ~PCI_MSI_FLAGS_ENABLE; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + msi->flags |= *data & ~PCI_MSI_FLAGS_ENABLE; /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); @@ -1220,18 +1231,19 @@ static int xen_pt_msgaddr32_reg_write(XenPCIPassthroughState *s, { XenPTRegInfo *reg = cfg_entry->reg; uint32_t writable_mask = 0; - uint32_t old_addr = cfg_entry->data; + uint32_t old_addr = *cfg_entry->ptr.word; + uint32_t *data = cfg_entry->ptr.word; /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); - s->msi->addr_lo = cfg_entry->data; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + s->msi->addr_lo = *data; /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, 0); /* update MSI */ - if (cfg_entry->data != old_addr) { + if (*data != old_addr) { if (s->msi->mapped) { xen_pt_msi_update(s); } @@ -1246,7 +1258,8 @@ static int xen_pt_msgaddr64_reg_write(XenPCIPassthroughState *s, { XenPTRegInfo *reg = cfg_entry->reg; uint32_t writable_mask = 0; - uint32_t old_addr = cfg_entry->data; + uint32_t old_addr = *cfg_entry->ptr.word; + uint32_t *data = cfg_entry->ptr.word; /* check whether the type is 64 bit or not */ if (!(s->msi->flags & PCI_MSI_FLAGS_64BIT)) { @@ -1257,15 +1270,15 @@ static int xen_pt_msgaddr64_reg_write(XenPCIPassthroughState *s, /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* update the msi_info too */ - s->msi->addr_hi = cfg_entry->data; + s->msi->addr_hi = *data; /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, 0); /* update MSI */ - if (cfg_entry->data != old_addr) { + if (*data != old_addr) { if (s->msi->mapped) { xen_pt_msi_update(s); } @@ -1284,8 +1297,9 @@ static int xen_pt_msgdata_reg_write(XenPCIPassthroughState *s, XenPTRegInfo *reg = cfg_entry->reg; XenPTMSI *msi = s->msi; uint16_t writable_mask = 0; - uint16_t old_data = cfg_entry->data; + uint16_t old_data = *cfg_entry->ptr.half_word; uint32_t offset = reg->offset; + uint16_t *data = cfg_entry->ptr.half_word; /* check the offset whether matches the type or not */ if (!xen_pt_msi_check_type(offset, msi->flags, DATA)) { @@ -1296,15 +1310,15 @@ static int xen_pt_msgdata_reg_write(XenPCIPassthroughState *s, /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* update the msi_info too */ - msi->data = cfg_entry->data; + msi->data = *data; /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, 0); /* update MSI */ - if (cfg_entry->data != old_data) { + if (*data != old_data) { if (msi->mapped) { xen_pt_msi_update(s); } @@ -1468,10 +1482,11 @@ static int xen_pt_msixctrl_reg_write(XenPCIPassthroughState *s, uint16_t writable_mask = 0; uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); int debug_msix_enabled_old; + uint16_t *data = cfg_entry->ptr.half_word; /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); @@ -1967,8 +1982,8 @@ static int xen_pt_config_reg_init(XenPCIPassthroughState *s, break; default: assert(1); } - /* set register value */ - reg_entry->data = val; + /* set register value pointer to the data. */ + reg_entry->ptr.byte = s->dev.config + offset; } /* list add register entry */ From ea6c50f99de4b36a2c3e90dedabac46aef7992ac Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 24 Jun 2015 17:18:26 -0400 Subject: [PATCH 24/29] xen/pt: Log xen_host_pci_get in two init functions To help with troubleshooting in the field. Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt_config_init.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index e0829b1673..aca3790681 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -1828,6 +1828,8 @@ static int xen_pt_ptr_reg_init(XenPCIPassthroughState *s, rc = xen_host_pci_get_byte(&s->real_device, reg_field + PCI_CAP_LIST_ID, &cap_id); if (rc) { + XEN_PT_ERR(&s->dev, "Failed to read capability @0x%x (rc:%d)\n", + reg_field + PCI_CAP_LIST_ID, rc); return rc; } if (xen_pt_emu_reg_grps[i].grp_id == cap_id) { @@ -2037,6 +2039,9 @@ int xen_pt_config_init(XenPCIPassthroughState *s) reg_grp_offset, ®_grp_entry->size); if (rc < 0) { + XEN_PT_LOG(&s->dev, "Failed to initialize %d/%ld, type=0x%x, rc:%d\n", + i, ARRAY_SIZE(xen_pt_emu_reg_grps), + xen_pt_emu_reg_grps[i].grp_type, rc); xen_pt_config_delete(s); return rc; } @@ -2051,6 +2056,10 @@ int xen_pt_config_init(XenPCIPassthroughState *s) /* initialize capability register */ rc = xen_pt_config_reg_init(s, reg_grp_entry, regs); if (rc < 0) { + XEN_PT_LOG(&s->dev, "Failed to initialize %d/%ld reg 0x%x in grp_type=0x%x (%d/%ld), rc=%d\n", + j, ARRAY_SIZE(xen_pt_emu_reg_grps[i].emu_regs), + regs->offset, xen_pt_emu_reg_grps[i].grp_type, + i, ARRAY_SIZE(xen_pt_emu_reg_grps), rc); xen_pt_config_delete(s); return rc; } From fe2da64c5ab882124aaf14c27d62409d02ff1786 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 24 Jun 2015 17:27:40 -0400 Subject: [PATCH 25/29] xen/pt: Log xen_host_pci_get/set errors in MSI code. We seem to only use these functions when de-activating the MSI - so just log errors. Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt_msi.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c index 5822df5f60..e3d71945cd 100644 --- a/hw/xen/xen_pt_msi.c +++ b/hw/xen/xen_pt_msi.c @@ -75,19 +75,29 @@ static int msi_msix_enable(XenPCIPassthroughState *s, bool enable) { uint16_t val = 0; + int rc; if (!address) { return -1; } - xen_host_pci_get_word(&s->real_device, address, &val); + rc = xen_host_pci_get_word(&s->real_device, address, &val); + if (rc) { + XEN_PT_ERR(&s->dev, "Failed to read MSI/MSI-X register (0x%x), rc:%d\n", + address, rc); + return rc; + } if (enable) { val |= flag; } else { val &= ~flag; } - xen_host_pci_set_word(&s->real_device, address, val); - return 0; + rc = xen_host_pci_set_word(&s->real_device, address, val); + if (rc) { + XEN_PT_ERR(&s->dev, "Failed to write MSI/MSI-X register (0x%x), rc:%d\n", + address, rc); + } + return rc; } static int msi_msix_setup(XenPCIPassthroughState *s, @@ -276,7 +286,7 @@ void xen_pt_msi_disable(XenPCIPassthroughState *s) return; } - xen_pt_msi_set_enable(s, false); + (void)xen_pt_msi_set_enable(s, false); msi_msix_disable(s, msi_addr64(msi), msi->data, msi->pirq, false, msi->initialized); From bce33948178e338eac2629284b199c0c1f05f8a3 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 8 Sep 2015 16:21:29 -0400 Subject: [PATCH 26/29] xen/pt: Make xen_pt_unregister_device idempotent To deal with xen_host_pci_[set|get]_ functions returning error values and clearing ourselves in the init function we should make the .exit (xen_pt_unregister_device) function be idempotent in case the generic code starts calling .exit (or for fun does it before calling .init!). Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen-host-pci-device.c | 5 +++++ hw/xen/xen-host-pci-device.h | 1 + hw/xen/xen_pt.c | 20 ++++++++++++++------ hw/xen/xen_pt.h | 2 ++ 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c index a54b7ded9b..be28ca2ce7 100644 --- a/hw/xen/xen-host-pci-device.c +++ b/hw/xen/xen-host-pci-device.c @@ -392,6 +392,11 @@ error: return rc; } +bool xen_host_pci_device_closed(XenHostPCIDevice *d) +{ + return d->config_fd == -1; +} + void xen_host_pci_device_put(XenHostPCIDevice *d) { if (d->config_fd >= 0) { diff --git a/hw/xen/xen-host-pci-device.h b/hw/xen/xen-host-pci-device.h index f1e1c308f6..3d44e044ff 100644 --- a/hw/xen/xen-host-pci-device.h +++ b/hw/xen/xen-host-pci-device.h @@ -39,6 +39,7 @@ typedef struct XenHostPCIDevice { int xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain, uint8_t bus, uint8_t dev, uint8_t func); void xen_host_pci_device_put(XenHostPCIDevice *pci_dev); +bool xen_host_pci_device_closed(XenHostPCIDevice *d); int xen_host_pci_get_byte(XenHostPCIDevice *d, int pos, uint8_t *p); int xen_host_pci_get_word(XenHostPCIDevice *d, int pos, uint16_t *p); diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index 1c84d3d22a..d0199683be 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -840,6 +840,7 @@ out: memory_listener_register(&s->memory_listener, &s->dev.bus_master_as); memory_listener_register(&s->io_listener, &address_space_io); + s->listener_set = true; XEN_PT_LOG(d, "Real physical device %02x:%02x.%d registered successfully!\n", s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function); @@ -852,10 +853,11 @@ static void xen_pt_unregister_device(PCIDevice *d) XenPCIPassthroughState *s = XEN_PT_DEVICE(d); XenHostPCIDevice *host_dev = &s->real_device; uint8_t machine_irq = s->machine_irq; - uint8_t intx = xen_pt_pci_intx(s); + uint8_t intx; int rc; - if (machine_irq) { + if (machine_irq && !xen_host_pci_device_closed(&s->real_device)) { + intx = xen_pt_pci_intx(s); rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq, PT_IRQ_TYPE_PCI, pci_bus_num(d->bus), @@ -870,6 +872,7 @@ static void xen_pt_unregister_device(PCIDevice *d) } } + /* N.B. xen_pt_config_delete takes care of freeing them. */ if (s->msi) { xen_pt_msi_disable(s); } @@ -889,6 +892,7 @@ static void xen_pt_unregister_device(PCIDevice *d) machine_irq, errno); } } + s->machine_irq = 0; } /* delete all emulated config registers */ @@ -896,10 +900,14 @@ static void xen_pt_unregister_device(PCIDevice *d) xen_pt_unregister_vga_regions(host_dev); - memory_listener_unregister(&s->memory_listener); - memory_listener_unregister(&s->io_listener); - - xen_host_pci_device_put(&s->real_device); + if (s->listener_set) { + memory_listener_unregister(&s->memory_listener); + memory_listener_unregister(&s->io_listener); + s->listener_set = false; + } + if (!xen_host_pci_device_closed(&s->real_device)) { + xen_host_pci_device_put(&s->real_device); + } } static Property xen_pci_passthrough_properties[] = { diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index 76551efb26..3bc22eb1d1 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -225,6 +225,7 @@ struct XenPCIPassthroughState { MemoryListener memory_listener; MemoryListener io_listener; + bool listener_set; }; int xen_pt_config_init(XenPCIPassthroughState *s); @@ -290,6 +291,7 @@ static inline uint8_t xen_pt_pci_intx(XenPCIPassthroughState *s) " value=%i, acceptable range is 1 - 4\n", r_val); r_val = 0; } else { + /* Note that if s.real_device.config_fd is closed we make 0xff. */ r_val -= 1; } From df6aa45752bf8145adcba9b077c346e9b758ebd1 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 8 Sep 2015 16:21:59 -0400 Subject: [PATCH 27/29] xen/pt: Move bulk of xen_pt_unregister_device in its own routine. This way we can call it if we fail during init. This code movement introduces no changes. Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt.c | 121 +++++++++++++++++++++++++----------------------- 1 file changed, 63 insertions(+), 58 deletions(-) diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index d0199683be..1334ef10bb 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -696,6 +696,68 @@ xen_igd_passthrough_isa_bridge_create(XenPCIPassthroughState *s, igd_passthrough_isa_bridge_create(d->bus, gpu_dev_id); } +/* destroy. */ +static void xen_pt_destroy(PCIDevice *d) { + + XenPCIPassthroughState *s = XEN_PT_DEVICE(d); + XenHostPCIDevice *host_dev = &s->real_device; + uint8_t machine_irq = s->machine_irq; + uint8_t intx; + int rc; + + if (machine_irq && !xen_host_pci_device_closed(&s->real_device)) { + intx = xen_pt_pci_intx(s); + rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq, + PT_IRQ_TYPE_PCI, + pci_bus_num(d->bus), + PCI_SLOT(s->dev.devfn), + intx, + 0 /* isa_irq */); + if (rc < 0) { + XEN_PT_ERR(d, "unbinding of interrupt INT%c failed." + " (machine irq: %i, err: %d)" + " But bravely continuing on..\n", + 'a' + intx, machine_irq, errno); + } + } + + /* N.B. xen_pt_config_delete takes care of freeing them. */ + if (s->msi) { + xen_pt_msi_disable(s); + } + if (s->msix) { + xen_pt_msix_disable(s); + } + + if (machine_irq) { + xen_pt_mapped_machine_irq[machine_irq]--; + + if (xen_pt_mapped_machine_irq[machine_irq] == 0) { + rc = xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq); + + if (rc < 0) { + XEN_PT_ERR(d, "unmapping of interrupt %i failed. (err: %d)" + " But bravely continuing on..\n", + machine_irq, errno); + } + } + s->machine_irq = 0; + } + + /* delete all emulated config registers */ + xen_pt_config_delete(s); + + xen_pt_unregister_vga_regions(host_dev); + + if (s->listener_set) { + memory_listener_unregister(&s->memory_listener); + memory_listener_unregister(&s->io_listener); + s->listener_set = false; + } + if (!xen_host_pci_device_closed(&s->real_device)) { + xen_host_pci_device_put(&s->real_device); + } +} /* init */ static int xen_pt_initfn(PCIDevice *d) @@ -850,64 +912,7 @@ out: static void xen_pt_unregister_device(PCIDevice *d) { - XenPCIPassthroughState *s = XEN_PT_DEVICE(d); - XenHostPCIDevice *host_dev = &s->real_device; - uint8_t machine_irq = s->machine_irq; - uint8_t intx; - int rc; - - if (machine_irq && !xen_host_pci_device_closed(&s->real_device)) { - intx = xen_pt_pci_intx(s); - rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq, - PT_IRQ_TYPE_PCI, - pci_bus_num(d->bus), - PCI_SLOT(s->dev.devfn), - intx, - 0 /* isa_irq */); - if (rc < 0) { - XEN_PT_ERR(d, "unbinding of interrupt INT%c failed." - " (machine irq: %i, err: %d)" - " But bravely continuing on..\n", - 'a' + intx, machine_irq, errno); - } - } - - /* N.B. xen_pt_config_delete takes care of freeing them. */ - if (s->msi) { - xen_pt_msi_disable(s); - } - if (s->msix) { - xen_pt_msix_disable(s); - } - - if (machine_irq) { - xen_pt_mapped_machine_irq[machine_irq]--; - - if (xen_pt_mapped_machine_irq[machine_irq] == 0) { - rc = xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq); - - if (rc < 0) { - XEN_PT_ERR(d, "unmapping of interrupt %i failed. (err: %d)" - " But bravely continuing on..\n", - machine_irq, errno); - } - } - s->machine_irq = 0; - } - - /* delete all emulated config registers */ - xen_pt_config_delete(s); - - xen_pt_unregister_vga_regions(host_dev); - - if (s->listener_set) { - memory_listener_unregister(&s->memory_listener); - memory_listener_unregister(&s->io_listener); - s->listener_set = false; - } - if (!xen_host_pci_device_closed(&s->real_device)) { - xen_host_pci_device_put(&s->real_device); - } + xen_pt_destroy(d); } static Property xen_pci_passthrough_properties[] = { From 3d3697f2576424a2c0830a7b2e7c94c79dea9b50 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 2 Jul 2015 14:33:44 -0400 Subject: [PATCH 28/29] xen/pt: Check for return values for xen_host_pci_[get|set] in init and if we have failures we call xen_pt_destroy introduced in 'xen/pt: Move bulk of xen_pt_unregister_device in its own routine.' and free all of the allocated structures. Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index 1334ef10bb..c07609c04c 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -790,10 +790,11 @@ static int xen_pt_initfn(PCIDevice *d) } /* Initialize virtualized PCI configuration (Extended 256 Bytes) */ - if (xen_host_pci_get_block(&s->real_device, 0, d->config, - PCI_CONFIG_SPACE_SIZE) < 0) { - xen_host_pci_device_put(&s->real_device); - return -1; + rc = xen_host_pci_get_block(&s->real_device, 0, d->config, + PCI_CONFIG_SPACE_SIZE); + if (rc < 0) { + XEN_PT_ERR(d,"Could not read PCI_CONFIG space! (rc:%d)\n", rc); + goto err_out; } s->memory_listener = xen_pt_memory_listener; @@ -823,17 +824,17 @@ static int xen_pt_initfn(PCIDevice *d) xen_pt_register_regions(s, &cmd); /* reinitialize each config register to be emulated */ - if (xen_pt_config_init(s)) { + rc = xen_pt_config_init(s); + if (rc) { XEN_PT_ERR(d, "PCI Config space initialisation failed.\n"); - xen_host_pci_device_put(&s->real_device); - return -1; + goto err_out; } /* Bind interrupt */ rc = xen_host_pci_get_byte(&s->real_device, PCI_INTERRUPT_PIN, &scratch); if (rc) { XEN_PT_ERR(d, "Failed to read PCI_INTERRUPT_PIN! (rc:%d)\n", rc); - scratch = 0; + goto err_out; } if (!scratch) { XEN_PT_LOG(d, "no pin interrupt\n"); @@ -890,12 +891,14 @@ out: rc = xen_host_pci_get_word(&s->real_device, PCI_COMMAND, &val); if (rc) { XEN_PT_ERR(d, "Failed to read PCI_COMMAND! (rc: %d)\n", rc); + goto err_out; } else { val |= cmd; rc = xen_host_pci_set_word(&s->real_device, PCI_COMMAND, val); if (rc) { XEN_PT_ERR(d, "Failed to write PCI_COMMAND val=0x%x!(rc: %d)\n", val, rc); + goto err_out; } } } @@ -908,6 +911,11 @@ out: s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function); return 0; + +err_out: + xen_pt_destroy(d); + assert(rc); + return rc; } static void xen_pt_unregister_device(PCIDevice *d) From cae99f1d779a6e2c8721ce2dd80bafdbf0abe7a0 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 8 Jul 2015 15:58:41 -0400 Subject: [PATCH 29/29] xen/pt: Don't slurp wholesale the PCI configuration registers Instead we have the emulation registers ->init functions which consult the host values to see what the initial value should be and they are responsible for populating the dev.config. Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini --- hw/xen/xen_pt.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index c07609c04c..2b54f52707 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -790,12 +790,7 @@ static int xen_pt_initfn(PCIDevice *d) } /* Initialize virtualized PCI configuration (Extended 256 Bytes) */ - rc = xen_host_pci_get_block(&s->real_device, 0, d->config, - PCI_CONFIG_SPACE_SIZE); - if (rc < 0) { - XEN_PT_ERR(d,"Could not read PCI_CONFIG space! (rc:%d)\n", rc); - goto err_out; - } + memset(d->config, 0, PCI_CONFIG_SPACE_SIZE); s->memory_listener = xen_pt_memory_listener; s->io_listener = xen_pt_io_listener;