vfio-pci: Reset workaround for AMD Bonaire and Hawaii GPUs

Somehow these GPUs manage not to respond to a PCI bus reset, removing
our primary mechanism for resetting graphics cards.  The result is
that these devices typically work well for a single VM boot.  If the
VM is rebooted or restarted, the guest driver is not able to init the
card from the dirty state, resulting in a blue screen for Windows
guests.

The workaround is to use a device specific reset.  This is not 100%
reliable though since it depends on the incoming state of the device,
but it substantially improves the usability of these devices in a VM.

Credit to Alex Deucher <alexander.deucher@amd.com> for his guidance.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
Alex Williamson 2015-04-28 11:14:02 -06:00
parent c6d231e2fd
commit 5655f931ab

View file

@ -154,6 +154,7 @@ typedef struct VFIOPCIDevice {
PCIHostDeviceAddress host;
EventNotifier err_notifier;
EventNotifier req_notifier;
int (*resetfn)(struct VFIOPCIDevice *);
uint32_t features;
#define VFIO_FEATURE_ENABLE_VGA_BIT 0
#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
@ -3325,6 +3326,162 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
vdev->req_enabled = false;
}
/*
* AMD Radeon PCI config reset, based on Linux:
* drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running()
* drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset
* drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc()
* drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock()
* IDs: include/drm/drm_pciids.h
* Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0
*
* Bonaire and Hawaii GPUs do not respond to a bus reset. This is a bug in the
* hardware that should be fixed on future ASICs. The symptom of this is that
* once the accerlated driver loads, Windows guests will bsod on subsequent
* attmpts to load the driver, such as after VM reset or shutdown/restart. To
* work around this, we do an AMD specific PCI config reset, followed by an SMC
* reset. The PCI config reset only works if SMC firmware is running, so we
* have a dependency on the state of the device as to whether this reset will
* be effective. There are still cases where we won't be able to kick the
* device into working, but this greatly improves the usability overall. The
* config reset magic is relatively common on AMD GPUs, but the setup and SMC
* poking is largely ASIC specific.
*/
static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
{
uint32_t clk, pc_c;
/*
* Registers 200h and 204h are index and data registers for acessing
* indirect configuration registers within the device.
*/
vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
return (!(clk & 1) && (0x20100 <= pc_c));
}
/*
* The scope of a config reset is controlled by a mode bit in the misc register
* and a fuse, exposed as a bit in another register. The fuse is the default
* (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula
* scope = !(misc ^ fuse), where the resulting scope is defined the same as
* the fuse. A truth table therefore tells us that if misc == fuse, we need
* to flip the value of the bit in the misc register.
*/
static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
{
uint32_t misc, fuse;
bool a, b;
vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
b = fuse & 64;
vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
a = misc & 2;
if (a == b) {
vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */
}
}
static int vfio_radeon_reset(VFIOPCIDevice *vdev)
{
PCIDevice *pdev = &vdev->pdev;
int i, ret = 0;
uint32_t data;
/* Defer to a kernel implemented reset */
if (vdev->vbasedev.reset_works) {
return -ENODEV;
}
/* Enable only memory BAR access */
vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
/* Reset only works if SMC firmware is loaded and running */
if (!vfio_radeon_smc_is_running(vdev)) {
ret = -EINVAL;
goto out;
}
/* Make sure only the GFX function is reset */
vfio_radeon_set_gfx_only_reset(vdev);
/* AMD PCI config reset */
vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
usleep(100);
/* Read back the memory size to make sure we're out of reset */
for (i = 0; i < 100000; i++) {
if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
break;
}
usleep(1);
}
/* Reset SMC */
vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
data |= 1;
vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
/* Disable SMC clock */
vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
data |= 1;
vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
out:
/* Restore PCI command register */
vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
return ret;
}
static void vfio_setup_resetfn(VFIOPCIDevice *vdev)
{
PCIDevice *pdev = &vdev->pdev;
uint16_t vendor, device;
vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
device = pci_get_word(pdev->config + PCI_DEVICE_ID);
switch (vendor) {
case 0x1002:
switch (device) {
/* Bonaire */
case 0x6649: /* Bonaire [FirePro W5100] */
case 0x6650:
case 0x6651:
case 0x6658: /* Bonaire XTX [Radeon R7 260X] */
case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */
case 0x665d: /* Bonaire [Radeon R7 200 Series] */
/* Hawaii */
case 0x67A0: /* Hawaii XT GL [FirePro W9100] */
case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */
case 0x67A2:
case 0x67A8:
case 0x67A9:
case 0x67AA:
case 0x67B0: /* Hawaii XT [Radeon R9 290X] */
case 0x67B1: /* Hawaii PRO [Radeon R9 290] */
case 0x67B8:
case 0x67B9:
case 0x67BA:
case 0x67BE:
vdev->resetfn = vfio_radeon_reset;
break;
}
break;
}
}
static int vfio_initfn(PCIDevice *pdev)
{
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
@ -3473,6 +3630,7 @@ static int vfio_initfn(PCIDevice *pdev)
vfio_register_err_notifier(vdev);
vfio_register_req_notifier(vdev);
vfio_setup_resetfn(vdev);
return 0;
@ -3520,6 +3678,10 @@ static void vfio_pci_reset(DeviceState *dev)
vfio_pci_pre_reset(vdev);
if (vdev->resetfn && !vdev->resetfn(vdev)) {
goto post_reset;
}
if (vdev->vbasedev.reset_works &&
(vdev->has_flr || !vdev->has_pm_reset) &&
!ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) {