From 76dc52684d0f72971d9f6cc7d5ae198061b715bd Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Fri, 14 Apr 2017 13:38:02 -0700 Subject: [PATCH 1/8] PCI: Make PCI_ROM_ADDRESS_MASK a 32-bit constant A 64-bit value is not needed since a PCI ROM address consists in 32 bits. This fixes a clang warning about "implicit conversion from 'unsigned long' to 'u32'". Also remove now unnecessary casts to u32 from __pci_read_base() and pci_std_update_resource(). Signed-off-by: Matthias Kaehlcke Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 2 +- drivers/pci/setup-res.c | 2 +- include/uapi/linux/pci_regs.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index dfc9a2794141..7d5d4a56a186 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -231,7 +231,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, res->flags |= IORESOURCE_ROM_ENABLE; l64 = l & PCI_ROM_ADDRESS_MASK; sz64 = sz & PCI_ROM_ADDRESS_MASK; - mask64 = (u32)PCI_ROM_ADDRESS_MASK; + mask64 = PCI_ROM_ADDRESS_MASK; } if (res->flags & IORESOURCE_MEM_64) { diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 4bc589ee78d0..85774b7a316a 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -63,7 +63,7 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno) mask = (u32)PCI_BASE_ADDRESS_IO_MASK; new |= res->flags & ~PCI_BASE_ADDRESS_IO_MASK; } else if (resno == PCI_ROM_RESOURCE) { - mask = (u32)PCI_ROM_ADDRESS_MASK; + mask = PCI_ROM_ADDRESS_MASK; } else { mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK; diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 634c9c44ed6c..fff521c9458c 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -114,7 +114,7 @@ #define PCI_SUBSYSTEM_ID 0x2e #define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ #define PCI_ROM_ADDRESS_ENABLE 0x01 -#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) +#define PCI_ROM_ADDRESS_MASK (~0x7ffU) #define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ From ea629d873f3e140fb2e3181c30413e485ee9002b Mon Sep 17 00:00:00 2001 From: Yongji Xie Date: Wed, 15 Feb 2017 14:50:22 +0800 Subject: [PATCH 2/8] PCI: Ignore requested alignment for IOV BARs We would call pci_reassigndev_resource_alignment() before pci_init_capabilities(). So the requested alignment would never work for IOV BARs. Furthermore, it's meaningless to request additional alignment for IOV BARs, the IOV BAR alignment is only determined by the VF BAR size. Signed-off-by: Yongji Xie Signed-off-by: Bjorn Helgaas Reviewed-by: Gavin Shan --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7904d02ffdb9..679af2a253ad 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5084,7 +5084,7 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev) command &= ~PCI_COMMAND_MEMORY; pci_write_config_word(dev, PCI_COMMAND, command); - for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) { + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { r = &dev->resource[i]; if (!(r->flags & IORESOURCE_MEM)) continue; From c9c75143a5962c4c26d2f2c99b7a6e06f421f5e1 Mon Sep 17 00:00:00 2001 From: Yongji Xie Date: Mon, 10 Apr 2017 19:58:11 +0800 Subject: [PATCH 3/8] PCI: Fix calculation of bridge window's size and alignment In case that one device's alignment is greater than its size, we may get an incorrect size and alignment for its bus's memory window in pbus_size_mem(). Fix this case. Signed-off-by: Yongji Xie Signed-off-by: Bjorn Helgaas --- drivers/pci/setup-bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index cb389277df41..958da7db9033 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1066,10 +1066,10 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, r->flags = 0; continue; } - size += r_size; + size += max(r_size, align); /* Exclude ranges with size > align from calculation of the alignment. */ - if (r_size == align) + if (r_size <= align) aligns[order] += align; if (order > max_order) max_order = order; From 0a701aa6378496ea54fb065c68b41d918e372e94 Mon Sep 17 00:00:00 2001 From: Yongji Xie Date: Mon, 10 Apr 2017 19:58:12 +0800 Subject: [PATCH 4/8] PCI: Add pcibios_default_alignment() for arch-specific alignment control When VFIO passes through a PCI device to a guest, it does not allow the guest to mmap BARs that are smaller than PAGE_SIZE unless it can reserve the rest of the page (see vfio_pci_probe_mmaps()). This is because a page might contain several small BARs for unrelated devices and a guest should not be able to access all of them. VFIO emulates guest accesses to non-mappable BARs, which is functional but slow. On systems with large page sizes, e.g., PowerNV with 64K pages, BARs are more likely to share a page and performance is more likely to be a problem. Add a weak function to set default alignment for all PCI devices. An arch can override it to force the PCI core to place memory BARs on their own pages. Signed-off-by: Yongji Xie Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 679af2a253ad..d22fb35be26e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4947,6 +4947,11 @@ void pci_ignore_hotplug(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_ignore_hotplug); +resource_size_t __weak pcibios_default_alignment(void) +{ + return 0; +} + #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0}; static DEFINE_SPINLOCK(resource_alignment_lock); @@ -4962,14 +4967,15 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) { int seg, bus, slot, func, align_order, count; unsigned short vendor, device, subsystem_vendor, subsystem_device; - resource_size_t align = 0; + resource_size_t align = pcibios_default_alignment(); char *p; spin_lock(&resource_alignment_lock); p = resource_alignment_param; - if (!*p) + if (!*p && !align) goto out; if (pci_has_flag(PCI_PROBE_ONLY)) { + align = 0; pr_info_once("PCI: Ignoring requested alignments (PCI_PROBE_ONLY)\n"); goto out; } From 382746376993cfa6d6c4e546c67384201c0f3a82 Mon Sep 17 00:00:00 2001 From: Yongji Xie Date: Mon, 10 Apr 2017 19:58:13 +0800 Subject: [PATCH 5/8] powerpc/powernv: Override pcibios_default_alignment() to force PCI devices to be page aligned Override pcibios_default_alignment() to set default alignment to PAGE_SIZE for all PCI devices on PowerNV platform. Thus sub-page BARs would not share a page and could be mapped into guest when VFIO passthrough them. Signed-off-by: Yongji Xie Signed-off-by: Bjorn Helgaas --- arch/powerpc/include/asm/machdep.h | 2 ++ arch/powerpc/kernel/pci-common.c | 8 ++++++++ arch/powerpc/platforms/powernv/pci-ioda.c | 7 +++++++ 3 files changed, 17 insertions(+) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 5011b69107a7..f90b22c722e1 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -173,6 +173,8 @@ struct machdep_calls { /* Called after scan and before resource survey */ void (*pcibios_fixup_phb)(struct pci_controller *hose); + resource_size_t (*pcibios_default_alignment)(void); + #ifdef CONFIG_PCI_IOV void (*pcibios_fixup_sriov)(struct pci_dev *pdev); resource_size_t (*pcibios_iov_resource_alignment)(struct pci_dev *, int resno); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index ffda24a38dda..03a7c9f58126 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -233,6 +233,14 @@ void pcibios_reset_secondary_bus(struct pci_dev *dev) pci_reset_secondary_bus(dev); } +resource_size_t pcibios_default_alignment(void) +{ + if (ppc_md.pcibios_default_alignment) + return ppc_md.pcibios_default_alignment(); + + return 0; +} + #ifdef CONFIG_PCI_IOV resource_size_t pcibios_iov_resource_alignment(struct pci_dev *pdev, int resno) { diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 6901a06da2f9..0529d5630afd 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3287,6 +3287,11 @@ static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type) } } +static resource_size_t pnv_pci_default_alignment(void) +{ + return PAGE_SIZE; +} + #ifdef CONFIG_PCI_IOV static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno) @@ -3820,6 +3825,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, hose->controller_ops = pnv_pci_ioda_controller_ops; } + ppc_md.pcibios_default_alignment = pnv_pci_default_alignment; + #ifdef CONFIG_PCI_IOV ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment; From 81a5e70e0de55707c3184d186b2103b5bb9377ec Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 14 Apr 2017 14:12:06 -0500 Subject: [PATCH 6/8] PCI: Factor pci_reassigndev_resource_alignment() Pull the BAR size adjustment out into a new function, pci_request_resource_alignment(), and add a comment about how and why we increase the resource size and alignment. Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 69 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d22fb35be26e..1d8bb090a40b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5049,6 +5049,48 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) return align; } +static void pci_request_resource_alignment(struct pci_dev *dev, int bar, + resource_size_t align) +{ + struct resource *r = &dev->resource[bar]; + resource_size_t size; + + if (!(r->flags & IORESOURCE_MEM)) + return; + + if (r->flags & IORESOURCE_PCI_FIXED) { + dev_info(&dev->dev, "BAR%d %pR: ignoring requested alignment %#llx\n", + bar, r, (unsigned long long)align); + return; + } + + size = resource_size(r); + if (size < align) { + + /* + * Increase the size of the resource. BARs are aligned on + * their size, so when we reallocate space for this + * resource, we'll allocate it with the larger alignment. + * It also prevents assignment of any other BARs inside the + * size. If we're requesting page alignment, this means no + * other BARs will share the page. + * + * This makes the resource larger than the hardware BAR, + * which may break drivers that compute things based on the + * resource size, e.g., to find registers at a fixed offset + * before the end of the BAR. We hope users don't request + * alignment for such devices. + */ + size = align; + dev_info(&dev->dev, "BAR%d %pR: requesting alignment to %#llx\n", + bar, r, (unsigned long long)align); + + } + r->flags |= IORESOURCE_UNSET; + r->end = size - 1; + r->start = 0; +} + /* * This function disables memory decoding and releases memory resources * of the device specified by kernel's boot parameter 'pci=resource_alignment='. @@ -5060,7 +5102,7 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev) { int i; struct resource *r; - resource_size_t align, size; + resource_size_t align; u16 command; /* @@ -5090,28 +5132,11 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev) command &= ~PCI_COMMAND_MEMORY; pci_write_config_word(dev, PCI_COMMAND, command); - for (i = 0; i <= PCI_ROM_RESOURCE; i++) { - r = &dev->resource[i]; - if (!(r->flags & IORESOURCE_MEM)) - continue; - if (r->flags & IORESOURCE_PCI_FIXED) { - dev_info(&dev->dev, "Ignoring requested alignment for BAR%d: %pR\n", - i, r); - continue; - } + for (i = 0; i <= PCI_ROM_RESOURCE; i++) + pci_request_resource_alignment(dev, i, align); - size = resource_size(r); - if (size < align) { - size = align; - dev_info(&dev->dev, - "Rounding up size of resource #%d to %#llx.\n", - i, (unsigned long long)size); - } - r->flags |= IORESOURCE_UNSET; - r->end = size - 1; - r->start = 0; - } - /* Need to disable bridge's resource window, + /* + * Need to disable bridge's resource window, * to enable the kernel to reassign new resource * window later on. */ From 0dde1c08d1b9dea01cefb327dba8a6e3ae795214 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 17 Apr 2017 15:20:58 -0500 Subject: [PATCH 7/8] PCI: Don't reassign resources that are already aligned The "pci=resource_alignment=" kernel argument designates devices for which we want alignment greater than is required by the PCI specs. Previously we set IORESOURCE_UNSET for every MEM resource of those devices, even if the resource was *already* sufficiently aligned. If a resource is already sufficiently aligned, leave it alone and don't try to reassign it. Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1d8bb090a40b..7eb6eb384e36 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5065,30 +5065,28 @@ static void pci_request_resource_alignment(struct pci_dev *dev, int bar, } size = resource_size(r); - if (size < align) { + if (size >= align) + return; - /* - * Increase the size of the resource. BARs are aligned on - * their size, so when we reallocate space for this - * resource, we'll allocate it with the larger alignment. - * It also prevents assignment of any other BARs inside the - * size. If we're requesting page alignment, this means no - * other BARs will share the page. - * - * This makes the resource larger than the hardware BAR, - * which may break drivers that compute things based on the - * resource size, e.g., to find registers at a fixed offset - * before the end of the BAR. We hope users don't request - * alignment for such devices. - */ - size = align; - dev_info(&dev->dev, "BAR%d %pR: requesting alignment to %#llx\n", - bar, r, (unsigned long long)align); + /* + * Increase the size of the resource. BARs are aligned on their + * size, so when we reallocate space for this resource, we'll + * allocate it with the larger alignment. It also prevents + * assignment of any other BARs inside the size. If we're + * requesting page alignment, this means no other BARs will share + * the page. + * + * This makes the resource larger than the hardware BAR, which may + * break drivers that compute things based on the resource size, + * e.g., to find registers at a fixed offset before the end of the + * BAR. We hope users don't request alignment for such devices. + */ + dev_info(&dev->dev, "BAR%d %pR: requesting alignment to %#llx\n", + bar, r, (unsigned long long)align); - } - r->flags |= IORESOURCE_UNSET; - r->end = size - 1; r->start = 0; + r->end = align - 1; + r->flags |= IORESOURCE_UNSET; } /* From e3adec72a3c50b733eb277a9f93c044f59ff55eb Mon Sep 17 00:00:00 2001 From: Yongji Xie Date: Mon, 10 Apr 2017 19:58:14 +0800 Subject: [PATCH 8/8] PCI: Don't resize resources when realigning all devices in system The "pci=resource_alignment" argument aligns BARs of designated devices by artificially increasing their size. Increasing the size increases the alignment and prevents other resources from being assigned in the same alignment region, e.g., in the same page, but it can break drivers that use the BAR size to locate things, e.g., ilo_map_device() does this: off = pci_resource_len(pdev, bar) - 0x2000; The new pcibios_default_alignment() interface allows an arch to request that *all* BARs in the system be aligned to a larger size. In this case, we don't need to artificially increase the resource size because we know every BAR of every device will be realigned, so nothing will share the same alignment region. Use IORESOURCE_STARTALIGN to request realignment of PCI BARs when we know we're realigning all BARs in the system. [bhelgaas: comment, changelog] Signed-off-by: Yongji Xie Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 59 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7eb6eb384e36..c140cb29ee7e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4959,11 +4959,13 @@ static DEFINE_SPINLOCK(resource_alignment_lock); /** * pci_specified_resource_alignment - get resource alignment specified by user. * @dev: the PCI device to get + * @resize: whether or not to change resources' size when reassigning alignment * * RETURNS: Resource alignment if it is specified. * Zero if it is not specified. */ -static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) +static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev, + bool *resize) { int seg, bus, slot, func, align_order, count; unsigned short vendor, device, subsystem_vendor, subsystem_device; @@ -5005,6 +5007,7 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) (!device || (device == dev->device)) && (!subsystem_vendor || (subsystem_vendor == dev->subsystem_vendor)) && (!subsystem_device || (subsystem_device == dev->subsystem_device))) { + *resize = true; if (align_order == -1) align = PAGE_SIZE; else @@ -5030,6 +5033,7 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) bus == dev->bus->number && slot == PCI_SLOT(dev->devfn) && func == PCI_FUNC(dev->devfn)) { + *resize = true; if (align_order == -1) align = PAGE_SIZE; else @@ -5050,7 +5054,7 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) } static void pci_request_resource_alignment(struct pci_dev *dev, int bar, - resource_size_t align) + resource_size_t align, bool resize) { struct resource *r = &dev->resource[bar]; resource_size_t size; @@ -5069,23 +5073,45 @@ static void pci_request_resource_alignment(struct pci_dev *dev, int bar, return; /* - * Increase the size of the resource. BARs are aligned on their - * size, so when we reallocate space for this resource, we'll - * allocate it with the larger alignment. It also prevents - * assignment of any other BARs inside the size. If we're - * requesting page alignment, this means no other BARs will share - * the page. + * Increase the alignment of the resource. There are two ways we + * can do this: * - * This makes the resource larger than the hardware BAR, which may - * break drivers that compute things based on the resource size, - * e.g., to find registers at a fixed offset before the end of the - * BAR. We hope users don't request alignment for such devices. + * 1) Increase the size of the resource. BARs are aligned on their + * size, so when we reallocate space for this resource, we'll + * allocate it with the larger alignment. This also prevents + * assignment of any other BARs inside the alignment region, so + * if we're requesting page alignment, this means no other BARs + * will share the page. + * + * The disadvantage is that this makes the resource larger than + * the hardware BAR, which may break drivers that compute things + * based on the resource size, e.g., to find registers at a + * fixed offset before the end of the BAR. + * + * 2) Retain the resource size, but use IORESOURCE_STARTALIGN and + * set r->start to the desired alignment. By itself this + * doesn't prevent other BARs being put inside the alignment + * region, but if we realign *every* resource of every device in + * the system, none of them will share an alignment region. + * + * When the user has requested alignment for only some devices via + * the "pci=resource_alignment" argument, "resize" is true and we + * use the first method. Otherwise we assume we're aligning all + * devices and we use the second. */ + dev_info(&dev->dev, "BAR%d %pR: requesting alignment to %#llx\n", bar, r, (unsigned long long)align); - r->start = 0; - r->end = align - 1; + if (resize) { + r->start = 0; + r->end = align - 1; + } else { + r->flags &= ~IORESOURCE_SIZEALIGN; + r->flags |= IORESOURCE_STARTALIGN; + r->start = align; + r->end = r->start + size - 1; + } r->flags |= IORESOURCE_UNSET; } @@ -5102,6 +5128,7 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev) struct resource *r; resource_size_t align; u16 command; + bool resize = false; /* * VF BARs are read-only zero according to SR-IOV spec r1.1, sec @@ -5113,7 +5140,7 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev) return; /* check if specified PCI is target device to reassign */ - align = pci_specified_resource_alignment(dev); + align = pci_specified_resource_alignment(dev, &resize); if (!align) return; @@ -5131,7 +5158,7 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev) pci_write_config_word(dev, PCI_COMMAND, command); for (i = 0; i <= PCI_ROM_RESOURCE; i++) - pci_request_resource_alignment(dev, i, align); + pci_request_resource_alignment(dev, i, align, resize); /* * Need to disable bridge's resource window,