linux/kernel/irq/affinity.c
Linus Torvalds f263fbb8d6 pci-v4.13-changes
-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJZYAFUAAoJEFmIoMA60/r8cFQP/A4fpdjhd42WRNQXGTpZieop
 i40lBQtGdBn/UY97U6BoutcS1ygDi9OiSzg+IR6I90iMgidqyUHFhe4hGWgVHD2g
 Tg0KLzd+lKKfQ6Gqt1P6t4dLGLvyEj5NUbCeFE4XYODAUkkiBaOndax6DK1GvU54
 Vjuj63rHtMKFR/tG/4iFTigObqyI8QE6O9JVxwuvIyEX6RXKbJe+wkulv5taSnWt
 Ne94950i10MrELtNreVdi8UbCbXiqjg0r5sKI/WTJ7Bc7WsC7X5PhWlhcNrbHyBT
 Ivhoypkui3Ky8gvwWqL0KBG+cRp8prBXAdabrD9wRbz0TKnfGI6pQzseCGRnkE6T
 mhlSJpsSNIHaejoCjk93yPn5oRiTNtPMdVhMpEQL9V/crVRGRRmbd7v2TYvpMHVR
 JaPZ8bv+C2aBTY8uL3/v/rgrjsMKOYFeaxeNklpErxrknsbgb6BgubmeZXDvTBVv
 YUIbAkvveonUKisv+kbD8L7tp1+jdbRUT0AikS0NVgAJQhfArOmBcDpTL9YC51vE
 feFhkVx4A32vvOm7Zcg9A7IMXNjeSfccKGw3dJOAvzgDODuJiaCG6S0o7B5Yngze
 axMi87ixGT4QM98z/I4MC8E9rDrJdIitlpvb6ZBgiLzoO3kmvsIZZKt8UxWqf5r8
 w3U2HoyKH13Qbkn1xkum
 =mkyb
 -----END PGP SIGNATURE-----

Merge tag 'pci-v4.13-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci

Pull PCI updates from Bjorn Helgaas:

  - add sysfs max_link_speed/width, current_link_speed/width (Wong Vee
    Khee)

  - make host bridge IRQ mapping much more generic (Matthew Minter,
    Lorenzo Pieralisi)

  - convert most drivers to pci_scan_root_bus_bridge() (Lorenzo
    Pieralisi)

  - mutex sriov_configure() (Jakub Kicinski)

  - mutex pci_error_handlers callbacks (Christoph Hellwig)

  - split ->reset_notify() into ->reset_prepare()/reset_done()
    (Christoph Hellwig)

  - support multiple PCIe portdrv interrupts for MSI as well as MSI-X
    (Gabriele Paoloni)

  - allocate MSI/MSI-X vector for Downstream Port Containment (Gabriele
    Paoloni)

  - fix MSI IRQ affinity pre/post/min_vecs issue (Michael Hernandez)

  - test INTx masking during enumeration, not at run-time (Piotr Gregor)

  - avoid using device_may_wakeup() for runtime PM (Rafael J. Wysocki)

  - restore the status of PCI devices across hibernation (Chen Yu)

  - keep parent resources that start at 0x0 (Ard Biesheuvel)

  - enable ECRC only if device supports it (Bjorn Helgaas)

  - restore PRI and PASID state after Function-Level Reset (CQ Tang)

  - skip DPC event if device is not present (Keith Busch)

  - check domain when matching SMBIOS info (Sujith Pandel)

  - mark Intel XXV710 NIC INTx masking as broken (Alex Williamson)

  - avoid AMD SB7xx EHCI USB wakeup defect (Kai-Heng Feng)

  - work around long-standing Macbook Pro poweroff issue (Bjorn Helgaas)

  - add Switchtec "running" status flag (Logan Gunthorpe)

  - fix dra7xx incorrect RW1C IRQ register usage (Arvind Yadav)

  - modify xilinx-nwl IRQ chip for legacy interrupts (Bharat Kumar
    Gogada)

  - move VMD SRCU cleanup after bus, child device removal (Jon Derrick)

  - add Faraday clock handling (Linus Walleij)

  - configure Rockchip MPS and reorganize (Shawn Lin)

  - limit Qualcomm TLP size to 2K (hardware issue) (Srinivas Kandagatla)

  - support Tegra MSI 64-bit addressing (Thierry Reding)

  - use Rockchip normal (not privileged) register bank (Shawn Lin)

  - add HiSilicon Kirin SoC PCIe controller driver (Xiaowei Song)

  - add Sigma Designs Tango SMP8759 PCIe controller driver (Marc
    Gonzalez)

  - add MediaTek PCIe host controller support (Ryder Lee)

  - add Qualcomm IPQ4019 support (John Crispin)

  - add HyperV vPCI protocol v1.2 support (Jork Loeser)

  - add i.MX6 regulator support (Quentin Schulz)

* tag 'pci-v4.13-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci: (113 commits)
  PCI: tango: Add Sigma Designs Tango SMP8759 PCIe host bridge support
  PCI: Add DT binding for Sigma Designs Tango PCIe controller
  PCI: rockchip: Use normal register bank for config accessors
  dt-bindings: PCI: Add documentation for MediaTek PCIe
  PCI: Remove __pci_dev_reset() and pci_dev_reset()
  PCI: Split ->reset_notify() method into ->reset_prepare() and ->reset_done()
  PCI: xilinx: Make of_device_ids const
  PCI: xilinx-nwl: Modify IRQ chip for legacy interrupts
  PCI: vmd: Move SRCU cleanup after bus, child device removal
  PCI: vmd: Correct comment: VMD domains start at 0x10000, not 0x1000
  PCI: versatile: Add local struct device pointers
  PCI: tegra: Do not allocate MSI target memory
  PCI: tegra: Support MSI 64-bit addressing
  PCI: rockchip: Use local struct device pointer consistently
  PCI: rockchip: Check for clk_prepare_enable() errors during resume
  MAINTAINERS: Remove Wenrui Li as Rockchip PCIe driver maintainer
  PCI: rockchip: Configure RC's MPS setting
  PCI: rockchip: Reconfigure configuration space header type
  PCI: rockchip: Split out rockchip_pcie_cfg_configuration_accesses()
  PCI: rockchip: Move configuration accesses into rockchip_pcie_cfg_atu()
  ...
2017-07-08 15:51:57 -07:00

220 lines
5.4 KiB
C

/*
* Copyright (C) 2016 Thomas Gleixner.
* Copyright (C) 2016-2017 Christoph Hellwig.
*/
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/cpu.h>
static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
int cpus_per_vec)
{
const struct cpumask *siblmsk;
int cpu, sibl;
for ( ; cpus_per_vec > 0; ) {
cpu = cpumask_first(nmsk);
/* Should not happen, but I'm too lazy to think about it */
if (cpu >= nr_cpu_ids)
return;
cpumask_clear_cpu(cpu, nmsk);
cpumask_set_cpu(cpu, irqmsk);
cpus_per_vec--;
/* If the cpu has siblings, use them first */
siblmsk = topology_sibling_cpumask(cpu);
for (sibl = -1; cpus_per_vec > 0; ) {
sibl = cpumask_next(sibl, siblmsk);
if (sibl >= nr_cpu_ids)
break;
if (!cpumask_test_and_clear_cpu(sibl, nmsk))
continue;
cpumask_set_cpu(sibl, irqmsk);
cpus_per_vec--;
}
}
}
static cpumask_var_t *alloc_node_to_present_cpumask(void)
{
cpumask_var_t *masks;
int node;
masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
if (!masks)
return NULL;
for (node = 0; node < nr_node_ids; node++) {
if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
goto out_unwind;
}
return masks;
out_unwind:
while (--node >= 0)
free_cpumask_var(masks[node]);
kfree(masks);
return NULL;
}
static void free_node_to_present_cpumask(cpumask_var_t *masks)
{
int node;
for (node = 0; node < nr_node_ids; node++)
free_cpumask_var(masks[node]);
kfree(masks);
}
static void build_node_to_present_cpumask(cpumask_var_t *masks)
{
int cpu;
for_each_present_cpu(cpu)
cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
}
static int get_nodes_in_cpumask(cpumask_var_t *node_to_present_cpumask,
const struct cpumask *mask, nodemask_t *nodemsk)
{
int n, nodes = 0;
/* Calculate the number of nodes in the supplied affinity mask */
for_each_node(n) {
if (cpumask_intersects(mask, node_to_present_cpumask[n])) {
node_set(n, *nodemsk);
nodes++;
}
}
return nodes;
}
/**
* irq_create_affinity_masks - Create affinity masks for multiqueue spreading
* @nvecs: The total number of vectors
* @affd: Description of the affinity requirements
*
* Returns the masks pointer or NULL if allocation failed.
*/
struct cpumask *
irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
{
int n, nodes, cpus_per_vec, extra_vecs, curvec;
int affv = nvecs - affd->pre_vectors - affd->post_vectors;
int last_affv = affv + affd->pre_vectors;
nodemask_t nodemsk = NODE_MASK_NONE;
struct cpumask *masks;
cpumask_var_t nmsk, *node_to_present_cpumask;
/*
* If there aren't any vectors left after applying the pre/post
* vectors don't bother with assigning affinity.
*/
if (!affv)
return NULL;
if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
return NULL;
masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
if (!masks)
goto out;
node_to_present_cpumask = alloc_node_to_present_cpumask();
if (!node_to_present_cpumask)
goto out;
/* Fill out vectors at the beginning that don't need affinity */
for (curvec = 0; curvec < affd->pre_vectors; curvec++)
cpumask_copy(masks + curvec, irq_default_affinity);
/* Stabilize the cpumasks */
get_online_cpus();
build_node_to_present_cpumask(node_to_present_cpumask);
nodes = get_nodes_in_cpumask(node_to_present_cpumask, cpu_present_mask,
&nodemsk);
/*
* If the number of nodes in the mask is greater than or equal the
* number of vectors we just spread the vectors across the nodes.
*/
if (affv <= nodes) {
for_each_node_mask(n, nodemsk) {
cpumask_copy(masks + curvec,
node_to_present_cpumask[n]);
if (++curvec == last_affv)
break;
}
goto done;
}
for_each_node_mask(n, nodemsk) {
int ncpus, v, vecs_to_assign, vecs_per_node;
/* Spread the vectors per node */
vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes;
/* Get the cpus on this node which are in the mask */
cpumask_and(nmsk, cpu_present_mask, node_to_present_cpumask[n]);
/* Calculate the number of cpus per vector */
ncpus = cpumask_weight(nmsk);
vecs_to_assign = min(vecs_per_node, ncpus);
/* Account for rounding errors */
extra_vecs = ncpus - vecs_to_assign * (ncpus / vecs_to_assign);
for (v = 0; curvec < last_affv && v < vecs_to_assign;
curvec++, v++) {
cpus_per_vec = ncpus / vecs_to_assign;
/* Account for extra vectors to compensate rounding errors */
if (extra_vecs) {
cpus_per_vec++;
--extra_vecs;
}
irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
}
if (curvec >= last_affv)
break;
--nodes;
}
done:
put_online_cpus();
/* Fill out vectors at the end that don't need affinity */
for (; curvec < nvecs; curvec++)
cpumask_copy(masks + curvec, irq_default_affinity);
free_node_to_present_cpumask(node_to_present_cpumask);
out:
free_cpumask_var(nmsk);
return masks;
}
/**
* irq_calc_affinity_vectors - Calculate the optimal number of vectors
* @minvec: The minimum number of vectors available
* @maxvec: The maximum number of vectors available
* @affd: Description of the affinity requirements
*/
int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd)
{
int resv = affd->pre_vectors + affd->post_vectors;
int vecs = maxvec - resv;
int ret;
if (resv > minvec)
return 0;
get_online_cpus();
ret = min_t(int, cpumask_weight(cpu_present_mask), vecs) + resv;
put_online_cpus();
return ret;
}