A treewide cleanup of interrupt descriptor (ab)use with all sorts of racy

accesses, inefficient and disfunctional code. The goal is to remove the
 export of irq_to_desc() to prevent these things from creeping up again.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAl/ifgsTHHRnbHhAbGlu
 dXRyb25peC5kZQAKCRCmGPVMDXSYoYm6EACAo8sObkuY3oWLagtGj1KHxon53oGZ
 VfDw2LYKM+rgJjDWdiyocyxQU5gtm6loWCrIHjH2adRQ4EisB5r8hfI8NZHxNMyq
 8khUi822NRBfFN6SCpO8eW9o95euscNQwCzqi7gV9/U/BAKoDoSEYzS4y0YmJlup
 mhoikkrFiBuFXplWI0gbP4ihb8S/to2+kTL6o7eBoJY9+fSXIFR3erZ6f3fLjYZG
 CQUUysTywdDhLeDkC9vaesXwgdl2XnaPRwcQqmK8Ez0QYNYpawyILUHLD75cIHDu
 bHdK2ZoDv/wtad/3BoGTK3+wChz20a/4/IAnBIUVgmnSLsPtW8zNEOPWNNc0aGg+
 rtafi5bvJ1lMoSZhkjLWQDOGU6vFaXl9NkC2fpF+dg1skFMT2CyLC8LD/ekmocon
 zHAPBva9j3m2A80hI3dUH9azo/IOl1GHG8ccM6SCxY3S/9vWSQChNhQDLe25xBEO
 VtKZS7DYFCRiL8mIy9GgwZWof8Vy2iMua2ML+W9a3mC9u3CqSLbCFmLMT/dDoXl1
 oHnMdAHk1DRatA8pJAz83C75RxbAS2riGEqtqLEQ6OaNXn6h0oXCanJX9jdKYDBh
 z6ijWayPSRMVktN6FDINsVNFe95N4GwYcGPfagIMqyMMhmJDic6apEzEo7iA76lk
 cko28MDqTIK4UQ==
 =BXv+
 -----END PGP SIGNATURE-----

Merge tag 'irq-core-2020-12-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull irq updates from Thomas Gleixner:
 "This is the second attempt after the first one failed miserably and
  got zapped to unblock the rest of the interrupt related patches.

  A treewide cleanup of interrupt descriptor (ab)use with all sorts of
  racy accesses, inefficient and disfunctional code. The goal is to
  remove the export of irq_to_desc() to prevent these things from
  creeping up again"

* tag 'irq-core-2020-12-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (30 commits)
  genirq: Restrict export of irq_to_desc()
  xen/events: Implement irq distribution
  xen/events: Reduce irq_info:: Spurious_cnt storage size
  xen/events: Only force affinity mask for percpu interrupts
  xen/events: Use immediate affinity setting
  xen/events: Remove disfunct affinity spreading
  xen/events: Remove unused bind_evtchn_to_irq_lateeoi()
  net/mlx5: Use effective interrupt affinity
  net/mlx5: Replace irq_to_desc() abuse
  net/mlx4: Use effective interrupt affinity
  net/mlx4: Replace irq_to_desc() abuse
  PCI: mobiveil: Use irq_data_get_irq_chip_data()
  PCI: xilinx-nwl: Use irq_data_get_irq_chip_data()
  NTB/msi: Use irq_has_action()
  mfd: ab8500-debugfs: Remove the racy fiddling with irq_desc
  pinctrl: nomadik: Use irq_has_action()
  drm/i915/pmu: Replace open coded kstat_irqs() copy
  drm/i915/lpe_audio: Remove pointless irq_to_desc() usage
  s390/irq: Use irq_desc_kstat_cpu() in show_msi_interrupt()
  parisc/irq: Use irq_desc_kstat_cpu() in show_interrupts()
  ...
This commit is contained in:
Linus Torvalds 2020-12-24 13:50:23 -08:00
commit 3913d00ac5
32 changed files with 281 additions and 228 deletions

View file

@ -7,7 +7,7 @@
*
* Code supporting the Jensen.
*/
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/mm.h>

View file

@ -549,7 +549,7 @@ void show_ipi_list(struct seq_file *p, int prec)
seq_printf(p, "%*s%u: ", prec - 1, "IPI", i);
for_each_online_cpu(cpu)
seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
seq_printf(p, " %s\n", ipi_types[i]);
}

View file

@ -811,7 +811,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
prec >= 4 ? " " : "");
for_each_online_cpu(cpu)
seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
seq_printf(p, " %s\n", ipi_types[i]);
}

View file

@ -216,12 +216,9 @@ int show_interrupts(struct seq_file *p, void *v)
if (!action)
goto skip;
seq_printf(p, "%3d: ", i);
#ifdef CONFIG_SMP
for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
#else
seq_printf(p, "%10u ", kstat_irqs(i));
#endif
seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, j));
seq_printf(p, " %14s", irq_desc_get_chip(desc)->name);
#ifndef PARISC_IRQ_CR16_COUNTS

View file

@ -124,7 +124,7 @@ static void show_msi_interrupt(struct seq_file *p, int irq)
raw_spin_lock_irqsave(&desc->lock, flags);
seq_printf(p, "%3d: ", irq);
for_each_online_cpu(cpu)
seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, cpu));
if (desc->irq_data.chip)
seq_printf(p, " %8s", desc->irq_data.chip->name);

View file

@ -25,6 +25,7 @@
*
* Send feedback to <colpatch@us.ibm.com>
*/
#include <linux/interrupt.h>
#include <linux/nodemask.h>
#include <linux/export.h>
#include <linux/mmzone.h>

View file

@ -297,13 +297,9 @@ int intel_lpe_audio_init(struct drm_i915_private *dev_priv)
*/
void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
{
struct irq_desc *desc;
if (!HAS_LPE_AUDIO(dev_priv))
return;
desc = irq_to_desc(dev_priv->lpe_audio.irq);
lpe_audio_platdev_destroy(dev_priv);
irq_free_desc(dev_priv->lpe_audio.irq);

View file

@ -60,6 +60,24 @@
* and related files, but that will be described in separate chapters.
*/
/*
* Interrupt statistic for PMU. Increments the counter only if the
* interrupt originated from the the GPU so interrupts from a device which
* shares the interrupt line are not accounted.
*/
static inline void pmu_irq_stats(struct drm_i915_private *i915,
irqreturn_t res)
{
if (unlikely(res != IRQ_HANDLED))
return;
/*
* A clever compiler translates that into INC. A not so clever one
* should at least prevent store tearing.
*/
WRITE_ONCE(i915->pmu.irq_count, i915->pmu.irq_count + 1);
}
typedef bool (*long_pulse_detect_func)(enum hpd_pin pin, u32 val);
typedef u32 (*hotplug_enables_func)(struct drm_i915_private *i915,
enum hpd_pin pin);
@ -1668,6 +1686,8 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
valleyview_pipestat_irq_handler(dev_priv, pipe_stats);
} while (0);
pmu_irq_stats(dev_priv, ret);
enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
return ret;
@ -1745,6 +1765,8 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
valleyview_pipestat_irq_handler(dev_priv, pipe_stats);
} while (0);
pmu_irq_stats(dev_priv, ret);
enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
return ret;
@ -2155,6 +2177,8 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg)
if (sde_ier)
raw_reg_write(regs, SDEIER, sde_ier);
pmu_irq_stats(i915, ret);
/* IRQs are synced during runtime_suspend, we don't require a wakeref */
enable_rpm_wakeref_asserts(&i915->runtime_pm);
@ -2541,6 +2565,8 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
gen8_master_intr_enable(regs);
pmu_irq_stats(dev_priv, IRQ_HANDLED);
return IRQ_HANDLED;
}
@ -2636,6 +2662,8 @@ __gen11_irq_handler(struct drm_i915_private * const i915,
gen11_gu_misc_irq_handler(gt, gu_misc_iir);
pmu_irq_stats(i915, IRQ_HANDLED);
return IRQ_HANDLED;
}
@ -3934,6 +3962,8 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
i8xx_pipestat_irq_handler(dev_priv, iir, pipe_stats);
} while (0);
pmu_irq_stats(dev_priv, ret);
enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
return ret;
@ -4043,6 +4073,8 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
i915_pipestat_irq_handler(dev_priv, iir, pipe_stats);
} while (0);
pmu_irq_stats(dev_priv, ret);
enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
return ret;
@ -4189,6 +4221,8 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
i965_pipestat_irq_handler(dev_priv, iir, pipe_stats);
} while (0);
pmu_irq_stats(dev_priv, IRQ_HANDLED);
enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
return ret;

View file

@ -4,7 +4,6 @@
* Copyright © 2017-2018 Intel Corporation
*/
#include <linux/irq.h>
#include <linux/pm_runtime.h>
#include "gt/intel_engine.h"
@ -424,22 +423,6 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
return HRTIMER_RESTART;
}
static u64 count_interrupts(struct drm_i915_private *i915)
{
/* open-coded kstat_irqs() */
struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
u64 sum = 0;
int cpu;
if (!desc || !desc->kstat_irqs)
return 0;
for_each_possible_cpu(cpu)
sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
return sum;
}
static void i915_pmu_event_destroy(struct perf_event *event)
{
struct drm_i915_private *i915 =
@ -590,7 +573,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
USEC_PER_SEC /* to MHz */);
break;
case I915_PMU_INTERRUPTS:
val = count_interrupts(i915);
val = READ_ONCE(pmu->irq_count);
break;
case I915_PMU_RC6_RESIDENCY:
val = get_rc6(&i915->gt);

View file

@ -111,6 +111,14 @@ struct i915_pmu {
* @sleep_last: Last time GT parked for RC6 estimation.
*/
ktime_t sleep_last;
/**
* @irq_count: Number of interrupts
*
* Intentionally unsigned long to avoid atomics or heuristics on 32bit.
* 4e9 interrupts are a lot and postprocessing can really deal with an
* occasional wraparound easily. It's 32bit after all.
*/
unsigned long irq_count;
/**
* @events_attr_group: Device events attribute group.
*/

View file

@ -1513,24 +1513,14 @@ static int ab8500_interrupts_show(struct seq_file *s, void *p)
{
int line;
seq_puts(s, "name: number: number of: wake:\n");
seq_puts(s, "name: number: irq: number of: wake:\n");
for (line = 0; line < num_interrupt_lines; line++) {
struct irq_desc *desc = irq_to_desc(line + irq_first);
seq_printf(s, "%3i: %6i %4i",
seq_printf(s, "%3i: %4i %6i %4i\n",
line,
line + irq_first,
num_interrupts[line],
num_wake_interrupts[line]);
if (desc && desc->name)
seq_printf(s, "-%-8s", desc->name);
if (desc && desc->action) {
struct irqaction *action = desc->action;
seq_printf(s, " %s", action->name);
while ((action = action->next) != NULL)
seq_printf(s, ", %s", action->name);
}
seq_putc(s, '\n');
}

View file

@ -90,7 +90,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
int cq_idx)
{
struct mlx4_en_dev *mdev = priv->mdev;
int err = 0;
int irq, err = 0;
int timestamp_en = 0;
bool assigned_eq = false;
@ -116,10 +116,8 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
assigned_eq = true;
}
cq->irq_desc =
irq_to_desc(mlx4_eq_get_irq(mdev->dev,
cq->vector));
irq = mlx4_eq_get_irq(mdev->dev, cq->vector);
cq->aff_mask = irq_get_effective_affinity_mask(irq);
} else {
/* For TX we use the same irq per
ring we assigned for the RX */

View file

@ -958,18 +958,14 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
/* If we used up all the quota - we're probably not done yet... */
if (done == budget || !clean_complete) {
const struct cpumask *aff;
struct irq_data *idata;
int cpu_curr;
/* in case we got here because of !clean_complete */
done = budget;
cpu_curr = smp_processor_id();
idata = irq_desc_get_irq_data(cq->irq_desc);
aff = irq_data_get_affinity_mask(idata);
if (likely(cpumask_test_cpu(cpu_curr, aff)))
if (likely(cpumask_test_cpu(cpu_curr, cq->aff_mask)))
return budget;
/* Current cpu is not according to smp_irq_affinity -

View file

@ -47,6 +47,7 @@
#endif
#include <linux/cpu_rmap.h>
#include <linux/ptp_clock_kernel.h>
#include <linux/irq.h>
#include <net/xdp.h>
#include <linux/mlx4/device.h>
@ -365,7 +366,7 @@ struct mlx4_en_cq {
struct mlx4_cqe *buf;
#define MLX4_EN_OPCODE_ERROR 0x1e
struct irq_desc *irq_desc;
const struct cpumask *aff_mask;
};
struct mlx4_en_port_profile {

View file

@ -684,7 +684,7 @@ struct mlx5e_channel {
spinlock_t async_icosq_lock;
/* data path - accessed per napi poll */
struct irq_desc *irq_desc;
const struct cpumask *aff_mask;
struct mlx5e_ch_stats *stats;
/* control */

View file

@ -479,7 +479,6 @@ int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
c->num_tc = params->num_tc;
c->stats = &priv->port_ptp_stats.ch;
c->irq_desc = irq_to_desc(irq);
c->lag_port = lag_port;
netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll, 64);

View file

@ -28,7 +28,6 @@ struct mlx5e_port_ptp {
u8 lag_port;
/* data path - accessed per napi poll */
struct irq_desc *irq_desc;
struct mlx5e_ch_stats *stats;
/* control */

View file

@ -1987,7 +1987,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
c->num_tc = params->num_tc;
c->xdp = !!params->xdp_prog;
c->stats = &priv->channel_stats[ix].ch;
c->irq_desc = irq_to_desc(irq);
c->aff_mask = irq_get_effective_affinity_mask(irq);
c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix);
netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);

View file

@ -40,12 +40,8 @@
static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
{
int current_cpu = smp_processor_id();
const struct cpumask *aff;
struct irq_data *idata;
idata = irq_desc_get_irq_data(c->irq_desc);
aff = irq_data_get_affinity_mask(idata);
return cpumask_test_cpu(current_cpu, aff);
return cpumask_test_cpu(current_cpu, c->aff_mask);
}
static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)

View file

@ -282,15 +282,13 @@ int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler,
struct ntb_msi_desc *msi_desc)
{
struct msi_desc *entry;
struct irq_desc *desc;
int ret;
if (!ntb->msi)
return -EINVAL;
for_each_pci_msi_entry(entry, ntb->pdev) {
desc = irq_to_desc(entry->irq);
if (desc->action)
if (irq_has_action(entry->irq))
continue;
ret = devm_request_threaded_irq(&ntb->dev, entry->irq, handler,

View file

@ -306,13 +306,11 @@ int mobiveil_host_init(struct mobiveil_pcie *pcie, bool reinit)
static void mobiveil_mask_intx_irq(struct irq_data *data)
{
struct irq_desc *desc = irq_to_desc(data->irq);
struct mobiveil_pcie *pcie;
struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(data);
struct mobiveil_root_port *rp;
unsigned long flags;
u32 mask, shifted_val;
pcie = irq_desc_get_chip_data(desc);
rp = &pcie->rp;
mask = 1 << ((data->hwirq + PAB_INTX_START) - 1);
raw_spin_lock_irqsave(&rp->intx_mask_lock, flags);
@ -324,13 +322,11 @@ static void mobiveil_mask_intx_irq(struct irq_data *data)
static void mobiveil_unmask_intx_irq(struct irq_data *data)
{
struct irq_desc *desc = irq_to_desc(data->irq);
struct mobiveil_pcie *pcie;
struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(data);
struct mobiveil_root_port *rp;
unsigned long flags;
u32 shifted_val, mask;
pcie = irq_desc_get_chip_data(desc);
rp = &pcie->rp;
mask = 1 << ((data->hwirq + PAB_INTX_START) - 1);
raw_spin_lock_irqsave(&rp->intx_mask_lock, flags);

View file

@ -374,13 +374,11 @@ static void nwl_pcie_msi_handler_low(struct irq_desc *desc)
static void nwl_mask_leg_irq(struct irq_data *data)
{
struct irq_desc *desc = irq_to_desc(data->irq);
struct nwl_pcie *pcie;
struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data);
unsigned long flags;
u32 mask;
u32 val;
pcie = irq_desc_get_chip_data(desc);
mask = 1 << (data->hwirq - 1);
raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags);
val = nwl_bridge_readl(pcie, MSGF_LEG_MASK);
@ -390,13 +388,11 @@ static void nwl_mask_leg_irq(struct irq_data *data)
static void nwl_unmask_leg_irq(struct irq_data *data)
{
struct irq_desc *desc = irq_to_desc(data->irq);
struct nwl_pcie *pcie;
struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data);
unsigned long flags;
u32 mask;
u32 val;
pcie = irq_desc_get_chip_data(desc);
mask = 1 << (data->hwirq - 1);
raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags);
val = nwl_bridge_readl(pcie, MSGF_LEG_MASK);

View file

@ -948,8 +948,8 @@ static void nmk_gpio_dbg_show_one(struct seq_file *s,
(mode < 0) ? "unknown" : modes[mode]);
} else {
int irq = chip->to_irq(chip, offset);
struct irq_desc *desc = irq_to_desc(irq);
const int pullidx = pull ? 1 : 0;
bool wake;
int val;
static const char * const pulls[] = {
"none ",
@ -969,8 +969,9 @@ static void nmk_gpio_dbg_show_one(struct seq_file *s,
* This races with request_irq(), set_irq_type(),
* and set_irq_wake() ... but those are "rare".
*/
if (irq > 0 && desc && desc->action) {
if (irq > 0 && irq_has_action(irq)) {
char *trigger;
bool wake;
if (nmk_chip->edge_rising & BIT(offset))
trigger = "edge-rising";
@ -979,10 +980,10 @@ static void nmk_gpio_dbg_show_one(struct seq_file *s,
else
trigger = "edge-undefined";
wake = !!(nmk_chip->real_wake & BIT(offset));
seq_printf(s, " irq-%d %s%s",
irq, trigger,
irqd_is_wakeup_set(&desc->irq_data)
? " wakeup" : "");
irq, trigger, wake ? " wakeup" : "");
}
}
clk_disable(nmk_chip->clk);

View file

@ -95,7 +95,8 @@ struct irq_info {
struct list_head list;
struct list_head eoi_list;
short refcnt;
short spurious_cnt;
u8 spurious_cnt;
u8 is_accounted;
enum xen_irq_type type; /* type */
unsigned irq;
evtchn_port_t evtchn; /* event channel */
@ -161,6 +162,9 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
/* IRQ <-> IPI mapping */
static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
/* Event channel distribution data */
static atomic_t channels_on_cpu[NR_CPUS];
static int **evtchn_to_irq;
#ifdef CONFIG_X86
static unsigned long *pirq_eoi_map;
@ -257,6 +261,32 @@ static void set_info_for_irq(unsigned int irq, struct irq_info *info)
irq_set_chip_data(irq, info);
}
/* Per CPU channel accounting */
static void channels_on_cpu_dec(struct irq_info *info)
{
if (!info->is_accounted)
return;
info->is_accounted = 0;
if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
return;
WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
}
static void channels_on_cpu_inc(struct irq_info *info)
{
if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
return;
if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
INT_MAX)))
return;
info->is_accounted = 1;
}
/* Constructors for packed IRQ information. */
static int xen_irq_info_common_setup(struct irq_info *info,
unsigned irq,
@ -339,6 +369,7 @@ static void xen_irq_info_cleanup(struct irq_info *info)
{
set_evtchn_to_irq(info->evtchn, -1);
info->evtchn = 0;
channels_on_cpu_dec(info);
}
/*
@ -433,18 +464,25 @@ static bool pirq_needs_eoi_flag(unsigned irq)
return info->u.pirq.flags & PIRQ_NEEDS_EOI;
}
static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu)
static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
bool force_affinity)
{
int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info = info_for_irq(irq);
BUG_ON(irq == -1);
#ifdef CONFIG_SMP
cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
#endif
if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
cpumask_copy(irq_get_effective_affinity_mask(irq),
cpumask_of(cpu));
}
xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
channels_on_cpu_dec(info);
info->cpu = cpu;
channels_on_cpu_inc(info);
}
/**
@ -523,8 +561,10 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
return;
if (spurious) {
if ((1 << info->spurious_cnt) < (HZ << 2))
info->spurious_cnt++;
if ((1 << info->spurious_cnt) < (HZ << 2)) {
if (info->spurious_cnt != 0xFF)
info->spurious_cnt++;
}
if (info->spurious_cnt > 1) {
delay = 1 << (info->spurious_cnt - 2);
if (delay > HZ)
@ -615,11 +655,6 @@ static void xen_irq_init(unsigned irq)
{
struct irq_info *info;
#ifdef CONFIG_SMP
/* By default all event channels notify CPU#0. */
cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
#endif
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (info == NULL)
panic("Unable to allocate metadata for IRQ%d\n", irq);
@ -628,6 +663,11 @@ static void xen_irq_init(unsigned irq)
info->refcnt = -1;
set_info_for_irq(irq, info);
/*
* Interrupt affinity setting can be immediate. No point
* in delaying it until an interrupt is handled.
*/
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
INIT_LIST_HEAD(&info->eoi_list);
list_add_tail(&info->list, &xen_irq_list_head);
@ -739,18 +779,7 @@ static void eoi_pirq(struct irq_data *data)
if (!VALID_EVTCHN(evtchn))
return;
if (unlikely(irqd_is_setaffinity_pending(data)) &&
likely(!irqd_irq_disabled(data))) {
int masked = test_and_set_mask(evtchn);
clear_evtchn(evtchn);
irq_move_masked_irq(data);
if (!masked)
unmask_evtchn(evtchn);
} else
clear_evtchn(evtchn);
clear_evtchn(evtchn);
if (pirq_needs_eoi(data->irq)) {
rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
@ -794,7 +823,7 @@ static unsigned int __startup_pirq(unsigned int irq)
goto err;
info->evtchn = evtchn;
bind_evtchn_to_cpu(evtchn, 0);
bind_evtchn_to_cpu(evtchn, 0, false);
rc = xen_evtchn_port_setup(evtchn);
if (rc)
@ -1113,8 +1142,14 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip)
irq = ret;
goto out;
}
/* New interdomain events are bound to VCPU 0. */
bind_evtchn_to_cpu(evtchn, 0);
/*
* New interdomain events are initially bound to vCPU0 This
* is required to setup the event channel in the first
* place and also important for UP guests because the
* affinity setting is not invoked on them so nothing would
* bind the channel.
*/
bind_evtchn_to_cpu(evtchn, 0, false);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
@ -1132,12 +1167,6 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn)
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
{
return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip);
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
@ -1168,7 +1197,11 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
irq = ret;
goto out;
}
bind_evtchn_to_cpu(evtchn, cpu);
/*
* Force the affinity mask to the target CPU so proc shows
* the correct target.
*/
bind_evtchn_to_cpu(evtchn, cpu, true);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_IPI);
@ -1281,7 +1314,11 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
goto out;
}
bind_evtchn_to_cpu(evtchn, cpu);
/*
* Force the affinity mask for percpu interrupts so proc
* shows the correct target.
*/
bind_evtchn_to_cpu(evtchn, cpu, percpu);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_VIRQ);
@ -1646,9 +1683,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
mutex_unlock(&irq_mapping_update_lock);
bind_evtchn_to_cpu(evtchn, info->cpu);
/* This will be deferred until interrupt is processed */
irq_set_affinity(irq, cpumask_of(info->cpu));
bind_evtchn_to_cpu(evtchn, info->cpu, false);
/* Unmask the event channel. */
enable_irq(irq);
@ -1682,7 +1717,7 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu)
* it, but don't do the xenlinux-level rebind in that case.
*/
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
bind_evtchn_to_cpu(evtchn, tcpu);
bind_evtchn_to_cpu(evtchn, tcpu, false);
if (!masked)
unmask_evtchn(evtchn);
@ -1690,27 +1725,47 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu)
return 0;
}
/*
* Find the CPU within @dest mask which has the least number of channels
* assigned. This is not precise as the per cpu counts can be modified
* concurrently.
*/
static unsigned int select_target_cpu(const struct cpumask *dest)
{
unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
for_each_cpu_and(cpu, dest, cpu_online_mask) {
unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
if (curch < minch) {
minch = curch;
best_cpu = cpu;
}
}
/*
* Catch the unlikely case that dest contains no online CPUs. Can't
* recurse.
*/
if (best_cpu == UINT_MAX)
return select_target_cpu(cpu_online_mask);
return best_cpu;
}
static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
bool force)
{
unsigned tcpu = cpumask_first_and(dest, cpu_online_mask);
int ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu);
unsigned int tcpu = select_target_cpu(dest);
int ret;
ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu);
if (!ret)
irq_data_update_effective_affinity(data, cpumask_of(tcpu));
return ret;
}
/* To be called with desc->lock held. */
int xen_set_affinity_evtchn(struct irq_desc *desc, unsigned int tcpu)
{
struct irq_data *d = irq_desc_get_irq_data(desc);
return set_affinity_irq(d, cpumask_of(tcpu), false);
}
EXPORT_SYMBOL_GPL(xen_set_affinity_evtchn);
static void enable_dynirq(struct irq_data *data)
{
evtchn_port_t evtchn = evtchn_from_irq(data->irq);
@ -1734,18 +1789,7 @@ static void ack_dynirq(struct irq_data *data)
if (!VALID_EVTCHN(evtchn))
return;
if (unlikely(irqd_is_setaffinity_pending(data)) &&
likely(!irqd_irq_disabled(data))) {
int masked = test_and_set_mask(evtchn);
clear_evtchn(evtchn);
irq_move_masked_irq(data);
if (!masked)
unmask_evtchn(evtchn);
} else
clear_evtchn(evtchn);
clear_evtchn(evtchn);
}
static void mask_ack_dynirq(struct irq_data *data)
@ -1830,7 +1874,8 @@ static void restore_cpu_virqs(unsigned int cpu)
/* Record the new mapping. */
(void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
bind_evtchn_to_cpu(evtchn, cpu);
/* The affinity mask is still valid */
bind_evtchn_to_cpu(evtchn, cpu, false);
}
}
@ -1855,7 +1900,8 @@ static void restore_cpu_ipis(unsigned int cpu)
/* Record the new mapping. */
(void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
bind_evtchn_to_cpu(evtchn, cpu);
/* The affinity mask is still valid */
bind_evtchn_to_cpu(evtchn, cpu, false);
}
}
@ -1938,8 +1984,12 @@ void xen_irq_resume(void)
xen_evtchn_resume();
/* No IRQ <-> event-channel mappings. */
list_for_each_entry(info, &xen_irq_list_head, list)
info->evtchn = 0; /* zap event-channel binding */
list_for_each_entry(info, &xen_irq_list_head, list) {
/* Zap event-channel binding */
info->evtchn = 0;
/* Adjust accounting */
channels_on_cpu_dec(info);
}
clear_evtchn_to_irq_all();

View file

@ -421,36 +421,6 @@ static void evtchn_unbind_from_user(struct per_user_data *u,
del_evtchn(u, evtchn);
}
static DEFINE_PER_CPU(int, bind_last_selected_cpu);
static void evtchn_bind_interdom_next_vcpu(evtchn_port_t evtchn)
{
unsigned int selected_cpu, irq;
struct irq_desc *desc;
unsigned long flags;
irq = irq_from_evtchn(evtchn);
desc = irq_to_desc(irq);
if (!desc)
return;
raw_spin_lock_irqsave(&desc->lock, flags);
selected_cpu = this_cpu_read(bind_last_selected_cpu);
selected_cpu = cpumask_next_and(selected_cpu,
desc->irq_common_data.affinity, cpu_online_mask);
if (unlikely(selected_cpu >= nr_cpu_ids))
selected_cpu = cpumask_first_and(desc->irq_common_data.affinity,
cpu_online_mask);
this_cpu_write(bind_last_selected_cpu, selected_cpu);
/* unmask expects irqs to be disabled */
xen_set_affinity_evtchn(desc, selected_cpu);
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
static long evtchn_ioctl(struct file *file,
unsigned int cmd, unsigned long arg)
{
@ -508,10 +478,8 @@ static long evtchn_ioctl(struct file *file,
break;
rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
if (rc == 0) {
if (rc == 0)
rc = bind_interdomain.local_port;
evtchn_bind_interdom_next_vcpu(rc);
}
break;
}

View file

@ -232,6 +232,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
# define local_irq_enable_in_hardirq() local_irq_enable()
#endif
bool irq_has_action(unsigned int irq);
extern void disable_irq_nosync(unsigned int irq);
extern bool disable_hardirq(unsigned int irq);
extern void disable_irq(unsigned int irq);

View file

@ -906,6 +906,13 @@ struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
}
#endif
static inline struct cpumask *irq_get_effective_affinity_mask(unsigned int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
return d ? irq_data_get_effective_affinity_mask(d) : NULL;
}
unsigned int arch_dynirq_lower_bound(unsigned int from);
int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,

View file

@ -113,6 +113,12 @@ static inline void irq_unlock_sparse(void) { }
extern struct irq_desc irq_desc[NR_IRQS];
#endif
static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc,
unsigned int cpu)
{
return desc->kstat_irqs ? *per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
}
static inline struct irq_desc *irq_data_to_desc(struct irq_data *data)
{
return container_of(data->common, struct irq_desc, irq_common_data);
@ -179,12 +185,7 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
/* Test to see if a driver has successfully requested an irq */
static inline int irq_desc_has_action(struct irq_desc *desc)
{
return desc->action != NULL;
}
static inline int irq_has_action(unsigned int irq)
{
return irq_desc_has_action(irq_to_desc(irq));
return desc && desc->action != NULL;
}
/**
@ -228,40 +229,31 @@ irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip,
data->chip = chip;
}
bool irq_check_status_bit(unsigned int irq, unsigned int bitmask);
static inline bool irq_balancing_disabled(unsigned int irq)
{
struct irq_desc *desc;
desc = irq_to_desc(irq);
return desc->status_use_accessors & IRQ_NO_BALANCING_MASK;
return irq_check_status_bit(irq, IRQ_NO_BALANCING_MASK);
}
static inline bool irq_is_percpu(unsigned int irq)
{
struct irq_desc *desc;
desc = irq_to_desc(irq);
return desc->status_use_accessors & IRQ_PER_CPU;
return irq_check_status_bit(irq, IRQ_PER_CPU);
}
static inline bool irq_is_percpu_devid(unsigned int irq)
{
struct irq_desc *desc;
desc = irq_to_desc(irq);
return desc->status_use_accessors & IRQ_PER_CPU_DEVID;
return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID);
}
void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
struct lock_class_key *request_class);
static inline void
irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
struct lock_class_key *request_class)
{
struct irq_desc *desc = irq_to_desc(irq);
if (desc) {
lockdep_set_class(&desc->lock, lock_class);
lockdep_set_class(&desc->request_mutex, request_class);
}
if (IS_ENABLED(CONFIG_LOCKDEP))
__irq_set_lockdep_class(irq, lock_class, request_class);
}
#endif

View file

@ -67,7 +67,6 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
/*
* Number of interrupts per specific IRQ source, since bootup
*/
extern unsigned int kstat_irqs(unsigned int irq);
extern unsigned int kstat_irqs_usr(unsigned int irq);
/*

View file

@ -147,12 +147,12 @@ static ssize_t per_cpu_count_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
int cpu, irq = desc->irq_data.irq;
ssize_t ret = 0;
char *p = "";
int cpu;
for_each_possible_cpu(cpu) {
unsigned int c = kstat_irqs_cpu(irq, cpu);
unsigned int c = irq_desc_kstat_cpu(desc, cpu);
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c);
p = ",";
@ -352,7 +352,9 @@ struct irq_desc *irq_to_desc(unsigned int irq)
{
return radix_tree_lookup(&irq_desc_tree, irq);
}
EXPORT_SYMBOL(irq_to_desc);
#ifdef CONFIG_KVM_BOOK3S_64_HV
EXPORT_SYMBOL_GPL(irq_to_desc);
#endif
static void delete_irq_desc(unsigned int irq)
{
@ -924,15 +926,7 @@ static bool irq_is_nmi(struct irq_desc *desc)
return desc->istate & IRQS_NMI;
}
/**
* kstat_irqs - Get the statistics for an interrupt
* @irq: The interrupt number
*
* Returns the sum of interrupt counts on all cpus since boot for
* @irq. The caller must ensure that the interrupt is not removed
* concurrently.
*/
unsigned int kstat_irqs(unsigned int irq)
static unsigned int kstat_irqs(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned int sum = 0;
@ -943,21 +937,22 @@ unsigned int kstat_irqs(unsigned int irq)
if (!irq_settings_is_per_cpu_devid(desc) &&
!irq_settings_is_per_cpu(desc) &&
!irq_is_nmi(desc))
return desc->tot_count;
return data_race(desc->tot_count);
for_each_possible_cpu(cpu)
sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
sum += data_race(*per_cpu_ptr(desc->kstat_irqs, cpu));
return sum;
}
/**
* kstat_irqs_usr - Get the statistics for an interrupt
* kstat_irqs_usr - Get the statistics for an interrupt from thread context
* @irq: The interrupt number
*
* Returns the sum of interrupt counts on all cpus since boot for @irq.
* Contrary to kstat_irqs() this can be called from any context.
* It uses rcu since a concurrent removal of an interrupt descriptor is
* observing an rcu grace period before delayed_free_desc()/irq_kobj_release().
*
* It uses rcu to protect the access since a concurrent removal of an
* interrupt descriptor is observing an rcu grace period before
* delayed_free_desc()/irq_kobj_release().
*/
unsigned int kstat_irqs_usr(unsigned int irq)
{
@ -968,3 +963,17 @@ unsigned int kstat_irqs_usr(unsigned int irq)
rcu_read_unlock();
return sum;
}
#ifdef CONFIG_LOCKDEP
void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
struct lock_class_key *request_class)
{
struct irq_desc *desc = irq_to_desc(irq);
if (desc) {
lockdep_set_class(&desc->lock, lock_class);
lockdep_set_class(&desc->request_mutex, request_class);
}
}
EXPORT_SYMBOL_GPL(__irq_set_lockdep_class);
#endif

View file

@ -2822,3 +2822,40 @@ int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
return err;
}
EXPORT_SYMBOL_GPL(irq_set_irqchip_state);
/**
* irq_has_action - Check whether an interrupt is requested
* @irq: The linux irq number
*
* Returns: A snapshot of the current state
*/
bool irq_has_action(unsigned int irq)
{
bool res;
rcu_read_lock();
res = irq_desc_has_action(irq_to_desc(irq));
rcu_read_unlock();
return res;
}
EXPORT_SYMBOL_GPL(irq_has_action);
/**
* irq_check_status_bit - Check whether bits in the irq descriptor status are set
* @irq: The linux irq number
* @bitmask: The bitmask to evaluate
*
* Returns: True if one of the bits in @bitmask is set
*/
bool irq_check_status_bit(unsigned int irq, unsigned int bitmask)
{
struct irq_desc *desc;
bool res = false;
rcu_read_lock();
desc = irq_to_desc(irq);
if (desc)
res = !!(desc->status_use_accessors & bitmask);
rcu_read_unlock();
return res;
}

View file

@ -488,9 +488,10 @@ int show_interrupts(struct seq_file *p, void *v)
if (!desc || irq_settings_is_hidden(desc))
goto outsparse;
if (desc->kstat_irqs)
if (desc->kstat_irqs) {
for_each_online_cpu(j)
any_count |= *per_cpu_ptr(desc->kstat_irqs, j);
any_count |= data_race(*per_cpu_ptr(desc->kstat_irqs, j));
}
if ((!desc->action || irq_desc_is_chained(desc)) && !any_count)
goto outsparse;