From 229b969b3d38bc28bcd55841ee7ca9a9afb922f3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 14 Jul 2019 08:23:14 -0700 Subject: [PATCH 01/28] x86/apic: Initialize TPR to block interrupts 16-31 The APIC, per spec, is fundamentally confused and thinks that interrupt vectors 16-31 are valid. This makes no sense -- the CPU reserves vectors 0-31 for exceptions (faults, traps, etc). Obviously, no device should actually produce an interrupt with vector 16-31, but robustness can be improved by setting the APIC TPR class to 1, which will prevent delivery of an interrupt with a vector below 32. Note: This is *not* intended as a security measure against attackers who control malicious hardware. Any PCI or similar hardware that can be controlled by an attacker MUST be behind a functional IOMMU that remaps interrupts. The purpose of this change is to reduce the chance that a certain class of device malfunctions crashes the kernel in hard-to-debug ways. Suggested-by: Andrew Cooper Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/dc04a9f8b234d7b0956a8d2560b8945bcd9c4bf7.1563117760.git.luto@kernel.org --- arch/x86/kernel/apic/apic.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f5291362da1a..84032bf81476 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1561,11 +1561,14 @@ static void setup_local_APIC(void) #endif /* - * Set Task Priority to 'accept all'. We never change this - * later on. + * Set Task Priority to 'accept all except vectors 0-31'. An APIC + * vector in the 16-31 range could be delivered if TPR == 0, but we + * would think it's an exception and terrible things will happen. We + * never change this later on. */ value = apic_read(APIC_TASKPRI); value &= ~APIC_TPRI_MASK; + value |= 0x10; apic_write(APIC_TASKPRI, value); apic_pending_intr_clear(); From 83b584d9c6a1494170abd3a8b24f41939b23d625 Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Mon, 15 Jul 2019 16:16:41 +0100 Subject: [PATCH 02/28] x86/paravirt: Drop {read,write}_cr8() hooks There is a lot of infrastructure for functionality which is used exclusively in __{save,restore}_processor_state() on the suspend/resume path. cr8 is an alias of APIC_TASKPRI, and APIC_TASKPRI is saved/restored by lapic_{suspend,resume}(). Saving and restoring cr8 independently of the rest of the Local APIC state isn't a clever thing to be doing. Delete the suspend/resume cr8 handling, which shrinks the size of struct saved_context, and allows for the removal of both PVOPS. Signed-off-by: Andrew Cooper Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Link: https://lkml.kernel.org/r/20190715151641.29210-1-andrew.cooper3@citrix.com --- arch/x86/include/asm/paravirt.h | 12 ------------ arch/x86/include/asm/paravirt_types.h | 5 ----- arch/x86/include/asm/special_insns.h | 24 ------------------------ arch/x86/include/asm/suspend_64.h | 2 +- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/paravirt.c | 4 ---- arch/x86/power/cpu.c | 4 ---- arch/x86/xen/enlighten_pv.c | 15 --------------- 8 files changed, 1 insertion(+), 66 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index dce26f1d13e1..69089d46f128 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -139,18 +139,6 @@ static inline void __write_cr4(unsigned long x) PVOP_VCALL1(cpu.write_cr4, x); } -#ifdef CONFIG_X86_64 -static inline unsigned long read_cr8(void) -{ - return PVOP_CALL0(unsigned long, cpu.read_cr8); -} - -static inline void write_cr8(unsigned long x) -{ - PVOP_VCALL1(cpu.write_cr8, x); -} -#endif - static inline void arch_safe_halt(void) { PVOP_VCALL0(irq.safe_halt); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 639b2df445ee..70b654f3ffe5 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -119,11 +119,6 @@ struct pv_cpu_ops { void (*write_cr4)(unsigned long); -#ifdef CONFIG_X86_64 - unsigned long (*read_cr8)(void); - void (*write_cr8)(unsigned long); -#endif - /* Segment descriptor handling */ void (*load_tr_desc)(void); void (*load_gdt)(const struct desc_ptr *); diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 219be88a59d2..6d37b8fcfc77 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -73,20 +73,6 @@ static inline unsigned long native_read_cr4(void) void native_write_cr4(unsigned long val); -#ifdef CONFIG_X86_64 -static inline unsigned long native_read_cr8(void) -{ - unsigned long cr8; - asm volatile("movq %%cr8,%0" : "=r" (cr8)); - return cr8; -} - -static inline void native_write_cr8(unsigned long val) -{ - asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); -} -#endif - #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS static inline u32 rdpkru(void) { @@ -200,16 +186,6 @@ static inline void wbinvd(void) #ifdef CONFIG_X86_64 -static inline unsigned long read_cr8(void) -{ - return native_read_cr8(); -} - -static inline void write_cr8(unsigned long x) -{ - native_write_cr8(x); -} - static inline void load_gs_index(unsigned selector) { native_load_gs_index(selector); diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index a7af9f53c0cb..35bb35d28733 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -34,7 +34,7 @@ struct saved_context { */ unsigned long kernelmode_gs_base, usermode_gs_base, fs_base; - unsigned long cr0, cr2, cr3, cr4, cr8; + unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; bool misc_enable_saved; struct saved_msrs saved_msrs; diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index d3d075226c0a..8b54d8e3a561 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -62,7 +62,6 @@ int main(void) ENTRY(cr2); ENTRY(cr3); ENTRY(cr4); - ENTRY(cr8); ENTRY(gdt_desc); BLANK(); #undef ENTRY diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 0aa6256eedd8..59d3d2763a9e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -311,10 +311,6 @@ struct paravirt_patch_template pv_ops = { .cpu.read_cr0 = native_read_cr0, .cpu.write_cr0 = native_write_cr0, .cpu.write_cr4 = native_write_cr4, -#ifdef CONFIG_X86_64 - .cpu.read_cr8 = native_read_cr8, - .cpu.write_cr8 = native_write_cr8, -#endif .cpu.wbinvd = native_wbinvd, .cpu.read_msr = native_read_msr, .cpu.write_msr = native_write_msr, diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 24b079e94bc2..1c58d8982728 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -122,9 +122,6 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr2 = read_cr2(); ctxt->cr3 = __read_cr3(); ctxt->cr4 = __read_cr4(); -#ifdef CONFIG_X86_64 - ctxt->cr8 = read_cr8(); -#endif ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE, &ctxt->misc_enable); msr_save_context(ctxt); @@ -207,7 +204,6 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) #else /* CONFIG X86_64 */ wrmsrl(MSR_EFER, ctxt->efer); - write_cr8(ctxt->cr8); __write_cr4(ctxt->cr4); #endif write_cr3(ctxt->cr3); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 7ceb32821093..58f79ab32358 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -877,16 +877,6 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } -#ifdef CONFIG_X86_64 -static inline unsigned long xen_read_cr8(void) -{ - return 0; -} -static inline void xen_write_cr8(unsigned long val) -{ - BUG_ON(val); -} -#endif static u64 xen_read_msr_safe(unsigned int msr, int *err) { @@ -1023,11 +1013,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .write_cr4 = xen_write_cr4, -#ifdef CONFIG_X86_64 - .read_cr8 = xen_read_cr8, - .write_cr8 = xen_write_cr8, -#endif - .wbinvd = native_wbinvd, .read_msr = xen_read_msr, From 747d5a1bf293dcb33af755a6d285d41b8c1ea010 Mon Sep 17 00:00:00 2001 From: Grzegorz Halat Date: Fri, 28 Jun 2019 14:28:13 +0200 Subject: [PATCH 03/28] x86/reboot: Always use NMI fallback when shutdown via reboot vector IPI fails A reboot request sends an IPI via the reboot vector and waits for all other CPUs to stop. If one or more CPUs are in critical regions with interrupts disabled then the IPI is not handled on those CPUs and the shutdown hangs if native_stop_other_cpus() is called with the wait argument set. Such a situation can happen when one CPU was stopped within a lock held section and another CPU is trying to acquire that lock with interrupts disabled. There are other scenarios which can cause such a lockup as well. In theory the shutdown should be attempted by an NMI IPI after the timeout period elapsed. Though the wait loop after sending the reboot vector IPI prevents this. It checks the wait request argument and the timeout. If wait is set, which is true for sys_reboot() then it won't fall through to the NMI shutdown method after the timeout period has finished. This was an oversight when the NMI shutdown mechanism was added to handle the 'reboot IPI is not working' situation. The mechanism was added to deal with stuck panic shutdowns, which do not have the wait request set, so the 'wait request' case was probably not considered. Remove the wait check from the post reboot vector IPI wait loop and enforce that the wait loop in the NMI fallback path is invoked even if NMI IPIs are disabled or the registration of the NMI handler fails. That second wait loop will then hang if not all CPUs shutdown and the wait argument is set. [ tglx: Avoid the hard to parse line break in the NMI fallback path, add comments and massage the changelog ] Fixes: 7d007d21e539 ("x86/reboot: Use NMI to assist in shutting down if IRQ fails") Signed-off-by: Grzegorz Halat Signed-off-by: Thomas Gleixner Cc: Don Zickus Link: https://lkml.kernel.org/r/20190628122813.15500-1-ghalat@redhat.com --- arch/x86/kernel/smp.c | 50 +++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 96421f97e75c..231fa230ebc7 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -179,6 +179,12 @@ asmlinkage __visible void smp_reboot_interrupt(void) irq_exit(); } +static int register_stop_handler(void) +{ + return register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback, + NMI_FLAG_FIRST, "smp_stop"); +} + static void native_stop_other_cpus(int wait) { unsigned long flags; @@ -212,39 +218,41 @@ static void native_stop_other_cpus(int wait) apic->send_IPI_allbutself(REBOOT_VECTOR); /* - * Don't wait longer than a second if the caller - * didn't ask us to wait. + * Don't wait longer than a second for IPI completion. The + * wait request is not checked here because that would + * prevent an NMI shutdown attempt in case that not all + * CPUs reach shutdown state. */ timeout = USEC_PER_SEC; - while (num_online_cpus() > 1 && (wait || timeout--)) + while (num_online_cpus() > 1 && timeout--) udelay(1); } - + /* if the REBOOT_VECTOR didn't work, try with the NMI */ - if ((num_online_cpus() > 1) && (!smp_no_nmi_ipi)) { - if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback, - NMI_FLAG_FIRST, "smp_stop")) - /* Note: we ignore failures here */ - /* Hope the REBOOT_IRQ is good enough */ - goto finish; - - /* sync above data before sending IRQ */ - wmb(); - - pr_emerg("Shutting down cpus with NMI\n"); - - apic->send_IPI_allbutself(NMI_VECTOR); - + if (num_online_cpus() > 1) { /* - * Don't wait longer than a 10 ms if the caller - * didn't ask us to wait. + * If NMI IPI is enabled, try to register the stop handler + * and send the IPI. In any case try to wait for the other + * CPUs to stop. + */ + if (!smp_no_nmi_ipi && !register_stop_handler()) { + /* Sync above data before sending IRQ */ + wmb(); + + pr_emerg("Shutting down cpus with NMI\n"); + + apic->send_IPI_allbutself(NMI_VECTOR); + } + /* + * Don't wait longer than 10 ms if the caller didn't + * reqeust it. If wait is true, the machine hangs here if + * one or more CPUs do not reach shutdown state. */ timeout = USEC_PER_MSEC * 10; while (num_online_cpus() > 1 && (wait || timeout--)) udelay(1); } -finish: local_irq_save(flags); disable_local_APIC(); mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); From 2591bc4e8d70b4e1330d327fb7e3921f4e070a51 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:06 +0200 Subject: [PATCH 04/28] x86/kgbd: Use NMI_VECTOR not APIC_DM_NMI apic->send_IPI_allbutself() takes a vector number as argument. APIC_DM_NMI is clearly not a vector number. It's defined to 0x400 which is outside the vector space. Use NMI_VECTOR instead as that's what it is intended to be. Fixes: 82da3ff89dc2 ("x86: kgdb support") Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105218.855189979@linutronix.de --- arch/x86/kernel/kgdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 23297ea64f5f..a53dfb09880f 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -416,7 +416,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs) */ void kgdb_roundup_cpus(void) { - apic->send_IPI_allbutself(APIC_DM_NMI); + apic->send_IPI_allbutself(NMI_VECTOR); } #endif From 39c89dff9c366ad98d2e5598db41ff9b1bdb9e88 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:07 +0200 Subject: [PATCH 05/28] x86/apic: Invoke perf_events_lapic_init() after enabling APIC If the APIC is soft disabled then unmasking an LVT entry does not work and the write is ignored. perf_events_lapic_init() tries to do so. Move the invocation after the point where the APIC has been enabled. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105218.962517234@linutronix.de --- arch/x86/kernel/apic/apic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 84032bf81476..fa0846d4e000 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1517,7 +1517,6 @@ static void setup_local_APIC(void) int logical_apicid, ldr_apicid; #endif - if (disable_apic) { disable_ioapic_support(); return; @@ -1532,8 +1531,6 @@ static void setup_local_APIC(void) apic_write(APIC_ESR, 0); } #endif - perf_events_lapic_init(); - /* * Double-check whether this APIC is really registered. * This is meaningless in clustered apic mode, so we skip it. @@ -1617,6 +1614,8 @@ static void setup_local_APIC(void) value |= SPURIOUS_APIC_VECTOR; apic_write(APIC_SPIV, value); + perf_events_lapic_init(); + /* * Set up LVT0, LVT1: * From 2640da4cccf5cc613bf26f0998b9e340f4b5f69c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:08 +0200 Subject: [PATCH 06/28] x86/apic: Soft disable APIC before initializing it If the APIC was already enabled on entry of setup_local_APIC() then disabling it soft via the SPIV register makes a lot of sense. That masks all LVT entries and brings it into a well defined state. Otherwise previously enabled LVTs which are not touched in the setup function stay unmasked and might surprise the just booting kernel. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105219.068290579@linutronix.de --- arch/x86/kernel/apic/apic.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index fa0846d4e000..621992de49ee 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1522,6 +1522,14 @@ static void setup_local_APIC(void) return; } + /* + * If this comes from kexec/kcrash the APIC might be enabled in + * SPIV. Soft disable it before doing further initialization. + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_SPIV_APIC_ENABLED; + apic_write(APIC_SPIV, value); + #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ if (lapic_is_integrated() && apic->disable_esr) { From cc8bf191378c1da8ad2b99cf470ee70193ace84e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:09 +0200 Subject: [PATCH 07/28] x86/apic: Make apic_pending_intr_clear() more robust In course of developing shorthand based IPI support issues with the function which tries to clear eventually pending ISR bits in the local APIC were observed. 1) O-day testing triggered the WARN_ON() in apic_pending_intr_clear(). This warning is emitted when the function fails to clear pending ISR bits or observes pending IRR bits which are not delivered to the CPU after the stale ISR bit(s) are ACK'ed. Unfortunately the function only emits a WARN_ON() and fails to dump the IRR/ISR content. That's useless for debugging. Feng added spot on debug printk's which revealed that the stale IRR bit belonged to the APIC timer interrupt vector, but adding ad hoc debug code does not help with sporadic failures in the field. Rework the loop so the full IRR/ISR contents are saved and on failure dumped. 2) The loop termination logic is interesting at best. If the machine has no TSC or cpu_khz is not known yet it tries 1 million times to ack stale IRR/ISR bits. What? With TSC it uses the TSC to calculate the loop termination. It takes a timestamp at entry and terminates the loop when: (rdtsc() - start_timestamp) >= (cpu_hkz << 10) That's roughly one second. Both methods are problematic. The APIC has 256 vectors, which means that in theory max. 256 IRR/ISR bits can be set. In practice this is impossible and the chance that more than a few bits are set is close to zero. With the pure loop based approach the 1 million retries are complete overkill. With TSC this can terminate too early in a guest which is running on a heavily loaded host even with only a couple of IRR/ISR bits set. The reason is that after acknowledging the highest priority ISR bit, pending IRRs must get serviced first before the next round of acknowledge can take place as the APIC (real and virtualized) does not honour EOI without a preceeding interrupt on the CPU. And every APIC read/write takes a VMEXIT if the APIC is virtualized. While trying to reproduce the issue 0-day reported it was observed that the guest was scheduled out long enough under heavy load that it terminated after 8 iterations. Make the loop terminate after 512 iterations. That's plenty enough in any case and does not take endless time to complete. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105219.158847694@linutronix.de --- arch/x86/kernel/apic/apic.c | 109 +++++++++++++++++++++--------------- 1 file changed, 64 insertions(+), 45 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 621992de49ee..fe30d1854a4e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1453,54 +1453,72 @@ static void lapic_setup_esr(void) oldvalue, value); } +#define APIC_IR_REGS APIC_ISR_NR +#define APIC_IR_BITS (APIC_IR_REGS * 32) +#define APIC_IR_MAPSIZE (APIC_IR_BITS / BITS_PER_LONG) + +union apic_ir { + unsigned long map[APIC_IR_MAPSIZE]; + u32 regs[APIC_IR_REGS]; +}; + +static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr) +{ + int i, bit; + + /* Read the IRRs */ + for (i = 0; i < APIC_IR_REGS; i++) + irr->regs[i] = apic_read(APIC_IRR + i * 0x10); + + /* Read the ISRs */ + for (i = 0; i < APIC_IR_REGS; i++) + isr->regs[i] = apic_read(APIC_ISR + i * 0x10); + + /* + * If the ISR map is not empty. ACK the APIC and run another round + * to verify whether a pending IRR has been unblocked and turned + * into a ISR. + */ + if (!bitmap_empty(isr->map, APIC_IR_BITS)) { + /* + * There can be multiple ISR bits set when a high priority + * interrupt preempted a lower priority one. Issue an ACK + * per set bit. + */ + for_each_set_bit(bit, isr->map, APIC_IR_BITS) + ack_APIC_irq(); + return true; + } + + return !bitmap_empty(irr->map, APIC_IR_BITS); +} + +/* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now the CPU has serviced that pending interrupt and it + * might not have done the ack_APIC_irq() because it thought, interrupt + * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear + * the ISR bit and cpu thinks it has already serivced the interrupt. Hence + * a vector might get locked. It was noticed for timer irq (vector + * 0x31). Issue an extra EOI to clear ISR. + * + * If there are pending IRR bits they turn into ISR bits after a higher + * priority ISR bit has been acked. + */ static void apic_pending_intr_clear(void) { - long long max_loops = cpu_khz ? cpu_khz : 1000000; - unsigned long long tsc = 0, ntsc; - unsigned int queued; - unsigned long value; - int i, j, acked = 0; + union apic_ir irr, isr; + unsigned int i; - if (boot_cpu_has(X86_FEATURE_TSC)) - tsc = rdtsc(); - /* - * After a crash, we no longer service the interrupts and a pending - * interrupt from previous kernel might still have ISR bit set. - * - * Most probably by now CPU has serviced that pending interrupt and - * it might not have done the ack_APIC_irq() because it thought, - * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it - * does not clear the ISR bit and cpu thinks it has already serivced - * the interrupt. Hence a vector might get locked. It was noticed - * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. - */ - do { - queued = 0; - for (i = APIC_ISR_NR - 1; i >= 0; i--) - queued |= apic_read(APIC_IRR + i*0x10); - - for (i = APIC_ISR_NR - 1; i >= 0; i--) { - value = apic_read(APIC_ISR + i*0x10); - for_each_set_bit(j, &value, 32) { - ack_APIC_irq(); - acked++; - } - } - if (acked > 256) { - pr_err("LAPIC pending interrupts after %d EOI\n", acked); - break; - } - if (queued) { - if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { - ntsc = rdtsc(); - max_loops = (long long)cpu_khz << 10; - max_loops -= ntsc - tsc; - } else { - max_loops--; - } - } - } while (queued && max_loops > 0); - WARN_ON(max_loops <= 0); + /* 512 loops are way oversized and give the APIC a chance to obey. */ + for (i = 0; i < 512; i++) { + if (!apic_check_and_ack(&irr, &isr)) + return; + } + /* Dump the IRR/ISR content if that failed */ + pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map); } /** @@ -1576,6 +1594,7 @@ static void setup_local_APIC(void) value |= 0x10; apic_write(APIC_TASKPRI, value); + /* Clear eventually stale ISR/IRR bits */ apic_pending_intr_clear(); /* From cdc86c9d1f825d13cef85d9ebd3e73572602fb48 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:10 +0200 Subject: [PATCH 08/28] x86/apic: Move IPI inlines into ipi.c No point in having them in an header file. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105219.252225936@linutronix.de --- arch/x86/include/asm/ipi.h | 19 ------------------- arch/x86/kernel/apic/ipi.c | 16 +++++++++++++--- 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index f73076be546a..8d4911b122f3 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -71,27 +71,8 @@ extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int vector); -/* Avoid include hell */ -#define NMI_VECTOR 0x02 - extern int no_broadcast; -static inline void __default_local_send_IPI_allbutself(int vector) -{ - if (no_broadcast || vector == NMI_VECTOR) - apic->send_IPI_mask_allbutself(cpu_online_mask, vector); - else - __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical); -} - -static inline void __default_local_send_IPI_all(int vector) -{ - if (no_broadcast || vector == NMI_VECTOR) - apic->send_IPI_mask(cpu_online_mask, vector); - else - __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); -} - #ifdef CONFIG_X86_32 extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector); diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 82f9244fe61f..de9764605d31 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -198,15 +198,25 @@ void default_send_IPI_allbutself(int vector) * if there are no other CPUs in the system then we get an APIC send * error if we try to broadcast, thus avoid sending IPIs in this case. */ - if (!(num_online_cpus() > 1)) + if (num_online_cpus() < 2) return; - __default_local_send_IPI_allbutself(vector); + if (no_broadcast || vector == NMI_VECTOR) { + apic->send_IPI_mask_allbutself(cpu_online_mask, vector); + } else { + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, + apic->dest_logical); + } } void default_send_IPI_all(int vector) { - __default_local_send_IPI_all(vector); + if (no_broadcast || vector == NMI_VECTOR) { + apic->send_IPI_mask(cpu_online_mask, vector); + } else { + __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, + apic->dest_logical); + } } void default_send_IPI_self(int vector) From 521b82fee98c1e334ba3a2459ba3739d459e9e4e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:11 +0200 Subject: [PATCH 09/28] x86/apic: Cleanup the include maze All of these APIC files include the world and some more. Remove the unneeded cruft. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105219.342631201@linutronix.de --- arch/x86/kernel/apic/apic_flat_64.c | 15 ++++--------- arch/x86/kernel/apic/apic_noop.c | 18 +-------------- arch/x86/kernel/apic/apic_numachip.c | 6 ++--- arch/x86/kernel/apic/ipi.c | 15 +------------ arch/x86/kernel/apic/probe_32.c | 18 ++------------- arch/x86/kernel/apic/probe_64.c | 11 --------- arch/x86/kernel/apic/x2apic_cluster.c | 16 ++++++-------- arch/x86/kernel/apic/x2apic_phys.c | 11 ++++----- arch/x86/kernel/apic/x2apic_uv_x.c | 32 +++++---------------------- 9 files changed, 28 insertions(+), 114 deletions(-) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index bbdca603f94a..8d7242df1fd6 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -8,21 +8,14 @@ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and * James Cleverdon. */ -#include -#include -#include #include -#include -#include -#include -#include #include +#include -#include -#include -#include -#include #include +#include +#include +#include static struct apic apic_physflat; static struct apic apic_flat; diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 5078b5ce63a7..98c9bb75d185 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -9,25 +9,9 @@ * to not uglify the caller's code and allow to call (some) apic routines * like self-ipi, etc... */ - -#include #include -#include -#include -#include -#include -#include -#include -#include + #include -#include - -#include -#include - -#include -#include -#include static void noop_init_apic_ldr(void) { } static void noop_send_IPI(int cpu, int vector) { } diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index a5464b8b6c46..e071e8dcb097 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -10,15 +10,15 @@ * Send feedback to * */ - +#include #include #include #include -#include + #include #include -#include +#include u8 numachip_system __read_mostly; static const struct apic apic_numachip1; diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index de9764605d31..dad523bbe701 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -1,21 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 + #include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include #include -#include #include void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1492799b8f43..8f3c7f50b0a9 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -6,26 +6,12 @@ * * Generic x86 APIC driver probe layer. */ -#include -#include #include -#include -#include -#include -#include #include -#include -#include -#include + #include -#include - -#include -#include - -#include #include -#include +#include #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index e6560a02eb46..f7bd3f48deb2 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -8,19 +8,8 @@ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and * James Cleverdon. */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include #include #include -#include /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 609e499387a1..ebde731dc4cf 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -1,14 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include + +#include + #include "x2apic.h" struct cluster_mask { diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index b5cf9e7b3830..e5289a0c595b 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -1,13 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include +#include +#include + #include + #include "x2apic.h" int x2apic_phys; diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 1e225528f0d7..73a652093820 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -7,40 +7,20 @@ * * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include +#include +#include #include -#include +#include +#include +#include #include #include -#include -#include #include #include #include -#include -#include -#include -#include -#include DEFINE_PER_CPU(int, x2apic_extra_bits); From 8b542da372875373db9688477671151df3418acb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:12 +0200 Subject: [PATCH 10/28] x86/apic: Move ipi header into apic directory Only used locally. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105219.434738036@linutronix.de --- arch/x86/kernel/apic/apic_flat_64.c | 3 ++- arch/x86/kernel/apic/apic_numachip.c | 3 ++- arch/x86/kernel/apic/bigsmp_32.c | 9 ++------- arch/x86/kernel/apic/ipi.c | 3 ++- arch/x86/{include/asm => kernel/apic}/ipi.h | 0 arch/x86/kernel/apic/probe_32.c | 3 ++- arch/x86/kernel/apic/probe_64.c | 3 ++- arch/x86/kernel/apic/x2apic_phys.c | 3 +-- 8 files changed, 13 insertions(+), 14 deletions(-) rename arch/x86/{include/asm => kernel/apic}/ipi.h (100%) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 8d7242df1fd6..a38b1ecc018d 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -15,7 +15,8 @@ #include #include #include -#include + +#include "ipi.h" static struct apic apic_physflat; static struct apic apic_flat; diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index e071e8dcb097..7d4c00f4e984 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -18,7 +18,8 @@ #include #include -#include + +#include "ipi.h" u8 numachip_system __read_mostly; static const struct apic apic_numachip1; diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index afee386ff711..2c031b75dfce 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -4,18 +4,13 @@ * * Drives the local APIC in "clustered mode". */ -#include #include -#include -#include #include #include -#include -#include -#include #include -#include + +#include "ipi.h" static unsigned bigsmp_get_apic_id(unsigned long x) { diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index dad523bbe701..0f26141d479c 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -3,7 +3,8 @@ #include #include -#include + +#include "ipi.h" void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) { diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/kernel/apic/ipi.h similarity index 100% rename from arch/x86/include/asm/ipi.h rename to arch/x86/kernel/apic/ipi.h diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 8f3c7f50b0a9..40b786e3427a 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -11,7 +11,8 @@ #include #include -#include + +#include "ipi.h" #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index f7bd3f48deb2..6268c487f963 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -9,7 +9,8 @@ * James Cleverdon. */ #include -#include + +#include "ipi.h" /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index e5289a0c595b..3bde4724c1c7 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -3,9 +3,8 @@ #include #include -#include - #include "x2apic.h" +#include "ipi.h" int x2apic_phys; From ba77b2a02e0099ab0021bc3169b8f674c6be19f0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:13 +0200 Subject: [PATCH 11/28] x86/apic: Move apic_flat_64 header into apic directory Only used locally. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105219.526508168@linutronix.de --- arch/x86/kernel/apic/apic_flat_64.c | 2 +- arch/x86/{include/asm => kernel/apic}/apic_flat_64.h | 0 arch/x86/kernel/apic/apic_numachip.c | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename arch/x86/{include/asm => kernel/apic}/apic_flat_64.h (100%) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index a38b1ecc018d..cfee2e546531 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -13,9 +13,9 @@ #include #include -#include #include +#include "apic_flat_64.h" #include "ipi.h" static struct apic apic_physflat; diff --git a/arch/x86/include/asm/apic_flat_64.h b/arch/x86/kernel/apic/apic_flat_64.h similarity index 100% rename from arch/x86/include/asm/apic_flat_64.h rename to arch/x86/kernel/apic/apic_flat_64.h diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 7d4c00f4e984..09ec9ffb268e 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -16,9 +16,9 @@ #include #include -#include #include +#include "apic_flat_64.h" #include "ipi.h" u8 numachip_system __read_mostly; From c94f0718fb1c171d6dfdd69cb6001fa0d8206710 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:14 +0200 Subject: [PATCH 12/28] x86/apic: Consolidate the apic local headers Now there are three small local headers. Some contain functions which are only used in one source file. Move all the inlines and declarations into a single local header and the inlines which are only used in one source file into that. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105219.618612624@linutronix.de --- arch/x86/kernel/apic/apic_flat_64.c | 3 +- arch/x86/kernel/apic/apic_flat_64.h | 8 --- arch/x86/kernel/apic/apic_numachip.c | 3 +- arch/x86/kernel/apic/bigsmp_32.c | 2 +- arch/x86/kernel/apic/ipi.c | 14 ++++- arch/x86/kernel/apic/ipi.h | 90 --------------------------- arch/x86/kernel/apic/local.h | 63 +++++++++++++++++++ arch/x86/kernel/apic/probe_32.c | 3 +- arch/x86/kernel/apic/probe_64.c | 2 +- arch/x86/kernel/apic/x2apic.h | 9 --- arch/x86/kernel/apic/x2apic_cluster.c | 2 +- arch/x86/kernel/apic/x2apic_phys.c | 3 +- 12 files changed, 83 insertions(+), 119 deletions(-) delete mode 100644 arch/x86/kernel/apic/apic_flat_64.h delete mode 100644 arch/x86/kernel/apic/ipi.h create mode 100644 arch/x86/kernel/apic/local.h delete mode 100644 arch/x86/kernel/apic/x2apic.h diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index cfee2e546531..f8594b844637 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -15,8 +15,7 @@ #include #include -#include "apic_flat_64.h" -#include "ipi.h" +#include "local.h" static struct apic apic_physflat; static struct apic apic_flat; diff --git a/arch/x86/kernel/apic/apic_flat_64.h b/arch/x86/kernel/apic/apic_flat_64.h deleted file mode 100644 index d3a2b3876ce6..000000000000 --- a/arch/x86/kernel/apic/apic_flat_64.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_APIC_FLAT_64_H -#define _ASM_X86_APIC_FLAT_64_H - -extern void flat_init_apic_ldr(void); - -#endif - diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 09ec9ffb268e..cdf45b4700f2 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -18,8 +18,7 @@ #include -#include "apic_flat_64.h" -#include "ipi.h" +#include "local.h" u8 numachip_system __read_mostly; static const struct apic apic_numachip1; diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 2c031b75dfce..9703b552f25a 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -10,7 +10,7 @@ #include -#include "ipi.h" +#include "local.h" static unsigned bigsmp_get_apic_id(unsigned long x) { diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 0f26141d479c..6fa9f6ca7eef 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -1,10 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include -#include +#include "local.h" -#include "ipi.h" +static inline int __prepare_ICR2(unsigned int mask) +{ + return SET_APIC_DEST_FIELD(mask); +} + +static inline void __xapic_wait_icr_idle(void) +{ + while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY) + cpu_relax(); +} void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) { diff --git a/arch/x86/kernel/apic/ipi.h b/arch/x86/kernel/apic/ipi.h deleted file mode 100644 index 8d4911b122f3..000000000000 --- a/arch/x86/kernel/apic/ipi.h +++ /dev/null @@ -1,90 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _ASM_X86_IPI_H -#define _ASM_X86_IPI_H - -#ifdef CONFIG_X86_LOCAL_APIC - -/* - * Copyright 2004 James Cleverdon, IBM. - * - * Generic APIC InterProcessor Interrupt code. - * - * Moved to include file by James Cleverdon from - * arch/x86-64/kernel/smp.c - * - * Copyrights from kernel/smp.c: - * - * (c) 1995 Alan Cox, Building #3 - * (c) 1998-99, 2000 Ingo Molnar - * (c) 2002,2003 Andi Kleen, SuSE Labs. - */ - -#include -#include -#include - -/* - * the following functions deal with sending IPIs between CPUs. - * - * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. - */ - -static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector, - unsigned int dest) -{ - unsigned int icr = shortcut | dest; - - switch (vector) { - default: - icr |= APIC_DM_FIXED | vector; - break; - case NMI_VECTOR: - icr |= APIC_DM_NMI; - break; - } - return icr; -} - -static inline int __prepare_ICR2(unsigned int mask) -{ - return SET_APIC_DEST_FIELD(mask); -} - -static inline void __xapic_wait_icr_idle(void) -{ - while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY) - cpu_relax(); -} - -void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest); - -/* - * This is used to send an IPI with no shorthand notation (the destination is - * specified in bits 56 to 63 of the ICR). - */ -void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest); - -extern void default_send_IPI_single(int cpu, int vector); -extern void default_send_IPI_single_phys(int cpu, int vector); -extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, - int vector); -extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, - int vector); - -extern int no_broadcast; - -#ifdef CONFIG_X86_32 -extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, - int vector); -extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, - int vector); -extern void default_send_IPI_mask_logical(const struct cpumask *mask, - int vector); -extern void default_send_IPI_allbutself(int vector); -extern void default_send_IPI_all(int vector); -extern void default_send_IPI_self(int vector); -#endif - -#endif - -#endif /* _ASM_X86_IPI_H */ diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h new file mode 100644 index 000000000000..95adac0e785b --- /dev/null +++ b/arch/x86/kernel/apic/local.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Historical copyright notices: + * + * Copyright 2004 James Cleverdon, IBM. + * (c) 1995 Alan Cox, Building #3 + * (c) 1998-99, 2000 Ingo Molnar + * (c) 2002,2003 Andi Kleen, SuSE Labs. + */ +#include + +/* APIC flat 64 */ +void flat_init_apic_ldr(void); + +/* X2APIC */ +int x2apic_apic_id_valid(u32 apicid); +int x2apic_apic_id_registered(void); +void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest); +unsigned int x2apic_get_apic_id(unsigned long id); +u32 x2apic_set_apic_id(unsigned int id); +int x2apic_phys_pkg_id(int initial_apicid, int index_msb); +void x2apic_send_IPI_self(int vector); + +/* IPI */ +static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector, + unsigned int dest) +{ + unsigned int icr = shortcut | dest; + + switch (vector) { + default: + icr |= APIC_DM_FIXED | vector; + break; + case NMI_VECTOR: + icr |= APIC_DM_NMI; + break; + } + return icr; +} + +void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest); + +/* + * This is used to send an IPI with no shorthand notation (the destination is + * specified in bits 56 to 63 of the ICR). + */ +void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest); + +void default_send_IPI_single(int cpu, int vector); +void default_send_IPI_single_phys(int cpu, int vector); +void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int vector); + +extern int no_broadcast; + +#ifdef CONFIG_X86_32 +void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector); +void default_send_IPI_mask_logical(const struct cpumask *mask, int vector); +void default_send_IPI_allbutself(int vector); +void default_send_IPI_all(int vector); +void default_send_IPI_self(int vector); +#endif diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 40b786e3427a..7cc961d4f51f 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -8,11 +8,12 @@ */ #include #include +#include #include #include -#include "ipi.h" +#include "local.h" #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 6268c487f963..e0e1e3380ea2 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -10,7 +10,7 @@ */ #include -#include "ipi.h" +#include "local.h" /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. diff --git a/arch/x86/kernel/apic/x2apic.h b/arch/x86/kernel/apic/x2apic.h deleted file mode 100644 index a49b3604027f..000000000000 --- a/arch/x86/kernel/apic/x2apic.h +++ /dev/null @@ -1,9 +0,0 @@ -/* Common bits for X2APIC cluster/physical modes. */ - -int x2apic_apic_id_valid(u32 apicid); -int x2apic_apic_id_registered(void); -void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest); -unsigned int x2apic_get_apic_id(unsigned long id); -u32 x2apic_set_apic_id(unsigned int id); -int x2apic_phys_pkg_id(int initial_apicid, int index_msb); -void x2apic_send_IPI_self(int vector); diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index ebde731dc4cf..d0a13c88f777 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -7,7 +7,7 @@ #include -#include "x2apic.h" +#include "local.h" struct cluster_mask { unsigned int clusterid; diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 3bde4724c1c7..5d50e1f9d4bf 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -3,8 +3,7 @@ #include #include -#include "x2apic.h" -#include "ipi.h" +#include "local.h" int x2apic_phys; From 82e574782345aa634e1544e80da85d71a9dbde19 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:15 +0200 Subject: [PATCH 13/28] x86/apic/uv: Make x2apic_extra_bits static Not used outside of the UV apic source. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105219.725264153@linutronix.de --- arch/x86/include/asm/apic.h | 2 -- arch/x86/kernel/apic/x2apic_uv_x.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index e647aa095867..f53eda2c986b 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -467,8 +467,6 @@ static inline unsigned default_get_apic_id(unsigned long x) #ifdef CONFIG_X86_64 extern void apic_send_IPI_self(int vector); - -DECLARE_PER_CPU(int, x2apic_extra_bits); #endif extern void generic_bigsmp_probe(void); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 73a652093820..e6230af19864 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -22,7 +22,7 @@ #include #include -DEFINE_PER_CPU(int, x2apic_extra_bits); +static DEFINE_PER_CPU(int, x2apic_extra_bits); static enum uv_system_type uv_system_type; static bool uv_hubless_system; From 9c92374b631d233abf5bd355cb4253d3d83d5578 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:17 +0200 Subject: [PATCH 14/28] x86/cpu: Move arch_smt_update() to a neutral place arch_smt_update() will be used to control IPI/NMI broadcasting via the shorthand mechanism. Keeping it in the bugs file and calling the apic function from there is possible, but not really intuitive. Move it to a neutral place and invoke the bugs function from there. No functional change. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105219.910317273@linutronix.de --- arch/x86/include/asm/bugs.h | 2 ++ arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kernel/cpu/common.c | 9 +++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h index 542509b53e0f..794eb2129bc6 100644 --- a/arch/x86/include/asm/bugs.h +++ b/arch/x86/include/asm/bugs.h @@ -18,4 +18,6 @@ int ppro_with_ram_bug(void); static inline int ppro_with_ram_bug(void) { return 0; } #endif +extern void cpu_bugs_smt_update(void); + #endif /* _ASM_X86_BUGS_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 66ca906aa790..6d9636c2ca51 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -700,7 +700,7 @@ static void update_mds_branch_idle(void) #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" -void arch_smt_update(void) +void cpu_bugs_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 11472178e17f..1ee6598c5d83 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1945,3 +1945,12 @@ void microcode_check(void) pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n"); pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); } + +/* + * Invoked from core CPU hotplug code after hotplug operations + */ +void arch_smt_update(void) +{ + /* Handle the speculative execution misfeatures */ + cpu_bugs_smt_update(); +} From 60dcaad5736faff5a6b1abba5a292499f57197fe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 24 Jul 2019 17:25:52 +0200 Subject: [PATCH 15/28] x86/hotplug: Silence APIC and NMI when CPU is dead In order to support IPI/NMI broadcasting via the shorthand mechanism side effects of shorthands need to be mitigated: Shorthand IPIs and NMIs hit all CPUs including unplugged CPUs Neither of those can be handled on unplugged CPUs for obvious reasons. It would be trivial to just fully disable the APIC via the enable bit in MSR_APICBASE. But that's not possible because clearing that bit on systems based on the 3 wire APIC bus would require a hardware reset to bring it back as the APIC would lose track of bus arbitration. On systems with FSB delivery APICBASE could be disabled, but it has to be guaranteed that no interrupt is sent to the APIC while in that state and it's not clear from the SDM whether it still responds to INIT/SIPI messages. Therefore stay on the safe side and switch the APIC into soft disabled mode so it won't deliver any regular vector to the CPU. NMIs are still propagated to the 'dead' CPUs. To mitigate that add a check for the CPU being offline on early nmi entry and if so bail. Note, this cannot use the stop/restart_nmi() magic which is used in the alternatives code. A dead CPU cannot invoke nmi_enter() or anything else due to RCU and other reasons. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1907241723290.1791@nanos.tec.linutronix.de --- arch/x86/include/asm/apic.h | 1 + arch/x86/kernel/apic/apic.c | 35 ++++++++++++++++++++++++----------- arch/x86/kernel/nmi.c | 3 +++ arch/x86/kernel/smpboot.c | 7 ++++++- 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index f53eda2c986b..cae7e0d02476 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -136,6 +136,7 @@ extern int lapic_get_maxlvt(void); extern void clear_local_APIC(void); extern void disconnect_bsp_APIC(int virt_wire_setup); extern void disable_local_APIC(void); +extern void apic_soft_disable(void); extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index fe30d1854a4e..831274e3c09f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1181,26 +1181,39 @@ void clear_local_APIC(void) } } +/** + * apic_soft_disable - Clears and software disables the local APIC on hotplug + * + * Contrary to disable_local_APIC() this does not touch the enable bit in + * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC + * bus would require a hardware reset as the APIC would lose track of bus + * arbitration. On systems with FSB delivery APICBASE could be disabled, + * but it has to be guaranteed that no interrupt is sent to the APIC while + * in that state and it's not clear from the SDM whether it still responds + * to INIT/SIPI messages. Stay on the safe side and use software disable. + */ +void apic_soft_disable(void) +{ + u32 value; + + clear_local_APIC(); + + /* Soft disable APIC (implies clearing of registers for 82489DX!). */ + value = apic_read(APIC_SPIV); + value &= ~APIC_SPIV_APIC_ENABLED; + apic_write(APIC_SPIV, value); +} + /** * disable_local_APIC - clear and disable the local APIC */ void disable_local_APIC(void) { - unsigned int value; - /* APIC hasn't been mapped yet */ if (!x2apic_mode && !apic_phys) return; - clear_local_APIC(); - - /* - * Disable APIC (implies clearing of registers - * for 82489DX!). - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_SPIV_APIC_ENABLED; - apic_write(APIC_SPIV, value); + apic_soft_disable(); #ifdef CONFIG_X86_32 /* diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 4df7705022b9..e676a9916c49 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -512,6 +512,9 @@ NOKPROBE_SYMBOL(is_debug_stack); dotraplinkage notrace void do_nmi(struct pt_regs *regs, long error_code) { + if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id())) + return; + if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { this_cpu_write(nmi_state, NMI_LATCHED); return; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fdbd47ceb84d..c19f8e21b748 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1596,7 +1596,12 @@ int native_cpu_disable(void) if (ret) return ret; - clear_local_APIC(); + /* + * Disable the local APIC. Otherwise IPI broadcasts will reach + * it. It still responds normally to INIT, NMI, SMI, and SIPI + * messages. + */ + apic_soft_disable(); cpu_disable_common(); return 0; From 3994ff90acc3b115734fe532720c37a499c502ce Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:19 +0200 Subject: [PATCH 16/28] x86/apic: Remove dest argument from __default_send_IPI_shortcut() The SDM states: "The destination shorthand field of the ICR allows the delivery mode to be by-passed in favor of broadcasting the IPI to all the processors on the system bus and/or back to itself (see Section 10.6.1, Interrupt Command Register (ICR)). Three destination shorthands are supported: self, all excluding self, and all including self. The destination mode is ignored when a destination shorthand is used." So there is no point to supply the destination mode to the shorthand delivery function. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105220.094613426@linutronix.de --- arch/x86/kernel/apic/apic_flat_64.c | 6 ++---- arch/x86/kernel/apic/ipi.c | 15 +++++++-------- arch/x86/kernel/apic/local.h | 2 +- arch/x86/kernel/apic/probe_64.c | 2 +- 4 files changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index f8594b844637..004611a44962 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -90,8 +90,7 @@ static void flat_send_IPI_allbutself(int vector) _flat_send_IPI_mask(mask, vector); } } else if (num_online_cpus() > 1) { - __default_send_IPI_shortcut(APIC_DEST_ALLBUT, - vector, apic->dest_logical); + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector); } } @@ -100,8 +99,7 @@ static void flat_send_IPI_all(int vector) if (vector == NMI_VECTOR) { flat_send_IPI_mask(cpu_online_mask, vector); } else { - __default_send_IPI_shortcut(APIC_DEST_ALLINC, - vector, apic->dest_logical); + __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector); } } diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 6fa9f6ca7eef..50c9dcc6f60e 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -16,7 +16,7 @@ static inline void __xapic_wait_icr_idle(void) cpu_relax(); } -void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) +void __default_send_IPI_shortcut(unsigned int shortcut, int vector) { /* * Subtle. In the case of the 'never do double writes' workaround @@ -33,9 +33,10 @@ void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int __xapic_wait_icr_idle(); /* - * No need to touch the target chip field + * No need to touch the target chip field. Also the destination + * mode is ignored when a shorthand is used. */ - cfg = __prepare_ICR(shortcut, vector, dest); + cfg = __prepare_ICR(shortcut, vector, 0); /* * Send the IPI. The write to APIC_ICR fires this off. @@ -202,8 +203,7 @@ void default_send_IPI_allbutself(int vector) if (no_broadcast || vector == NMI_VECTOR) { apic->send_IPI_mask_allbutself(cpu_online_mask, vector); } else { - __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, - apic->dest_logical); + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector); } } @@ -212,14 +212,13 @@ void default_send_IPI_all(int vector) if (no_broadcast || vector == NMI_VECTOR) { apic->send_IPI_mask(cpu_online_mask, vector); } else { - __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, - apic->dest_logical); + __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector); } } void default_send_IPI_self(int vector) { - __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical); + __default_send_IPI_shortcut(APIC_DEST_SELF, vector); } /* must come after the send_IPI functions above for inlining */ diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index 95adac0e785b..47c43381b444 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -38,7 +38,7 @@ static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector, return icr; } -void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest); +void __default_send_IPI_shortcut(unsigned int shortcut, int vector); /* * This is used to send an IPI with no shorthand notation (the destination is diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index e0e1e3380ea2..fb457b540e78 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -40,7 +40,7 @@ void __init default_setup_apic_routing(void) void apic_send_IPI_self(int vector) { - __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); + __default_send_IPI_shortcut(APIC_DEST_SELF, vector); } int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) From bd82dba2fa6ae91061e5d31399d61fe65028f714 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:20 +0200 Subject: [PATCH 17/28] x86/apic: Add NMI_VECTOR wait to IPI shorthand To support NMI shorthand broadcasts add the safe wait for ICR idle for NMI vector delivery. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105220.185838026@linutronix.de --- arch/x86/kernel/apic/ipi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 50c9dcc6f60e..7236fefde396 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -30,7 +30,10 @@ void __default_send_IPI_shortcut(unsigned int shortcut, int vector) /* * Wait for idle. */ - __xapic_wait_icr_idle(); + if (unlikely(vector == NMI_VECTOR)) + safe_apic_wait_icr_idle(); + else + __xapic_wait_icr_idle(); /* * No need to touch the target chip field. Also the destination From bdda3b93e66085abf0b2c16bcdf471176e3c816a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:21 +0200 Subject: [PATCH 18/28] x86/apic: Move no_ipi_broadcast() out of 32bit For the upcoming shorthand support for all APIC incarnations the command line option needs to be available for 64 bit as well. While at it, rename the control variable, make it static and mark it __ro_after_init. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105220.278327940@linutronix.de --- arch/x86/kernel/apic/ipi.c | 29 +++++++++++++++++++++++++++-- arch/x86/kernel/apic/local.h | 2 -- arch/x86/kernel/apic/probe_32.c | 25 ------------------------- 3 files changed, 27 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 7236fefde396..ca3bcdb7c4a8 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -5,6 +5,31 @@ #include "local.h" +#ifdef CONFIG_SMP +#ifdef CONFIG_HOTPLUG_CPU +#define DEFAULT_SEND_IPI (1) +#else +#define DEFAULT_SEND_IPI (0) +#endif + +static int apic_ipi_shorthand_off __ro_after_init = DEFAULT_SEND_IPI; + +static __init int apic_ipi_shorthand(char *str) +{ + get_option(&str, &apic_ipi_shorthand_off); + return 1; +} +__setup("no_ipi_broadcast=", apic_ipi_shorthand); + +static int __init print_ipi_mode(void) +{ + pr_info("IPI shorthand broadcast: %s\n", + apic_ipi_shorthand_off ? "disabled" : "enabled"); + return 0; +} +late_initcall(print_ipi_mode); +#endif + static inline int __prepare_ICR2(unsigned int mask) { return SET_APIC_DEST_FIELD(mask); @@ -203,7 +228,7 @@ void default_send_IPI_allbutself(int vector) if (num_online_cpus() < 2) return; - if (no_broadcast || vector == NMI_VECTOR) { + if (apic_ipi_shorthand_off || vector == NMI_VECTOR) { apic->send_IPI_mask_allbutself(cpu_online_mask, vector); } else { __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector); @@ -212,7 +237,7 @@ void default_send_IPI_allbutself(int vector) void default_send_IPI_all(int vector) { - if (no_broadcast || vector == NMI_VECTOR) { + if (apic_ipi_shorthand_off || vector == NMI_VECTOR) { apic->send_IPI_mask(cpu_online_mask, vector); } else { __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector); diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index 47c43381b444..bd074e5997b0 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -51,8 +51,6 @@ void default_send_IPI_single_phys(int cpu, int vector); void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector); void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int vector); -extern int no_broadcast; - #ifdef CONFIG_X86_32 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector); void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector); diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 7cc961d4f51f..0ac9fd667c99 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -15,31 +15,6 @@ #include "local.h" -#ifdef CONFIG_HOTPLUG_CPU -#define DEFAULT_SEND_IPI (1) -#else -#define DEFAULT_SEND_IPI (0) -#endif - -int no_broadcast = DEFAULT_SEND_IPI; - -static __init int no_ipi_broadcast(char *str) -{ - get_option(&str, &no_broadcast); - pr_info("Using %s mode\n", - no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); - return 1; -} -__setup("no_ipi_broadcast=", no_ipi_broadcast); - -static int __init print_ipi_mode(void) -{ - pr_info("Using IPI %s mode\n", - no_broadcast ? "No-Shortcut" : "Shortcut"); - return 0; -} -late_initcall(print_ipi_mode); - static int default_x86_32_early_logical_apicid(int cpu) { return 1 << cpu; From 6a1cb5f5c6413222b8532722562dd1edb5fdfd38 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:22 +0200 Subject: [PATCH 19/28] x86/apic: Add static key to Control IPI shorthands The IPI shorthand functionality delivers IPI/NMI broadcasts to all CPUs in the system. This can have similar side effects as the MCE broadcasting when CPUs are waiting in the BIOS or are offlined. The kernel tracks already the state of offlined CPUs whether they have been brought up at least once so that the CR4 MCE bit is set to make sure that MCE broadcasts can't brick the machine. Utilize that information and compare it to the cpu_present_mask. If all present CPUs have been brought up at least once then the broadcast side effect is mitigated by disabling regular interrupt/IPI delivery in the APIC itself and by the cpu offline check at the begin of the NMI handler. Use a static key to switch between broadcasting via shorthands or sending the IPI/NMI one by one. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105220.386410643@linutronix.de --- arch/x86/include/asm/apic.h | 2 ++ arch/x86/kernel/apic/ipi.c | 24 +++++++++++++++++++++++- arch/x86/kernel/apic/local.h | 6 ++++++ arch/x86/kernel/cpu/common.c | 2 ++ 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index cae7e0d02476..4a0d349ab44d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -505,8 +505,10 @@ extern int default_check_phys_apicid_present(int phys_apicid); #ifdef CONFIG_SMP bool apic_id_is_primary_thread(unsigned int id); +void apic_smt_update(void); #else static inline bool apic_id_is_primary_thread(unsigned int id) { return false; } +static inline void apic_smt_update(void) { } #endif extern void irq_enter(void); diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index ca3bcdb7c4a8..5bd8a001a887 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -5,6 +5,8 @@ #include "local.h" +DEFINE_STATIC_KEY_FALSE(apic_use_ipi_shorthand); + #ifdef CONFIG_SMP #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) @@ -28,7 +30,27 @@ static int __init print_ipi_mode(void) return 0; } late_initcall(print_ipi_mode); -#endif + +void apic_smt_update(void) +{ + /* + * Do not switch to broadcast mode if: + * - Disabled on the command line + * - Only a single CPU is online + * - Not all present CPUs have been at least booted once + * + * The latter is important as the local APIC might be in some + * random state and a broadcast might cause havoc. That's + * especially true for NMI broadcasting. + */ + if (apic_ipi_shorthand_off || num_online_cpus() == 1 || + !cpumask_equal(cpu_present_mask, &cpus_booted_once_mask)) { + static_branch_disable(&apic_use_ipi_shorthand); + } else { + static_branch_enable(&apic_use_ipi_shorthand); + } +} +#endif /* CONFIG_SMP */ static inline int __prepare_ICR2(unsigned int mask) { diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index bd074e5997b0..391594cd5ca9 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -7,6 +7,9 @@ * (c) 1998-99, 2000 Ingo Molnar * (c) 2002,2003 Andi Kleen, SuSE Labs. */ + +#include + #include /* APIC flat 64 */ @@ -22,6 +25,9 @@ int x2apic_phys_pkg_id(int initial_apicid, int index_msb); void x2apic_send_IPI_self(int vector); /* IPI */ + +DECLARE_STATIC_KEY_FALSE(apic_use_ipi_shorthand); + static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector, unsigned int dest) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1ee6598c5d83..e0489d2860d3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1953,4 +1953,6 @@ void arch_smt_update(void) { /* Handle the speculative execution misfeatures */ cpu_bugs_smt_update(); + /* Check whether IPI broadcasting can be enabled */ + apic_smt_update(); } From 22ca7ee933a39f542ff6f81fc64f8036eff56519 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:23 +0200 Subject: [PATCH 20/28] x86/apic: Provide and use helper for send_IPI_allbutself() To support IPI shorthands wrap invocations of apic->send_IPI_allbutself() in a helper function, so the static key controlling the shorthand mode is only in one place. Fixup all callers. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105220.492691679@linutronix.de --- arch/x86/include/asm/apic.h | 2 ++ arch/x86/kernel/apic/ipi.c | 12 ++++++++++++ arch/x86/kernel/kgdb.c | 2 +- arch/x86/kernel/reboot.c | 7 +------ arch/x86/kernel/smp.c | 4 ++-- 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 4a0d349ab44d..de86c6c15228 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -177,6 +177,8 @@ extern void lapic_online(void); extern void lapic_offline(void); extern bool apic_needs_pit(void); +extern void apic_send_IPI_allbutself(unsigned int vector); + #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } #define local_apic_timer_c2_ok 1 diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 5bd8a001a887..f53de3e0145e 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -50,6 +50,18 @@ void apic_smt_update(void) static_branch_enable(&apic_use_ipi_shorthand); } } + +void apic_send_IPI_allbutself(unsigned int vector) +{ + if (num_online_cpus() < 2) + return; + + if (static_branch_likely(&apic_use_ipi_shorthand)) + apic->send_IPI_allbutself(vector); + else + apic->send_IPI_mask_allbutself(cpu_online_mask, vector); +} + #endif /* CONFIG_SMP */ static inline int __prepare_ICR2(unsigned int mask) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index a53dfb09880f..c44fe7d8d9a4 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -416,7 +416,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs) */ void kgdb_roundup_cpus(void) { - apic->send_IPI_allbutself(NMI_VECTOR); + apic_send_IPI_allbutself(NMI_VECTOR); } #endif diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 09d6bded3c1e..0cc7c0b106bb 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -828,11 +828,6 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) return NMI_HANDLED; } -static void smp_send_nmi_allbutself(void) -{ - apic->send_IPI_allbutself(NMI_VECTOR); -} - /* * Halt all other CPUs, calling the specified function on each of them * @@ -861,7 +856,7 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) */ wmb(); - smp_send_nmi_allbutself(); + apic_send_IPI_allbutself(NMI_VECTOR); /* Kick CPUs looping in NMI context. */ WRITE_ONCE(crash_ipi_issued, 1); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 231fa230ebc7..b8ad1876a081 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -215,7 +215,7 @@ static void native_stop_other_cpus(int wait) /* sync above data before sending IRQ */ wmb(); - apic->send_IPI_allbutself(REBOOT_VECTOR); + apic_send_IPI_allbutself(REBOOT_VECTOR); /* * Don't wait longer than a second for IPI completion. The @@ -241,7 +241,7 @@ static void native_stop_other_cpus(int wait) pr_emerg("Shutting down cpus with NMI\n"); - apic->send_IPI_allbutself(NMI_VECTOR); + apic_send_IPI_allbutself(NMI_VECTOR); } /* * Don't wait longer than 10 ms if the caller didn't From d0a7166bc7ac4feac5c482ebe8b2417aa3302ef4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:25 +0200 Subject: [PATCH 21/28] x86/smp: Move smp_function_call implementations into IPI code Move it where it belongs. That allows to keep all the shorthand logic in one place. No functional change. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105220.677835995@linutronix.de --- arch/x86/include/asm/smp.h | 1 + arch/x86/kernel/apic/ipi.c | 40 ++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smp.c | 40 -------------------------------------- 3 files changed, 41 insertions(+), 40 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index e1356a3b8223..e15f364efbcc 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -143,6 +143,7 @@ void play_dead_common(void); void wbinvd_on_cpu(int cpu); int wbinvd_on_all_cpus(void); +void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index f53de3e0145e..eaac65bf58f0 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -62,6 +62,46 @@ void apic_send_IPI_allbutself(unsigned int vector) apic->send_IPI_mask_allbutself(cpu_online_mask, vector); } +/* + * Send a 'reschedule' IPI to another CPU. It goes straight through and + * wastes no time serializing anything. Worst case is that we lose a + * reschedule ... + */ +void native_smp_send_reschedule(int cpu) +{ + if (unlikely(cpu_is_offline(cpu))) { + WARN(1, "sched: Unexpected reschedule of offline CPU#%d!\n", cpu); + return; + } + apic->send_IPI(cpu, RESCHEDULE_VECTOR); +} + +void native_send_call_func_single_ipi(int cpu) +{ + apic->send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR); +} + +void native_send_call_func_ipi(const struct cpumask *mask) +{ + cpumask_var_t allbutself; + + if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { + apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); + return; + } + + cpumask_copy(allbutself, cpu_online_mask); + __cpumask_clear_cpu(smp_processor_id(), allbutself); + + if (cpumask_equal(mask, allbutself) && + cpumask_equal(cpu_online_mask, cpu_callout_mask)) + apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR); + else + apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); + + free_cpumask_var(allbutself); +} + #endif /* CONFIG_SMP */ static inline int __prepare_ICR2(unsigned int mask) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index b8ad1876a081..b8d4e9c3c070 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -115,46 +115,6 @@ static atomic_t stopping_cpu = ATOMIC_INIT(-1); static bool smp_no_nmi_ipi = false; -/* - * this function sends a 'reschedule' IPI to another CPU. - * it goes straight through and wastes no time serializing - * anything. Worst case is that we lose a reschedule ... - */ -static void native_smp_send_reschedule(int cpu) -{ - if (unlikely(cpu_is_offline(cpu))) { - WARN(1, "sched: Unexpected reschedule of offline CPU#%d!\n", cpu); - return; - } - apic->send_IPI(cpu, RESCHEDULE_VECTOR); -} - -void native_send_call_func_single_ipi(int cpu) -{ - apic->send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR); -} - -void native_send_call_func_ipi(const struct cpumask *mask) -{ - cpumask_var_t allbutself; - - if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { - apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); - return; - } - - cpumask_copy(allbutself, cpu_online_mask); - __cpumask_clear_cpu(smp_processor_id(), allbutself); - - if (cpumask_equal(mask, allbutself) && - cpumask_equal(cpu_online_mask, cpu_callout_mask)) - apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR); - else - apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); - - free_cpumask_var(allbutself); -} - static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) { /* We are registered on stopping cpu too, avoid spurious NMI */ From 832df3d47badcbc860aef617105b6bc1c9459304 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:26 +0200 Subject: [PATCH 22/28] x86/smp: Enhance native_send_call_func_ipi() Nadav noticed that the cpumask allocations in native_send_call_func_ipi() are noticeable in microbenchmarks. Use the new cpumask_or_equal() function to simplify the decision whether the supplied target CPU mask is either equal to cpu_online_mask or equal to cpu_online_mask except for the CPU on which the function is invoked. cpumask_or_equal() or's the target mask and the cpumask of the current CPU together and compares it to cpu_online_mask. If the result is false, use the mask based IPI function, otherwise check whether the current CPU is set in the target mask and invoke either the send_IPI_all() or the send_IPI_allbutselt() APIC callback. Make the shorthand decision also depend on the static key which enables shorthand mode. That allows to remove the extra cpumask comparison with cpu_callout_mask. Reported-by: Nadav Amit Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105220.768238809@linutronix.de --- arch/x86/kernel/apic/ipi.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index eaac65bf58f0..117ee2323f59 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -83,23 +83,21 @@ void native_send_call_func_single_ipi(int cpu) void native_send_call_func_ipi(const struct cpumask *mask) { - cpumask_var_t allbutself; + if (static_branch_likely(&apic_use_ipi_shorthand)) { + unsigned int cpu = smp_processor_id(); - if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { - apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); + if (!cpumask_or_equal(mask, cpumask_of(cpu), cpu_online_mask)) + goto sendmask; + + if (cpumask_test_cpu(cpu, mask)) + apic->send_IPI_all(CALL_FUNCTION_VECTOR); + else if (num_online_cpus() > 1) + apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR); return; } - cpumask_copy(allbutself, cpu_online_mask); - __cpumask_clear_cpu(smp_processor_id(), allbutself); - - if (cpumask_equal(mask, allbutself) && - cpumask_equal(cpu_online_mask, cpu_callout_mask)) - apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR); - else - apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); - - free_cpumask_var(allbutself); +sendmask: + apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); } #endif /* CONFIG_SMP */ From 1f0ad660488b8eb2450d1834af6a156104281194 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:27 +0200 Subject: [PATCH 23/28] x86/apic: Remove the shorthand decision logic All callers of apic->send_IPI_all() and apic->send_IPI_allbutself() contain the decision logic for shorthand invocation already and invoke send_IPI_mask() if the prereqisites are not satisfied. Remove the now redundant decision logic in the 32bit implementation. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105220.860244707@linutronix.de --- arch/x86/kernel/apic/ipi.c | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 117ee2323f59..71363b0d4a67 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -8,13 +8,7 @@ DEFINE_STATIC_KEY_FALSE(apic_use_ipi_shorthand); #ifdef CONFIG_SMP -#ifdef CONFIG_HOTPLUG_CPU -#define DEFAULT_SEND_IPI (1) -#else -#define DEFAULT_SEND_IPI (0) -#endif - -static int apic_ipi_shorthand_off __ro_after_init = DEFAULT_SEND_IPI; +static int apic_ipi_shorthand_off __ro_after_init; static __init int apic_ipi_shorthand(char *str) { @@ -293,27 +287,12 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) void default_send_IPI_allbutself(int vector) { - /* - * if there are no other CPUs in the system then we get an APIC send - * error if we try to broadcast, thus avoid sending IPIs in this case. - */ - if (num_online_cpus() < 2) - return; - - if (apic_ipi_shorthand_off || vector == NMI_VECTOR) { - apic->send_IPI_mask_allbutself(cpu_online_mask, vector); - } else { - __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector); - } + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector); } void default_send_IPI_all(int vector) { - if (apic_ipi_shorthand_off || vector == NMI_VECTOR) { - apic->send_IPI_mask(cpu_online_mask, vector); - } else { - __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector); - } + __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector); } void default_send_IPI_self(int vector) From dea978632e8400b84888bad20df0cd91c18f0aec Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:28 +0200 Subject: [PATCH 24/28] x86/apic: Share common IPI helpers The 64bit implementations need the same wrappers around __default_send_IPI_shortcut() as 32bit. Move them out of the 32bit section. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105220.951534451@linutronix.de --- arch/x86/kernel/apic/ipi.c | 30 +++++++++++++++--------------- arch/x86/kernel/apic/local.h | 6 +++--- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 71363b0d4a67..6ca0f91372fd 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -226,6 +226,21 @@ void default_send_IPI_single(int cpu, int vector) apic->send_IPI_mask(cpumask_of(cpu), vector); } +void default_send_IPI_allbutself(int vector) +{ + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector); +} + +void default_send_IPI_all(int vector) +{ + __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector); +} + +void default_send_IPI_self(int vector) +{ + __default_send_IPI_shortcut(APIC_DEST_SELF, vector); +} + #ifdef CONFIG_X86_32 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, @@ -285,21 +300,6 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) local_irq_restore(flags); } -void default_send_IPI_allbutself(int vector) -{ - __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector); -} - -void default_send_IPI_all(int vector) -{ - __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector); -} - -void default_send_IPI_self(int vector) -{ - __default_send_IPI_shortcut(APIC_DEST_SELF, vector); -} - /* must come after the send_IPI functions above for inlining */ static int convert_apicid_to_cpu(int apic_id) { diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index 391594cd5ca9..69ba777cef98 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -56,12 +56,12 @@ void default_send_IPI_single(int cpu, int vector); void default_send_IPI_single_phys(int cpu, int vector); void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector); void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int vector); +void default_send_IPI_allbutself(int vector); +void default_send_IPI_all(int vector); +void default_send_IPI_self(int vector); #ifdef CONFIG_X86_32 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector); void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector); void default_send_IPI_mask_logical(const struct cpumask *mask, int vector); -void default_send_IPI_allbutself(int vector); -void default_send_IPI_all(int vector); -void default_send_IPI_self(int vector); #endif From 2510d09e9dabc265341f164e0b45b2dfdcb7ef36 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:29 +0200 Subject: [PATCH 25/28] x86/apic/flat64: Remove the IPI shorthand decision logic All callers of apic->send_IPI_all() and apic->send_IPI_allbutself() contain the decision logic for shorthand invocation already and invoke send_IPI_mask() if the prereqisites are not satisfied. Remove the now redundant decision logic in the APIC code and the duplicate helper in probe_64.c. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105221.042964120@linutronix.de --- arch/x86/include/asm/apic.h | 4 --- arch/x86/kernel/apic/apic_flat_64.c | 49 ++++------------------------- arch/x86/kernel/apic/probe_64.c | 7 ----- 3 files changed, 6 insertions(+), 54 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index de86c6c15228..2ebc17d9c72c 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -468,10 +468,6 @@ static inline unsigned default_get_apic_id(unsigned long x) #define TRAMPOLINE_PHYS_LOW 0x467 #define TRAMPOLINE_PHYS_HIGH 0x469 -#ifdef CONFIG_X86_64 -extern void apic_send_IPI_self(int vector); -#endif - extern void generic_bigsmp_probe(void); #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 004611a44962..7862b152a052 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -76,33 +76,6 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) _flat_send_IPI_mask(mask, vector); } -static void flat_send_IPI_allbutself(int vector) -{ - int cpu = smp_processor_id(); - - if (IS_ENABLED(CONFIG_HOTPLUG_CPU) || vector == NMI_VECTOR) { - if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) { - unsigned long mask = cpumask_bits(cpu_online_mask)[0]; - - if (cpu < BITS_PER_LONG) - __clear_bit(cpu, &mask); - - _flat_send_IPI_mask(mask, vector); - } - } else if (num_online_cpus() > 1) { - __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector); - } -} - -static void flat_send_IPI_all(int vector) -{ - if (vector == NMI_VECTOR) { - flat_send_IPI_mask(cpu_online_mask, vector); - } else { - __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector); - } -} - static unsigned int flat_get_apic_id(unsigned long x) { return (x >> 24) & 0xFF; @@ -164,9 +137,9 @@ static struct apic apic_flat __ro_after_init = { .send_IPI = default_send_IPI_single, .send_IPI_mask = flat_send_IPI_mask, .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, - .send_IPI_allbutself = flat_send_IPI_allbutself, - .send_IPI_all = flat_send_IPI_all, - .send_IPI_self = apic_send_IPI_self, + .send_IPI_allbutself = default_send_IPI_allbutself, + .send_IPI_all = default_send_IPI_all, + .send_IPI_self = default_send_IPI_self, .inquire_remote_apic = default_inquire_remote_apic, @@ -216,16 +189,6 @@ static void physflat_init_apic_ldr(void) */ } -static void physflat_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); -} - -static void physflat_send_IPI_all(int vector) -{ - default_send_IPI_mask_sequence_phys(cpu_online_mask, vector); -} - static int physflat_probe(void) { if (apic == &apic_physflat || num_possible_cpus() > 8 || @@ -267,9 +230,9 @@ static struct apic apic_physflat __ro_after_init = { .send_IPI = default_send_IPI_single_phys, .send_IPI_mask = default_send_IPI_mask_sequence_phys, .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_phys, - .send_IPI_allbutself = physflat_send_IPI_allbutself, - .send_IPI_all = physflat_send_IPI_all, - .send_IPI_self = apic_send_IPI_self, + .send_IPI_allbutself = default_send_IPI_allbutself, + .send_IPI_all = default_send_IPI_all, + .send_IPI_self = default_send_IPI_self, .inquire_remote_apic = default_inquire_remote_apic, diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index fb457b540e78..29f0e0984557 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -36,13 +36,6 @@ void __init default_setup_apic_routing(void) x86_platform.apic_post_init(); } -/* Same for both flat and physical. */ - -void apic_send_IPI_self(int vector) -{ - __default_send_IPI_shortcut(APIC_DEST_SELF, vector); -} - int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { struct apic **drv; From 43931d350f30c6cd8c2f498d54ef7d65750abc92 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:30 +0200 Subject: [PATCH 26/28] x86/apic/x2apic: Implement IPI shorthands support All callers of apic->send_IPI_all() and apic->send_IPI_allbutself() contain the decision logic for shorthand invocation already and invoke send_IPI_mask() if the prereqisites are not satisfied. Implement shorthand support for x2apic. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105221.134696837@linutronix.de --- arch/x86/kernel/apic/local.h | 1 + arch/x86/kernel/apic/x2apic_cluster.c | 4 ++-- arch/x86/kernel/apic/x2apic_phys.c | 12 ++++++++++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index 69ba777cef98..04797f05ce94 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -23,6 +23,7 @@ unsigned int x2apic_get_apic_id(unsigned long id); u32 x2apic_set_apic_id(unsigned int id); int x2apic_phys_pkg_id(int initial_apicid, int index_msb); void x2apic_send_IPI_self(int vector); +void __x2apic_send_IPI_shorthand(int vector, u32 which); /* IPI */ diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index d0a13c88f777..45e92cba92f5 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -82,12 +82,12 @@ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) static void x2apic_send_IPI_allbutself(int vector) { - __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT); + __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLBUT); } static void x2apic_send_IPI_all(int vector) { - __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); + __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLINC); } static u32 x2apic_calc_apicid(unsigned int cpu) diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 5d50e1f9d4bf..bc9693841353 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -75,12 +75,12 @@ static void static void x2apic_send_IPI_allbutself(int vector) { - __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT); + __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLBUT); } static void x2apic_send_IPI_all(int vector) { - __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); + __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLINC); } static void init_x2apic_ldr(void) @@ -112,6 +112,14 @@ void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) native_x2apic_icr_write(cfg, apicid); } +void __x2apic_send_IPI_shorthand(int vector, u32 which) +{ + unsigned long cfg = __prepare_ICR(which, vector, 0); + + x2apic_wrmsr_fence(); + native_x2apic_icr_write(cfg, 0); +} + unsigned int x2apic_get_apic_id(unsigned long id) { return id; From 6444b40eeda4f78f57b255dd7ecb8d3e5936eea2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 5 Aug 2019 14:21:34 -0700 Subject: [PATCH 27/28] x86/apic: Annotate global config variables as "read-only after init" Mark the APIC's global config variables that are constant after boot as __ro_after_init to help document that the majority of the APIC config is not changed at runtime, and to harden the kernel a smidge. Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190805212134.12001-1-sean.j.christopherson@intel.com --- arch/x86/kernel/apic/apic.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 831274e3c09f..3a31875bd0a3 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -65,10 +65,10 @@ unsigned int num_processors; unsigned disabled_cpus; /* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; +unsigned int boot_cpu_physical_apicid __ro_after_init = -1U; EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); -u8 boot_cpu_apic_version; +u8 boot_cpu_apic_version __ro_after_init; /* * The highest APIC ID seen during enumeration. @@ -85,13 +85,13 @@ physid_mask_t phys_cpu_present_map; * disable_cpu_apicid=, mostly used for the kdump 2nd kernel to * avoid undefined behaviour caused by sending INIT from AP to BSP. */ -static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID; +static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID; /* * This variable controls which CPUs receive external NMIs. By default, * external NMIs are delivered only to the BSP. */ -static int apic_extnmi = APIC_EXTNMI_BSP; +static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP; /* * Map cpu index to physical APIC ID @@ -114,7 +114,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); /* Local APIC was disabled by the BIOS and enabled by the kernel */ -static int enabled_via_apicbase; +static int enabled_via_apicbase __ro_after_init; /* * Handle interrupt mode configuration register (IMCR). @@ -172,23 +172,23 @@ static __init int setup_apicpmtimer(char *s) __setup("apicpmtimer", setup_apicpmtimer); #endif -unsigned long mp_lapic_addr; -int disable_apic; +unsigned long mp_lapic_addr __ro_after_init; +int disable_apic __ro_after_init; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __initdata; /* Local APIC timer works in C2 */ -int local_apic_timer_c2_ok; +int local_apic_timer_c2_ok __ro_after_init; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); /* * Debug level, exported for io_apic.c */ -int apic_verbosity; +int apic_verbosity __ro_after_init; -int pic_mode; +int pic_mode __ro_after_init; /* Have we found an MP table */ -int smp_found_config; +int smp_found_config __ro_after_init; static struct resource lapic_resource = { .name = "Local APIC", @@ -199,7 +199,7 @@ unsigned int lapic_timer_period = 0; static void apic_pm_activate(void); -static unsigned long apic_phys; +static unsigned long apic_phys __ro_after_init; /* * Get the LAPIC version @@ -1278,7 +1278,7 @@ void __init sync_Arb_IDs(void) APIC_INT_LEVELTRIG | APIC_DM_INIT); } -enum apic_intr_mode_id apic_intr_mode; +enum apic_intr_mode_id apic_intr_mode __ro_after_init; static int __init apic_intr_mode_select(void) { From 743dac494d61d991967ebcfab92e4f80dc7583b3 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 22 Aug 2019 10:34:21 -0400 Subject: [PATCH 28/28] x86/apic/vector: Warn when vector space exhaustion breaks affinity On x86, CPUs are limited in the number of interrupts they can have affined to them as they only support 256 interrupt vectors per CPU. 32 vectors are reserved for the CPU and the kernel reserves another 22 for internal purposes. That leaves 202 vectors for assignement to devices. When an interrupt is set up or the affinity is changed by the kernel or the administrator, the vector assignment code attempts to honor the requested affinity mask. If the vector space on the CPUs in that affinity mask is exhausted the code falls back to a wider set of CPUs and assigns a vector on a CPU outside of the requested affinity mask silently. While the effective affinity is reflected in the corresponding /proc/irq/$N/effective_affinity* files the silent breakage of the requested affinity can lead to unexpected behaviour for administrators. Add a pr_warn() when this happens so that adminstrators get at least informed about it in the syslog. [ tglx: Massaged changelog and made the pr_warn() more informative ] Reported-by: djuran@redhat.com Signed-off-by: Neil Horman Signed-off-by: Thomas Gleixner Tested-by: djuran@redhat.com Link: https://lkml.kernel.org/r/20190822143421.9535-1-nhorman@tuxdriver.com --- arch/x86/kernel/apic/vector.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index fdacb864c3dd..2c5676b0a6e7 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -398,6 +398,17 @@ static int activate_reserved(struct irq_data *irqd) if (!irqd_can_reserve(irqd)) apicd->can_reserve = false; } + + /* + * Check to ensure that the effective affinity mask is a subset + * the user supplied affinity mask, and warn the user if it is not + */ + if (!cpumask_subset(irq_data_get_effective_affinity_mask(irqd), + irq_data_get_affinity_mask(irqd))) { + pr_warn("irq %u: Affinity broken due to vector space exhaustion.\n", + irqd->irq); + } + return ret; }