From f30a4a1cede79eb1923262c85a6268c3e50b3408 Mon Sep 17 00:00:00 2001 From: Stephan Uphoff Date: Tue, 7 Dec 2004 20:15:01 +0000 Subject: [PATCH] Avoid more than two pending IPI interrupt vectors per local APIC as this may cause deadlocks. This should fix kern/72123. Discussed with: jhb Tested by: Nik Azim Azam, Andy Farkas, Flack Man, Aykut KARA Izzet BESKARDES, Jens Binnewies, Karl Keusgen Approved by: sam (mentor) --- sys/i386/i386/apic_vector.s | 56 ++------------------ sys/i386/i386/mp_machdep.c | 103 +++++++++++++++++++++++------------- sys/i386/include/apicvar.h | 53 +++++++++++++++---- sys/i386/include/smp.h | 7 +-- 4 files changed, 115 insertions(+), 104 deletions(-) diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s index da5b2ebdbf7a..88b54aac348c 100644 --- a/sys/i386/i386/apic_vector.s +++ b/sys/i386/i386/apic_vector.s @@ -227,7 +227,8 @@ IDTVEC(invlrng) */ .text SUPERALIGN_TEXT -IDTVEC(hardclock) +IDTVEC(ipi_intr_bitmap_handler) + PUSH_FRAME movl $KDSEL, %eax /* reload with kernel's data segment */ movl %eax, %ds @@ -237,64 +238,15 @@ IDTVEC(hardclock) movl lapic, %edx movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */ - - pushl $0 /* XXX convert trapframe to clockframe */ - call forwarded_hardclock - addl $4, %esp /* XXX convert clockframe to trapframe */ - MEXITCOUNT - jmp doreti - -/* - * Forward statclock to another CPU. Pushes a clockframe and calls - * forwarded_statclock(). - */ - .text - SUPERALIGN_TEXT -IDTVEC(statclock) - PUSH_FRAME - movl $KDSEL, %eax /* reload with kernel's data segment */ - movl %eax, %ds - movl %eax, %es - movl $KPSEL, %eax - movl %eax, %fs - - movl lapic, %edx - movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */ - + FAKE_MCOUNT(TF_EIP(%esp)) pushl $0 /* XXX convert trapframe to clockframe */ - call forwarded_statclock + call ipi_bitmap_handler addl $4, %esp /* XXX convert clockframe to trapframe */ MEXITCOUNT jmp doreti -/* - * Executed by a CPU when it receives an Xcpuast IPI from another CPU, - * - * The other CPU has already executed aston() or need_resched() on our - * current process, so we simply need to ack the interrupt and return - * via doreti to run ast(). - */ - - .text - SUPERALIGN_TEXT -IDTVEC(cpuast) - PUSH_FRAME - movl $KDSEL, %eax - movl %eax, %ds /* use KERNEL data segment */ - movl %eax, %es - movl $KPSEL, %eax - movl %eax, %fs - - movl lapic, %edx - movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */ - - FAKE_MCOUNT(TF_EIP(%esp)) - - MEXITCOUNT - jmp doreti - /* * Executed by a CPU when it receives an Xcpustop IPI from another CPU, * diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index a79484fa5981..814d7c46a049 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -198,6 +198,9 @@ struct cpu_info { } static cpu_info[MAXCPU]; static int cpu_apic_ids[MAXCPU]; +/* Holds pending bitmap based IPIs per CPU */ +static volatile u_int cpu_ipi_pending[MAXCPU]; + static u_int boot_address; static void set_logical_apic_ids(void); @@ -347,8 +350,10 @@ cpu_mp_start(void) POSTCODE(MP_START_POST); /* Initialize the logical ID to APIC ID table. */ - for (i = 0; i < MAXCPU; i++) + for (i = 0; i < MAXCPU; i++) { cpu_apic_ids[i] = -1; + cpu_ipi_pending[i] = 0; + } /* Install an inter-CPU IPI for TLB invalidation */ setidt(IPI_INVLTLB, IDTVEC(invltlb), @@ -357,14 +362,6 @@ cpu_mp_start(void) SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - - /* Install an inter-CPU IPI for forwarding hardclock() */ - setidt(IPI_HARDCLOCK, IDTVEC(hardclock), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - - /* Install an inter-CPU IPI for forwarding statclock() */ - setidt(IPI_STATCLOCK, IDTVEC(statclock), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for lazy pmap release */ setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), @@ -374,8 +371,8 @@ cpu_mp_start(void) setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - /* Install an inter-CPU IPI for forcing an additional software trap */ - setidt(IPI_AST, IDTVEC(cpuast), + /* Install generic inter-CPU IPI handler */ + setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for CPU stop/restart */ @@ -1095,20 +1092,6 @@ smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2) * For statclock, we send an IPI to all CPU's to have them call this * function. */ -void -forwarded_statclock(struct clockframe frame) -{ - struct thread *td; - - CTR0(KTR_SMP, "forwarded_statclock"); - td = curthread; - td->td_intr_nesting_level++; - if (profprocs != 0) - profclock(&frame); - if (pscnt == psdiv) - statclock(&frame); - td->td_intr_nesting_level--; -} void forward_statclock(void) @@ -1132,17 +1115,6 @@ forward_statclock(void) * state and call hardclock_process() on the CPU receiving the clock interrupt * and then just use a simple IPI to handle any ast's if needed. */ -void -forwarded_hardclock(struct clockframe frame) -{ - struct thread *td; - - CTR0(KTR_SMP, "forwarded_hardclock"); - td = curthread; - td->td_intr_nesting_level++; - hardclock_process(&frame); - td->td_intr_nesting_level--; -} void forward_hardclock(void) @@ -1159,6 +1131,42 @@ forward_hardclock(void) ipi_selected(map, IPI_HARDCLOCK); } + +void ipi_bitmap_handler(struct clockframe frame) +{ + int cpu = PCPU_GET(cpuid); + u_int ipi_bitmap; + struct thread *td; + + ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); + + critical_enter(); + + /* Nothing to do for AST */ + + if (ipi_bitmap & (1 << IPI_HARDCLOCK)) { + td = curthread; + td->td_intr_nesting_level++; + hardclock_process(&frame); + td->td_intr_nesting_level--; + } + + if (ipi_bitmap & (1 << IPI_STATCLOCK)) { + CTR0(KTR_SMP, "forwarded_statclock"); + + td = curthread; + td->td_intr_nesting_level++; + if (profprocs != 0) + profclock(&frame); + if (pscnt == psdiv) + statclock(&frame); + td->td_intr_nesting_level--; + } + + critical_exit(); +} + + /* * send an IPI to a set of cpus. */ @@ -1166,15 +1174,36 @@ void ipi_selected(u_int32_t cpus, u_int ipi) { int cpu; + u_int bitmap = 0; + u_int old_pending; + u_int new_pending; + + if (IPI_IS_BITMAPED(ipi)) { + bitmap = 1 << ipi; + ipi = IPI_BITMAP_VECTOR; + } CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); while ((cpu = ffs(cpus)) != 0) { cpu--; + cpus &= ~(1 << cpu); + KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); + + if (bitmap) { + do { + old_pending = cpu_ipi_pending[cpu]; + new_pending = old_pending | bitmap; + } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending)); + + if (old_pending) + continue; + } + lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); - cpus &= ~(1 << cpu); } + } /* diff --git a/sys/i386/include/apicvar.h b/sys/i386/include/apicvar.h index cc46fdac365d..a680b0de1098 100644 --- a/sys/i386/include/apicvar.h +++ b/sys/i386/include/apicvar.h @@ -81,22 +81,55 @@ #define APIC_IO_INTS (IDT_IO_INTS + 16) #define APIC_NUM_IOINTS 192 -#define APIC_LOCAL_INTS 240 -#define APIC_TIMER_INT APIC_LOCAL_INTS -#define APIC_ERROR_INT (APIC_LOCAL_INTS + 1) -#define APIC_THERMAL_INT (APIC_LOCAL_INTS + 2) +/* + ********************* !!! WARNING !!! ****************************** + * Each local apic has an interrupt receive fifo that is two entries deep + * for each interrupt priority class (higher 4 bits of interrupt vector). + * Once the fifo is full the APIC can no longer receive interrupts for this + * class and sending IPIs from other CPUs will be blocked. + * To avoid deadlocks there should be no more than two IPI interrupts + * pending at the same time. + * Currently this is guaranteed by dividing the IPIs in two groups that have + * each at most one IPI interrupt pending. The first group is protected by the + * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user + * at a time) The second group uses a single interrupt and a bitmap to avoid + * redundant IPI interrupts. + * + * Right now IPI_STOP used by kdb shares the interrupt priority class with + * the two IPI groups mentioned above. As such IPI_STOP may cause a deadlock. + * Eventually IPI_STOP should use NMI IPIs - this would eliminate this and + * other deadlocks caused by IPI_STOP. + */ -#define APIC_IPI_INTS (APIC_LOCAL_INTS + 3) -#define IPI_AST APIC_IPI_INTS /* Generate software trap. */ +#define APIC_LOCAL_INTS 240 + +#if 0 +#define APIC_TIMER_INT (APIC_LOCAL_INTS + X) +#define APIC_ERROR_INT (APIC_LOCAL_INTS + X) +#define APIC_THERMAL_INT (APIC_LOCAL_INTS + X) +#endif + +#define APIC_IPI_INTS (APIC_LOCAL_INTS + 0) +#define IPI_RENDEZVOUS (APIC_IPI_INTS) /* Inter-CPU rendezvous. */ #define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs */ #define IPI_INVLPG (APIC_IPI_INTS + 2) #define IPI_INVLRNG (APIC_IPI_INTS + 3) #define IPI_LAZYPMAP (APIC_IPI_INTS + 4) /* Lazy pmap release. */ -#define IPI_HARDCLOCK (APIC_IPI_INTS + 8) /* Inter-CPU clock handling. */ -#define IPI_STATCLOCK (APIC_IPI_INTS + 9) -#define IPI_RENDEZVOUS (APIC_IPI_INTS + 10) /* Inter-CPU rendezvous. */ -#define IPI_STOP (APIC_IPI_INTS + 11) /* Stop CPU until restarted. */ +/* Vector to handle bitmap based IPIs */ +#define IPI_BITMAP_VECTOR (APIC_IPI_INTS + 5) +/* IPIs handled by IPI_BITMAPED_VECTOR (XXX ups is there a better place?) */ +#define IPI_AST 0 /* Generate software trap. */ +#define IPI_HARDCLOCK 1 /* Inter-CPU clock handling. */ +#define IPI_STATCLOCK 2 +#define IPI_BITMAP_LAST IPI_STATCLOCK +#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) + +#define IPI_STOP (APIC_IPI_INTS + 6) /* Stop CPU until restarted. */ + +/* The spurious interrupt can share the priority class with the IPIs since + * it is not a normal interrupt. (Does not use the APIC's interrupt fifo) + */ #define APIC_SPURIOUS_INT 255 #define LVT_LINT0 0 diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index fe87fdfd0740..eb6a635b443a 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -55,9 +55,7 @@ inthand_t IDTVEC(invltlb), /* TLB shootdowns - global */ IDTVEC(invlpg), /* TLB shootdowns - 1 page */ IDTVEC(invlrng), /* TLB shootdowns - page range */ - IDTVEC(hardclock), /* Forward hardclock() */ - IDTVEC(statclock), /* Forward statclock() */ - IDTVEC(cpuast), /* Additional software trap on other cpu */ + IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */ IDTVEC(cpustop), /* CPU stops & waits to be restarted */ IDTVEC(rendezvous), /* handle CPU rendezvous */ IDTVEC(lazypmap); /* handle lazy pmap release */ @@ -70,9 +68,8 @@ void ipi_all(u_int ipi); void ipi_all_but_self(u_int ipi); void ipi_self(u_int ipi); void forward_statclock(void); -void forwarded_statclock(struct clockframe frame); void forward_hardclock(void); -void forwarded_hardclock(struct clockframe frame); +void ipi_bitmap_handler(struct clockframe frame); u_int mp_bootaddress(u_int); int mp_grab_cpu_hlt(void); void mp_topology(void);