Avoid more than two pending IPI interrupt vectors per local APIC

as this may cause deadlocks.

This should fix kern/72123.

Discussed with: jhb
Tested by: Nik Azim Azam, Andy Farkas, Flack Man, Aykut KARA
           Izzet BESKARDES, Jens Binnewies, Karl Keusgen
Approved by:    sam (mentor)
This commit is contained in:
Stephan Uphoff 2004-12-07 20:15:01 +00:00
parent 5656474145
commit f30a4a1ced
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=138528
4 changed files with 115 additions and 104 deletions

View file

@ -227,7 +227,8 @@ IDTVEC(invlrng)
*/
.text
SUPERALIGN_TEXT
IDTVEC(hardclock)
IDTVEC(ipi_intr_bitmap_handler)
PUSH_FRAME
movl $KDSEL, %eax /* reload with kernel's data segment */
movl %eax, %ds
@ -237,64 +238,15 @@ IDTVEC(hardclock)
movl lapic, %edx
movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */
pushl $0 /* XXX convert trapframe to clockframe */
call forwarded_hardclock
addl $4, %esp /* XXX convert clockframe to trapframe */
MEXITCOUNT
jmp doreti
/*
* Forward statclock to another CPU. Pushes a clockframe and calls
* forwarded_statclock().
*/
.text
SUPERALIGN_TEXT
IDTVEC(statclock)
PUSH_FRAME
movl $KDSEL, %eax /* reload with kernel's data segment */
movl %eax, %ds
movl %eax, %es
movl $KPSEL, %eax
movl %eax, %fs
movl lapic, %edx
movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */
FAKE_MCOUNT(TF_EIP(%esp))
pushl $0 /* XXX convert trapframe to clockframe */
call forwarded_statclock
call ipi_bitmap_handler
addl $4, %esp /* XXX convert clockframe to trapframe */
MEXITCOUNT
jmp doreti
/*
* Executed by a CPU when it receives an Xcpuast IPI from another CPU,
*
* The other CPU has already executed aston() or need_resched() on our
* current process, so we simply need to ack the interrupt and return
* via doreti to run ast().
*/
.text
SUPERALIGN_TEXT
IDTVEC(cpuast)
PUSH_FRAME
movl $KDSEL, %eax
movl %eax, %ds /* use KERNEL data segment */
movl %eax, %es
movl $KPSEL, %eax
movl %eax, %fs
movl lapic, %edx
movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */
FAKE_MCOUNT(TF_EIP(%esp))
MEXITCOUNT
jmp doreti
/*
* Executed by a CPU when it receives an Xcpustop IPI from another CPU,
*

View file

@ -198,6 +198,9 @@ struct cpu_info {
} static cpu_info[MAXCPU];
static int cpu_apic_ids[MAXCPU];
/* Holds pending bitmap based IPIs per CPU */
static volatile u_int cpu_ipi_pending[MAXCPU];
static u_int boot_address;
static void set_logical_apic_ids(void);
@ -347,8 +350,10 @@ cpu_mp_start(void)
POSTCODE(MP_START_POST);
/* Initialize the logical ID to APIC ID table. */
for (i = 0; i < MAXCPU; i++)
for (i = 0; i < MAXCPU; i++) {
cpu_apic_ids[i] = -1;
cpu_ipi_pending[i] = 0;
}
/* Install an inter-CPU IPI for TLB invalidation */
setidt(IPI_INVLTLB, IDTVEC(invltlb),
@ -357,14 +362,6 @@ cpu_mp_start(void)
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(IPI_INVLRNG, IDTVEC(invlrng),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* Install an inter-CPU IPI for forwarding hardclock() */
setidt(IPI_HARDCLOCK, IDTVEC(hardclock),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* Install an inter-CPU IPI for forwarding statclock() */
setidt(IPI_STATCLOCK, IDTVEC(statclock),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* Install an inter-CPU IPI for lazy pmap release */
setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
@ -374,8 +371,8 @@ cpu_mp_start(void)
setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* Install an inter-CPU IPI for forcing an additional software trap */
setidt(IPI_AST, IDTVEC(cpuast),
/* Install generic inter-CPU IPI handler */
setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* Install an inter-CPU IPI for CPU stop/restart */
@ -1095,20 +1092,6 @@ smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
* For statclock, we send an IPI to all CPU's to have them call this
* function.
*/
void
forwarded_statclock(struct clockframe frame)
{
struct thread *td;
CTR0(KTR_SMP, "forwarded_statclock");
td = curthread;
td->td_intr_nesting_level++;
if (profprocs != 0)
profclock(&frame);
if (pscnt == psdiv)
statclock(&frame);
td->td_intr_nesting_level--;
}
void
forward_statclock(void)
@ -1132,17 +1115,6 @@ forward_statclock(void)
* state and call hardclock_process() on the CPU receiving the clock interrupt
* and then just use a simple IPI to handle any ast's if needed.
*/
void
forwarded_hardclock(struct clockframe frame)
{
struct thread *td;
CTR0(KTR_SMP, "forwarded_hardclock");
td = curthread;
td->td_intr_nesting_level++;
hardclock_process(&frame);
td->td_intr_nesting_level--;
}
void
forward_hardclock(void)
@ -1159,6 +1131,42 @@ forward_hardclock(void)
ipi_selected(map, IPI_HARDCLOCK);
}
void ipi_bitmap_handler(struct clockframe frame)
{
int cpu = PCPU_GET(cpuid);
u_int ipi_bitmap;
struct thread *td;
ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
critical_enter();
/* Nothing to do for AST */
if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
td = curthread;
td->td_intr_nesting_level++;
hardclock_process(&frame);
td->td_intr_nesting_level--;
}
if (ipi_bitmap & (1 << IPI_STATCLOCK)) {
CTR0(KTR_SMP, "forwarded_statclock");
td = curthread;
td->td_intr_nesting_level++;
if (profprocs != 0)
profclock(&frame);
if (pscnt == psdiv)
statclock(&frame);
td->td_intr_nesting_level--;
}
critical_exit();
}
/*
* send an IPI to a set of cpus.
*/
@ -1166,15 +1174,36 @@ void
ipi_selected(u_int32_t cpus, u_int ipi)
{
int cpu;
u_int bitmap = 0;
u_int old_pending;
u_int new_pending;
if (IPI_IS_BITMAPED(ipi)) {
bitmap = 1 << ipi;
ipi = IPI_BITMAP_VECTOR;
}
CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
while ((cpu = ffs(cpus)) != 0) {
cpu--;
cpus &= ~(1 << cpu);
KASSERT(cpu_apic_ids[cpu] != -1,
("IPI to non-existent CPU %d", cpu));
if (bitmap) {
do {
old_pending = cpu_ipi_pending[cpu];
new_pending = old_pending | bitmap;
} while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending));
if (old_pending)
continue;
}
lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
cpus &= ~(1 << cpu);
}
}
/*

View file

@ -81,22 +81,55 @@
#define APIC_IO_INTS (IDT_IO_INTS + 16)
#define APIC_NUM_IOINTS 192
#define APIC_LOCAL_INTS 240
#define APIC_TIMER_INT APIC_LOCAL_INTS
#define APIC_ERROR_INT (APIC_LOCAL_INTS + 1)
#define APIC_THERMAL_INT (APIC_LOCAL_INTS + 2)
/*
********************* !!! WARNING !!! ******************************
* Each local apic has an interrupt receive fifo that is two entries deep
* for each interrupt priority class (higher 4 bits of interrupt vector).
* Once the fifo is full the APIC can no longer receive interrupts for this
* class and sending IPIs from other CPUs will be blocked.
* To avoid deadlocks there should be no more than two IPI interrupts
* pending at the same time.
* Currently this is guaranteed by dividing the IPIs in two groups that have
* each at most one IPI interrupt pending. The first group is protected by the
* smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user
* at a time) The second group uses a single interrupt and a bitmap to avoid
* redundant IPI interrupts.
*
* Right now IPI_STOP used by kdb shares the interrupt priority class with
* the two IPI groups mentioned above. As such IPI_STOP may cause a deadlock.
* Eventually IPI_STOP should use NMI IPIs - this would eliminate this and
* other deadlocks caused by IPI_STOP.
*/
#define APIC_IPI_INTS (APIC_LOCAL_INTS + 3)
#define IPI_AST APIC_IPI_INTS /* Generate software trap. */
#define APIC_LOCAL_INTS 240
#if 0
#define APIC_TIMER_INT (APIC_LOCAL_INTS + X)
#define APIC_ERROR_INT (APIC_LOCAL_INTS + X)
#define APIC_THERMAL_INT (APIC_LOCAL_INTS + X)
#endif
#define APIC_IPI_INTS (APIC_LOCAL_INTS + 0)
#define IPI_RENDEZVOUS (APIC_IPI_INTS) /* Inter-CPU rendezvous. */
#define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs */
#define IPI_INVLPG (APIC_IPI_INTS + 2)
#define IPI_INVLRNG (APIC_IPI_INTS + 3)
#define IPI_LAZYPMAP (APIC_IPI_INTS + 4) /* Lazy pmap release. */
#define IPI_HARDCLOCK (APIC_IPI_INTS + 8) /* Inter-CPU clock handling. */
#define IPI_STATCLOCK (APIC_IPI_INTS + 9)
#define IPI_RENDEZVOUS (APIC_IPI_INTS + 10) /* Inter-CPU rendezvous. */
#define IPI_STOP (APIC_IPI_INTS + 11) /* Stop CPU until restarted. */
/* Vector to handle bitmap based IPIs */
#define IPI_BITMAP_VECTOR (APIC_IPI_INTS + 5)
/* IPIs handled by IPI_BITMAPED_VECTOR (XXX ups is there a better place?) */
#define IPI_AST 0 /* Generate software trap. */
#define IPI_HARDCLOCK 1 /* Inter-CPU clock handling. */
#define IPI_STATCLOCK 2
#define IPI_BITMAP_LAST IPI_STATCLOCK
#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
#define IPI_STOP (APIC_IPI_INTS + 6) /* Stop CPU until restarted. */
/* The spurious interrupt can share the priority class with the IPIs since
* it is not a normal interrupt. (Does not use the APIC's interrupt fifo)
*/
#define APIC_SPURIOUS_INT 255
#define LVT_LINT0 0

View file

@ -55,9 +55,7 @@ inthand_t
IDTVEC(invltlb), /* TLB shootdowns - global */
IDTVEC(invlpg), /* TLB shootdowns - 1 page */
IDTVEC(invlrng), /* TLB shootdowns - page range */
IDTVEC(hardclock), /* Forward hardclock() */
IDTVEC(statclock), /* Forward statclock() */
IDTVEC(cpuast), /* Additional software trap on other cpu */
IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */
IDTVEC(cpustop), /* CPU stops & waits to be restarted */
IDTVEC(rendezvous), /* handle CPU rendezvous */
IDTVEC(lazypmap); /* handle lazy pmap release */
@ -70,9 +68,8 @@ void ipi_all(u_int ipi);
void ipi_all_but_self(u_int ipi);
void ipi_self(u_int ipi);
void forward_statclock(void);
void forwarded_statclock(struct clockframe frame);
void forward_hardclock(void);
void forwarded_hardclock(struct clockframe frame);
void ipi_bitmap_handler(struct clockframe frame);
u_int mp_bootaddress(u_int);
int mp_grab_cpu_hlt(void);
void mp_topology(void);