Give the 4bsd scheduler the ability to wake up idle processors

when there is new work to be done.

MFC after:	5 days
This commit is contained in:
Julian Elischer 2004-09-01 06:42:02 +00:00
parent 037fc73d96
commit 6804a3ab6d
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=134591
8 changed files with 251 additions and 27 deletions

View file

@ -113,7 +113,6 @@ extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
*/
static u_int logical_cpus;
static u_int logical_cpus_mask;
/* used to hold the AP's until we are ready to release them */
static struct mtx ap_boot_mtx;
@ -138,7 +137,6 @@ static int start_all_aps(void);
static int start_ap(int apic_id);
static void release_aps(void *dummy);
static int hlt_cpus_mask;
static int hlt_logical_cpus;
static struct sysctl_ctx_list logical_cpu_clist;
static u_int bootMP_size;

View file

@ -183,7 +183,6 @@ volatile int smp_tlb_wait;
*/
static u_int logical_cpus;
static u_int logical_cpus_mask;
/* used to hold the AP's until we are ready to release them */
static struct mtx ap_boot_mtx;
@ -209,7 +208,6 @@ static void install_ap_tramp(void);
static int start_ap(int apic_id);
static void release_aps(void *dummy);
static int hlt_cpus_mask;
static int hlt_logical_cpus;
static struct sysctl_ctx_list logical_cpu_clist;

View file

@ -98,9 +98,7 @@
#define PDRMASK (NBPDR-1)
/* PREEMPTION exposes scheduler bugs that need to be fixed. */
#if 0
#define PREEMPTION
#endif
#define IOPAGES 2 /* pages of i/o permission bitmap */

View file

@ -36,6 +36,9 @@ __FBSDID("$FreeBSD$");
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/unistd.h>
#ifdef SMP
#include <sys/smp.h>
#endif
static void idle_setup(void *dummy);
SYSINIT(idle_setup, SI_SUB_SCHED_IDLE, SI_ORDER_FIRST, idle_setup, NULL)
@ -96,9 +99,18 @@ idle_proc(void *dummy)
{
struct proc *p;
struct thread *td;
#ifdef SMP
cpumask_t mycpu;
#endif
td = curthread;
p = td->td_proc;
#ifdef SMP
mycpu = PCPU_GET(cpumask);
mtx_lock_spin(&sched_lock);
idle_cpus_mask |= mycpu;
mtx_unlock_spin(&sched_lock);
#endif
for (;;) {
mtx_assert(&Giant, MA_NOTOWNED);
@ -106,7 +118,13 @@ idle_proc(void *dummy)
cpu_idle();
mtx_lock_spin(&sched_lock);
#ifdef SMP
idle_cpus_mask &= ~mycpu;
#endif
mi_switch(SW_VOL, NULL);
#ifdef SMP
idle_cpus_mask |= mycpu;
#endif
mtx_unlock_spin(&sched_lock);
}
}

View file

@ -89,6 +89,7 @@ reassigned to keep this true.
__FBSDID("$FreeBSD$");
#include "opt_full_preemption.h"
#include "opt_sched.h"
#include <sys/param.h>
#include <sys/systm.h>
@ -104,6 +105,10 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#endif
#include <machine/critical.h>
#if defined(SMP) && defined(SCHED_4BSD)
#include <sys/sysctl.h>
#endif
CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
@ -686,6 +691,12 @@ runq_check(struct runq *rq)
return (0);
}
#if defined(SMP) && defined(SCHED_4BSD)
int runq_fuzz = 1;
SYSCTL_DECL(_kern_sched);
SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
#endif
/*
* Find the highest priority process on the run queue.
*/
@ -699,7 +710,28 @@ runq_choose(struct runq *rq)
mtx_assert(&sched_lock, MA_OWNED);
while ((pri = runq_findbit(rq)) != -1) {
rqh = &rq->rq_queues[pri];
ke = TAILQ_FIRST(rqh);
#if defined(SMP) && defined(SCHED_4BSD)
/* fuzz == 1 is normal.. 0 or less are ignored */
if (runq_fuzz > 1) {
/*
* In the first couple of entries, check if
* there is one for our CPU as a preference.
*/
int count = runq_fuzz;
int cpu = PCPU_GET(cpuid);
struct kse *ke2;
ke2 = ke = TAILQ_FIRST(rqh);
while (count-- && ke2) {
if (ke->ke_thread->td_lastcpu == cpu) {
ke = ke2;
break;
}
ke2 = TAILQ_NEXT(ke2, ke_procq);
}
} else
#endif
ke = TAILQ_FIRST(rqh);
KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
CTR3(KTR_RUNQ,
"runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);

View file

@ -698,6 +698,10 @@ void
sched_add(struct thread *td, int flags)
{
struct kse *ke;
#ifdef SMP
int forwarded = 0;
int cpu;
#endif
ke = td->td_kse;
mtx_assert(&sched_lock, MA_OWNED);
@ -710,34 +714,71 @@ sched_add(struct thread *td, int flags)
KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
("sched_add: process swapped out"));
#ifdef SMP
/*
* Only try to preempt if the thread is unpinned or pinned to the
* current CPU.
*/
if (KSE_CAN_MIGRATE(ke) || ke->ke_runq == &runq_pcpu[PCPU_GET(cpuid)])
#endif
/*
* Don't try preempt if we are already switching.
* all hell might break loose.
*/
if ((flags & SRQ_YIELDING) == 0)
if (maybe_preempt(td))
return;
#ifdef SMP
if (KSE_CAN_MIGRATE(ke)) {
CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p) to gbl runq", ke, td);
CTR2(KTR_RUNQ,
"sched_add: adding kse:%p (td:%p) to gbl runq", ke, td);
cpu = NOCPU;
ke->ke_runq = &runq;
} else {
CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p)to pcpu runq", ke, td);
if (!SKE_RUNQ_PCPU(ke))
ke->ke_runq = &runq_pcpu[PCPU_GET(cpuid)];
ke->ke_runq = &runq_pcpu[(cpu = PCPU_GET(cpuid))];
else
cpu = td->td_lastcpu;
CTR3(KTR_RUNQ,
"sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
}
#else
CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p) to runq", ke, td);
ke->ke_runq = &runq;
#endif
/*
* If we are yielding (on the way out anyhow)
* or the thread being saved is US,
* then don't try be smart about preemption
* or kicking off another CPU
* as it won't help and may hinder.
* In the YIEDLING case, we are about to run whoever is
* being put in the queue anyhow, and in the
* OURSELF case, we are puting ourself on the run queue
* which also only happens when we are about to yield.
*/
if((flags & SRQ_YIELDING) == 0) {
#ifdef SMP
cpumask_t me = PCPU_GET(cpumask);
int idle = idle_cpus_mask & me;
/*
* Only try to kick off another CPU if
* the thread is unpinned
* or pinned to another cpu,
* and there are other available and idle CPUs.
* if we are idle, then skip straight to preemption.
*/
if ( (! idle) &&
(idle_cpus_mask & ~(hlt_cpus_mask | me)) &&
( KSE_CAN_MIGRATE(ke) ||
ke->ke_runq != &runq_pcpu[PCPU_GET(cpuid)])) {
forwarded = forward_wakeup(cpu);
}
/*
* If we failed to kick off another cpu, then look to
* see if we should preempt this CPU. Only allow this
* if it is not pinned or IS pinned to this CPU.
* If we are the idle thread, we also try do preempt.
* as it will be quicker and being idle, we won't
* lose in doing so..
*/
if ((!forwarded) &&
(ke->ke_runq == &runq ||
ke->ke_runq == &runq_pcpu[PCPU_GET(cpuid)]))
#endif
{
if (maybe_preempt(td))
return;
}
}
if ((td->td_proc->p_flag & P_NOLOAD) == 0)
sched_tdcnt++;
runq_add(ke->ke_runq, ke);

View file

@ -49,9 +49,15 @@ __FBSDID("$FreeBSD$");
#include <machine/smp.h>
#include "opt_sched.h"
#ifdef SMP
volatile cpumask_t stopped_cpus;
volatile cpumask_t started_cpus;
cpumask_t all_cpus;
cpumask_t idle_cpus_mask;
cpumask_t hlt_cpus_mask;
cpumask_t logical_cpus_mask;
void (*cpustop_restartfunc)(void);
#endif
@ -62,7 +68,6 @@ int mp_maxcpus = MAXCPU;
struct cpu_top *smp_topology;
volatile int smp_started;
cpumask_t all_cpus;
u_int mp_maxid;
SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD, NULL, "Kernel SMP");
@ -96,6 +101,46 @@ SYSCTL_INT(_kern_smp, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
&forward_roundrobin_enabled, 0,
"Forwarding of roundrobin to all other CPUs");
#ifdef SCHED_4BSD
/* Enable forwarding of wakeups to all other cpus */
SYSCTL_NODE(_kern_smp, OID_AUTO, ipiwakeup, CTLFLAG_RD, NULL, "Kernel SMP");
static int forward_wakeup_enabled = 0;
SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW,
&forward_wakeup_enabled, 0,
"Forwarding of wakeup to idle CPUs");
static int forward_wakeups_requested = 0;
SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD,
&forward_wakeups_requested, 0,
"Requests for Forwarding of wakeup to idle CPUs");
static int forward_wakeups_delivered = 0;
SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD,
&forward_wakeups_delivered, 0,
"Completed Forwarding of wakeup to idle CPUs");
static int forward_wakeup_use_mask = 0;
SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW,
&forward_wakeup_use_mask, 0,
"Use the mask of idle cpus");
static int forward_wakeup_use_loop = 0;
SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW,
&forward_wakeup_use_loop, 0,
"Use a loop to find idle cpus");
static int forward_wakeup_use_single = 0;
SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, onecpu, CTLFLAG_RW,
&forward_wakeup_use_single, 0,
"Only signal one idle cpu");
static int forward_wakeup_use_htt = 0;
SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, htt2, CTLFLAG_RW,
&forward_wakeup_use_htt, 0,
"account for htt");
#endif /* SCHED_4BSD */
/* Variables needed for SMP rendezvous. */
static void (*smp_rv_setup_func)(void *arg);
static void (*smp_rv_action_func)(void *arg);
@ -203,6 +248,95 @@ forward_roundrobin(void)
ipi_selected(map, IPI_AST);
}
#ifdef SCHED_4BSD
/* enable HTT_2 if you have a 2-way HTT cpu.*/
int
forward_wakeup(int cpunum)
{
cpumask_t map, me, dontuse;
cpumask_t map2;
struct pcpu *pc;
cpumask_t id, map3;
mtx_assert(&sched_lock, MA_OWNED);
CTR0(KTR_SMP, "forward_wakeup()");
if ((!forward_wakeup_enabled) ||
(forward_wakeup_use_mask == 0 && forward_wakeup_use_loop == 0))
return (0);
if (!smp_started || cold || panicstr)
return (0);
forward_wakeups_requested++;
/*
* check the idle mask we received against what we calculated before
* in the old version.
*/
me = PCPU_GET(cpumask);
/*
* don't bother if we should be doing it ourself..
*/
if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum)))
return (0);
dontuse = me | stopped_cpus | hlt_cpus_mask;
map3 = 0;
if (forward_wakeup_use_loop) {
SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
id = pc->pc_cpumask;
if ( (id & dontuse) == 0 &&
pc->pc_curthread == pc->pc_idlethread) {
map3 |= id;
}
}
}
if (forward_wakeup_use_mask) {
map = 0;
map = idle_cpus_mask & ~dontuse;
/* If they are both on, compare and use loop if different */
if (forward_wakeup_use_loop) {
if (map != map3) {
printf("map (%02X) != map3 (%02X)\n",
map, map3);
map = map3;
}
}
} else {
map = map3;
}
/* If we only allow a specific CPU, then mask off all the others */
if (cpunum != NOCPU) {
KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum."));
map &= (1 << cpunum);
} else {
/* Try choose an idle die. */
if (forward_wakeup_use_htt) {
map2 = (map & (map >> 1)) & 0x5555;
if (map2) {
map = map2;
}
}
/* set only one bit */
if (forward_wakeup_use_single) {
map = map & ((~map) + 1);
}
}
if (map) {
forward_wakeups_delivered++;
ipi_selected(map, IPI_AST);
return (1);
}
if (cpunum == NOCPU)
printf("forward_wakeup: Idle processor not found\n");
return (0);
}
#endif /* SCHED_4BSD */
/*
* When called the executing CPU will send an IPI to all other CPUs
* requesting that they halt execution.

View file

@ -51,11 +51,15 @@ extern volatile cpumask_t started_cpus;
extern volatile cpumask_t stopped_cpus;
#endif /* SMP */
extern cpumask_t all_cpus;
extern u_int mp_maxid;
extern int mp_ncpus;
extern volatile int smp_started;
extern cpumask_t all_cpus;
extern cpumask_t idle_cpus_mask;
extern cpumask_t hlt_cpus_mask;
extern cpumask_t logical_cpus_mask;
/*
* Macro allowing us to determine whether a CPU is absent at any given
* time, thus permitting us to configure sparse maps of cpuid-dependent
@ -92,6 +96,7 @@ void cpu_mp_start(void);
void forward_signal(struct thread *);
void forward_roundrobin(void);
int forward_wakeup(int cpunum);
int restart_cpus(cpumask_t);
int stop_cpus(cpumask_t);
void smp_rendezvous_action(void);