Introduce the new kernel thread called "deadlock resolver".

While the name is pretentious, a good explanation of its targets is
reported in this 17 months old presentation e-mail:
http://lists.freebsd.org/pipermail/freebsd-arch/2008-August/008452.html

In order to implement it, the sq_type in sleepqueues is mandatory and not
only compiled along with INVARIANTS option. Additively, a new sleepqueue
function, sleepq_type() is added, returning the type of the sleepqueue
linked to a wchan.
Three new sysctls are added in order to configure the thread:
debug.deadlkres.slptime_threshold
debug.deadlkres.blktime_threshold
debug.deadlkres.sleepfreq

rappresenting the thresholds for sleep and block time that will lead to
a deadlock matching (when exceeded), while the sleepfreq rappresents the
number of seconds between 2 consecutive thread runnings.
In order to enable the deadlock resolver thread recompile your kernel
with the option DEADLKRES.

Reviewed by:	jeff
Tested by:	pho, Giovanni Trematerra
Sponsored by:	Nokia Incorporated, Sandvine Incorporated
MFC after:	2 weeks
This commit is contained in:
Attilio Rao 2010-01-09 01:46:38 +00:00
parent ff451e0c50
commit f7829d0d5c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=201879
9 changed files with 171 additions and 5 deletions

View file

@ -22,6 +22,11 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 9.x IS SLOW:
machines to maximize performance. (To disable malloc debugging, run
ln -s aj /etc/malloc.conf.)
20100108:
Introduce the kernel thread "deadlock resolver" (which can be enabled
via the DEADLKRES option, see NOTES for more details) and the
sleepq_type() function for sleepqueues.
20091202:
The rc.firewall and rc.firewall6 were unified, and
rc.firewall6 and rc.d/ip6fw were removed.

View file

@ -23,7 +23,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd December 12, 2009
.Dd January 8, 2010
.Dt SLEEPQUEUE 9
.Os
.Sh NAME
@ -44,6 +44,7 @@
.Nm sleepq_sleepcnt ,
.Nm sleepq_timedwait ,
.Nm sleepq_timedwait_sig ,
.Nm sleepq_type ,
.Nm sleepq_wait ,
.Nm sleepq_wait_sig
.Nd manage the queues of sleeping threads
@ -84,6 +85,8 @@
.Fn sleepq_timedwait "void *wchan"
.Ft int
.Fn sleepq_timedwait_sig "void *wchan" "int signal_caught"
.Ft int
.Fn sleepq_type "void *wchan"
.Ft void
.Fn sleepq_wait "void *wchan"
.Ft int
@ -366,6 +369,12 @@ given a
.Fa wchan .
.Pp
The
.Fn sleepq_type
function returns the type of
.Fa wchan
associated to a sleepqueue.
.Pp
The
.Fn sleepq_abort ,
.Fn sleepq_broadcast ,
and

View file

@ -2530,6 +2530,11 @@ options BOOTP_BLOCKSIZE=8192 # Override NFS block size
#
options SW_WATCHDOG
#
# Add the software deadlock resolver thread.
#
options DEADLKRES
#
# Disable swapping of stack pages. This option removes all
# code which actually performs swapping, so it's not possible to turn

View file

@ -72,6 +72,7 @@ COMPAT_FREEBSD6 opt_compat.h
COMPAT_FREEBSD7 opt_compat.h
COMPILING_LINT opt_global.h
CY_PCI_FASTINTR
DEADLKRES opt_watchdog.h
DIRECTIO
FULL_PREEMPTION opt_sched.h
IPI_PREEMPTION opt_sched.h

View file

@ -48,14 +48,16 @@ __FBSDID("$FreeBSD$");
#include <sys/callout.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/kthread.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/signalvar.h>
#include <sys/sleepqueue.h>
#include <sys/smp.h>
#include <vm/vm.h>
#include <vm/pmap.h>
@ -159,6 +161,124 @@ sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics");
#ifdef DEADLKRES
static int slptime_threshold = 1800;
static int blktime_threshold = 900;
static int sleepfreq = 3;
static void
deadlkres(void)
{
struct proc *p;
struct thread *td;
void *wchan;
int blkticks, slpticks, slptype, tryl, tticks;
tryl = 0;
for (;;) {
blkticks = blktime_threshold * hz;
slpticks = slptime_threshold * hz;
/*
* Avoid to sleep on the sx_lock in order to avoid a possible
* priority inversion problem leading to starvation.
* If the lock can't be held after 100 tries, panic.
*/
if (!sx_try_slock(&allproc_lock)) {
if (tryl > 100)
panic("%s: possible deadlock detected on allproc_lock\n",
__func__);
tryl++;
pause("allproc_lock deadlkres", sleepfreq * hz);
continue;
}
tryl = 0;
FOREACH_PROC_IN_SYSTEM(p) {
PROC_LOCK(p);
FOREACH_THREAD_IN_PROC(p, td) {
thread_lock(td);
if (TD_ON_LOCK(td)) {
/*
* The thread should be blocked on a
* turnstile, simply check if the
* turnstile channel is in good state.
*/
MPASS(td->td_blocked != NULL);
tticks = ticks - td->td_blktick;
thread_unlock(td);
if (tticks > blkticks) {
/*
* Accordingly with provided
* thresholds, this thread is
* stuck for too long on a
* turnstile.
*/
PROC_UNLOCK(p);
sx_sunlock(&allproc_lock);
panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
__func__, td, tticks);
}
} else if (TD_IS_SLEEPING(td)) {
/*
* Check if the thread is sleeping on a
* lock, otherwise skip the check.
* Drop the thread lock in order to
* avoid a LOR with the sleepqueue
* spinlock.
*/
wchan = td->td_wchan;
tticks = ticks - td->td_slptick;
thread_unlock(td);
slptype = sleepq_type(wchan);
if ((slptype == SLEEPQ_SX ||
slptype == SLEEPQ_LK) &&
tticks > slpticks) {
/*
* Accordingly with provided
* thresholds, this thread is
* stuck for too long on a
* sleepqueue.
*/
PROC_UNLOCK(p);
sx_sunlock(&allproc_lock);
panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
__func__, td, tticks);
}
} else
thread_unlock(td);
}
PROC_UNLOCK(p);
}
sx_sunlock(&allproc_lock);
/* Sleep for sleepfreq seconds. */
pause("deadlkres", sleepfreq * hz);
}
}
static struct kthread_desc deadlkres_kd = {
"deadlkres",
deadlkres,
(struct thread **)NULL
};
SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd);
SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, "Deadlock resolver");
SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW,
&slptime_threshold, 0,
"Number of seconds within is valid to sleep on a sleepqueue");
SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW,
&blktime_threshold, 0,
"Number of seconds within is valid to block on a turnstile");
SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0,
"Number of seconds between any deadlock resolver thread run");
#endif /* DEADLKRES */
void
read_cpu_time(long *cp_time)
{

View file

@ -122,8 +122,8 @@ struct sleepqueue {
LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */
LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */
void *sq_wchan; /* (c) Wait channel. */
#ifdef INVARIANTS
int sq_type; /* (c) Queue type. */
#ifdef INVARIANTS
struct lock_object *sq_lock; /* (c) Associated lock. */
#endif
};
@ -317,7 +317,6 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
("thread's sleep queue has a non-empty free list"));
KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
sq->sq_lock = lock;
sq->sq_type = flags & SLEEPQ_TYPE;
#endif
#ifdef SLEEPQUEUE_PROFILING
sc->sc_depth++;
@ -330,6 +329,7 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
sq = td->td_sleepqueue;
LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
sq->sq_wchan = wchan;
sq->sq_type = flags & SLEEPQ_TYPE;
} else {
MPASS(wchan == sq->sq_wchan);
MPASS(lock == sq->sq_lock);
@ -668,6 +668,28 @@ sleepq_timedwait_sig(void *wchan, int pri)
return (rvalt);
}
/*
* Returns the type of sleepqueue given a waitchannel.
*/
int
sleepq_type(void *wchan)
{
struct sleepqueue *sq;
int type;
MPASS(wchan != NULL);
sleepq_lock(wchan);
sq = sleepq_lookup(wchan);
if (sq == NULL) {
sleepq_release(wchan);
return (-1);
}
type = sq->sq_type;
sleepq_release(wchan);
return (type);
}
/*
* Removes a thread from a sleep queue and makes it
* runnable.
@ -1176,8 +1198,8 @@ DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
return;
found:
db_printf("Wait channel: %p\n", sq->sq_wchan);
#ifdef INVARIANTS
db_printf("Queue type: %d\n", sq->sq_type);
#ifdef INVARIANTS
if (sq->sq_lock) {
lock = sq->sq_lock;
db_printf("Associated Interlock: %p - (%s) %s\n", lock,

View file

@ -733,6 +733,7 @@ turnstile_wait(struct turnstile *ts, struct thread *owner, int queue)
td->td_tsqueue = queue;
td->td_blocked = ts;
td->td_lockname = lock->lo_name;
td->td_blktick = ticks;
TD_SET_LOCK(td);
mtx_unlock_spin(&tc->tc_lock);
propagate_priority(td);
@ -925,6 +926,7 @@ turnstile_unpend(struct turnstile *ts, int owner_type)
MPASS(TD_CAN_RUN(td));
td->td_blocked = NULL;
td->td_lockname = NULL;
td->td_blktick = 0;
#ifdef INVARIANTS
td->td_tsqueue = 0xff;
#endif

View file

@ -218,6 +218,7 @@ struct thread {
struct ucred *td_ucred; /* (k) Reference to credentials. */
u_int td_estcpu; /* (t) estimated cpu utilization */
int td_slptick; /* (t) Time at sleep. */
int td_blktick; /* (t) Time spent blocked. */
struct rusage td_ru; /* (t) rusage information */
uint64_t td_incruntime; /* (t) Cpu ticks to transfer to proc. */
uint64_t td_runtime; /* (t) How many cpu ticks we've run. */

View file

@ -112,6 +112,7 @@ void sleepq_set_timeout(void *wchan, int timo);
u_int sleepq_sleepcnt(void *wchan, int queue);
int sleepq_timedwait(void *wchan, int pri);
int sleepq_timedwait_sig(void *wchan, int pri);
int sleepq_type(void *wchan);
void sleepq_wait(void *wchan, int pri);
int sleepq_wait_sig(void *wchan, int pri);