Add my recent work of adaptive spin mutex code. Use two environments variable

to tune pthread mutex performance:
1. LIBPTHREAD_SPINLOOPS
	If a pthread mutex is being locked by another thread, this environment
	variable sets total number of spin loops before the current thread
	sleeps in kernel, this saves a syscall overhead if the mutex will be
	unlocked very soon (well written application code).
2. LIBPTHREAD_YIELDLOOPS
	If a pthread mutex is being locked by other threads, this environment
	variable sets total number of sched_yield() loops before the currrent
	thread sleeps in kernel. if a pthread mutex is locked, the current thread
	gives up cpu, but will not sleep in kernel, this means, current thread
	does not set contention bit in mutex, but let lock owner to run again
	if the owner is on kernel's run queue, and when lock owner unlocks the
	mutex, it does not need to enter kernel and do lots of work to resume
	mutex waiters, in some cases, this saves lots of syscall overheads for
	mutex owner.

In my practice, sometimes LIBPTHREAD_YIELDLOOPS can massively improve performance
than LIBPTHREAD_SPINLOOPS, this depends on application. These two environments
are global to all pthread mutex, there is no interface to set them for each
pthread mutex, the default values are zero, this means spinning is turned off
by default.
This commit is contained in:
David Xu 2007-10-30 05:57:37 +00:00
parent 327433d2a2
commit 7416cdabcd
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=173173
3 changed files with 52 additions and 49 deletions

View file

@ -103,6 +103,8 @@ size_t _thr_guard_default;
size_t _thr_stack_default = THR_STACK_DEFAULT;
size_t _thr_stack_initial = THR_STACK_INITIAL;
int _thr_page_size;
int _thr_spinloops;
int _thr_yieldloops;
int _gc_count;
struct umutex _mutex_static_lock = DEFAULT_UMUTEX;
struct umutex _cond_static_lock = DEFAULT_UMUTEX;
@ -423,6 +425,7 @@ init_private(void)
{
size_t len;
int mib[2];
char *env;
_thr_umutex_init(&_mutex_static_lock);
_thr_umutex_init(&_cond_static_lock);
@ -452,7 +455,12 @@ init_private(void)
_thr_guard_default = _thr_page_size;
_pthread_attr_default.guardsize_attr = _thr_guard_default;
_pthread_attr_default.stacksize_attr = _thr_stack_default;
env = getenv("LIBPTHREAD_SPINLOOPS");
if (env)
_thr_spinloops = atoi(env);
env = getenv("LIBPTHREAD_YIELDLOOPS");
if (env)
_thr_yieldloops = atoi(env);
TAILQ_INIT(&_thr_atfork_list);
}
init_once = 1;

View file

@ -66,12 +66,6 @@
#define MUTEX_ASSERT_NOT_OWNED(m)
#endif
/*
* For adaptive mutexes, how many times to spin doing trylock2
* before entering the kernel to block
*/
#define MUTEX_ADAPTIVE_SPINS 200
/*
* Prototypes
*/
@ -279,6 +273,16 @@ _pthread_mutex_destroy(pthread_mutex_t *mutex)
return (ret);
}
#define ENQUEUE_MUTEX(curthread, m) \
m->m_owner = curthread; \
/* Add to the list of owned mutexes: */ \
MUTEX_ASSERT_NOT_OWNED(m); \
if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0) \
TAILQ_INSERT_TAIL(&curthread->mutexq, m, m_qe); \
else \
TAILQ_INSERT_TAIL(&curthread->pp_mutexq, m, m_qe)
static int
mutex_trylock_common(struct pthread *curthread, pthread_mutex_t *mutex)
{
@ -290,13 +294,7 @@ mutex_trylock_common(struct pthread *curthread, pthread_mutex_t *mutex)
m = *mutex;
ret = _thr_umutex_trylock(&m->m_lock, id);
if (ret == 0) {
m->m_owner = curthread;
/* Add to the list of owned mutexes. */
MUTEX_ASSERT_NOT_OWNED(m);
if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
TAILQ_INSERT_TAIL(&curthread->mutexq, m, m_qe);
else
TAILQ_INSERT_TAIL(&curthread->pp_mutexq, m, m_qe);
ENQUEUE_MUTEX(curthread, m);
} else if (m->m_owner == curthread) {
ret = mutex_self_trylock(m);
} /* else {} */
@ -348,39 +346,43 @@ mutex_lock_common(struct pthread *curthread, pthread_mutex_t *mutex,
struct pthread_mutex *m;
uint32_t id;
int ret;
int count;
id = TID(curthread);
m = *mutex;
ret = _thr_umutex_trylock2(&m->m_lock, id);
if (ret == 0) {
m->m_owner = curthread;
/* Add to the list of owned mutexes: */
MUTEX_ASSERT_NOT_OWNED(m);
if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
TAILQ_INSERT_TAIL(&curthread->mutexq, m, m_qe);
else
TAILQ_INSERT_TAIL(&curthread->pp_mutexq, m, m_qe);
ENQUEUE_MUTEX(curthread, m);
} else if (m->m_owner == curthread) {
ret = mutex_self_lock(m, abstime);
} else {
/*
* For adaptive mutexes, spin for a bit in the expectation
* that if the application requests this mutex type then
* the lock is likely to be released quickly and it is
* faster than entering the kernel
*/
if (m->m_type == PTHREAD_MUTEX_ADAPTIVE_NP) {
int count = MUTEX_ADAPTIVE_SPINS;
while (count--) {
if (_thr_spinloops != 0 && _thr_is_smp &&
!(m->m_lock.m_flags & UMUTEX_PRIO_PROTECT)) {
count = _thr_spinloops;
while (count && m->m_lock.m_owner != UMUTEX_UNOWNED) {
count--;
CPU_SPINWAIT;
}
if (count) {
ret = _thr_umutex_trylock2(&m->m_lock, id);
if (ret == 0)
break;
cpu_spinwait();
if (ret == 0) {
ENQUEUE_MUTEX(curthread, m);
return (ret);
}
}
}
if (_thr_yieldloops != 0) {
count = _thr_yieldloops;
while (count--) {
_sched_yield();
ret = _thr_umutex_trylock2(&m->m_lock, id);
if (ret == 0) {
ENQUEUE_MUTEX(curthread, m);
return (ret);
}
}
}
if (ret == 0)
goto done;
if (abstime == NULL) {
ret = __thr_umutex_lock(&m->m_lock);
@ -399,17 +401,8 @@ mutex_lock_common(struct pthread *curthread, pthread_mutex_t *mutex,
if (ret == EINTR)
ret = ETIMEDOUT;
}
done:
if (ret == 0) {
m->m_owner = curthread;
/* Add to the list of owned mutexes: */
MUTEX_ASSERT_NOT_OWNED(m);
if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
TAILQ_INSERT_TAIL(&curthread->mutexq, m, m_qe);
else
TAILQ_INSERT_TAIL(&curthread->pp_mutexq, m,
m_qe);
}
if (ret == 0)
ENQUEUE_MUTEX(curthread, m);
}
return (ret);
}
@ -529,7 +522,6 @@ mutex_self_trylock(pthread_mutex_t m)
switch (m->m_type) {
case PTHREAD_MUTEX_ERRORCHECK:
case PTHREAD_MUTEX_NORMAL:
case PTHREAD_MUTEX_ADAPTIVE_NP:
ret = EBUSY;
break;

View file

@ -583,7 +583,9 @@ extern size_t _thr_guard_default __hidden;
extern size_t _thr_stack_default __hidden;
extern size_t _thr_stack_initial __hidden;
extern int _thr_page_size __hidden;
extern int _thr_adaptive_spin __hidden;
extern int _thr_spinloops __hidden;
extern int _thr_yieldloops __hidden;
/* Garbage thread count. */
extern int _gc_count __hidden;
@ -653,6 +655,7 @@ int _schedparam_to_rtp(int policy, const struct sched_param *param,
struct rtprio *rtp) __hidden;
void _thread_bp_create(void);
void _thread_bp_death(void);
int _sched_yield(void);
/* #include <fcntl.h> */
#ifdef _SYS_FCNTL_H_