Implement robust futexes. Most of the code is modelled after

what Linux does. This is because robust futexes are mostly
userspace thing which we cannot alter. Two syscalls maintain
pointer to userspace list and when process exits a routine
walks this list waking up processes sleeping on futexes
from that list.

Reviewed by:	kib (mentor)
MFC after:	1 month
This commit is contained in:
Roman Divacky 2008-05-13 20:01:27 +00:00
parent 1e4ef54501
commit 4732e446fb
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=178976
9 changed files with 200 additions and 15 deletions

View file

@ -98,8 +98,6 @@ DUMMY(migrate_pages);
DUMMY(pselect6);
DUMMY(ppoll);
DUMMY(unshare);
DUMMY(set_robust_list);
DUMMY(get_robust_list);
DUMMY(splice);
DUMMY(sync_file_range);
DUMMY(tee);

View file

@ -493,8 +493,10 @@
308 AUE_NULL STD { int linux_pselect6(void); }
309 AUE_NULL STD { int linux_ppoll(void); }
310 AUE_NULL STD { int linux_unshare(void); }
311 AUE_NULL STD { int linux_set_robust_list(void); }
312 AUE_NULL STD { int linux_get_robust_list(void); }
311 AUE_NULL STD { int linux_set_robust_list(struct linux_robust_list_head *head, \
l_size_t len); }
312 AUE_NULL STD { int linux_get_robust_list(l_int pid, struct linux_robust_list_head *head, \
l_size_t *len); }
313 AUE_NULL STD { int linux_splice(void); }
314 AUE_NULL STD { int linux_sync_file_range(void); }
315 AUE_NULL STD { int linux_tee(void); }

View file

@ -44,9 +44,6 @@ __FBSDID("$FreeBSD$");
#include <sys/sysproto.h>
#include <sys/unistd.h>
#include <compat/linux/linux_emul.h>
#include <compat/linux/linux_futex.h>
#ifdef COMPAT_LINUX32
#include <machine/../linux32/linux.h>
#include <machine/../linux32/linux32_proto.h>
@ -55,6 +52,9 @@ __FBSDID("$FreeBSD$");
#include <machine/../linux/linux_proto.h>
#endif
#include <compat/linux/linux_emul.h>
#include <compat/linux/linux_futex.h>
struct sx emul_shared_lock;
struct mtx emul_lock;
@ -86,6 +86,7 @@ linux_proc_init(struct thread *td, pid_t child, int flags)
em = malloc(sizeof *em, M_LINUX, M_WAITOK | M_ZERO);
em->pid = child;
em->pdeath_signal = 0;
em->robust_futexes = NULL;
if (flags & LINUX_CLONE_THREAD) {
/* handled later in the code */
} else {
@ -161,6 +162,8 @@ linux_proc_exit(void *arg __unused, struct proc *p)
if (__predict_true(p->p_sysent != &elf_linux_sysvec))
return;
release_futexes(p);
/* find the emuldata */
em = em_find(p, EMUL_DOLOCK);

View file

@ -31,6 +31,8 @@
#ifndef _LINUX_EMUL_H_
#define _LINUX_EMUL_H_
#include <compat/linux/linux_futex.h>
struct linux_emuldata_shared {
int refs;
pid_t group_pid;
@ -52,6 +54,8 @@ struct linux_emuldata {
int pdeath_signal; /* parent death signal */
struct linux_robust_list_head *robust_futexes;
LIST_ENTRY(linux_emuldata) threads; /* list of linux threads */
};

View file

@ -45,8 +45,11 @@ __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $")
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/imgact.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/priv.h>
#include <sys/sched.h>
#include <sys/sx.h>
#include <sys/malloc.h>
@ -57,6 +60,7 @@ __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $")
#include <machine/../linux/linux.h>
#include <machine/../linux/linux_proto.h>
#endif
#include <compat/linux/linux_emul.h>
#include <compat/linux/linux_futex.h>
struct futex;
@ -533,3 +537,160 @@ futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr)
return (-ENOSYS);
}
}
int
linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args)
{
struct linux_emuldata *em;
#ifdef DEBUG
if (ldebug(set_robust_list))
printf(ARGS(set_robust_list, ""));
#endif
if (args->len != sizeof(struct linux_robust_list_head))
return (EINVAL);
em = em_find(td->td_proc, EMUL_DOLOCK);
em->robust_futexes = args->head;
EMUL_UNLOCK(&emul_lock);
return (0);
}
int
linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args)
{
struct linux_emuldata *em;
struct linux_robust_list_head *head;
l_size_t len = sizeof(struct linux_robust_list_head);
int error = 0;
#ifdef DEBUG
if (ldebug(get_robust_list))
printf(ARGS(get_robust_list, ""));
#endif
if (!args->pid) {
em = em_find(td->td_proc, EMUL_DONTLOCK);
head = em->robust_futexes;
} else {
struct proc *p;
p = pfind(args->pid);
if (p == NULL)
return (ESRCH);
em = em_find(p, EMUL_DONTLOCK);
/* XXX: ptrace? */
if (priv_check(td, PRIV_CRED_SETUID) ||
priv_check(td, PRIV_CRED_SETEUID) ||
p_candebug(td, p))
return (EPERM);
head = em->robust_futexes;
PROC_UNLOCK(p);
}
error = copyout(&len, args->len, sizeof(l_size_t));
if (error)
return (EFAULT);
error = copyout(head, args->head, sizeof(struct linux_robust_list_head));
return (error);
}
static int
handle_futex_death(void *uaddr, pid_t pid, int pi)
{
int uval, nval, mval;
struct futex *f;
retry:
if (copyin(uaddr, &uval, 4))
return (EFAULT);
if ((uval & FUTEX_TID_MASK) == pid) {
mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
nval = casuword32(uaddr, uval, mval);
if (nval == -1)
return (EFAULT);
if (nval != uval)
goto retry;
if (!pi && (uval & FUTEX_WAITERS)) {
f = futex_get(uaddr, FUTEX_UNLOCKED);
futex_wake(f, 1, NULL, 0);
}
}
return (0);
}
static int
fetch_robust_entry(struct linux_robust_list **entry,
struct linux_robust_list **head, int *pi)
{
l_ulong uentry;
if (copyin((const void *)head, &uentry, sizeof(l_ulong)))
return (EFAULT);
*entry = (void *)(uentry & ~1UL);
*pi = uentry & 1;
return (0);
}
/* This walks the list of robust futexes releasing them. */
void
release_futexes(struct proc *p)
{
struct linux_robust_list_head *head = NULL;
struct linux_robust_list *entry, *next_entry, *pending;
unsigned int limit = 2048, pi, next_pi, pip;
struct linux_emuldata *em;
l_ulong futex_offset;
int rc;
em = em_find(p, EMUL_DONTLOCK);
head = em->robust_futexes;
if (head == NULL)
return;
if (fetch_robust_entry(&entry, &head->list.next, &pi))
return;
if (copyin(&head->futex_offset, &futex_offset, sizeof(l_ulong)))
return;
if (fetch_robust_entry(&pending, &head->pending_list, &pip))
return;
while (entry != &head->list) {
rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
if (entry != pending)
if (handle_futex_death((char *)entry + futex_offset,
p->p_pid, pi))
return;
if (rc)
return;
entry = next_entry;
pi = next_pi;
if (!--limit)
break;
sched_relinquish(curthread);
}
if (pending)
handle_futex_death((char *) pending + futex_offset,
p->p_pid, pip);
}

View file

@ -63,4 +63,22 @@
#define FUTEX_OP_CMP_GT 4 /* if (oldval > CMPARG) wake */
#define FUTEX_OP_CMP_GE 5 /* if (oldval >= CMPARG) wake */
/* This is defined by Linux user-space */
struct linux_robust_list {
struct linux_robust_list *next;
};
struct linux_robust_list_head {
struct linux_robust_list list;
l_ulong futex_offset;
struct linux_robust_list *pending_list;
};
#define FUTEX_WAITERS 0x80000000
#define FUTEX_OWNER_DIED 0x40000000
#define FUTEX_TID_MASK 0x3fffffff
void release_futexes(struct proc *);
#endif /* !_LINUX_FUTEX_H */

View file

@ -75,10 +75,6 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_object.h>
#include <vm/swap_pager.h>
#include <compat/linux/linux_sysproto.h>
#include <compat/linux/linux_emul.h>
#include <compat/linux/linux_misc.h>
#ifdef COMPAT_LINUX32
#include <machine/../linux32/linux.h>
#include <machine/../linux32/linux32_proto.h>
@ -91,6 +87,9 @@ __FBSDID("$FreeBSD$");
#include <compat/linux/linux_mib.h>
#include <compat/linux/linux_signal.h>
#include <compat/linux/linux_util.h>
#include <compat/linux/linux_sysproto.h>
#include <compat/linux/linux_emul.h>
#include <compat/linux/linux_misc.h>
#ifdef __i386__
#include <machine/cputypes.h>

View file

@ -89,8 +89,6 @@ DUMMY(migrate_pages);
DUMMY(pselect6);
DUMMY(ppoll);
DUMMY(unshare);
DUMMY(set_robust_list);
DUMMY(get_robust_list);
DUMMY(splice);
DUMMY(sync_file_range);
DUMMY(tee);

View file

@ -503,8 +503,10 @@
308 AUE_NULL STD { int linux_pselect6(void); }
309 AUE_NULL STD { int linux_ppoll(void); }
310 AUE_NULL STD { int linux_unshare(void); }
311 AUE_NULL STD { int linux_set_robust_list(void); }
312 AUE_NULL STD { int linux_get_robust_list(void); }
311 AUE_NULL STD { int linux_set_robust_list(struct linux_robust_list_head *head, \
l_size_t len); }
312 AUE_NULL STD { int linux_get_robust_list(l_int pid, struct linux_robust_list_head **head, \
l_size_t *len); }
313 AUE_NULL STD { int linux_splice(void); }
314 AUE_NULL STD { int linux_sync_file_range(void); }
315 AUE_NULL STD { int linux_tee(void); }