mirror of
https://github.com/freebsd/freebsd-src
synced 2024-10-06 16:40:47 +00:00
Add membarrier(2)
This is an attempt at clean-room implementation of the Linux' membarrier(2) syscall. For documentation, you would need to read both membarrier(2) Linux man page, the comments in Linux kernel/sched/membarrier.c implementation and possibly look at actual uses. Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D32360
This commit is contained in:
parent
74ccb8ecf6
commit
4a69fc16a5
|
@ -419,6 +419,7 @@ FBSD_1.7 {
|
|||
_Fork;
|
||||
fspacectl;
|
||||
kqueuex;
|
||||
membarrier;
|
||||
swapoff;
|
||||
};
|
||||
|
||||
|
|
|
@ -3780,6 +3780,7 @@ kern/kern_lockstat.c optional kdtrace_hooks
|
|||
kern/kern_loginclass.c standard
|
||||
kern/kern_malloc.c standard
|
||||
kern/kern_mbuf.c standard
|
||||
kern/kern_membarrier.c standard
|
||||
kern/kern_mib.c standard
|
||||
kern/kern_module.c standard
|
||||
kern/kern_mtxpool.c standard
|
||||
|
|
|
@ -805,6 +805,8 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
|
|||
p->p_flag2 &= ~P2_NOTRACE;
|
||||
if ((p->p_flag2 & P2_STKGAP_DISABLE_EXEC) == 0)
|
||||
p->p_flag2 &= ~P2_STKGAP_DISABLE;
|
||||
p->p_flag2 &= ~(P2_MEMBAR_PRIVE | P2_MEMBAR_PRIVE_SYNCORE |
|
||||
P2_MEMBAR_GLOBE);
|
||||
if (p->p_flag & P_PPWAIT) {
|
||||
p->p_flag &= ~(P_PPWAIT | P_PPTRACE);
|
||||
cv_broadcast(&p->p_pwait);
|
||||
|
|
239
sys/kern/kern_membarrier.c
Normal file
239
sys/kern/kern_membarrier.c
Normal file
|
@ -0,0 +1,239 @@
|
|||
/*-
|
||||
* Copyright (c) 2021 The FreeBSD Foundation
|
||||
*
|
||||
* This software were developed by Konstantin Belousov <kib@FreeBSD.org>
|
||||
* under sponsorship from the FreeBSD Foundation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/cpuset.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/membarrier.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/sched.h>
|
||||
#include <sys/smp.h>
|
||||
#include <sys/syscallsubr.h>
|
||||
#include <sys/sysproto.h>
|
||||
|
||||
#include <vm/vm_param.h>
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
#include <vm/vm_map.h>
|
||||
|
||||
#define MEMBARRIER_SUPPORTED_CMDS ( \
|
||||
MEMBARRIER_CMD_GLOBAL | \
|
||||
MEMBARRIER_CMD_GLOBAL_EXPEDITED | \
|
||||
MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED | \
|
||||
MEMBARRIER_CMD_PRIVATE_EXPEDITED | \
|
||||
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED | \
|
||||
MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE | \
|
||||
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
|
||||
|
||||
static void
|
||||
membarrier_action_seqcst(void *arg __unused)
|
||||
{
|
||||
atomic_thread_fence_seq_cst();
|
||||
}
|
||||
|
||||
static void
|
||||
membarrier_action_seqcst_sync_core(void *arg __unused)
|
||||
{
|
||||
atomic_thread_fence_seq_cst();
|
||||
cpu_sync_core();
|
||||
}
|
||||
|
||||
static void
|
||||
do_membarrier_ipi(cpuset_t *csp, void (*func)(void *))
|
||||
{
|
||||
atomic_thread_fence_seq_cst();
|
||||
smp_rendezvous_cpus(*csp, smp_no_rendezvous_barrier, func,
|
||||
smp_no_rendezvous_barrier, NULL);
|
||||
atomic_thread_fence_seq_cst();
|
||||
}
|
||||
|
||||
static void
|
||||
check_cpu_switched(int c, cpuset_t *csp, uint64_t *swt, bool init)
|
||||
{
|
||||
struct pcpu *pc;
|
||||
uint64_t sw;
|
||||
|
||||
if (CPU_ISSET(c, csp))
|
||||
return;
|
||||
|
||||
pc = cpuid_to_pcpu[c];
|
||||
if (pc->pc_curthread == pc->pc_idlethread) {
|
||||
CPU_SET(c, csp);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sync with context switch to ensure that override of
|
||||
* pc_curthread with non-idle thread pointer is visible before
|
||||
* reading of pc_switchtime.
|
||||
*/
|
||||
atomic_thread_fence_acq();
|
||||
|
||||
sw = pc->pc_switchtime;
|
||||
if (init)
|
||||
swt[c] = sw;
|
||||
else if (sw != swt[c])
|
||||
CPU_SET(c, csp);
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
* XXXKIB: We execute the requested action (seq_cst and possibly
|
||||
* sync_core) on current CPU as well. There is no guarantee that
|
||||
* current thread executes anything with the full fence semantics
|
||||
* during syscall execution. Similarly, cpu_core_sync() semantics
|
||||
* might be not provided by the syscall return. E.g. on amd64 we
|
||||
* typically return without IRET.
|
||||
*/
|
||||
int
|
||||
kern_membarrier(struct thread *td, int cmd, unsigned flags, int cpu_id)
|
||||
{
|
||||
struct proc *p, *p1;
|
||||
struct thread *td1;
|
||||
cpuset_t cs;
|
||||
uint64_t *swt;
|
||||
int c, error;
|
||||
bool first;
|
||||
|
||||
if (flags != 0 || (cmd & ~MEMBARRIER_SUPPORTED_CMDS) != 0)
|
||||
return (EINVAL);
|
||||
|
||||
if (cmd == MEMBARRIER_CMD_QUERY) {
|
||||
td->td_retval[0] = MEMBARRIER_SUPPORTED_CMDS;
|
||||
return (0);
|
||||
}
|
||||
|
||||
p = td->td_proc;
|
||||
error = 0;
|
||||
|
||||
switch (cmd) {
|
||||
case MEMBARRIER_CMD_GLOBAL:
|
||||
swt = malloc((mp_maxid + 1) * sizeof(*swt), M_TEMP, M_WAITOK);
|
||||
CPU_ZERO(&cs);
|
||||
sched_pin();
|
||||
CPU_SET(PCPU_GET(cpuid), &cs);
|
||||
for (first = true; error == 0; first = false) {
|
||||
CPU_FOREACH(c)
|
||||
check_cpu_switched(c, &cs, swt, first);
|
||||
if (CPU_CMP(&cs, &all_cpus) == 0)
|
||||
break;
|
||||
error = pause_sig("mmbr", 1);
|
||||
if (error == EWOULDBLOCK)
|
||||
error = 0;
|
||||
}
|
||||
sched_unpin();
|
||||
free(swt, M_TEMP);
|
||||
atomic_thread_fence_seq_cst();
|
||||
break;
|
||||
|
||||
case MEMBARRIER_CMD_GLOBAL_EXPEDITED:
|
||||
if ((td->td_proc->p_flag2 & P2_MEMBAR_GLOBE) == 0) {
|
||||
error = EPERM;
|
||||
} else {
|
||||
CPU_ZERO(&cs);
|
||||
CPU_FOREACH(c) {
|
||||
td1 = cpuid_to_pcpu[c]->pc_curthread;
|
||||
p1 = td1->td_proc;
|
||||
if (p1 != NULL &&
|
||||
(p1->p_flag2 & P2_MEMBAR_GLOBE) != 0)
|
||||
CPU_SET(c, &cs);
|
||||
}
|
||||
do_membarrier_ipi(&cs, membarrier_action_seqcst);
|
||||
}
|
||||
break;
|
||||
|
||||
case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
|
||||
if ((p->p_flag2 & P2_MEMBAR_GLOBE) == 0) {
|
||||
PROC_LOCK(p);
|
||||
p->p_flag2 |= P2_MEMBAR_GLOBE;
|
||||
PROC_UNLOCK(p);
|
||||
}
|
||||
break;
|
||||
|
||||
case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
|
||||
if ((td->td_proc->p_flag2 & P2_MEMBAR_PRIVE) == 0) {
|
||||
error = EPERM;
|
||||
} else {
|
||||
pmap_active_cpus(vmspace_pmap(p->p_vmspace), &cs);
|
||||
do_membarrier_ipi(&cs, membarrier_action_seqcst);
|
||||
}
|
||||
break;
|
||||
|
||||
case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
|
||||
if ((p->p_flag2 & P2_MEMBAR_PRIVE) == 0) {
|
||||
PROC_LOCK(p);
|
||||
p->p_flag2 |= P2_MEMBAR_PRIVE;
|
||||
PROC_UNLOCK(p);
|
||||
}
|
||||
break;
|
||||
|
||||
case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
|
||||
if ((td->td_proc->p_flag2 & P2_MEMBAR_PRIVE_SYNCORE) == 0) {
|
||||
error = EPERM;
|
||||
} else {
|
||||
/*
|
||||
* Calculating the IPI multicast mask from
|
||||
* pmap active mask means that we do not call
|
||||
* cpu_sync_core() on CPUs that were missed
|
||||
* from pmap active mask but could be switched
|
||||
* from or to meantime. This is fine at least
|
||||
* on amd64 because threads always use slow
|
||||
* (IRETQ) path to return from syscall after
|
||||
* context switch.
|
||||
*/
|
||||
pmap_active_cpus(vmspace_pmap(p->p_vmspace), &cs);
|
||||
|
||||
do_membarrier_ipi(&cs,
|
||||
membarrier_action_seqcst_sync_core);
|
||||
}
|
||||
break;
|
||||
|
||||
case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
|
||||
if ((p->p_flag2 & P2_MEMBAR_PRIVE_SYNCORE) == 0) {
|
||||
PROC_LOCK(p);
|
||||
p->p_flag2 |= P2_MEMBAR_PRIVE_SYNCORE;
|
||||
PROC_UNLOCK(p);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
sys_membarrier(struct thread *td, struct membarrier_args *uap)
|
||||
{
|
||||
return (kern_membarrier(td, uap->cmd, uap->flags, uap->cpu_id));
|
||||
}
|
|
@ -3300,7 +3300,6 @@
|
|||
581 AUE_NULL STD|CAPENABLED {
|
||||
int sched_getcpu(void);
|
||||
}
|
||||
|
||||
582 AUE_SWAPOFF STD {
|
||||
int swapoff(
|
||||
_In_z_ const char *name,
|
||||
|
@ -3312,5 +3311,13 @@
|
|||
u_int flags
|
||||
);
|
||||
}
|
||||
584 AUE_NULL STD|CAPENABLED {
|
||||
int membarrier(
|
||||
int cmd,
|
||||
unsigned flags,
|
||||
int cpu_id
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
; vim: syntax=off
|
||||
|
|
70
sys/sys/membarrier.h
Normal file
70
sys/sys/membarrier.h
Normal file
|
@ -0,0 +1,70 @@
|
|||
/*-
|
||||
* Copyright (c) 2021 The FreeBSD Foundation
|
||||
*
|
||||
* This software were developed by Konstantin Belousov <kib@FreeBSD.org>
|
||||
* under sponsorship from the FreeBSD Foundation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __SYS_MEMBARRIER_H__
|
||||
#define __SYS_MEMBARRIER_H__
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
|
||||
/*
|
||||
* The enum membarrier_cmd values are bits. The MEMBARRIER_CMD_QUERY
|
||||
* command returns a bitset indicating which commands are supported.
|
||||
* Also the value of MEMBARRIER_CMD_QUERY is zero, so it is
|
||||
* effectively not returned by the query.
|
||||
*/
|
||||
enum membarrier_cmd {
|
||||
MEMBARRIER_CMD_QUERY = 0x00000000,
|
||||
MEMBARRIER_CMD_GLOBAL = 0x00000001,
|
||||
MEMBARRIER_CMD_SHARED = MEMBARRIER_CMD_GLOBAL,
|
||||
MEMBARRIER_CMD_GLOBAL_EXPEDITED = 0x00000002,
|
||||
MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED = 0x00000004,
|
||||
MEMBARRIER_CMD_PRIVATE_EXPEDITED = 0x00000008,
|
||||
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = 0x00000010,
|
||||
MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE = 0x00000020,
|
||||
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE = 0x00000040,
|
||||
|
||||
/*
|
||||
* RSEQ constants are defined for source compatibility but are
|
||||
* not yes supported, MEMBARRIER_CMD_QUERY does not return
|
||||
* them in the mask.
|
||||
*/
|
||||
MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = 0x00000080,
|
||||
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = 0x00000100,
|
||||
};
|
||||
|
||||
enum membarrier_cmd_flag {
|
||||
MEMBARRIER_CMD_FLAG_CPU = 0x00000001,
|
||||
};
|
||||
|
||||
#ifndef _KERNEL
|
||||
__BEGIN_DECLS
|
||||
int membarrier(int, unsigned, int);
|
||||
__END_DECLS
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#endif /* __SYS_MEMBARRIER_H__ */
|
|
@ -883,6 +883,12 @@ struct proc {
|
|||
external thread_single() is
|
||||
permitted */
|
||||
#define P2_REAPKILLED 0x00080000
|
||||
#define P2_MEMBAR_PRIVE 0x00100000 /* membar private expedited
|
||||
registered */
|
||||
#define P2_MEMBAR_PRIVE_SYNCORE 0x00200000 /* membar private expedited
|
||||
sync core registered */
|
||||
#define P2_MEMBAR_GLOBE 0x00400000 /* membar global expedited
|
||||
registered */
|
||||
|
||||
/* Flags protected by proctree_lock, kept in p_treeflags. */
|
||||
#define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */
|
||||
|
|
|
@ -204,6 +204,8 @@ int kern_minherit(struct thread *td, uintptr_t addr, size_t len,
|
|||
int inherit);
|
||||
int kern_mkdirat(struct thread *td, int fd, const char *path,
|
||||
enum uio_seg segflg, int mode);
|
||||
int kern_membarrier(struct thread *td, int cmd, unsigned flags,
|
||||
int cpu_id);
|
||||
int kern_mkfifoat(struct thread *td, int fd, const char *path,
|
||||
enum uio_seg pathseg, int mode);
|
||||
int kern_mknodat(struct thread *td, int fd, const char *path,
|
||||
|
|
Loading…
Reference in a new issue