seccomp updates for v6.6-rc1

- Provide USER_NOTIFY flag for synchronous mode (Andrei Vagin, Peter
   Oskolkov). This touches the scheduler and perf but has been Acked by
   Peter Zijlstra.
 
 - Fix regression in syscall skipping and restart tracing on arm32.
   This touches arch/arm/ but has been Acked by Arnd Bergmann.
 -----BEGIN PGP SIGNATURE-----
 
 iQJKBAABCgA0FiEEpcP2jyKd1g9yPm4TiXL039xtwCYFAmTs418WHGtlZXNjb29r
 QGNocm9taXVtLm9yZwAKCRCJcvTf3G3AJohpD/4tEfRdnb/KDgwQ7uvqBonUJXcx
 wqw17LZCGTpBV3/Tp3+aEseD1NezOxiMJL88VyUHSy7nfDJShbL6QtyoenwEOeXJ
 HmBUfcIH3cqRutHEJ3drYBzBetpeeK2G+gTYVj+JoEfPWyPf+Egj+1JE2n1xLi92
 WC1miBAyBZ59kN+D1hcDzJu24CkAwbcUYlEzGejN5lBOwxYV3/fjARBVRvefOO5m
 jljSCIVJOFgCiybKhJ7Zw1+lkFc3cIlcOgr4/ZegSc8PxFVebnuImTHHp/gvoo6F
 7d1xe5Hk+PSfNvVq41MAeRB2vK2tY5efwjXRarThUaydPTO43KiQm0dzP0EYWK9a
 LcOg8zAXZnpvuWU5O2SqUKADcxe2TjS1WuQ/Q4ixxgKz2kJKDwrNU8Frf327eLSR
 acfZgMMiUfEXyXDV9B3LzNAtwdvwyxYrzEzxgKywhThIhZmQDat0rI2IaTV5QIc5
 pkxiFEe0TPwpzyUVO9dSzE+ughTmNQOKk5uAM9e2NwRwVdhEmlZAxo0kStJ1NoaA
 yDjYIKfaNBElchL4v2931KJFJseI+uRaWdW10JEV+1M69+gEAEs6wbmAxtcYS776
 xWsYp3slXzlmeVyvQp/ah8p0y55r+qTbcnhkvIdiwLYei4Bh3KOoJUlVmW0V5dKq
 b+7qspIvBA0kKRAqPw==
 =DI8R
 -----END PGP SIGNATURE-----

Merge tag 'seccomp-v6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Pull seccomp updates from Kees Cook:

 - Provide USER_NOTIFY flag for synchronous mode (Andrei Vagin, Peter
   Oskolkov). This touches the scheduler and perf but has been Acked by
   Peter Zijlstra.

 - Fix regression in syscall skipping and restart tracing on arm32. This
   touches arch/arm/ but has been Acked by Arnd Bergmann.

* tag 'seccomp-v6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  seccomp: Add missing kerndoc notations
  ARM: ptrace: Restore syscall skipping for tracers
  ARM: ptrace: Restore syscall restart tracing
  selftests/seccomp: Handle arm32 corner cases better
  perf/benchmark: add a new benchmark for seccom_unotify
  selftest/seccomp: add a new test for the sync mode of seccomp_user_notify
  seccomp: add the synchronous mode for seccomp_unotify
  sched: add a few helpers to wake up tasks on the current cpu
  sched: add WF_CURRENT_CPU and externise ttwu
  seccomp: don't use semaphore and wait_queue together
This commit is contained in:
Linus Torvalds 2023-08-28 12:38:26 -07:00
commit b03a434214
21 changed files with 384 additions and 34 deletions

View file

@ -25,6 +25,9 @@ static inline int syscall_get_nr(struct task_struct *task,
if (IS_ENABLED(CONFIG_AEABI) && !IS_ENABLED(CONFIG_OABI_COMPAT))
return task_thread_info(task)->abi_syscall;
if (task_thread_info(task)->abi_syscall == -1)
return -1;
return task_thread_info(task)->abi_syscall & __NR_SYSCALL_MASK;
}

View file

@ -90,6 +90,7 @@ slow_work_pending:
cmp r0, #0
beq no_work_pending
movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
str scno, [tsk, #TI_ABI_SYSCALL] @ make sure tracers see update
ldmia sp, {r0 - r6} @ have to reload r0 - r6
b local_restart @ ... and off we go
ENDPROC(ret_fast_syscall)

View file

@ -783,8 +783,9 @@ long arch_ptrace(struct task_struct *child, long request,
break;
case PTRACE_SET_SYSCALL:
task_thread_info(child)->abi_syscall = data &
__NR_SYSCALL_MASK;
if (data != -1)
data &= __NR_SYSCALL_MASK;
task_thread_info(child)->abi_syscall = data;
ret = 0;
break;

View file

@ -116,6 +116,7 @@ extern bool try_wait_for_completion(struct completion *x);
extern bool completion_done(struct completion *x);
extern void complete(struct completion *);
extern void complete_on_current_cpu(struct completion *x);
extern void complete_all(struct completion *);
#endif

View file

@ -146,7 +146,7 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq)
extern void swake_up_one(struct swait_queue_head *q);
extern void swake_up_all(struct swait_queue_head *q);
extern void swake_up_locked(struct swait_queue_head *q);
extern void swake_up_locked(struct swait_queue_head *q, int wake_flags);
extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state);
extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);

View file

@ -210,6 +210,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
}
int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
unsigned int mode, void *key, wait_queue_entry_t *bookmark);
@ -237,6 +238,8 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head);
#define key_to_poll(m) ((__force __poll_t)(uintptr_t)(void *)(m))
#define wake_up_poll(x, m) \
__wake_up(x, TASK_NORMAL, 1, poll_to_key(m))
#define wake_up_poll_on_current_cpu(x, m) \
__wake_up_on_current_cpu(x, TASK_NORMAL, poll_to_key(m))
#define wake_up_locked_poll(x, m) \
__wake_up_locked_key((x), TASK_NORMAL, poll_to_key(m))
#define wake_up_interruptible_poll(x, m) \

View file

@ -115,6 +115,8 @@ struct seccomp_notif_resp {
__u32 flags;
};
#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
/* valid flags for seccomp_notif_addfd */
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
@ -150,4 +152,6 @@ struct seccomp_notif_addfd {
#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
struct seccomp_notif_addfd)
#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
#endif /* _UAPI_LINUX_SECCOMP_H */

View file

@ -13,6 +13,23 @@
* Waiting for completion is a typically sync point, but not an exclusion point.
*/
static void complete_with_flags(struct completion *x, int wake_flags)
{
unsigned long flags;
raw_spin_lock_irqsave(&x->wait.lock, flags);
if (x->done != UINT_MAX)
x->done++;
swake_up_locked(&x->wait, wake_flags);
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
}
void complete_on_current_cpu(struct completion *x)
{
return complete_with_flags(x, WF_CURRENT_CPU);
}
/**
* complete: - signals a single thread waiting on this completion
* @x: holds the state of this particular completion
@ -27,14 +44,7 @@
*/
void complete(struct completion *x)
{
unsigned long flags;
raw_spin_lock_irqsave(&x->wait.lock, flags);
if (x->done != UINT_MAX)
x->done++;
swake_up_locked(&x->wait);
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
complete_with_flags(x, 0);
}
EXPORT_SYMBOL(complete);

View file

@ -4193,8 +4193,7 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
* Return: %true if @p->state changes (an actual wakeup was done),
* %false otherwise.
*/
static int
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
{
unsigned long flags;
int cpu, success = 0;
@ -7030,7 +7029,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
void *key)
{
WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC);
WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~(WF_SYNC|WF_CURRENT_CPU));
return try_to_wake_up(curr->private, mode, wake_flags);
}
EXPORT_SYMBOL(default_wake_function);

View file

@ -7741,6 +7741,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
if (wake_flags & WF_TTWU) {
record_wakee(p);
if ((wake_flags & WF_CURRENT_CPU) &&
cpumask_test_cpu(cpu, p->cpus_ptr))
return cpu;
if (sched_energy_enabled()) {
new_cpu = find_energy_efficient_cpu(p, prev_cpu);
if (new_cpu >= 0)

View file

@ -2131,12 +2131,13 @@ static inline int task_on_rq_migrating(struct task_struct *p)
}
/* Wake flags. The first three directly map to some SD flag value */
#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE */
#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE */
#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
#define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */
#ifdef CONFIG_SMP
static_assert(WF_EXEC == SD_BALANCE_EXEC);
@ -3229,6 +3230,8 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
extern void swake_up_all_locked(struct swait_queue_head *q);
extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
extern int try_to_wake_up(struct task_struct *tsk, unsigned int state, int wake_flags);
#ifdef CONFIG_PREEMPT_DYNAMIC
extern int preempt_dynamic_mode;
extern int sched_dynamic_mode(const char *str);

View file

@ -18,7 +18,7 @@ EXPORT_SYMBOL(__init_swait_queue_head);
* If for some reason it would return 0, that means the previously waiting
* task is already running, so it will observe condition true (or has already).
*/
void swake_up_locked(struct swait_queue_head *q)
void swake_up_locked(struct swait_queue_head *q, int wake_flags)
{
struct swait_queue *curr;
@ -26,7 +26,7 @@ void swake_up_locked(struct swait_queue_head *q)
return;
curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
wake_up_process(curr->task);
try_to_wake_up(curr->task, TASK_NORMAL, wake_flags);
list_del_init(&curr->task_list);
}
EXPORT_SYMBOL(swake_up_locked);
@ -41,7 +41,7 @@ EXPORT_SYMBOL(swake_up_locked);
void swake_up_all_locked(struct swait_queue_head *q)
{
while (!list_empty(&q->task_list))
swake_up_locked(q);
swake_up_locked(q, 0);
}
void swake_up_one(struct swait_queue_head *q)
@ -49,7 +49,7 @@ void swake_up_one(struct swait_queue_head *q)
unsigned long flags;
raw_spin_lock_irqsave(&q->lock, flags);
swake_up_locked(q);
swake_up_locked(q, 0);
raw_spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(swake_up_one);

View file

@ -161,6 +161,11 @@ int __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
}
EXPORT_SYMBOL(__wake_up);
void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key)
{
__wake_up_common_lock(wq_head, mode, 1, WF_CURRENT_CPU, key);
}
/*
* Same as __wake_up but called with the spinlock in wait_queue_head_t held.
*/

View file

@ -110,11 +110,13 @@ struct seccomp_knotif {
* @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
* is allowed.
* @ioctl_flags: The flags used for the seccomp_addfd ioctl.
* @setfd: whether or not SECCOMP_ADDFD_FLAG_SETFD was set during notify_addfd
* @ret: The return value of the installing process. It is set to the fd num
* upon success (>= 0).
* @completion: Indicates that the installing process has completed fd
* installation, or gone away (either due to successful
* reply, or signal)
* @list: list_head for chaining seccomp_kaddfd together.
*
*/
struct seccomp_kaddfd {
@ -138,14 +140,17 @@ struct seccomp_kaddfd {
* structure is fairly large, we store the notification-specific stuff in a
* separate structure.
*
* @request: A semaphore that users of this notification can wait on for
* changes. Actual reads and writes are still controlled with
* filter->notify_lock.
* @requests: A semaphore that users of this notification can wait on for
* changes. Actual reads and writes are still controlled with
* filter->notify_lock.
* @flags: A set of SECCOMP_USER_NOTIF_FD_* flags.
* @next_id: The id of the next request.
* @notifications: A list of struct seccomp_knotif elements.
*/
struct notification {
struct semaphore request;
atomic_t requests;
u32 flags;
u64 next_id;
struct list_head notifications;
};
@ -555,6 +560,8 @@ static void __seccomp_filter_release(struct seccomp_filter *orig)
* drop its reference count, and notify
* about unused filters
*
* @tsk: task the filter should be released from.
*
* This function should only be called when the task is exiting as
* it detaches it from its filter tree. As such, READ_ONCE() and
* barriers are not needed here, as would normally be needed.
@ -574,6 +581,8 @@ void seccomp_filter_release(struct task_struct *tsk)
/**
* seccomp_sync_threads: sets all threads to use current's filter
*
* @flags: SECCOMP_FILTER_FLAG_* flags to set during sync.
*
* Expects sighand and cred_guard_mutex locks to be held, and for
* seccomp_can_sync_threads() to have returned success already
* without dropping the locks.
@ -1116,8 +1125,11 @@ static int seccomp_do_user_notification(int this_syscall,
list_add_tail(&n.list, &match->notif->notifications);
INIT_LIST_HEAD(&n.addfd);
up(&match->notif->request);
wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
atomic_inc(&match->notif->requests);
if (match->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
wake_up_poll_on_current_cpu(&match->wqh, EPOLLIN | EPOLLRDNORM);
else
wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
/*
* This is where we wait for a reply from userspace.
@ -1450,6 +1462,37 @@ find_notification(struct seccomp_filter *filter, u64 id)
return NULL;
}
static int recv_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
void *key)
{
/* Avoid a wakeup if event not interesting for us. */
if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
return 0;
return autoremove_wake_function(wait, mode, sync, key);
}
static int recv_wait_event(struct seccomp_filter *filter)
{
DEFINE_WAIT_FUNC(wait, recv_wake_function);
int ret;
if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
return 0;
for (;;) {
ret = prepare_to_wait_event(&filter->wqh, &wait, TASK_INTERRUPTIBLE);
if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
break;
if (ret)
return ret;
schedule();
}
finish_wait(&filter->wqh, &wait);
return 0;
}
static long seccomp_notify_recv(struct seccomp_filter *filter,
void __user *buf)
@ -1467,7 +1510,7 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
memset(&unotif, 0, sizeof(unotif));
ret = down_interruptible(&filter->notif->request);
ret = recv_wait_event(filter);
if (ret < 0)
return ret;
@ -1515,7 +1558,8 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
if (should_sleep_killable(filter, knotif))
complete(&knotif->ready);
knotif->state = SECCOMP_NOTIFY_INIT;
up(&filter->notif->request);
atomic_inc(&filter->notif->requests);
wake_up_poll(&filter->wqh, EPOLLIN | EPOLLRDNORM);
}
mutex_unlock(&filter->notify_lock);
}
@ -1561,7 +1605,10 @@ static long seccomp_notify_send(struct seccomp_filter *filter,
knotif->error = resp.error;
knotif->val = resp.val;
knotif->flags = resp.flags;
complete(&knotif->ready);
if (filter->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
complete_on_current_cpu(&knotif->ready);
else
complete(&knotif->ready);
out:
mutex_unlock(&filter->notify_lock);
return ret;
@ -1591,6 +1638,22 @@ static long seccomp_notify_id_valid(struct seccomp_filter *filter,
return ret;
}
static long seccomp_notify_set_flags(struct seccomp_filter *filter,
unsigned long flags)
{
long ret;
if (flags & ~SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
return -EINVAL;
ret = mutex_lock_interruptible(&filter->notify_lock);
if (ret < 0)
return ret;
filter->notif->flags = flags;
mutex_unlock(&filter->notify_lock);
return 0;
}
static long seccomp_notify_addfd(struct seccomp_filter *filter,
struct seccomp_notif_addfd __user *uaddfd,
unsigned int size)
@ -1720,6 +1783,8 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
case SECCOMP_IOCTL_NOTIF_ID_VALID:
return seccomp_notify_id_valid(filter, buf);
case SECCOMP_IOCTL_NOTIF_SET_FLAGS:
return seccomp_notify_set_flags(filter, arg);
}
/* Extensible Argument ioctls */
@ -1777,7 +1842,6 @@ static struct file *init_listener(struct seccomp_filter *filter)
if (!filter->notif)
goto out;
sema_init(&filter->notif->request, 0);
filter->notif->next_id = get_random_u64();
INIT_LIST_HEAD(&filter->notif->notifications);

View file

@ -26,3 +26,6 @@
#ifndef __NR_setns
#define __NR_setns 346
#endif
#ifdef __NR_seccomp
#define __NR_seccomp 354
#endif

View file

@ -26,3 +26,6 @@
#ifndef __NR_getcpu
#define __NR_getcpu 309
#endif
#ifndef __NR_seccomp
#define __NR_seccomp 317
#endif

View file

@ -1,5 +1,6 @@
perf-y += sched-messaging.o
perf-y += sched-pipe.o
perf-y += sched-seccomp-notify.o
perf-y += syscall.o
perf-y += mem-functions.o
perf-y += futex-hash.o

View file

@ -21,6 +21,7 @@ extern struct timeval bench__start, bench__end, bench__runtime;
int bench_numa(int argc, const char **argv);
int bench_sched_messaging(int argc, const char **argv);
int bench_sched_pipe(int argc, const char **argv);
int bench_sched_seccomp_notify(int argc, const char **argv);
int bench_syscall_basic(int argc, const char **argv);
int bench_syscall_getpgid(int argc, const char **argv);
int bench_syscall_fork(int argc, const char **argv);

View file

@ -0,0 +1,178 @@
// SPDX-License-Identifier: GPL-2.0
#include <subcmd/parse-options.h>
#include "bench.h"
#include <uapi/linux/filter.h>
#include <sys/types.h>
#include <sys/time.h>
#include <linux/unistd.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <linux/time64.h>
#include <linux/seccomp.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <limits.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <sys/wait.h>
#include <string.h>
#include <errno.h>
#include <err.h>
#include <inttypes.h>
#define LOOPS_DEFAULT 1000000UL
static uint64_t loops = LOOPS_DEFAULT;
static bool sync_mode;
static const struct option options[] = {
OPT_U64('l', "loop", &loops, "Specify number of loops"),
OPT_BOOLEAN('s', "sync-mode", &sync_mode,
"Enable the synchronious mode for seccomp notifications"),
OPT_END()
};
static const char * const bench_seccomp_usage[] = {
"perf bench sched secccomp-notify <options>",
NULL
};
static int seccomp(unsigned int op, unsigned int flags, void *args)
{
return syscall(__NR_seccomp, op, flags, args);
}
static int user_notif_syscall(int nr, unsigned int flags)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
.len = (unsigned short)ARRAY_SIZE(filter),
.filter = filter,
};
return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
}
#define USER_NOTIF_MAGIC INT_MAX
static void user_notification_sync_loop(int listener)
{
struct seccomp_notif_resp resp;
struct seccomp_notif req;
uint64_t nr;
for (nr = 0; nr < loops; nr++) {
memset(&req, 0, sizeof(req));
if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req))
err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_RECV failed");
if (req.data.nr != __NR_gettid)
errx(EXIT_FAILURE, "unexpected syscall: %d", req.data.nr);
resp.id = req.id;
resp.error = 0;
resp.val = USER_NOTIF_MAGIC;
resp.flags = 0;
if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp))
err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_SEND failed");
}
}
#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
#endif
int bench_sched_seccomp_notify(int argc, const char **argv)
{
struct timeval start, stop, diff;
unsigned long long result_usec = 0;
int status, listener;
pid_t pid;
long ret;
argc = parse_options(argc, argv, options, bench_seccomp_usage, 0);
gettimeofday(&start, NULL);
prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
listener = user_notif_syscall(__NR_gettid,
SECCOMP_FILTER_FLAG_NEW_LISTENER);
if (listener < 0)
err(EXIT_FAILURE, "can't create a notification descriptor");
pid = fork();
if (pid < 0)
err(EXIT_FAILURE, "fork");
if (pid == 0) {
if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0))
err(EXIT_FAILURE, "can't set the parent death signal");
while (1) {
ret = syscall(__NR_gettid);
if (ret == USER_NOTIF_MAGIC)
continue;
break;
}
_exit(1);
}
if (sync_mode) {
if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0))
err(EXIT_FAILURE,
"can't set SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP");
}
user_notification_sync_loop(listener);
kill(pid, SIGKILL);
if (waitpid(pid, &status, 0) != pid)
err(EXIT_FAILURE, "waitpid(%d) failed", pid);
if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGKILL)
errx(EXIT_FAILURE, "unexpected exit code: %d", status);
gettimeofday(&stop, NULL);
timersub(&stop, &start, &diff);
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
printf("# Executed %" PRIu64 " system calls\n\n",
loops);
result_usec = diff.tv_sec * USEC_PER_SEC;
result_usec += diff.tv_usec;
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
(unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
printf(" %14lf usecs/op\n",
(double)result_usec / (double)loops);
printf(" %14d ops/sec\n",
(int)((double)loops /
((double)result_usec / (double)USEC_PER_SEC)));
break;
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n",
(unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
break;
default:
/* reaching here is something disaster */
fprintf(stderr, "Unknown format:%d\n", bench_format);
exit(1);
break;
}
return 0;
}

View file

@ -47,6 +47,7 @@ static struct bench numa_benchmarks[] = {
static struct bench sched_benchmarks[] = {
{ "messaging", "Benchmark for scheduling and IPC", bench_sched_messaging },
{ "pipe", "Benchmark for pipe() between two processes", bench_sched_pipe },
{ "seccomp-notify", "Benchmark for seccomp user notify", bench_sched_seccomp_notify},
{ "all", "Run all scheduler benchmarks", NULL },
{ NULL, NULL, NULL }
};

View file

@ -2184,6 +2184,9 @@ FIXTURE_TEARDOWN(TRACE_syscall)
TEST(negative_ENOSYS)
{
#if defined(__arm__)
SKIP(return, "arm32 does not support calling syscall -1");
#endif
/*
* There should be no difference between an "internal" skip
* and userspace asking for syscall "-1".
@ -3072,7 +3075,8 @@ TEST(syscall_restart)
timeout.tv_sec = 1;
errno = 0;
EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
TH_LOG("Call to nanosleep() failed (errno %d)", errno);
TH_LOG("Call to nanosleep() failed (errno %d: %s)",
errno, strerror(errno));
}
/* Read final sync from parent. */
@ -3908,6 +3912,9 @@ TEST(user_notification_filter_empty)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
if (__NR_clone3 < 0)
SKIP(return, "Test not built with clone3 support");
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
@ -3962,6 +3969,9 @@ TEST(user_notification_filter_empty_threaded)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
if (__NR_clone3 < 0)
SKIP(return, "Test not built with clone3 support");
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
@ -4255,6 +4265,61 @@ TEST(user_notification_addfd_rlimit)
close(memfd);
}
#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
#endif
TEST(user_notification_sync)
{
struct seccomp_notif req = {};
struct seccomp_notif_resp resp = {};
int status, listener;
pid_t pid;
long ret;
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
listener = user_notif_syscall(__NR_getppid,
SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
/* Try to set invalid flags. */
EXPECT_SYSCALL_RETURN(-EINVAL,
ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0));
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0);
pid = fork();
ASSERT_GE(pid, 0);
if (pid == 0) {
ret = syscall(__NR_getppid);
ASSERT_EQ(ret, USER_NOTIF_MAGIC) {
_exit(1);
}
_exit(0);
}
req.pid = 0;
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
ASSERT_EQ(req.data.nr, __NR_getppid);
resp.id = req.id;
resp.error = 0;
resp.val = USER_NOTIF_MAGIC;
resp.flags = 0;
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
ASSERT_EQ(waitpid(pid, &status, 0), pid);
ASSERT_EQ(status, 0);
}
/* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */
FIXTURE(O_SUSPEND_SECCOMP) {
pid_t pid;