freebsd-src/sys/kern/sys_timerfd.c
Baptiste Daroussin 0ecf0b26a7 timerfd_create: fix after cf742faa39
Remove the include that crept in by accident
Clang complains about CLOCK_BOOTTIME being the same for now as
CLOCK_UPTIME, so remove CLOCK_BOOTTIME and leave a comment for
what to do when CLOCK_BOOTTIME will be different for real.
2024-03-06 18:28:02 +01:00

610 lines
15 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org>
* Copyright (c) 2023 Jake Freeland <jfree@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/filio.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/selinfo.h>
#include <sys/stat.h>
#include <sys/sx.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
#include <sys/sysproto.h>
#include <sys/timerfd.h>
#include <sys/timespec.h>
#include <sys/uio.h>
#include <sys/user.h>
#include <security/audit/audit.h>
static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures");
static struct mtx timerfd_list_lock;
static LIST_HEAD(, timerfd) timerfd_list;
MTX_SYSINIT(timerfd, &timerfd_list_lock, "timerfd_list_lock", MTX_DEF);
static struct unrhdr64 tfdino_unr;
#define TFD_NOJUMP 0 /* Realtime clock has not jumped. */
#define TFD_READ 1 /* Jumped, tfd has been read since. */
#define TFD_ZREAD 2 /* Jumped backwards, CANCEL_ON_SET=false. */
#define TFD_CANCELED 4 /* Jumped, CANCEL_ON_SET=true. */
#define TFD_JUMPED (TFD_ZREAD | TFD_CANCELED)
/*
* One structure allocated per timerfd descriptor.
*
* Locking semantics:
* (t) locked by tfd_lock mtx
* (l) locked by timerfd_list_lock sx
* (c) const until freeing
*/
struct timerfd {
/* User specified. */
struct itimerspec tfd_time; /* (t) tfd timer */
clockid_t tfd_clockid; /* (c) timing base */
int tfd_flags; /* (c) creation flags */
int tfd_timflags; /* (t) timer flags */
/* Used internally. */
timerfd_t tfd_count; /* (t) expiration count since read */
bool tfd_expired; /* (t) true upon initial expiration */
struct mtx tfd_lock; /* tfd mtx lock */
struct callout tfd_callout; /* (t) expiration notification */
struct selinfo tfd_sel; /* (t) I/O alerts */
struct timespec tfd_boottim; /* (t) cached boottime */
int tfd_jumped; /* (t) timer jump status */
LIST_ENTRY(timerfd) entry; /* (l) entry in list */
/* For stat(2). */
ino_t tfd_ino; /* (c) inode number */
struct timespec tfd_atim; /* (t) time of last read */
struct timespec tfd_mtim; /* (t) time of last settime */
struct timespec tfd_birthtim; /* (c) creation time */
};
static void
timerfd_init(void *data)
{
new_unrhdr64(&tfdino_unr, 1);
}
SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL);
static inline void
timerfd_getboottime(struct timespec *ts)
{
struct timeval tv;
getboottime(&tv);
TIMEVAL_TO_TIMESPEC(&tv, ts);
}
/*
* Call when a discontinuous jump has occured in CLOCK_REALTIME and
* update timerfd's cached boottime. A jump can be triggered using
* functions like clock_settime(2) or settimeofday(2).
*
* Timer is marked TFD_CANCELED if TFD_TIMER_CANCEL_ON_SET is set
* and the realtime clock jumps.
* Timer is marked TFD_ZREAD if TFD_TIMER_CANCEL_ON_SET is not set,
* but the realtime clock jumps backwards.
*/
void
timerfd_jumped(void)
{
struct timerfd *tfd;
struct timespec boottime, diff;
if (LIST_EMPTY(&timerfd_list))
return;
timerfd_getboottime(&boottime);
mtx_lock(&timerfd_list_lock);
LIST_FOREACH(tfd, &timerfd_list, entry) {
mtx_lock(&tfd->tfd_lock);
if (tfd->tfd_clockid != CLOCK_REALTIME ||
(tfd->tfd_timflags & TFD_TIMER_ABSTIME) == 0 ||
timespeccmp(&boottime, &tfd->tfd_boottim, ==)) {
mtx_unlock(&tfd->tfd_lock);
continue;
}
if (callout_active(&tfd->tfd_callout)) {
if ((tfd->tfd_timflags & TFD_TIMER_CANCEL_ON_SET) != 0)
tfd->tfd_jumped = TFD_CANCELED;
else if (timespeccmp(&boottime, &tfd->tfd_boottim, <))
tfd->tfd_jumped = TFD_ZREAD;
/*
* Do not reschedule callout when
* inside interval time loop.
*/
if (!tfd->tfd_expired) {
timespecsub(&boottime,
&tfd->tfd_boottim, &diff);
timespecsub(&tfd->tfd_time.it_value,
&diff, &tfd->tfd_time.it_value);
if (callout_stop(&tfd->tfd_callout) == 1) {
callout_schedule_sbt(&tfd->tfd_callout,
tstosbt(tfd->tfd_time.it_value),
0, C_ABSOLUTE);
}
}
}
tfd->tfd_boottim = boottime;
mtx_unlock(&tfd->tfd_lock);
}
mtx_unlock(&timerfd_list_lock);
}
static int
timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
int flags, struct thread *td)
{
struct timerfd *tfd = fp->f_data;
timerfd_t count;
int error = 0;
if (uio->uio_resid < sizeof(timerfd_t))
return (EINVAL);
mtx_lock(&tfd->tfd_lock);
retry:
getnanotime(&tfd->tfd_atim);
if ((tfd->tfd_jumped & TFD_JUMPED) != 0) {
if (tfd->tfd_jumped == TFD_CANCELED)
error = ECANCELED;
tfd->tfd_jumped = TFD_READ;
tfd->tfd_count = 0;
mtx_unlock(&tfd->tfd_lock);
return (error);
} else {
tfd->tfd_jumped = TFD_NOJUMP;
}
if (tfd->tfd_count == 0) {
if ((fp->f_flag & FNONBLOCK) != 0) {
mtx_unlock(&tfd->tfd_lock);
return (EAGAIN);
}
td->td_rtcgen = atomic_load_acq_int(&rtc_generation);
error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock,
PCATCH, "tfdrd", 0);
if (error == 0) {
goto retry;
} else {
mtx_unlock(&tfd->tfd_lock);
return (error);
}
}
count = tfd->tfd_count;
tfd->tfd_count = 0;
mtx_unlock(&tfd->tfd_lock);
error = uiomove(&count, sizeof(timerfd_t), uio);
return (error);
}
static int
timerfd_ioctl(struct file *fp, u_long cmd, void *data,
struct ucred *active_cred, struct thread *td)
{
switch (cmd) {
case FIOASYNC:
if (*(int *)data != 0)
atomic_set_int(&fp->f_flag, FASYNC);
else
atomic_clear_int(&fp->f_flag, FASYNC);
return (0);
case FIONBIO:
if (*(int *)data != 0)
atomic_set_int(&fp->f_flag, FNONBLOCK);
else
atomic_clear_int(&fp->f_flag, FNONBLOCK);
return (0);
}
return (ENOTTY);
}
static int
timerfd_poll(struct file *fp, int events, struct ucred *active_cred,
struct thread *td)
{
struct timerfd *tfd = fp->f_data;
int revents = 0;
mtx_lock(&tfd->tfd_lock);
if ((events & (POLLIN | POLLRDNORM)) != 0 &&
tfd->tfd_count > 0 && tfd->tfd_jumped != TFD_READ)
revents |= events & (POLLIN | POLLRDNORM);
if (revents == 0)
selrecord(td, &tfd->tfd_sel);
mtx_unlock(&tfd->tfd_lock);
return (revents);
}
static void
filt_timerfddetach(struct knote *kn)
{
struct timerfd *tfd = kn->kn_hook;
mtx_lock(&tfd->tfd_lock);
knlist_remove(&tfd->tfd_sel.si_note, kn, 1);
mtx_unlock(&tfd->tfd_lock);
}
static int
filt_timerfdread(struct knote *kn, long hint)
{
struct timerfd *tfd = kn->kn_hook;
mtx_assert(&tfd->tfd_lock, MA_OWNED);
kn->kn_data = (int64_t)tfd->tfd_count;
return (tfd->tfd_count > 0);
}
static struct filterops timerfd_rfiltops = {
.f_isfd = 1,
.f_detach = filt_timerfddetach,
.f_event = filt_timerfdread,
};
static int
timerfd_kqfilter(struct file *fp, struct knote *kn)
{
struct timerfd *tfd = fp->f_data;
if (kn->kn_filter != EVFILT_READ)
return (EINVAL);
kn->kn_fop = &timerfd_rfiltops;
kn->kn_hook = tfd;
knlist_add(&tfd->tfd_sel.si_note, kn, 0);
return (0);
}
static int
timerfd_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
{
struct timerfd *tfd = fp->f_data;
bzero(sb, sizeof(*sb));
sb->st_nlink = fp->f_count - 1;
sb->st_uid = fp->f_cred->cr_uid;
sb->st_gid = fp->f_cred->cr_gid;
sb->st_blksize = PAGE_SIZE;
mtx_lock(&tfd->tfd_lock);
sb->st_atim = tfd->tfd_atim;
sb->st_mtim = tfd->tfd_mtim;
mtx_unlock(&tfd->tfd_lock);
sb->st_ctim = sb->st_mtim;
sb->st_ino = tfd->tfd_ino;
sb->st_birthtim = tfd->tfd_birthtim;
return (0);
}
static int
timerfd_close(struct file *fp, struct thread *td)
{
struct timerfd *tfd = fp->f_data;
mtx_lock(&timerfd_list_lock);
LIST_REMOVE(tfd, entry);
mtx_unlock(&timerfd_list_lock);
callout_drain(&tfd->tfd_callout);
seldrain(&tfd->tfd_sel);
knlist_destroy(&tfd->tfd_sel.si_note);
mtx_destroy(&tfd->tfd_lock);
free(tfd, M_TIMERFD);
fp->f_ops = &badfileops;
return (0);
}
static int
timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif,
struct filedesc *fdp)
{
struct timerfd *tfd = fp->f_data;
kif->kf_type = KF_TYPE_TIMERFD;
kif->kf_un.kf_timerfd.kf_timerfd_clockid = tfd->tfd_clockid;
kif->kf_un.kf_timerfd.kf_timerfd_flags = tfd->tfd_flags;
kif->kf_un.kf_timerfd.kf_timerfd_addr = (uintptr_t)tfd;
return (0);
}
static struct fileops timerfdops = {
.fo_read = timerfd_read,
.fo_write = invfo_rdwr,
.fo_truncate = invfo_truncate,
.fo_ioctl = timerfd_ioctl,
.fo_poll = timerfd_poll,
.fo_kqfilter = timerfd_kqfilter,
.fo_stat = timerfd_stat,
.fo_close = timerfd_close,
.fo_chmod = invfo_chmod,
.fo_chown = invfo_chown,
.fo_sendfile = invfo_sendfile,
.fo_fill_kinfo = timerfd_fill_kinfo,
.fo_cmp = file_kcmp_generic,
.fo_flags = DFLAG_PASSABLE,
};
static void
timerfd_curval(struct timerfd *tfd, struct itimerspec *old_value)
{
struct timespec curr_value;
mtx_assert(&tfd->tfd_lock, MA_OWNED);
*old_value = tfd->tfd_time;
if (timespecisset(&tfd->tfd_time.it_value)) {
nanouptime(&curr_value);
timespecsub(&tfd->tfd_time.it_value, &curr_value,
&old_value->it_value);
}
}
static void
timerfd_expire(void *arg)
{
struct timerfd *tfd = (struct timerfd *)arg;
struct timespec uptime;
++tfd->tfd_count;
tfd->tfd_expired = true;
if (timespecisset(&tfd->tfd_time.it_interval)) {
/* Count missed events. */
nanouptime(&uptime);
if (timespeccmp(&uptime, &tfd->tfd_time.it_value, >)) {
timespecsub(&uptime, &tfd->tfd_time.it_value, &uptime);
tfd->tfd_count += tstosbt(uptime) /
tstosbt(tfd->tfd_time.it_interval);
}
timespecadd(&tfd->tfd_time.it_value,
&tfd->tfd_time.it_interval, &tfd->tfd_time.it_value);
callout_schedule_sbt(&tfd->tfd_callout,
tstosbt(tfd->tfd_time.it_value),
0, C_ABSOLUTE);
} else {
/* Single shot timer. */
callout_deactivate(&tfd->tfd_callout);
timespecclear(&tfd->tfd_time.it_value);
}
wakeup(&tfd->tfd_count);
selwakeup(&tfd->tfd_sel);
KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0);
}
int
kern_timerfd_create(struct thread *td, int clockid, int flags)
{
struct file *fp;
struct timerfd *tfd;
int error, fd, fflags;
AUDIT_ARG_VALUE(clockid);
AUDIT_ARG_FFLAGS(flags);
switch (clockid) {
case CLOCK_REALTIME:
/* FALLTHROUGH */
case CLOCK_MONOTONIC:
/* FALLTHROUGH */
case CLOCK_UPTIME:
/*
* CLOCK_BOOTTIME should be added once different from
* CLOCK_UPTIME
*/
break;
default:
return (EINVAL);
}
if ((flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) != 0)
return (EINVAL);
fflags = FREAD;
if ((flags & TFD_CLOEXEC) != 0)
fflags |= O_CLOEXEC;
if ((flags & TFD_NONBLOCK) != 0)
fflags |= FNONBLOCK;
error = falloc(td, &fp, &fd, fflags);
if (error != 0)
return (error);
tfd = malloc(sizeof(*tfd), M_TIMERFD, M_WAITOK | M_ZERO);
tfd->tfd_clockid = (clockid_t)clockid;
tfd->tfd_flags = flags;
tfd->tfd_ino = alloc_unr64(&tfdino_unr);
mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF);
callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0);
knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock);
timerfd_getboottime(&tfd->tfd_boottim);
getnanotime(&tfd->tfd_birthtim);
mtx_lock(&timerfd_list_lock);
LIST_INSERT_HEAD(&timerfd_list, tfd, entry);
mtx_unlock(&timerfd_list_lock);
finit(fp, fflags, DTYPE_TIMERFD, tfd, &timerfdops);
fdrop(fp, td);
td->td_retval[0] = fd;
return (0);
}
int
kern_timerfd_gettime(struct thread *td, int fd, struct itimerspec *curr_value)
{
struct file *fp;
struct timerfd *tfd;
int error;
error = fget(td, fd, &cap_write_rights, &fp);
if (error != 0)
return (error);
if (fp->f_type != DTYPE_TIMERFD) {
fdrop(fp, td);
return (EINVAL);
}
tfd = fp->f_data;
mtx_lock(&tfd->tfd_lock);
timerfd_curval(tfd, curr_value);
mtx_unlock(&tfd->tfd_lock);
fdrop(fp, td);
return (0);
}
int
kern_timerfd_settime(struct thread *td, int fd, int flags,
const struct itimerspec *new_value, struct itimerspec *old_value)
{
struct file *fp;
struct timerfd *tfd;
struct timespec ts;
int error = 0;
if ((flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) != 0)
return (EINVAL);
if (!timespecvalid_interval(&new_value->it_value) ||
!timespecvalid_interval(&new_value->it_interval))
return (EINVAL);
error = fget(td, fd, &cap_write_rights, &fp);
if (error != 0)
return (error);
if (fp->f_type != DTYPE_TIMERFD) {
fdrop(fp, td);
return (EINVAL);
}
tfd = fp->f_data;
mtx_lock(&tfd->tfd_lock);
getnanotime(&tfd->tfd_mtim);
tfd->tfd_timflags = flags;
/* Store old itimerspec, if applicable. */
if (old_value != NULL)
timerfd_curval(tfd, old_value);
/* Set new expiration. */
tfd->tfd_time = *new_value;
if (timespecisset(&tfd->tfd_time.it_value)) {
if ((flags & TFD_TIMER_ABSTIME) == 0) {
nanouptime(&ts);
timespecadd(&tfd->tfd_time.it_value, &ts,
&tfd->tfd_time.it_value);
} else if (tfd->tfd_clockid == CLOCK_REALTIME) {
/* ECANCELED if unread jump is pending. */
if (tfd->tfd_jumped == TFD_CANCELED)
error = ECANCELED;
/* Convert from CLOCK_REALTIME to CLOCK_BOOTTIME. */
timespecsub(&tfd->tfd_time.it_value, &tfd->tfd_boottim,
&tfd->tfd_time.it_value);
}
callout_reset_sbt(&tfd->tfd_callout,
tstosbt(tfd->tfd_time.it_value),
0, timerfd_expire, tfd, C_ABSOLUTE);
} else {
callout_stop(&tfd->tfd_callout);
}
tfd->tfd_count = 0;
tfd->tfd_expired = false;
tfd->tfd_jumped = TFD_NOJUMP;
mtx_unlock(&tfd->tfd_lock);
fdrop(fp, td);
return (error);
}
int
sys_timerfd_create(struct thread *td, struct timerfd_create_args *uap)
{
return (kern_timerfd_create(td, uap->clockid, uap->flags));
}
int
sys_timerfd_gettime(struct thread *td, struct timerfd_gettime_args *uap)
{
struct itimerspec curr_value;
int error;
error = kern_timerfd_gettime(td, uap->fd, &curr_value);
if (error == 0)
error = copyout(&curr_value, uap->curr_value,
sizeof(curr_value));
return (error);
}
int
sys_timerfd_settime(struct thread *td, struct timerfd_settime_args *uap)
{
struct itimerspec new_value, old_value;
int error;
error = copyin(uap->new_value, &new_value, sizeof(new_value));
if (error != 0)
return (error);
if (uap->old_value == NULL) {
error = kern_timerfd_settime(td, uap->fd, uap->flags,
&new_value, NULL);
} else {
error = kern_timerfd_settime(td, uap->fd, uap->flags,
&new_value, &old_value);
if (error == 0)
error = copyout(&old_value, uap->old_value,
sizeof(old_value));
}
return (error);
}