Add the posix_fallocate(2) syscall. The default implementation in

vop_stdallocate() is filesystem agnostic and will run as slow as a
read/write loop in userspace; however, it serves to correctly
implement the functionality for filesystems that do not implement a
VOP_ALLOCATE.

Note that __FreeBSD_version was already bumped today to 900036 for any
ports which would like to use this function.

Also reserve space in the syscall table for posix_fadvise(2).

Reviewed by:	-arch (previous version)
This commit is contained in:
Matthew D Fleming 2011-04-18 16:32:22 +00:00
parent fe51d6c1d1
commit d91f88f7f3
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=220791
11 changed files with 393 additions and 2 deletions

View file

@ -96,7 +96,7 @@ MAN+= abort2.2 accept.2 access.2 acct.2 adjtime.2 \
mq_setattr.2 \
msgctl.2 msgget.2 msgrcv.2 msgsnd.2 \
msync.2 munmap.2 nanosleep.2 nfssvc.2 ntp_adjtime.2 open.2 \
pathconf.2 pipe.2 poll.2 posix_openpt.2 profil.2 \
pathconf.2 pipe.2 poll.2 posix_fallocate.2 posix_openpt.2 profil.2 \
pselect.2 ptrace.2 quotactl.2 \
read.2 readlink.2 reboot.2 recv.2 rename.2 revoke.2 rfork.2 rmdir.2 \
rtprio.2

View file

@ -364,6 +364,7 @@ FBSD_1.2 {
cap_enter;
cap_getmode;
getloginclass;
posix_fallocate;
rctl_get_racct;
rctl_get_rules;
rctl_get_limits;

View file

@ -0,0 +1,146 @@
.\" Copyright (c) 1980, 1991, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 4. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)open.2 8.2 (Berkeley) 11/16/93
.\" $FreeBSD$
.\"
.Dd April 13, 2011
.Dt POSIX_FALLOCATE 2
.Os
.Sh NAME
.Nm posix_fallocate
.Nd pre-allocate storage for a range in a file
.Sh LIBRARY
.Lb libc
.Sh SYNOPSIS
.In fcntl.h
.Ft int
.Fn posix_fallocate "int fd" "off_t offset" "off_t len"
.Sh DESCRIPTION
Required storage for the range
.Fa offset
to
.Fa offset +
.Fa len
in the file referenced by
.Fa fd
is guarateed to be allocated upon successful return.
That is, if
.Fn posix_fallocate
returns successfully, subsequent writes to the specified file data
will not fail due to lack of free space on the file system storage
media.
Any existing file data in the specified range is unmodified.
If
.Fa offset +
.Fa len
is beyond the current file size, then
.Fn posix_fallocate
will adjust the file size to
.Fa offset +
.Fa len .
Otherwise, the file size will not be changed.
.Pp
Space allocated by
.Fn posix_fallocate
will be freed by a successful call to
.Xr creat 2
or
.Xr open 2
that truncates the size of the file.
Space allocated via
.Fn posix_fallocate
may be freed by a successful call to
.Xr ftruncate 2
that reduces the file size to a size smaller than
.Fa offset +
.Fa len .
.Pp
.Sh RETURN VALUES
If successful,
.Fn posix_fallocate
returns zero.
It returns -1 on failure, and sets
.Va errno
to indicate the error.
.Sh ERRORS
Possible failure conditions:
.Bl -tag -width Er
.It Bq Er EBADF
The
.Fa fd
argument is not a valid file descriptor.
.It Bq Er EBADF
The
.Fa fd
argument references a file that was opened without write permission.
.It Bq Er EFBIG
The value of
.Fa offset +
.Fa len
is greater than the maximum file size.
.It Bq Er EINTR
A signal was caught during execution.
.It Bq Er EINVAL
The
.Fa len
argument was zero or the
.Fa offset
argument was less than zero.
.It Bq Er EIO
An I/O error occurred while reading from or writing to a file system.
.It Bq Er ENODEV
The
.Fa fd
argument does not refer to a regular file.
.It Bq Er ENOSPC
There is insufficient free space remaining on the file system storage
media.
.It Bq Er ESPIPE
The
.Fa fd
argument is associated with a pipe or FIFO.
.El
.Sh SEE ALSO
.Xr creat 2 ,
.Xr ftruncate 2 ,
.Xr open 2 ,
.Xr unlink 2
.Sh STANDARDS
The
.Fn posix_fallocate
system call conforms to
.St -p1003.1-2004 .
.Sh HISTORY
The
.Fn posix_fallocate
function appeared in
.Fx 9.0 .
.Sh AUTHORS
.Fn posix_fallocate
and this manual page were initially written by
.An Matthew Fleming Aq mdf@FreeBSD.org .

View file

@ -2790,3 +2790,15 @@ freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap)
bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname));
return (copyout(&stat32, uap->stat, version));
}
int
freebsd32_posix_fallocate(struct thread *td,
struct freebsd32_posix_fallocate_args *uap)
{
struct posix_fallocate_args ap;
ap.fd = uap->fd;
ap.offset = (uap->offsetlo | ((off_t)uap->offsethi << 32));
ap.len = (uap->lenlo | ((off_t)uap->lenhi << 32));
return (posix_fallocate(td, &ap));
}

View file

@ -986,3 +986,7 @@
529 AUE_NULL NOPROTO { int rctl_remove_rule(const void *inbufp, \
size_t inbuflen, void *outbufp, \
size_t outbuflen); }
530 AUE_NULL STD { int freebsd32_posix_fallocate(int fd,\
uint32_t offsetlo, uint32_t offsethi,\
uint32_t lenlo, uint32_t lenhi); }
531 AUE_NULL UNIMPL posix_fadvise

View file

@ -944,5 +944,8 @@
529 AUE_NULL STD { int rctl_remove_rule(const void *inbufp, \
size_t inbuflen, void *outbufp, \
size_t outbuflen); }
530 AUE_NULL STD { int posix_fallocate(int fd, \
off_t offset, off_t len); }
531 AUE_NULL UNIMPL posix_fadvise
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master

View file

@ -99,6 +99,7 @@ struct vop_vector default_vnodeops = {
.vop_advlock = vop_stdadvlock,
.vop_advlockasync = vop_stdadvlockasync,
.vop_advlockpurge = vop_stdadvlockpurge,
.vop_allocate = vop_stdallocate,
.vop_bmap = vop_stdbmap,
.vop_close = VOP_NULL,
.vop_fsync = VOP_NULL,
@ -855,6 +856,136 @@ vop_stdvptocnp(struct vop_vptocnp_args *ap)
return (error);
}
int
vop_stdallocate(struct vop_allocate_args *ap)
{
#ifdef __notyet__
struct statfs sfs;
#endif
struct iovec aiov;
struct vattr vattr, *vap;
struct uio auio;
off_t len, cur, offset;
uint8_t *buf;
struct thread *td;
struct vnode *vp;
size_t iosize;
int error, locked;
buf = NULL;
error = 0;
locked = 1;
td = curthread;
vap = &vattr;
vp = ap->a_vp;
len = ap->a_len;
offset = ap->a_offset;
error = VOP_GETATTR(vp, vap, td->td_ucred);
if (error != 0)
goto out;
iosize = vap->va_blocksize;
if (iosize == 0)
iosize = BLKDEV_IOSIZE;
if (iosize > MAXPHYS)
iosize = MAXPHYS;
buf = malloc(iosize, M_TEMP, M_WAITOK);
#ifdef __notyet__
/*
* Check if the filesystem sets f_maxfilesize; if not use
* VOP_SETATTR to perform the check.
*/
error = VFS_STATFS(vp->v_mount, &sfs, td);
if (error != 0)
goto out;
if (sfs.f_maxfilesize) {
if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize ||
offset + len > sfs.f_maxfilesize) {
error = EFBIG;
goto out;
}
} else
#endif
if (offset + len > vap->va_size) {
VATTR_NULL(vap);
vap->va_size = offset + len;
error = VOP_SETATTR(vp, vap, td->td_ucred);
if (error != 0)
goto out;
}
while (len > 0) {
if (should_yield()) {
VOP_UNLOCK(vp, 0);
locked = 0;
kern_yield(-1);
error = vn_lock(vp, LK_EXCLUSIVE);
if (error != 0)
break;
locked = 1;
error = VOP_GETATTR(vp, vap, td->td_ucred);
if (error != 0)
break;
}
/*
* Read and write back anything below the nominal file
* size. There's currently no way outside the filesystem
* to know whether this area is sparse or not.
*/
cur = iosize;
if ((offset % iosize) != 0)
cur -= (offset % iosize);
if (cur > len)
cur = len;
if (offset < vap->va_size) {
aiov.iov_base = buf;
aiov.iov_len = cur;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = offset;
auio.uio_resid = cur;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_rw = UIO_READ;
auio.uio_td = td;
error = VOP_READ(vp, &auio, 0, td->td_ucred);
if (error != 0)
break;
if (auio.uio_resid > 0) {
bzero(buf + cur - auio.uio_resid,
auio.uio_resid);
}
} else {
bzero(buf, cur);
}
aiov.iov_base = buf;
aiov.iov_len = cur;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = offset;
auio.uio_resid = cur;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_rw = UIO_WRITE;
auio.uio_td = td;
error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
if (error != 0)
break;
len -= cur;
offset += cur;
}
out:
KASSERT(locked || error != 0, ("How'd I get unlocked with no error?"));
if (locked && error != 0)
VOP_UNLOCK(vp, 0);
free(buf, M_TEMP);
return (error);
}
/*
* vfs default ops
* used to fill the vfs function table to get reasonable default return values.

View file

@ -4671,3 +4671,83 @@ kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
VFS_UNLOCK_GIANT(vfslocked);
return (error);
}
static int
kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
{
struct file *fp;
struct mount *mp;
struct vnode *vp;
int error, vfslocked, vnlocked;
fp = NULL;
mp = NULL;
vfslocked = 0;
vnlocked = 0;
error = fget(td, fd, &fp);
if (error != 0)
goto out;
switch (fp->f_type) {
case DTYPE_VNODE:
break;
case DTYPE_PIPE:
case DTYPE_FIFO:
error = ESPIPE;
goto out;
default:
error = ENODEV;
goto out;
}
if ((fp->f_flag & FWRITE) == 0) {
error = EBADF;
goto out;
}
vp = fp->f_vnode;
if (vp->v_type != VREG) {
error = ENODEV;
goto out;
}
if (offset < 0 || len <= 0) {
error = EINVAL;
goto out;
}
/* Check for wrap. */
if (offset > OFF_MAX - len) {
error = EFBIG;
goto out;
}
bwillwrite();
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
if (error != 0)
goto out;
error = vn_lock(vp, LK_EXCLUSIVE);
if (error != 0)
goto out;
vnlocked = 1;
#ifdef MAC
error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
if (error != 0)
goto out;
#endif
error = VOP_ALLOCATE(vp, offset, len);
if (error != 0)
vnlocked = 0;
out:
if (vnlocked)
VOP_UNLOCK(vp, 0);
vn_finished_write(mp);
VFS_UNLOCK_GIANT(vfslocked);
if (fp != NULL)
fdrop(fp, td);
return (error);
}
int
posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
{
return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len));
}

View file

@ -608,6 +608,7 @@ vop_vptofh {
IN struct fid *fhp;
};
%% vptocnp vp L L L
%% vptocnp vpp - U -
@ -618,3 +619,12 @@ vop_vptocnp {
INOUT char *buf;
INOUT int *buflen;
};
%% allocate vp E E U
vop_allocate {
IN struct vnode *vp;
IN off_t offset;
IN off_t len;
};

View file

@ -278,7 +278,7 @@ struct oflock {
#endif
/*
* XXX missing posix_fadvise() and posix_fallocate(), and POSIX_FADV_* macros.
* XXX missing posix_fadvise() and POSIX_FADV_* macros.
*/
#ifndef _KERNEL
@ -289,6 +289,9 @@ int fcntl(int, int, ...);
#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200809
int openat(int, const char *, int, ...);
#endif
#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200112
int posix_fallocate(int, off_t, off_t);
#endif
#if __BSD_VISIBLE
int flock(int, int);
#endif

View file

@ -689,6 +689,7 @@ int vop_stdaccessx(struct vop_accessx_args *ap);
int vop_stdadvlock(struct vop_advlock_args *ap);
int vop_stdadvlockasync(struct vop_advlockasync_args *ap);
int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap);
int vop_stdallocate(struct vop_allocate_args *ap);
int vop_stdpathconf(struct vop_pathconf_args *);
int vop_stdpoll(struct vop_poll_args *);
int vop_stdvptocnp(struct vop_vptocnp_args *ap);