FIOSEEKHOLE/FIOSEEKDATA: correct consistency for bmap-based implementation

Writes on UFS through a mapped region do not allocate disk blocks in
holes immediately. The blocks are allocated when the pages are paged out
first time.

This breaks the algorithm in vn_bmap_seekhole() and ufs_bmap_seekdata(),
because VOP_BMAP() reports hole for the place which already contains a
valid data.

Clean the pages before doing VOP_BMAP() in the affected functions.  In
principle, we could clean less by only requesting clean starting from
the offset, but it is probably not very important.

PR:	269261
Reported by:	asomers
Reviewed by:	asomers, markj
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D38379
This commit is contained in:
Konstantin Belousov 2023-02-04 03:20:19 +02:00
parent d9d5f2c042
commit 3b6056204d
3 changed files with 31 additions and 3 deletions

View file

@ -2556,6 +2556,7 @@ int
vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off,
struct ucred *cred)
{
vm_object_t obj;
off_t size;
daddr_t bn, bnp;
uint64_t bsize;
@ -2564,7 +2565,7 @@ vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off,
KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA,
("%s: Wrong command %lu", __func__, cmd));
ASSERT_VOP_LOCKED(vp, "vn_bmap_seekhole_locked");
ASSERT_VOP_ELOCKED(vp, "vn_bmap_seekhole_locked");
if (vp->v_type != VREG) {
error = ENOTTY;
@ -2578,6 +2579,15 @@ vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off,
error = ENXIO;
goto out;
}
/* See the comment in ufs_bmap_seekdata(). */
obj = vp->v_object;
if (obj != NULL) {
VM_OBJECT_WLOCK(obj);
vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
VM_OBJECT_WUNLOCK(obj);
}
bsize = vp->v_mount->mnt_stat.f_iosize;
for (bn = noff / bsize; noff < size; bn++, noff += bsize -
noff % bsize) {
@ -2613,7 +2623,7 @@ vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred)
KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA,
("%s: Wrong command %lu", __func__, cmd));
if (vn_lock(vp, LK_SHARED) != 0)
if (vn_lock(vp, LK_EXCLUSIVE) != 0)
return (EBADF);
error = vn_bmap_seekhole_locked(vp, cmd, off, cred);
VOP_UNLOCK(vp);

View file

@ -44,12 +44,16 @@ __FBSDID("$FreeBSD$");
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/stat.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
#include <ufs/ufs/extattr.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
@ -348,6 +352,7 @@ ufs_bmap_seekdata(struct vnode *vp, off_t *offp)
struct inode *ip;
struct mount *mp;
struct ufsmount *ump;
vm_object_t obj;
ufs2_daddr_t bn, daddr, nextbn;
uint64_t bsize;
off_t numblks;
@ -364,6 +369,19 @@ ufs_bmap_seekdata(struct vnode *vp, off_t *offp)
if (*offp < 0 || *offp >= ip->i_size)
return (ENXIO);
/*
* We could have pages on the vnode' object queue which still
* do not have the data blocks allocated. Convert all dirty
* pages into buffer writes to ensure that we see all
* allocated data.
*/
obj = vp->v_object;
if (obj != NULL) {
VM_OBJECT_WLOCK(obj);
vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
VM_OBJECT_WUNLOCK(obj);
}
bsize = mp->mnt_stat.f_iosize;
for (bn = *offp / bsize, numblks = howmany(ip->i_size, bsize);
bn < numblks; bn = nextbn) {

View file

@ -2944,7 +2944,7 @@ ufs_ioctl(struct vop_ioctl_args *ap)
vp = ap->a_vp;
switch (ap->a_command) {
case FIOSEEKDATA:
error = vn_lock(vp, LK_SHARED);
error = vn_lock(vp, LK_EXCLUSIVE);
if (error == 0) {
error = ufs_bmap_seekdata(vp, (off_t *)ap->a_data);
VOP_UNLOCK(vp);