fusefs: enable write clustering

Enable write clustering in fusefs whenever cache mode is set to writeback
and the "async" mount option is used.  With default values for MAXPHYS,
DFLTPHYS, and the fuse max_write mount parameter, that means sequential
writes will now be written 128KB at a time instead of 64KB.

Also, add a regression test for PR 238565, a panic during unmount that
probably affects UFS, ext2, and msdosfs as well as fusefs.

PR:		238565
Sponsored by:	The FreeBSD Foundation
This commit is contained in:
Alan Somers 2019-06-14 18:14:51 +00:00
parent dff3a6b410
commit 8eecd9ce05
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/fuse2/; revision=349036
9 changed files with 150 additions and 25 deletions

View file

@ -29,7 +29,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd May 19, 2019
.Dd June 14, 2019
.Dt MOUNT_FUSEFS 8
.Os
.Sh NAME
@ -136,21 +136,24 @@ The following options are available (and also their negated versions,
by prefixing them with
.Dq no ) :
.Bl -tag -width indent
.It Cm default_permissions
Enable traditional (file mode based) permission checking in kernel
.It Cm allow_other
Do not apply
.Sx STRICT ACCESS POLICY .
Only root can use this option
.It Cm async
I/O to the file system may be done asynchronously.
Writes may delayed and/or reordered.
.It Cm default_permissions
Enable traditional (file mode based) permission checking in kernel
.It Cm max_read Ns = Ns Ar n
Limit size of read requests to
.Ar n
.It Cm neglect_shares
Do not refuse unmounting if there are secondary mounts
.It Cm private
Refuse shared mounting of the daemon.
This is the default behaviour, to allow sharing, expicitly use
.Fl o Cm noprivate
.It Cm neglect_shares
Do not refuse unmounting if there are secondary mounts
.It Cm push_symlinks_in
Prefix absolute symlinks with the mountpoint
.It Cm subtype Ns = Ns Ar fsname

View file

@ -88,6 +88,8 @@ static struct mntopt mopts[] = {
{ "large_read", 0, 0x00, 1 },
/* "nonempty", just the first two chars are stripped off during parsing */
{ "nempty", 0, 0x00, 1 },
{ "async", 0, MNT_ASYNC, 0},
{ "noasync", 1, MNT_ASYNC, 0},
MOPT_STDOPTS,
MOPT_END
};

View file

@ -565,11 +565,13 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio,
daddr_t lbn;
off_t filesize;
int bcount;
int n, on, err = 0;
int n, on, seqcount, err = 0;
bool last_page;
const int biosize = fuse_iosize(vp);
seqcount = ioflag >> IO_SEQSHIFT;
KASSERT(uio->uio_rw == UIO_WRITE, ("fuse_write_biobackend mode"));
if (vp->v_type != VREG)
return (EIO);
@ -644,6 +646,7 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio,
* with other readers
*/
err = fuse_vnode_setsize(vp, uio->uio_offset + n);
filesize = uio->uio_offset + n;
fvdat->flag |= FN_SIZECHANGE;
if (err) {
brelse(bp);
@ -787,20 +790,22 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio,
} else if (vm_page_count_severe() ||
buf_dirty_count_severe() ||
(ioflag & IO_ASYNC)) {
/* TODO: enable write clustering later */
bp->b_flags |= B_CLUSTEROK;
SDT_PROBE2(fusefs, , io, write_biobackend_issue, 3, bp);
bawrite(bp);
} else if (on == 0 && n == bcount) {
if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
bp->b_flags |= B_CLUSTEROK;
SDT_PROBE2(fusefs, , io, write_biobackend_issue,
4, bp);
bdwrite(bp);
cluster_write(vp, bp, filesize, seqcount, 0);
} else {
SDT_PROBE2(fusefs, , io, write_biobackend_issue,
5, bp);
bawrite(bp);
}
} else if (ioflag & IO_DIRECT) {
bp->b_flags |= B_CLUSTEROK;
SDT_PROBE2(fusefs, , io, write_biobackend_issue, 6, bp);
bawrite(bp);
} else {

View file

@ -433,6 +433,7 @@ fuse_vfsop_mount(struct mount *mp)
}
copystr(fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &len);
bzero(mp->mnt_stat.f_mntfromname + len, MNAMELEN - len);
mp->mnt_iosize_max = MAXPHYS;
/* Now handshaking with daemon */
fuse_internal_send_init(data, td);

View file

@ -336,7 +336,7 @@ void MockFS::debug_response(const mockfs_buf_out &out) {
MockFS::MockFS(int max_readahead, bool allow_other, bool default_permissions,
bool push_symlinks_in, bool ro, enum poll_method pm, uint32_t flags,
uint32_t kernel_minor_version)
uint32_t kernel_minor_version, uint32_t max_write, bool async)
{
struct sigaction sa;
struct iovec *iov = NULL;
@ -347,6 +347,7 @@ MockFS::MockFS(int max_readahead, bool allow_other, bool default_permissions,
m_daemon_id = NULL;
m_kernel_minor_version = kernel_minor_version;
m_maxreadahead = max_readahead;
m_maxwrite = max_write;
m_nready = -1;
m_pm = pm;
m_quit = false;
@ -404,6 +405,10 @@ MockFS::MockFS(int max_readahead, bool allow_other, bool default_permissions,
build_iovec(&iov, &iovlen, "ro",
__DECONST(void*, &trueval), sizeof(bool));
}
if (async) {
build_iovec(&iov, &iovlen, "async", __DECONST(void*, &trueval),
sizeof(bool));
}
if (nmount(iov, iovlen, 0))
throw(std::system_error(errno, std::system_category(),
"Couldn't mount filesystem"));
@ -449,15 +454,7 @@ void MockFS::init(uint32_t flags) {
out->body.init.minor = m_kernel_minor_version;;
out->body.init.flags = in->body.init.flags & flags;
/*
* The default max_write is set to this formula in libfuse, though
* individual filesystems can lower it. The "- 4096" was added in
* commit 154ffe2, with the commit message "fix".
*/
uint32_t default_max_write = 32 * getpagesize() + 0x1000 - 4096;
/* For testing purposes, it should be distinct from MAXPHYS */
m_max_write = MIN(default_max_write, MAXPHYS / 2);
out->body.init.max_write = m_max_write;
out->body.init.max_write = m_maxwrite;
out->body.init.max_readahead = m_maxreadahead;
SET_OUT_HEADER_LEN(*out, init);

View file

@ -252,7 +252,7 @@ class MockFS {
int m_kq;
/* The max_readahead filesystem option */
/* The max_readahead file system option */
uint32_t m_maxreadahead;
/* pid of the test process */
@ -288,7 +288,7 @@ class MockFS {
pid_t m_child_pid;
/* Maximum size of a FUSE_WRITE write */
uint32_t m_max_write;
uint32_t m_maxwrite;
/*
* Number of events that were available from /dev/fuse after the last
@ -303,7 +303,7 @@ class MockFS {
MockFS(int max_readahead, bool allow_other,
bool default_permissions, bool push_symlinks_in, bool ro,
enum poll_method pm, uint32_t flags,
uint32_t kernel_minor_version);
uint32_t kernel_minor_version, uint32_t max_write, bool async);
virtual ~MockFS();

View file

@ -50,6 +50,20 @@ extern "C" {
using namespace testing;
/*
* The default max_write is set to this formula in libfuse, though
* individual filesystems can lower it. The "- 4096" was added in
* commit 154ffe2, with the commit message "fix".
*/
const uint32_t libfuse_max_write = 32 * getpagesize() + 0x1000 - 4096;
/*
* Set the default max_write to a distinct value from MAXPHYS to catch bugs
* that confuse the two.
*/
const uint32_t default_max_write = MIN(libfuse_max_write, MAXPHYS / 2);
/* Check that fusefs(4) is accessible and the current user can mount(2) */
void check_environment()
{
@ -98,7 +112,8 @@ void FuseTest::SetUp() {
try {
m_mock = new MockFS(m_maxreadahead, m_allow_other,
m_default_permissions, m_push_symlinks_in, m_ro,
m_pm, m_init_flags, m_kernel_minor_version);
m_pm, m_init_flags, m_kernel_minor_version,
m_maxwrite, m_async);
/*
* FUSE_ACCESS is called almost universally. Expecting it in
* each test case would be super-annoying. Instead, set a

View file

@ -40,9 +40,12 @@ inline void nap()
usleep(NAP_NS / 1000);
}
extern const uint32_t libfuse_max_write;
extern const uint32_t default_max_write;
class FuseTest : public ::testing::Test {
protected:
uint32_t m_maxreadahead;
uint32_t m_maxwrite;
uint32_t m_init_flags;
bool m_allow_other;
bool m_default_permissions;
@ -50,6 +53,7 @@ class FuseTest : public ::testing::Test {
enum poll_method m_pm;
bool m_push_symlinks_in;
bool m_ro;
bool m_async;
MockFS *m_mock = NULL;
const static uint64_t FH = 0xdeadbeef1a7ebabe;
@ -62,13 +66,15 @@ class FuseTest : public ::testing::Test {
* be lowered
*/
m_maxreadahead(UINT_MAX),
m_maxwrite(default_max_write),
m_init_flags(0),
m_allow_other(false),
m_default_permissions(false),
m_kernel_minor_version(FUSE_KERNEL_MINOR_VERSION),
m_pm(BLOCKING),
m_push_symlinks_in(false),
m_ro(false)
m_ro(false),
m_async(false)
{}
virtual void SetUp();

View file

@ -29,7 +29,7 @@
*/
extern "C" {
#include <sys/types.h>
#include <sys/param.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/stat.h>
@ -179,6 +179,19 @@ void expect_write(uint64_t ino, uint64_t offset, uint64_t isize,
}
};
/* Tests for clustered writes with WriteBack cacheing */
class WriteCluster: public WriteBack {
public:
virtual void SetUp() {
if (MAXPHYS < 2 * DFLTPHYS)
GTEST_SKIP() << "MAXPHYS must be at least twice DFLTPHYS"
<< "for this test";
m_async = true;
m_maxwrite = MAXPHYS;
WriteBack::SetUp();
}
};
void sigxfsz_handler(int __unused sig) {
Write::s_sigxfsz = 1;
}
@ -617,7 +630,7 @@ TEST_F(Write, write_large)
int fd;
ssize_t halfbufsize, bufsize;
halfbufsize = m_mock->m_max_write;
halfbufsize = m_mock->m_maxwrite;
bufsize = halfbufsize * 2;
contents = (int*)malloc(bufsize);
ASSERT_NE(NULL, contents);
@ -711,6 +724,89 @@ TEST_F(WriteBack, close)
close(fd);
}
/* In writeback mode, adjacent writes will be clustered together */
TEST_F(WriteCluster, clustering)
{
const char FULLPATH[] = "mountpoint/some_file.txt";
const char RELPATH[] = "some_file.txt";
uint64_t ino = 42;
int i, fd;
void *wbuf, *wbuf2x;
ssize_t bufsize = 65536;
off_t filesize = 327680;
wbuf = malloc(bufsize);
ASSERT_NE(NULL, wbuf) << strerror(errno);
memset(wbuf, 'X', bufsize);
wbuf2x = malloc(2 * bufsize);
ASSERT_NE(NULL, wbuf2x) << strerror(errno);
memset(wbuf2x, 'X', 2 * bufsize);
expect_lookup(RELPATH, ino, filesize);
expect_open(ino, 0, 1);
/*
* Writes of bufsize-bytes each should be clustered into greater sizes.
* The amount of clustering is adaptive, so the first write actually
* issued will be 2x bufsize and subsequent writes may be larger
*/
expect_write(ino, 0, 2 * bufsize, 2 * bufsize, wbuf2x);
expect_write(ino, 2 * bufsize, 2 * bufsize, 2 * bufsize, wbuf2x);
expect_flush(ino, 1, ReturnErrno(0));
expect_release(ino, ReturnErrno(0));
fd = open(FULLPATH, O_RDWR);
ASSERT_LE(0, fd) << strerror(errno);
for (i = 0; i < 4; i++) {
ASSERT_EQ(bufsize, write(fd, wbuf, bufsize))
<< strerror(errno);
}
close(fd);
}
/*
* When clustering writes, an I/O error to any of the cluster's children should
* not panic the system on unmount
*/
/*
* Disabled because it panics.
* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=238565
*/
TEST_F(WriteCluster, DISABLED_cluster_write_err)
{
const char FULLPATH[] = "mountpoint/some_file.txt";
const char RELPATH[] = "some_file.txt";
uint64_t ino = 42;
int i, fd;
void *wbuf;
ssize_t bufsize = 65536;
off_t filesize = 262144;
wbuf = malloc(bufsize);
ASSERT_NE(NULL, wbuf) << strerror(errno);
memset(wbuf, 'X', bufsize);
expect_lookup(RELPATH, ino, filesize);
expect_open(ino, 0, 1);
EXPECT_CALL(*m_mock, process(
ResultOf([=](auto in) {
return (in.header.opcode == FUSE_WRITE);
}, Eq(true)),
_)
).WillRepeatedly(Invoke(ReturnErrno(EIO)));
expect_flush(ino, 1, ReturnErrno(0));
expect_release(ino, ReturnErrno(0));
fd = open(FULLPATH, O_RDWR);
ASSERT_LE(0, fd) << strerror(errno);
for (i = 0; i < 3; i++) {
ASSERT_EQ(bufsize, write(fd, wbuf, bufsize))
<< strerror(errno);
}
close(fd);
}
/*
* In writeback mode, writes to an O_WRONLY file could trigger reads from the
* server. The FUSE protocol explicitly allows that.