From e97ad33a89a78f55280b0485b3249ee9b907a718 Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Tue, 6 Dec 2022 13:07:46 +0000 Subject: [PATCH] Add an implementation of the 9P filesystem This is derived from swills@ fork of the Juniper virtfs with many changes by me including bug fixes, style improvements, clearer layering and more consistent logging. The filesystem is renamed to p9fs to better reflect its function and to prevent possible future confusion with virtio-fs. Several updates and fixes from Juniper have been integrated into this version by Val Packett and these contributions along with the original Juniper authors are credited below. To use this with bhyve, add 'virtio_p9fs_load=YES' to loader.conf. The bhyve virtio-9p device allows access from the guest to files on the host by mapping a 'sharename' to a host path. It is possible to use p9fs as a root filesystem by adding this to /boot/loader.conf: vfs.root.mountfrom="p9fs:sharename" for non-root filesystems add something like this to /etc/fstab: sharename /mnt p9fs rw 0 0 In both examples, substitute the share name used on the bhyve command line. The 9P filesystem protocol relies on stateful file opens which map protocol-level FIDs to host file descriptors. The FreeBSD vnode interface doesn't really support this and we use heuristics to guess the right FID to use for file operations. This can be confused by privilege lowering and does not guarantee that the FID created for a given file open is always used for file operations, even if the calling process is using the file descriptor from the original open call. Improving this would involve changes to the vnode interface which is out-of-scope for this import. Differential Revision: https://reviews.freebsd.org/D41844 Reviewed by: kib, emaste, dch MFC after: 3 months Co-authored-by: Val Packett Co-authored-by: Ka Ho Ng Co-authored-by: joyu Co-authored-by: Kumara Babu Narayanaswamy --- share/man/man5/Makefile | 1 + share/man/man5/p9fs.5 | 127 ++ sys/conf/files | 7 + sys/conf/options | 1 + sys/dev/virtio/p9fs/virtio_p9fs.c | 511 +++++++ sys/dev/virtio/p9fs/virtio_p9fs.h | 39 + sys/fs/p9fs/p9_client.c | 1311 ++++++++++++++++++ sys/fs/p9fs/p9_client.h | 168 +++ sys/fs/p9fs/p9_debug.h | 45 + sys/fs/p9fs/p9_protocol.c | 632 +++++++++ sys/fs/p9fs/p9_protocol.h | 280 ++++ sys/fs/p9fs/p9_transport.c | 70 + sys/fs/p9fs/p9_transport.h | 53 + sys/fs/p9fs/p9fs.h | 203 +++ sys/fs/p9fs/p9fs_proto.h | 42 + sys/fs/p9fs/p9fs_subr.c | 411 ++++++ sys/fs/p9fs/p9fs_vfsops.c | 602 ++++++++ sys/fs/p9fs/p9fs_vnops.c | 2148 +++++++++++++++++++++++++++++ sys/kern/vfs_mountroot.c | 1 + sys/modules/Makefile | 1 + sys/modules/p9fs/Makefile | 8 + sys/modules/virtio/Makefile | 2 +- sys/modules/virtio/p9fs/Makefile | 32 + 23 files changed, 6694 insertions(+), 1 deletion(-) create mode 100644 share/man/man5/p9fs.5 create mode 100644 sys/dev/virtio/p9fs/virtio_p9fs.c create mode 100644 sys/dev/virtio/p9fs/virtio_p9fs.h create mode 100644 sys/fs/p9fs/p9_client.c create mode 100644 sys/fs/p9fs/p9_client.h create mode 100644 sys/fs/p9fs/p9_debug.h create mode 100644 sys/fs/p9fs/p9_protocol.c create mode 100644 sys/fs/p9fs/p9_protocol.h create mode 100644 sys/fs/p9fs/p9_transport.c create mode 100644 sys/fs/p9fs/p9_transport.h create mode 100644 sys/fs/p9fs/p9fs.h create mode 100644 sys/fs/p9fs/p9fs_proto.h create mode 100644 sys/fs/p9fs/p9fs_subr.c create mode 100644 sys/fs/p9fs/p9fs_vfsops.c create mode 100644 sys/fs/p9fs/p9fs_vnops.c create mode 100644 sys/modules/p9fs/Makefile create mode 100644 sys/modules/virtio/p9fs/Makefile diff --git a/share/man/man5/Makefile b/share/man/man5/Makefile index bc345b42717c..465cc85a3feb 100644 --- a/share/man/man5/Makefile +++ b/share/man/man5/Makefile @@ -35,6 +35,7 @@ MAN= acct.5 \ nsmb.conf.5 \ nsswitch.conf.5 \ os-release.5 \ + p9fs.5 \ passwd.5 \ pbm.5 \ periodic.conf.5 \ diff --git a/share/man/man5/p9fs.5 b/share/man/man5/p9fs.5 new file mode 100644 index 000000000000..5c110e3dc963 --- /dev/null +++ b/share/man/man5/p9fs.5 @@ -0,0 +1,127 @@ +.\" +.\" Copyright (c) 2022-present Doug Rabson +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS DOCUMENTATION IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd December 7, 2022 +.Dt P9FS 5 +.Os +.Sh NAME +.Nm p9fs +.Nd "9P file system" +.Sh SYNOPSIS +To use this filesystem, +either add the following to the kernel config: +.Bd -ragged -offset indent +.Cd "options P9FS" +.Cd "device virtio_p9fs" +.Ed +.Pp +Alternatively, load the driver as a kernel module, +either at boot time by adding the following to +.Xr loader.conf 5 : +.Bd -literal -offset indent +virtio_p9fs_load="YES" +.Ed +.Pp +or on system startup using the command: +.Pp +.Dl "# sysrc kld_list+=virtio_p9fs" +.Sh DESCRIPTION +The +.Nm +filesystem uses the 9P protocol to mount a host file system directory +into a +.Xr bhyve 8 +guest. +Multiple host directories can be accessed using the +.Xr bhyve 8 +virtio-9p virtual PCI device. +Each device is configured with a share name and a host directory path. +The share name can be used with +.Xr mount 8 +to mount the host directory in the guest: +.Pp +.Dl "# mount -t p9fs mysharename /mnt" +.Pp +Host directories can be mounted on system startup using +.Xr fstab 5 +like this: +.Pp +.Bd -literal -offset indent +mysharename /mnt p9fs rw 0 0 +.Ed +.Pp +Using +.Nm +as a root file system is supported by adding the following to +.Xr loader.conf 5 : +.Bd -literal -offset indent +vfs.root.mountfrom="p9fs:mysharename" +.Ed +.Sh LIMITATIONS +The 9P protocol relies on stateful file opens +which map protocol-level FIDs to host file descriptors. +The FreeBSD vnode interface doesn't support this and +.Nm +uses heuristics to guess the right FID to use for file operations. +.Pp +This can be confused by privilege lowering and +does not guarantee that the FID created for a +given file open is always used, +even if the calling process is using the file descriptor from +the original open call. +.Pp +In particular, accessing unlinked files using open file descriptor +may not work correctly. +If +.Nm +is the root filesystem, +it is recommented to use with +.Xr tmpfs 5 +to ensure that temporary files created in +.Pa /tmp +or +.Pa /var/tmp +have the expected semantics. +.Sh SEE ALSO +.Xr fstab 5 +.Sh HISTORY +The 9P protocol first appeared in the Plan 9 operating system. +More recently, the protocol has been widely used with virtual machines +to allow the use of host file resources inside a guest VM. +.Sh AUTHORS +This is derived from software released by Juniper Networks, Inc. +with many improvements and fixes from +.An Steve Wills . +.Pp +This manual page was written by +.An -nosplit +.An Doug Rabson Aq Mt dfr@FreeBSD.org . +.Sh BUGS +A better name for this filesystem would be +.Ar 9pfs +but for technical reasons, +the names of filesystems must be valid C identifiers. +As a compromise, +the filesystem is named +.Nm . diff --git a/sys/conf/files b/sys/conf/files index 875021aaa357..968894ea948b 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3444,6 +3444,7 @@ dev/virtio/balloon/virtio_balloon.c optional virtio_balloon dev/virtio/block/virtio_blk.c optional virtio_blk dev/virtio/console/virtio_console.c optional virtio_console dev/virtio/gpu/virtio_gpu.c optional virtio_gpu +dev/virtio/p9fs/virtio_p9fs.c optional virtio_p9fs dev/virtio/random/virtio_random.c optional virtio_random dev/virtio/scmi/virtio_scmi.c optional virtio_scmi dev/virtio/scsi/virtio_scsi.c optional virtio_scsi @@ -3593,6 +3594,12 @@ fs/nfsserver/nfs_nfsdcache.c optional nfsd inet fs/nullfs/null_subr.c optional nullfs fs/nullfs/null_vfsops.c optional nullfs fs/nullfs/null_vnops.c optional nullfs +fs/p9fs/p9_client.c optional p9fs +fs/p9fs/p9_protocol.c optional p9fs +fs/p9fs/p9_transport.c optional p9fs +fs/p9fs/p9fs_subr.c optional p9fs +fs/p9fs/p9fs_vfsops.c optional p9fs +fs/p9fs/p9fs_vnops.c optional p9fs fs/procfs/procfs.c optional procfs fs/procfs/procfs_dbregs.c optional procfs fs/procfs/procfs_fpregs.c optional procfs diff --git a/sys/conf/options b/sys/conf/options index d9bc981232c4..52fafffabd99 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -284,6 +284,7 @@ TMPFS opt_dontuse.h UDF opt_dontuse.h UNIONFS opt_dontuse.h ZFS opt_dontuse.h +P9FS opt_dontuse.h # Pseudofs debugging PSEUDOFS_TRACE opt_pseudofs.h diff --git a/sys/dev/virtio/p9fs/virtio_p9fs.c b/sys/dev/virtio/p9fs/virtio_p9fs.c new file mode 100644 index 000000000000..48430b4f6b67 --- /dev/null +++ b/sys/dev/virtio/p9fs/virtio_p9fs.c @@ -0,0 +1,511 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +/* + * The Virtio 9P transport driver. This file contains all functions related to + * the virtqueue infrastructure which include creating the virtqueue, host + * interactions, interrupts etc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#define VT9P_MTX(_sc) (&(_sc)->vt9p_mtx) +#define VT9P_LOCK(_sc) mtx_lock(VT9P_MTX(_sc)) +#define VT9P_UNLOCK(_sc) mtx_unlock(VT9P_MTX(_sc)) +#define VT9P_LOCK_INIT(_sc) mtx_init(VT9P_MTX(_sc), \ + "VIRTIO 9P CHAN lock", NULL, MTX_DEF) +#define VT9P_LOCK_DESTROY(_sc) mtx_destroy(VT9P_MTX(_sc)) +#define MAX_SUPPORTED_SGS 20 +static MALLOC_DEFINE(M_P9FS_MNTTAG, "p9fs_mount_tag", "P9fs Mounttag"); + +struct vt9p_softc { + device_t vt9p_dev; + struct mtx vt9p_mtx; + struct sglist *vt9p_sglist; + struct cv submit_cv; + bool busy; + struct virtqueue *vt9p_vq; + int max_nsegs; + uint16_t mount_tag_len; + char *mount_tag; + STAILQ_ENTRY(vt9p_softc) chan_next; +}; + +/* Global channel list, Each channel will correspond to a mount point */ +static STAILQ_HEAD( ,vt9p_softc) global_chan_list; +struct mtx global_chan_list_mtx; + +static struct virtio_feature_desc virtio_9p_feature_desc[] = { + { VIRTIO_9PNET_F_MOUNT_TAG, "9PMountTag" }, + { 0, NULL } +}; + +static void +global_chan_list_init(void) +{ + + mtx_init(&global_chan_list_mtx, "9pglobal", + NULL, MTX_DEF); + STAILQ_INIT(&global_chan_list); +} +SYSINIT(global_chan_list_init, SI_SUB_KLD, SI_ORDER_FIRST, + global_chan_list_init, NULL); + +/* We don't currently allow canceling of virtio requests */ +static int +vt9p_cancel(void *handle, struct p9_req_t *req) +{ + + return (1); +} + +SYSCTL_NODE(_vfs, OID_AUTO, 9p, CTLFLAG_RW, 0, "9P File System Protocol"); + +/* + * Maximum number of seconds vt9p_request thread sleep waiting for an + * ack from the host, before exiting + */ +static unsigned int vt9p_ackmaxidle = 120; + +SYSCTL_UINT(_vfs_9p, OID_AUTO, ackmaxidle, CTLFLAG_RW, &vt9p_ackmaxidle, 0, + "Maximum time request thread waits for ack from host"); + +/* + * Wait for completion of a p9 request. + * + * This routine will sleep and release the chan mtx during the period. + * chan mtx will be acquired again upon return. + */ +static int +vt9p_req_wait(struct vt9p_softc *chan, struct p9_req_t *req) +{ + if (req->tc->tag != req->rc->tag) { + if (msleep(req, VT9P_MTX(chan), 0, "chan lock", + vt9p_ackmaxidle * hz)) { + /* + * Waited for 120s. No response from host. + * Can't wait for ever.. + */ + P9_DEBUG(ERROR, "Timeout after waiting %u seconds" + "for an ack from host\n", vt9p_ackmaxidle); + return (EIO); + } + KASSERT(req->tc->tag == req->rc->tag, + ("Spurious event on p9 req")); + } + return (0); +} + +/* + * Request handler. This is called for every request submitted to the host + * It basically maps the tc/rc buffers to sg lists and submits the requests + * into the virtqueue. Since we have implemented a synchronous version, the + * submission thread sleeps until the ack in the interrupt wakes it up. Once + * it wakes up, it returns back to the P9fs layer. The rc buffer is then + * processed and completed to its upper layers. + */ +static int +vt9p_request(void *handle, struct p9_req_t *req) +{ + int error; + struct vt9p_softc *chan; + int readable, writable; + struct sglist *sg; + struct virtqueue *vq; + + chan = handle; + sg = chan->vt9p_sglist; + vq = chan->vt9p_vq; + + P9_DEBUG(TRANS, "%s: req=%p\n", __func__, req); + + /* Grab the channel lock*/ + VT9P_LOCK(chan); + sglist_reset(sg); + /* Handle out VirtIO ring buffers */ + error = sglist_append(sg, req->tc->sdata, req->tc->size); + if (error != 0) { + P9_DEBUG(ERROR, "%s: sglist append failed\n", __func__); + VT9P_UNLOCK(chan); + return (error); + } + readable = sg->sg_nseg; + + error = sglist_append(sg, req->rc->sdata, req->rc->capacity); + if (error != 0) { + P9_DEBUG(ERROR, "%s: sglist append failed\n", __func__); + VT9P_UNLOCK(chan); + return (error); + } + writable = sg->sg_nseg - readable; + +req_retry: + error = virtqueue_enqueue(vq, req, sg, readable, writable); + + if (error != 0) { + if (error == ENOSPC) { + /* + * Condvar for the submit queue. Unlock the chan + * since wakeup needs one. + */ + cv_wait(&chan->submit_cv, VT9P_MTX(chan)); + P9_DEBUG(TRANS, "%s: retry virtio request\n", __func__); + goto req_retry; + } else { + P9_DEBUG(ERROR, "%s: virtio enuqueue failed \n", __func__); + VT9P_UNLOCK(chan); + return (EIO); + } + } + + /* We have to notify */ + virtqueue_notify(vq); + + error = vt9p_req_wait(chan, req); + if (error != 0) { + VT9P_UNLOCK(chan); + return (error); + } + + VT9P_UNLOCK(chan); + + P9_DEBUG(TRANS, "%s: virtio request kicked\n", __func__); + + return (0); +} + +/* + * Completion of the request from the virtqueue. This interrupt handler is + * setup at initialization and is called for every completing request. It + * just wakes up the sleeping submission requests. + */ +static void +vt9p_intr_complete(void *xsc) +{ + struct vt9p_softc *chan; + struct virtqueue *vq; + struct p9_req_t *curreq; + + chan = (struct vt9p_softc *)xsc; + vq = chan->vt9p_vq; + + P9_DEBUG(TRANS, "%s: completing\n", __func__); + + VT9P_LOCK(chan); + while ((curreq = virtqueue_dequeue(vq, NULL)) != NULL) { + curreq->rc->tag = curreq->tc->tag; + wakeup_one(curreq); + } + virtqueue_enable_intr(vq); + cv_signal(&chan->submit_cv); + VT9P_UNLOCK(chan); +} + +/* + * Allocation of the virtqueue with interrupt complete routines. + */ +static int +vt9p_alloc_virtqueue(struct vt9p_softc *sc) +{ + struct vq_alloc_info vq_info; + device_t dev; + + dev = sc->vt9p_dev; + + VQ_ALLOC_INFO_INIT(&vq_info, sc->max_nsegs, + vt9p_intr_complete, sc, &sc->vt9p_vq, + "%s request", device_get_nameunit(dev)); + + return (virtio_alloc_virtqueues(dev, 1, &vq_info)); +} + +/* Probe for existence of 9P virtio channels */ +static int +vt9p_probe(device_t dev) +{ + + /* If the virtio device type is a 9P device, then we claim and attach it */ + if (virtio_get_device_type(dev) != VIRTIO_ID_9P) + return (ENXIO); + device_set_desc(dev, "VirtIO 9P Transport"); + + return (BUS_PROBE_DEFAULT); +} + +static void +vt9p_stop(struct vt9p_softc *sc) +{ + + /* Device specific stops .*/ + virtqueue_disable_intr(sc->vt9p_vq); + virtio_stop(sc->vt9p_dev); +} + +/* Detach the 9P virtio PCI device */ +static int +vt9p_detach(device_t dev) +{ + struct vt9p_softc *sc; + + sc = device_get_softc(dev); + VT9P_LOCK(sc); + vt9p_stop(sc); + VT9P_UNLOCK(sc); + + if (sc->vt9p_sglist) { + sglist_free(sc->vt9p_sglist); + sc->vt9p_sglist = NULL; + } + if (sc->mount_tag) { + free(sc->mount_tag, M_P9FS_MNTTAG); + sc->mount_tag = NULL; + } + mtx_lock(&global_chan_list_mtx); + STAILQ_REMOVE(&global_chan_list, sc, vt9p_softc, chan_next); + mtx_unlock(&global_chan_list_mtx); + + VT9P_LOCK_DESTROY(sc); + cv_destroy(&sc->submit_cv); + + return (0); +} + +/* Attach the 9P virtio PCI device */ +static int +vt9p_attach(device_t dev) +{ + struct sysctl_ctx_list *ctx; + struct sysctl_oid *tree; + struct vt9p_softc *chan; + char *mount_tag; + int error; + uint16_t mount_tag_len; + + chan = device_get_softc(dev); + chan->vt9p_dev = dev; + + /* Init the channel lock. */ + VT9P_LOCK_INIT(chan); + /* Initialize the condition variable */ + cv_init(&chan->submit_cv, "Conditional variable for submit queue" ); + chan->max_nsegs = MAX_SUPPORTED_SGS; + chan->vt9p_sglist = sglist_alloc(chan->max_nsegs, M_NOWAIT); + + /* Negotiate the features from the host */ + virtio_set_feature_desc(dev, virtio_9p_feature_desc); + virtio_negotiate_features(dev, VIRTIO_9PNET_F_MOUNT_TAG); + + /* + * If mount tag feature is supported read the mount tag + * from device config + */ + if (virtio_with_feature(dev, VIRTIO_9PNET_F_MOUNT_TAG)) + mount_tag_len = virtio_read_dev_config_2(dev, + offsetof(struct virtio_9pnet_config, mount_tag_len)); + else { + error = EINVAL; + P9_DEBUG(ERROR, "%s: Mount tag feature not supported by host\n", __func__); + goto out; + } + mount_tag = malloc(mount_tag_len + 1, M_P9FS_MNTTAG, + M_WAITOK | M_ZERO); + + virtio_read_device_config(dev, + offsetof(struct virtio_9pnet_config, mount_tag), + mount_tag, mount_tag_len); + + device_printf(dev, "Mount tag: %s\n", mount_tag); + + mount_tag_len++; + chan->mount_tag_len = mount_tag_len; + chan->mount_tag = mount_tag; + + ctx = device_get_sysctl_ctx(dev); + tree = device_get_sysctl_tree(dev); + SYSCTL_ADD_STRING(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "p9fs_mount_tag", + CTLFLAG_RD, chan->mount_tag, 0, "Mount tag"); + + if (chan->vt9p_sglist == NULL) { + error = ENOMEM; + P9_DEBUG(ERROR, "%s: Cannot allocate sglist\n", __func__); + goto out; + } + + /* We expect one virtqueue, for requests. */ + error = vt9p_alloc_virtqueue(chan); + + if (error != 0) { + P9_DEBUG(ERROR, "%s: Allocating the virtqueue failed \n", __func__); + goto out; + } + + error = virtio_setup_intr(dev, INTR_TYPE_MISC|INTR_MPSAFE); + + if (error != 0) { + P9_DEBUG(ERROR, "%s: Cannot setup virtqueue interrupt\n", __func__); + goto out; + } + error = virtqueue_enable_intr(chan->vt9p_vq); + + if (error != 0) { + P9_DEBUG(ERROR, "%s: Cannot enable virtqueue interrupt\n", __func__); + goto out; + } + + mtx_lock(&global_chan_list_mtx); + /* Insert the channel in global channel list */ + STAILQ_INSERT_HEAD(&global_chan_list, chan, chan_next); + mtx_unlock(&global_chan_list_mtx); + + return (0); +out: + /* Something went wrong, detach the device */ + vt9p_detach(dev); + return (error); +} + +/* + * Allocate a new virtio channel. This sets up a transport channel + * for 9P communication + */ +static int +vt9p_create(const char *mount_tag, void **handlep) +{ + struct vt9p_softc *sc, *chan; + + chan = NULL; + + /* + * Find out the corresponding channel for a client from global list + * of channels based on mount tag and attach it to client + */ + mtx_lock(&global_chan_list_mtx); + STAILQ_FOREACH(sc, &global_chan_list, chan_next) { + if (!strcmp(sc->mount_tag, mount_tag)) { + chan = sc; + break; + } + } + mtx_unlock(&global_chan_list_mtx); + + /* + * If chan is already attached to a client then it cannot be used for + * another client. + */ + if (chan && chan->busy) { + //p9_debug(TRANS, "Channel busy: used by clnt=%p\n", chan->client); + return (EBUSY); + } + + /* If we dont have one, for now bail out.*/ + if (chan) { + *handlep = (void *)chan; + chan->busy = TRUE; + } else { + P9_DEBUG(TRANS, "%s: No Global channel with mount_tag=%s\n", + __func__, mount_tag); + return (EINVAL); + } + + return (0); +} + +static void +vt9p_close(void *handle) +{ + struct vt9p_softc *chan = handle; + chan->busy = FALSE; +} + +static struct p9_trans_module vt9p_trans = { + .name = "virtio", + .create = vt9p_create, + .close = vt9p_close, + .request = vt9p_request, + .cancel = vt9p_cancel, +}; + +static device_method_t vt9p_mthds[] = { + /* Device methods. */ + DEVMETHOD(device_probe, vt9p_probe), + DEVMETHOD(device_attach, vt9p_attach), + DEVMETHOD(device_detach, vt9p_detach), + DEVMETHOD_END +}; + +static driver_t vt9p_drv = { + "virtio_p9fs", + vt9p_mthds, + sizeof(struct vt9p_softc) +}; + +static int +vt9p_modevent(module_t mod, int type, void *unused) +{ + int error; + + error = 0; + + switch (type) { + case MOD_LOAD: + p9_init_zones(); + p9_register_trans(&vt9p_trans); + break; + case MOD_UNLOAD: + p9_destroy_zones(); + break; + case MOD_SHUTDOWN: + break; + default: + error = EOPNOTSUPP; + break; + } + return (error); +} + +DRIVER_MODULE(virtio_p9fs, virtio_pci, vt9p_drv, vt9p_modevent, 0); +MODULE_VERSION(virtio_p9fs, 1); +MODULE_DEPEND(virtio_p9fs, virtio, 1, 1, 1); +MODULE_DEPEND(virtio_p9fs, p9fs, 1, 1, 1); diff --git a/sys/dev/virtio/p9fs/virtio_p9fs.h b/sys/dev/virtio/p9fs/virtio_p9fs.h new file mode 100644 index 000000000000..924b413d29a5 --- /dev/null +++ b/sys/dev/virtio/p9fs/virtio_p9fs.h @@ -0,0 +1,39 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __VIRTIO_9P_CONFIG__ +#define __VIRTIO_9P_CONFIG__ + +/* Mount point feature specified in config variable */ +#define VIRTIO_9PNET_F_MOUNT_TAG 1 + +struct virtio_9pnet_config { + /* Mount tag length */ + uint16_t mount_tag_len; + /* non NULL terminated tag name */ + uint8_t mount_tag[0]; +}; +#endif /* __VIRTIO_9P_CONFIG__ */ diff --git a/sys/fs/p9fs/p9_client.c b/sys/fs/p9fs/p9_client.c new file mode 100644 index 000000000000..8f36cc4e775a --- /dev/null +++ b/sys/fs/p9fs/p9_client.c @@ -0,0 +1,1311 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains 9P client functions which prepares message to be sent to + * the server. Every fileop typically has a function defined here to interact + * with the host. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define QEMU_HEADER 7 +#define P9FS_MAX_FID_CNT (1024 * 1024 * 1024) +#define P9FS_ROOT_FID_NO 2 +#define P9FS_MIN_TAG 1 +#define P9FS_MAX_TAG 65535 +#define WSTAT_SIZE 47 +#define WSTAT_EXTENSION_SIZE 14 + +static MALLOC_DEFINE(M_P9CLNT, "p9_client", "p9fs client structure"); +static uma_zone_t p9fs_fid_zone; +static uma_zone_t p9fs_req_zone; +static uma_zone_t p9fs_buf_zone; + +SYSCTL_DECL(_vfs_p9fs); +int p9_debug_level = 0; +SYSCTL_INT(_vfs_p9fs, OID_AUTO, debug_level, CTLFLAG_RW, + &p9_debug_level, 0, "p9fs debug logging"); + +static struct p9_req_t *p9_get_request(struct p9_client *c, int *error); +static struct p9_req_t *p9_client_request( + struct p9_client *c, int8_t type, int *error, const char *fmt, ...); + +inline int +p9_is_proto_dotl(struct p9_client *clnt) +{ + + return (clnt->proto_version == p9_proto_2000L); +} + +inline int +p9_is_proto_dotu(struct p9_client *clnt) +{ + + return (clnt->proto_version == p9_proto_2000u); +} + +/* Parse mount options into client structure */ +static int +p9_parse_opts(struct mount *mp, struct p9_client *clnt) +{ + int error, len; + char *trans; + + /* + * Default to virtio since thats the only transport we have for now. + */ + error = vfs_getopt(mp->mnt_optnew, "trans", (void **)&trans, &len); + if (error == ENOENT) + trans = "virtio"; + + /* These are defaults for now */ + clnt->proto_version = p9_proto_2000L; + clnt->msize = 8192; + + /* Get the default trans callback */ + clnt->ops = p9_get_trans_by_name(trans); + + return (0); +} + +/* Allocate buffer for sending request and getting responses */ +static struct p9_buffer * +p9_buffer_alloc(int alloc_msize) +{ + struct p9_buffer *fc; + + fc = uma_zalloc(p9fs_buf_zone, M_WAITOK | M_ZERO); + fc->capacity = alloc_msize; + fc->offset = 0; + fc->size = 0; + fc->sdata = (char *)fc + sizeof(struct p9_buffer); + + return (fc); +} + +/* Free memory used by request and response buffers */ +static void +p9_buffer_free(struct p9_buffer **buf) +{ + + /* Free the sdata buffers first, then the whole structure*/ + uma_zfree(p9fs_buf_zone, *buf); + *buf = NULL; +} + +/* Free the request */ +static void +p9_free_req(struct p9_client *clnt, struct p9_req_t *req) +{ + + if (req->tc != NULL) { + if (req->tc->tag != P9_NOTAG) + p9_tag_destroy(clnt, req->tc->tag); + p9_buffer_free(&req->tc); + } + + if (req->rc != NULL) + p9_buffer_free(&req->rc); + + uma_zfree(p9fs_req_zone, req); +} + +/* Allocate a request by tag */ +static struct p9_req_t * +p9_get_request(struct p9_client *clnt, int *error) +{ + struct p9_req_t *req; + int alloc_msize; + uint16_t tag; + + alloc_msize = P9FS_MTU; + + req = uma_zalloc(p9fs_req_zone, M_WAITOK | M_ZERO); + req->tc = p9_buffer_alloc(alloc_msize); + req->rc = p9_buffer_alloc(alloc_msize); + + tag = p9_tag_create(clnt); + if (tag == P9_NOTAG) { + *error = EAGAIN; + req->tc->tag = P9_NOTAG; + p9_free_req(clnt, req); + return (NULL); + } + req->tc->tag = tag; + return (req); +} + +/* Parse header arguments of the response buffer */ +static int +p9_parse_receive(struct p9_buffer *buf, struct p9_client *clnt) +{ + int8_t type; + int16_t tag; + int32_t size; + int error; + + buf->offset = 0; + + /* This value is set by QEMU for the header.*/ + if (buf->size == 0) + buf->size = QEMU_HEADER; + + /* This is the initial header. Parse size, type, and tag .*/ + error = p9_buf_readf(buf, 0, "dbw", &size, &type, &tag); + if (error != 0) + goto out; + + buf->size = size; + buf->id = type; + buf->tag = tag; + P9_DEBUG(TRANS, "%s: size=%d type: %d tag: %d\n", + __func__, buf->size, buf->id, buf->tag); +out: + return (error); +} + +/* Check 9P response for any errors returned and process it */ +static int +p9_client_check_return(struct p9_client *c, struct p9_req_t *req) +{ + int error; + int ecode; + char *ename; + + /* Check what we have in the receive bufer .*/ + error = p9_parse_receive(req->rc, c); + if (error != 0) + goto out; + + /* + * No error, We are done with the preprocessing. Return to the caller + * and process the actual data. + */ + if (req->rc->id != P9PROTO_RERROR && req->rc->id != P9PROTO_RLERROR) + return (0); + + /* + * Interpreting the error is done in different ways for Linux and + * Unix version. Make sure you interpret it right. + */ + if (req->rc->id == P9PROTO_RERROR) { + error = p9_buf_readf(req->rc, c->proto_version, "s?d", &ename, &ecode); + } else if (req->rc->id == P9PROTO_RLERROR) { + error = p9_buf_readf(req->rc, c->proto_version, "d", &ecode); + } else { + goto out; + } + if (error != 0) + goto out; + + /* if there was an ecode error make this the err now */ + error = ecode; + + /* + * Note this is still not completely an error, as lookups for files + * not present can hit this and return. Hence it is made a debug print. + */ + if (error != 0) { + if (req->rc->id == P9PROTO_RERROR) { + P9_DEBUG(PROTO, "RERROR error %d ename %s\n", + error, ename); + } else if (req->rc->id == P9PROTO_RLERROR) { + P9_DEBUG(PROTO, "RLERROR error %d\n", error); + } + } + + if (req->rc->id == P9PROTO_RERROR) { + free(ename, M_TEMP); + } + return (error); + +out: + P9_DEBUG(ERROR, "couldn't parse receive buffer error%d\n", error); + return (error); +} + +/* State machine changing helpers */ +void p9_client_disconnect(struct p9_client *clnt) +{ + + P9_DEBUG(TRANS, "%s: clnt %p\n", __func__, clnt); + clnt->trans_status = P9FS_DISCONNECT; +} + +void p9_client_begin_disconnect(struct p9_client *clnt) +{ + + P9_DEBUG(TRANS, "%s: clnt %p\n", __func__, clnt); + clnt->trans_status = P9FS_BEGIN_DISCONNECT; +} + +static struct p9_req_t * +p9_client_prepare_req(struct p9_client *c, int8_t type, + int req_size, int *error, const char *fmt, __va_list ap) +{ + struct p9_req_t *req; + + P9_DEBUG(TRANS, "%s: client %p op %d\n", __func__, c, type); + + /* + * Before we start with the request, check if its possible to finish + * this request. We are allowed to submit the request only if there + * are no close sessions happening or else there can be race. If the + * status is Disconnected, we stop any requests coming in after that. + */ + if (c->trans_status == P9FS_DISCONNECT) { + *error = EIO; + return (NULL); + } + + /* Allow only cleanup clunk messages once teardown has started. */ + if ((c->trans_status == P9FS_BEGIN_DISCONNECT) && + (type != P9PROTO_TCLUNK)) { + *error = EIO; + return (NULL); + } + + /* Allocate buffer for transferring and receiving data from host */ + req = p9_get_request(c, error); + if (*error != 0) { + P9_DEBUG(ERROR, "%s: request allocation failed.\n", __func__); + return (NULL); + } + + /* Marshall the data according to QEMU standards */ + *error = p9_buf_prepare(req->tc, type); + if (*error != 0) { + P9_DEBUG(ERROR, "%s: p9_buf_prepare failed: %d\n", + __func__, *error); + goto out; + } + + *error = p9_buf_vwritef(req->tc, c->proto_version, fmt, ap); + if (*error != 0) { + P9_DEBUG(ERROR, "%s: p9_buf_vwrite failed: %d\n", + __func__, *error); + goto out; + } + + *error = p9_buf_finalize(c, req->tc); + if (*error != 0) { + P9_DEBUG(ERROR, "%s: p9_buf_finalize failed: %d \n", + __func__, *error); + goto out; + } + + return (req); +out: + p9_free_req(c, req); + return (NULL); +} + +/* + * Issue a request and wait for response. The routine takes care of preparing + * the 9P request header to be sent, parsing and checking for error conditions + * in the received buffer. It returns the request structure. + */ +static struct p9_req_t * +p9_client_request(struct p9_client *c, int8_t type, int *error, + const char *fmt, ...) +{ + va_list ap; + struct p9_req_t *req; + + va_start(ap, fmt); + req = p9_client_prepare_req(c, type, c->msize, error, fmt, ap); + va_end(ap); + + /* Issue with allocation of request buffer */ + if (*error != 0) + return (NULL); + + /* Call into the transport for submission. */ + *error = c->ops->request(c->handle, req); + if (*error != 0) { + P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, *error); + goto out; + } + + /* + * Before we return, pre process the header and the rc buffer before + * calling into the protocol infra to analyze the data in rc. + */ + *error = p9_client_check_return(c, req); + if (*error != 0) + goto out; + + return (req); +out: + p9_free_req(c, req); + return (NULL); +} + +/* Setup tag contents and structure */ +uint16_t +p9_tag_create(struct p9_client *clnt) +{ + int tag; + + tag = alloc_unr(&clnt->tagpool); + P9_DEBUG(LPROTO, "%s: clnt %p: tag %d\n", __func__, clnt, tag); + + /* Alloc_unr returning -1 is an error for no units left */ + if (tag == -1) { + return (P9_NOTAG); + } + return (tag); +} + +/* Clean up tag structures */ +void +p9_tag_destroy(struct p9_client *clnt, uint16_t tag) +{ + + P9_DEBUG(LPROTO, "%s: clnt %p: tag %d\n", __func__, clnt, tag); + + /* Release to the pool */ + free_unr(&clnt->tagpool, tag); +} + +/* Allocate a new fid from the fidpool */ +struct p9_fid * +p9_fid_create(struct p9_client *clnt) +{ + struct p9_fid *fid; + + + fid = uma_zalloc(p9fs_fid_zone, M_WAITOK | M_ZERO); + fid->fid = alloc_unr(&clnt->fidpool); + P9_DEBUG(LPROTO, "%s: fid %d\n", __func__, fid->fid); + + /* Alloc_unr returning -1 is an error for no units left */ + if (fid->fid == -1) { + uma_zfree(p9fs_fid_zone, fid); + return (NULL); + } + fid->mode = -1; + fid->uid = -1; + fid->clnt = clnt; + + return (fid); +} + +/* Free the fid by releasing it to fidpool */ +void +p9_fid_destroy(struct p9_fid *fid) +{ + struct p9_client *clnt; + + P9_DEBUG(LPROTO, "%s: fid %d\n", __func__, fid->fid); + clnt = fid->clnt; + /* Release to the pool */ + free_unr(&clnt->fidpool, fid->fid); + uma_zfree(p9fs_fid_zone, fid); +} + +/* Request the version of 9P protocol */ +int +p9_client_version(struct p9_client *c) +{ + int error; + struct p9_req_t *req; + char *version; + int msize; + + error = 0; + + P9_DEBUG(PROTO, "TVERSION msize %d protocol %d\n", + c->msize, c->proto_version); + + switch (c->proto_version) { + case p9_proto_2000L: + req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds", + c->msize, "9P2000.L"); + break; + case p9_proto_2000u: + req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds", + c->msize, "9P2000.u"); + break; + case p9_proto_legacy: + req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds", + c->msize, "9P2000"); + break; + default: + return (EINVAL); + } + + /* Always return the relevant error code */ + if (error != 0) + return (error); + + error = p9_buf_readf(req->rc, c->proto_version, "ds", &msize, &version); + if (error != 0) { + P9_DEBUG(ERROR, "%s: version error: %d\n", __func__, error); + goto out; + } + + P9_DEBUG(PROTO, "RVERSION msize %d %s\n", msize, version); + + if (!strncmp(version, "9P2000.L", 8)) + c->proto_version = p9_proto_2000L; + else if (!strncmp(version, "9P2000.u", 8)) + c->proto_version = p9_proto_2000u; + else if (!strncmp(version, "9P2000", 6)) + c->proto_version = p9_proto_legacy; + else { + error = ENOMEM; + goto out; + } + + /* limit the msize .*/ + if (msize < c->msize) + c->msize = msize; +out: + p9_free_req(c, req); + return (error); +} + +/* + * Initialize zones for different things. This is called from Init module + * so that we just have them initalized once. + */ +void +p9_init_zones(void) +{ + + /* Create the request and the fid zones */ + p9fs_fid_zone = uma_zcreate("p9fs fid zone", + sizeof(struct p9_fid), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + + /* Create the request and the fid zones */ + p9fs_req_zone = uma_zcreate("p9fs req zone", + sizeof(struct p9_req_t), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + + /* Create the buffer zone */ + p9fs_buf_zone = uma_zcreate("p9fs buf zone", + sizeof(struct p9_buffer) + P9FS_MTU, NULL, NULL, + NULL, NULL, UMA_ALIGN_PTR, 0); +} + +void +p9_destroy_zones(void) +{ + + uma_zdestroy(p9fs_fid_zone); + uma_zdestroy(p9fs_req_zone); + uma_zdestroy(p9fs_buf_zone); +} + +/* Return the client to the session in the FS to hold it */ +struct p9_client * +p9_client_create(struct mount *mp, int *error, const char *mount_tag) +{ + struct p9_client *clnt; + + clnt = malloc(sizeof(struct p9_client), M_P9CLNT, M_WAITOK | M_ZERO); + mtx_init(&clnt->clnt_mtx, "p9clnt", NULL, MTX_DEF); + + /* Parse should have set trans_mod */ + *error = p9_parse_opts(mp, clnt); + if (*error != 0) + goto out; + + if (clnt->ops == NULL) { + *error = EINVAL; + P9_DEBUG(ERROR, "%s: no transport\n", __func__); + goto out; + } + + /* All the structures from here are protected by the lock clnt_mtx */ + init_unrhdr(&clnt->fidpool, P9FS_ROOT_FID_NO, P9FS_MAX_FID_CNT, + &clnt->clnt_mtx); + init_unrhdr(&clnt->tagpool, P9FS_MIN_TAG, P9FS_MAX_TAG, + &clnt->clnt_mtx); + + P9_DEBUG(TRANS, "%s: clnt %p trans %p msize %d protocol %d\n", + __func__, clnt, clnt->ops, clnt->msize, clnt->proto_version); + + *error = clnt->ops->create(mount_tag, &clnt->handle); + if (*error != 0) { + P9_DEBUG(ERROR, "%s: transport create failed .%d \n", + __func__, *error); + goto out; + } + clnt->trans_status = P9FS_CONNECT; + + *error = p9_client_version(clnt); + if (*error != 0) + goto out; + + P9_DEBUG(TRANS, "%s: client creation succeeded.\n", __func__); + return (clnt); +out: + free(clnt, M_P9CLNT); + return (NULL); +} + +/* Destroy the client by destroying associated fidpool and tagpool */ +void +p9_client_destroy(struct p9_client *clnt) +{ + + P9_DEBUG(TRANS, "%s: client %p\n", __func__, clnt); + clnt->ops->close(clnt->handle); + + P9_DEBUG(TRANS, "%s : Destroying fidpool\n", __func__); + clear_unrhdr(&clnt->fidpool); + + P9_DEBUG(TRANS, "%s : Destroying tagpool\n", __func__); + clear_unrhdr(&clnt->tagpool); + + free(clnt, M_P9CLNT); +} + +/* + * Attach a user to the filesystem. Create a fid for that user to access + * the root of the filesystem. + */ +struct p9_fid * +p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, + const char *uname, uid_t n_uname, const char *aname, int *error) +{ + struct p9_req_t *req; + struct p9_fid *fid; + struct p9_qid qid; + + P9_DEBUG(PROTO, "TATTACH uname=%s aname=%s, n_uname=%d\n", + uname, aname, n_uname); + fid = p9_fid_create(clnt); + if (fid == NULL) { + *error = ENOMEM; + return (NULL); + } + fid->uid = n_uname; + + req = p9_client_request(clnt, P9PROTO_TATTACH, error, "ddssd", fid->fid, + P9PROTO_NOFID, uname, aname, n_uname); + if (*error != 0) + goto out; + + *error = p9_buf_readf(req->rc, clnt->proto_version, "Q", &qid); + if (*error != 0) { + P9_DEBUG(ERROR, "%s: p9_buf_readf failed: %d \n", + __func__, *error); + goto out; + } + + P9_DEBUG(PROTO, "RATTACH qid %x.%llx.%x\n", + qid.type, (unsigned long long)qid.path, qid.version); + + memmove(&fid->qid, &qid, sizeof(struct p9_qid)); + p9_free_req(clnt, req); + + return (fid); +out: + if (req != NULL) + p9_free_req(clnt, req); + if (fid != NULL) + p9_fid_destroy(fid); + + return (NULL); +} + +/* Delete a file/directory. Corresponding fid will be cluncked too */ +int +p9_client_remove(struct p9_fid *fid) +{ + int error; + struct p9_client *clnt; + struct p9_req_t *req; + + P9_DEBUG(PROTO, "TREMOVE fid %d\n", fid->fid); + + error = 0; + clnt = fid->clnt; + + req = p9_client_request(clnt, P9PROTO_TREMOVE, &error, "d", fid->fid); + if (error != 0) { + P9_DEBUG(PROTO, "RREMOVE fid %d\n", fid->fid); + return (error); + } + + p9_free_req(clnt, req); + return (error); +} + +/* Inform the file server that the current file represented by fid is no longer + * needed by the client. Any allocated fid on the server needs a clunk to be + * destroyed. + */ +int +p9_client_clunk(struct p9_fid *fid) +{ + int error; + struct p9_client *clnt; + struct p9_req_t *req; + + error = 0; + + if (fid == NULL) { + P9_DEBUG(ERROR, "%s: clunk with NULL fid is bad\n", __func__); + return (0); + } + + P9_DEBUG(PROTO, "TCLUNK fid %d \n", fid->fid); + + clnt = fid->clnt; + req = p9_client_request(clnt, P9PROTO_TCLUNK, &error, "d", fid->fid); + if (req != NULL) { + P9_DEBUG(PROTO, "RCLUNK fid %d\n", fid->fid); + p9_free_req(clnt, req); + } + + p9_fid_destroy(fid); + return (error); +} + +/* + * Client_walk is for searching any component name in a directory. + * This is usually called on lookups. Also when we need a new open fid + * as 9p needs to have an open fid for every file to fileops, we call this + * validate the component of the file and return the newfid(openfid) created. + */ +struct p9_fid * +p9_client_walk(struct p9_fid *oldfid, uint16_t nwnames, char **wnames, + int clone, int *error) +{ + struct p9_client *clnt; + struct p9_fid *fid; + struct p9_qid *wqids; + struct p9_req_t *req; + uint16_t nwqids, count; + + clnt = oldfid->clnt; + wqids = NULL; + nwqids = 0; + + /* + * Before, we go and create fid, make sure we are not tearing + * down. Only then we create. + * Allow only cleanup clunk messages once we are starting to teardown. + */ + if (clnt->trans_status != P9FS_CONNECT) { + *error = EIO; + return (NULL); + } + + if (clone) { + fid = p9_fid_create(clnt); + if (fid == NULL) { + *error = ENOMEM; + return (NULL); + } + fid->uid = oldfid->uid; + } else + fid = oldfid; + + P9_DEBUG(PROTO, "TWALK fids %d,%d nwnames %u wname %s\n", + oldfid->fid, fid->fid, nwnames, + wnames != NULL ? wnames[nwnames-1] : NULL); + + /* + * The newfid is for the component in search. We are preallocating as + * qemu on other side allocates or returns a fid if it sees a match + */ + req = p9_client_request(clnt, P9PROTO_TWALK, error, "ddT", oldfid->fid, + fid->fid, wnames, nwnames); + if (*error != 0) { + if (fid != oldfid) + p9_fid_destroy(fid); + return (NULL); + } + + *error = p9_buf_readf(req->rc, clnt->proto_version, "R", &nwqids, + &wqids); + if (*error != 0) + goto out; + + P9_DEBUG(PROTO, "RWALK nwqid %d:\n", nwqids); + + if (nwqids != nwnames) { + *error = ENOENT; + goto out; + } + + for (count = 0; count < nwqids; count++) + P9_DEBUG(TRANS, "%s: [%d] %x.%llx.%x\n", + __func__, count, wqids[count].type, + (unsigned long long)wqids[count].path, + wqids[count].version); + + if (nwnames) + memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid)); + else + fid->qid = oldfid->qid; + + p9_free_req(clnt, req); + free(wqids, M_TEMP); + return (fid); + +out: + p9_free_req(clnt, req); + if (wqids) + free(wqids, M_TEMP); + if (fid && fid != oldfid) + p9_client_clunk(fid); + return (NULL); +} + +/* Open a file with given fid and mode */ +int +p9_client_open(struct p9_fid *fid, int mode) +{ + int error, mtu; + struct p9_client *clnt; + struct p9_req_t *req; + + error = 0; + clnt = fid->clnt; + mtu = 0; + + P9_DEBUG(PROTO, "%s fid %d mode %d\n", + p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", + fid->fid, mode); + + if (fid->mode != -1) + return (EINVAL); + + if (p9_is_proto_dotl(clnt)) + req = p9_client_request(clnt, P9PROTO_TLOPEN, &error, "dd", + fid->fid, mode); + else + req = p9_client_request(clnt, P9PROTO_TOPEN, &error, "db", + fid->fid, mode); + + if (error != 0) + return (error); + + error = p9_buf_readf(req->rc, clnt->proto_version, "Qd", &fid->qid, + &mtu); + if (error != 0) + goto out; + + P9_DEBUG(PROTO, "%s qid %x.%llx.%x mtu %x\n", + p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", + (fid->qid).type, (unsigned long long)(fid->qid).path, + (fid->qid).version, mtu); + + fid->mode = mode; + fid->mtu = mtu; +out: + p9_free_req(clnt, req); + return (error); +} + +/* Request to get directory entries */ +int +p9_client_readdir(struct p9_fid *fid, char *data, uint64_t offset, + uint32_t count) +{ + int error; + uint32_t rsize; + struct p9_client *clnt; + struct p9_req_t *req; + char *dataptr; + + P9_DEBUG(PROTO, "TREADDIR fid %d offset %llu count %d\n", + fid->fid, (unsigned long long) offset, count); + + error = 0; + rsize = fid->mtu; + clnt = fid->clnt; + + if (rsize == 0 || rsize > clnt->msize) + rsize = clnt->msize; + + if (count < rsize) + rsize = count; + + req = p9_client_request(clnt, P9PROTO_TREADDIR, &error, "dqd", + fid->fid, offset, rsize); + + if (error != 0) { + P9_DEBUG(ERROR, "%s: couldn't allocate req in client_readdir\n", + __func__); + return (-error); + } + + error = p9_buf_readf(req->rc, clnt->proto_version, "D", &count, + &dataptr); + if (error != 0) { + P9_DEBUG(ERROR, "%s: p0_buf_readf failed: %d\n", + __func__, error); + p9_free_req(clnt, req); + return (-error); + } + + P9_DEBUG(PROTO, "RREADDIR count %u\n", count); + + /* Copy back the data into the input buffer. */ + memmove(data, dataptr, count); + p9_free_req(clnt, req); + return (count); +} + +/* + * Read count bytes from offset for the file fid into the character + * buffer data. This buffer is handed over to p9fs to process into user + * buffers. Note that this function typically returns the number of bytes read + * so in case of an error we return -error so that we can distinguish between + * error codes and bytes. + */ +int +p9_client_read(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data) +{ + struct p9_client *clnt; + struct p9_req_t *req; + char *dataptr; + int error, rsize; + + clnt = fid->clnt; + rsize = fid->mtu; + error = 0; + + P9_DEBUG(PROTO, "TREAD fid %d offset %llu %u\n", + fid->fid, (unsigned long long) offset, count); + + if (!rsize || rsize > clnt->msize) + rsize = clnt->msize; + + if (count < rsize) + rsize = count; + + /* At this stage, we only have 8K buffers so only transfer */ + req = p9_client_request(clnt, P9PROTO_TREAD, &error, "dqd", fid->fid, + offset, rsize); + if (error != 0) { + P9_DEBUG(ERROR, "%s: failed allocate request\n", __func__); + return (-error); + } + + error = p9_buf_readf(req->rc, clnt->proto_version, "D", &count, + &dataptr); + if (error != 0) { + P9_DEBUG(ERROR, "%s: p9_buf_readf failed: %d\n", + __func__, error); + goto out; + } + + if (rsize < count) { + P9_DEBUG(PROTO, "RREAD count (%d > %d)\n", count, rsize); + count = rsize; + } + + P9_DEBUG(PROTO, "RREAD count %d\n", count); + + if (count == 0) { + error = -EIO; + P9_DEBUG(ERROR, "%s: EIO error in client_read \n", __func__); + goto out; + } + + /* Copy back the data into the input buffer. */ + memmove(data, dataptr, count); + p9_free_req(clnt, req); + return (count); +out: + p9_free_req(clnt, req); + return (-error); +} + +/* + * Write count bytes from buffer to the offset for the file fid + * Note that this function typically returns the number of bytes written + * so in case of an error we return -error so that we can distinguish between + * error codes and bytes. + */ + +int +p9_client_write(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data) +{ + struct p9_client *clnt; + struct p9_req_t *req; + int ret, error, rsize; + + clnt = fid->clnt; + rsize = fid->mtu; + ret = 0; + error = 0; + + P9_DEBUG(PROTO, "TWRITE fid %d offset %llu %u\n", + fid->fid, (unsigned long long) offset, count); + + if (!rsize || rsize > clnt->msize) + rsize = clnt->msize; + + /* Limit set by Qemu ,8168 */ + if (count > rsize) { + count = rsize; + } + + /* + * Doing the Data blob instead. If at all we add the zerocopy, we can + * change it to uio direct copy + */ + req = p9_client_request(clnt, P9PROTO_TWRITE, &error, "dqD", fid->fid, + offset, count, data); + if (error != 0) { + P9_DEBUG(ERROR, "%s: failed allocate request: %d\n", + __func__, error); + return (-error); + } + + error = p9_buf_readf(req->rc, clnt->proto_version, "d", &ret); + if (error != 0) { + P9_DEBUG(ERROR, "%s: p9_buf_readf error: %d\n", + __func__, error); + goto out; + } + + if (count < ret) { + P9_DEBUG(PROTO, "RWRITE count (%d > %d)\n", count, ret); + ret = count; + } + P9_DEBUG(PROTO, "RWRITE count %d\n", ret); + + if (count == 0) { + error = EIO; + P9_DEBUG(ERROR, "%s: EIO error\n", __func__); + goto out; + } + + p9_free_req(clnt, req); + return (ret); +out: + p9_free_req(clnt, req); + return (-error); +} + + +/* Create file under directory fid, with name, permissions, mode. */ +int +p9_client_file_create(struct p9_fid *fid, char *name, uint32_t perm, int mode, + char *extension) +{ + int error; + struct p9_client *clnt; + struct p9_req_t *req; + struct p9_qid qid; + int mtu; + + P9_DEBUG(PROTO, "TCREATE fid %d name %s perm %d mode %d\n", + fid->fid, name, perm, mode); + + clnt = fid->clnt; + error = 0; + + if (fid->mode != -1) + return (EINVAL); + + req = p9_client_request(clnt, P9PROTO_TCREATE, &error, "dsdb?s", + fid->fid, name, perm, mode, extension); + if (error != 0) + return (error); + + error = p9_buf_readf(req->rc, clnt->proto_version, "Qd", &qid, &mtu); + if (error != 0) + goto out; + + P9_DEBUG(PROTO, "RCREATE qid %x.%jx.%x mtu %x\n", + qid.type, (uintmax_t)qid.path, qid.version, mtu); + fid->mode = mode; + fid->mtu = mtu; + +out: + p9_free_req(clnt, req); + return (error); +} + +/* Request file system information of the file system */ +int +p9_client_statfs(struct p9_fid *fid, struct p9_statfs *stat) +{ + int error; + struct p9_req_t *req; + struct p9_client *clnt; + + error = 0; + clnt = fid->clnt; + + P9_DEBUG(PROTO, "TSTATFS fid %d\n", fid->fid); + + req = p9_client_request(clnt, P9PROTO_TSTATFS, &error, "d", fid->fid); + if (error != 0) { + return (error); + } + + error = p9_buf_readf(req->rc, clnt->proto_version, "ddqqqqqqd", + &stat->type, &stat->bsize, &stat->blocks, &stat->bfree, + &stat->bavail, &stat->files, &stat->ffree, &stat->fsid, + &stat->namelen); + + if (error != 0) + goto out; + + P9_DEBUG(PROTO, "RSTATFS fid %d type 0x%jx bsize %ju " + "blocks %ju bfree %ju bavail %ju files %ju ffree %ju " + "fsid %ju namelen %ju\n", + fid->fid, (uintmax_t)stat->type, + (uintmax_t)stat->bsize, (uintmax_t)stat->blocks, + (uintmax_t)stat->bfree, (uintmax_t)stat->bavail, + (uintmax_t)stat->files, (uintmax_t)stat->ffree, + (uintmax_t)stat->fsid, (uintmax_t)stat->namelen); + +out: + p9_free_req(clnt, req); + return (error); +} + +/* Rename file referenced by the fid */ +int +p9_client_renameat(struct p9_fid *oldfid, char *oldname, struct p9_fid *newfid, + char *newname) +{ + int error; + struct p9_client *clnt; + struct p9_req_t *req; + + P9_DEBUG(PROTO, "TRENAMEAT oldfid %d oldname %s newfid %d newfid %s", + oldfid->fid, oldname, newfid->fid, newname); + + error = 0; + clnt = oldfid->clnt; + + /* + * we are calling the request with TRENAMEAT tag and not TRENAME with + * the 9p protocol version 9p2000.u as the QEMU version supports this + * version of renaming + */ + req = p9_client_request(clnt, P9PROTO_TRENAMEAT, &error, "dsds", + oldfid->fid, oldname, newfid->fid, newname); + + if (error != 0) + return (error); + + p9_free_req(clnt, req); + return (error); +} + +/* Request to create symbolic link */ +int +p9_create_symlink(struct p9_fid *fid, char *name, char *symtgt, gid_t gid) +{ + int error; + struct p9_req_t *req; + struct p9_client *clnt; + struct p9_qid qid; + + error = 0; + clnt = fid->clnt; + + P9_DEBUG(PROTO, "TSYMLINK fid %d name %s\n", fid->fid, name); + + req = p9_client_request(clnt, P9PROTO_TSYMLINK, &error, "dssd", + fid->fid, name, symtgt, gid); + + if (error != 0) + return (error); + + error = p9_buf_readf(req->rc, clnt->proto_version, "Q", &qid); + if (error != 0) { + P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, error); + return (error); + } + + P9_DEBUG(PROTO, "RSYMLINK qid %x.%jx.%x\n", + qid.type, (uintmax_t)qid.path, qid.version); + + p9_free_req(clnt, req); + return (0); +} + +/* Request to create hard link */ +int +p9_create_hardlink(struct p9_fid *dfid, struct p9_fid *oldfid, char *name) +{ + int error; + struct p9_req_t *req; + struct p9_client *clnt; + + error = 0; + clnt = dfid->clnt; + + P9_DEBUG(PROTO, "TLINK dfid %d oldfid %d name %s\n", + dfid->fid, oldfid->fid, name); + + req = p9_client_request(clnt, P9PROTO_TLINK, &error, "dds", dfid->fid, + oldfid->fid, name); + if (error != 0) + return (error); + + p9_free_req(clnt, req); + return (0); +} + +/* Request to read contents of symbolic link */ +int +p9_readlink(struct p9_fid *fid, char **target) +{ + int error; + struct p9_client *clnt; + struct p9_req_t *req; + + error = 0; + clnt = fid->clnt; + + P9_DEBUG(PROTO, "TREADLINK fid %d\n", fid->fid); + + req = p9_client_request(clnt, P9PROTO_TREADLINK, &error, "d", fid->fid); + if (error != 0) + return (error); + + error = p9_buf_readf(req->rc, clnt->proto_version, "s", target); + if (error != 0) { + P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, error); + return (error); + } + + P9_DEBUG(PROTO, "RREADLINK target %s \n", *target); + + p9_free_req(clnt, req); + return (0); +} + +/* Get file attributes of the file referenced by the fid */ +int +p9_client_getattr(struct p9_fid *fid, struct p9_stat_dotl *stat_dotl, + uint64_t request_mask) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + err = 0; + + P9_DEBUG(PROTO, "TGETATTR fid %d mask %ju\n", + fid->fid, (uintmax_t)request_mask); + + clnt = fid->clnt; + req = p9_client_request(clnt, P9PROTO_TGETATTR, &err, "dq", fid->fid, + request_mask); + if (req == NULL) { + P9_DEBUG(ERROR, "%s: allocation failed %d", __func__, err); + goto error; + } + + err = p9_buf_readf(req->rc, clnt->proto_version, "A", stat_dotl); + if (err != 0) { + P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, err); + goto error; + } + + p9_free_req(clnt, req); + P9_DEBUG(PROTO, "RGETATTR fid %d qid %x.%jx.%x st_mode %8.8x " + "uid %d gid %d nlink %ju rdev %jx st_size %jx blksize %ju " + "blocks %ju st_atime_sec %ju, st_atime_nsec %ju " + "st_mtime_sec %ju, st_mtime_nsec %ju st_ctime_sec %ju " + "st_ctime_nsec %ju st_btime_sec %ju, st_btime_nsec %ju " + "st_stat %ju, st_data_version %ju \n", fid->fid, + stat_dotl->qid.type, (uintmax_t)stat_dotl->qid.path, + stat_dotl->qid.version, stat_dotl->st_mode, stat_dotl->st_uid, + stat_dotl->st_gid, (uintmax_t)stat_dotl->st_nlink, + (uintmax_t)stat_dotl->st_rdev, (uintmax_t)stat_dotl->st_size, + (uintmax_t)stat_dotl->st_blksize, + (uintmax_t)stat_dotl->st_blocks, (uintmax_t)stat_dotl->st_atime_sec, + (uintmax_t)stat_dotl->st_atime_nsec, (uintmax_t)stat_dotl->st_mtime_sec, + (uintmax_t)stat_dotl->st_mtime_nsec, (uintmax_t)stat_dotl->st_ctime_sec, + (uintmax_t)stat_dotl->st_ctime_nsec, (uintmax_t)stat_dotl->st_btime_sec, + (uintmax_t)stat_dotl->st_btime_nsec, (uintmax_t)stat_dotl->st_gen, + (uintmax_t)stat_dotl->st_data_version); + + return (err); + +error: + if (req != NULL) + p9_free_req(clnt, req); + + return (err); +} + +/* Set file attributes of the file referenced by the fid */ +int +p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + err = 0; + + P9_DEBUG(PROTO, "TSETATTR fid %d" + " valid %x mode %x uid %d gid %d size %ju" + " atime_sec %ju atime_nsec %ju" + " mtime_sec %ju mtime_nsec %ju\n", + fid->fid, + p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid, + (uintmax_t)p9attr->size, (uintmax_t)p9attr->atime_sec, + (uintmax_t)p9attr->atime_nsec, (uintmax_t)p9attr->mtime_sec, + (uintmax_t)p9attr->mtime_nsec); + + clnt = fid->clnt; + + /* Any client_request error is converted to req == NULL error*/ + req = p9_client_request(clnt, P9PROTO_TSETATTR, &err, "dA", fid->fid, + p9attr); + + if (req == NULL) { + P9_DEBUG(ERROR, "%s: allocation failed %d\n", __func__, err); + goto error; + } + + p9_free_req(clnt, req); +error: + return (err); +} + diff --git a/sys/fs/p9fs/p9_client.h b/sys/fs/p9fs/p9_client.h new file mode 100644 index 000000000000..8597c0732ba3 --- /dev/null +++ b/sys/fs/p9fs/p9_client.h @@ -0,0 +1,168 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* 9P client definitions */ + +#ifndef FS_P9FS_P9_CLIENT_H +#define FS_P9FS_P9_CLIENT_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* 9P protocol versions */ +enum p9_proto_versions { + p9_proto_legacy, /* legacy version */ + p9_proto_2000u, /* Unix version */ + p9_proto_2000L, /* Linux version */ +}; + +/* P9 Request exchanged between Host and Guest */ +struct p9_req_t { + struct p9_buffer *tc; /* request buffer */ + struct p9_buffer *rc; /* response buffer */ +}; + +/* 9P transport status */ +enum transport_status { + P9FS_CONNECT, /* transport is connected */ + P9FS_BEGIN_DISCONNECT,/* transport has begun to disconnect */ + P9FS_DISCONNECT, /* transport has been dosconnected */ +}; + +/* This is set by QEMU so we will oblige */ +#define P9FS_MTU 8192 + +/* + * Even though we have a 8k buffer, Qemu is typically doing 8168 + * because of a HDR of 24. Use that amount for transfers so that we dont + * drop anything. + */ +#define P9FS_IOUNIT (P9FS_MTU - 24) +#define P9FS_DIRENT_LEN 256 +#define P9_NOTAG 0 + +/* Client state information */ +struct p9_client { + struct p9_trans_module *ops; /* module API instantiated with this client */ + void *handle; /* module-specific client handle */ + struct mtx clnt_mtx; /* mutex to lock the client */ + struct mtx req_mtx; /* mutex to lock the request buffer */ + struct cv req_cv; /* condition variable on which to wake up thread */ + unsigned int msize; /* maximum data size */ + unsigned char proto_version; /* 9P version to use */ + struct unrhdr fidpool; /* fid handle accounting for session */ + struct unrhdr tagpool; /* transaction id accounting for session */ + enum transport_status trans_status; /* tranport instance state */ +}; + +/* The main fid structure which keeps track of the file.*/ +struct p9_fid { + struct p9_client *clnt; /* the instatntiating 9P client */ + uint32_t fid; /* numeric identifier */ + int mode; /* current mode of this fid */ + struct p9_qid qid; /* server identifier */ + uint32_t mtu; /* max transferrable unit at a time */ + uid_t uid; /* numeric uid of the local user who owns this handle */ + int v_opens; /* keep count on the number of opens called with this fiel handle */ + STAILQ_ENTRY(p9_fid) fid_next; /* points to next fid in the list */ +}; + +/* Directory entry structure */ +struct p9_dirent { + struct p9_qid qid; /* 9P server qid for this dirent */ + uint64_t d_off; /* offset to the next dirent */ + unsigned char d_type; /* file type */ + char d_name[P9FS_DIRENT_LEN]; /* file name */ + int len; +}; + +void p9_init_zones(void); +void p9_destroy_zones(void); + +/* Session and client Init Ops */ +struct p9_client *p9_client_create(struct mount *mp, int *error, + const char *mount_tag); +void p9_client_destroy(struct p9_client *clnt); +struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *fid, + const char *uname, uid_t n_uname, const char *aname, int *error); + +/* FILE OPS - These are individually called from the specific vop function */ + +int p9_client_open(struct p9_fid *fid, int mode); +int p9_client_close(struct p9_fid *fid); +struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwnames, + char **wnames, int clone, int *error); +struct p9_fid *p9_fid_create(struct p9_client *clnt); +void p9_fid_destroy(struct p9_fid *fid); +uint16_t p9_tag_create(struct p9_client *clnt); +void p9_tag_destroy(struct p9_client *clnt, uint16_t tag); +int p9_client_clunk(struct p9_fid *fid); +int p9_client_version(struct p9_client *clnt); +int p9_client_readdir(struct p9_fid *fid, char *data, uint64_t offset, uint32_t count); +int p9_client_read(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data); +int p9_client_write(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data); +int p9_client_file_create(struct p9_fid *fid, char *name, uint32_t perm, int mode, + char *extension); +int p9_client_remove(struct p9_fid *fid); +int p9_dirent_read(struct p9_client *clnt, char *buf, int start, int len, + struct p9_dirent *dirent); +int p9_client_statfs(struct p9_fid *fid, struct p9_statfs *stat); +int p9_client_statread(struct p9_client *clnt, char *data, size_t len, struct p9_wstat *st); +int p9_is_proto_dotu(struct p9_client *clnt); +int p9_is_proto_dotl(struct p9_client *clnt); +void p9_client_cb(struct p9_client *c, struct p9_req_t *req); +int p9stat_read(struct p9_client *clnt, char *data, size_t len, struct p9_wstat *st); +void p9_client_disconnect(struct p9_client *clnt); +void p9_client_begin_disconnect(struct p9_client *clnt); +int p9_create_symlink(struct p9_fid *fid, char *name, char *symtgt, gid_t gid); +int p9_create_hardlink(struct p9_fid *dfid, struct p9_fid *oldfid, char *name); +int p9_readlink(struct p9_fid *fid, char **target); +int p9_client_renameat(struct p9_fid *oldfid, char *oldname, struct p9_fid *newfid, char *newname); +int p9_client_getattr(struct p9_fid *fid, struct p9_stat_dotl *stat_dotl, + uint64_t request_mask); +int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr); + +int p9_buf_vwritef(struct p9_buffer *buf, int proto_version, const char *fmt, + va_list ap); +int p9_buf_readf(struct p9_buffer *buf, int proto_version, const char *fmt, ...); +int p9_buf_prepare(struct p9_buffer *buf, int8_t type); +int p9_buf_finalize(struct p9_client *clnt, struct p9_buffer *buf); +void p9_buf_reset(struct p9_buffer *buf); + +#endif /* FS_P9FS_P9_CLIENT_H */ diff --git a/sys/fs/p9fs/p9_debug.h b/sys/fs/p9fs/p9_debug.h new file mode 100644 index 000000000000..463b009d00ad --- /dev/null +++ b/sys/fs/p9fs/p9_debug.h @@ -0,0 +1,45 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef FS_P9FS_P9_DEBUG_H +#define FS_P9FS_P9_DEBUG_H + +extern int p9_debug_level; /* All debugs on now */ + +/* 9P debug flags */ +#define P9_DEBUG_TRANS 0x0001 /* Trace transport */ +#define P9_DEBUG_SUBR 0x0002 /* Trace driver submissions */ +#define P9_DEBUG_LPROTO 0x0004 /* Low level protocol tracing */ +#define P9_DEBUG_PROTO 0x0008 /* High level protocol tracing */ +#define P9_DEBUG_VOPS 0x0010 /* VOPs tracing */ +#define P9_DEBUG_ERROR 0x0020 /* verbose error messages */ + +#define P9_DEBUG(category, fmt, ...) do { \ + if ((p9_debug_level & P9_DEBUG_##category) != 0) \ + printf(fmt, ##__VA_ARGS__); \ +} while (0) + +#endif /* FS_P9FS_P9_DEBUG_H */ diff --git a/sys/fs/p9fs/p9_protocol.c b/sys/fs/p9fs/p9_protocol.c new file mode 100644 index 000000000000..e0045f67993d --- /dev/null +++ b/sys/fs/p9fs/p9_protocol.c @@ -0,0 +1,632 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * 9P Protocol Support Code + * This file provides the standard for the FS interactions with the server + * interface as it can understand only this protocol. The details of the + * protocol can be found here + * XXX (link to protocol details page on FreeBSD wiki) + */ + +#include +#include +#include +#include + +#define P9FS_MAXLEN 255 + +static int p9_buf_writef(struct p9_buffer *buf, int proto_version, + const char *fmt, ...); +static void stat_free(struct p9_wstat *sbuf); + +static void +stat_free(struct p9_wstat *stbuf) +{ + + free(stbuf->name, M_TEMP); + free(stbuf->uid, M_TEMP); + free(stbuf->gid, M_TEMP); + free(stbuf->muid, M_TEMP); + free(stbuf->extension, M_TEMP); +} + +static size_t +buf_read(struct p9_buffer *buf, void *data, size_t size) +{ + size_t len; + + len = min(buf->size - buf->offset, size); + + memcpy(data, &buf->sdata[buf->offset], len); + buf->offset += len; + + return (size - len); +} + +static size_t +buf_write(struct p9_buffer *buf, const void *data, size_t size) +{ + size_t len; + + len = min(buf->capacity - buf->size, size); + + memcpy(&buf->sdata[buf->size], data, len); + buf->size += len; + + return (size - len); +} + +/* + * Main buf_read routine. This copies the data from the buffer into the + * respective values based on the data type. + * Here + * b - int8_t + * w - int16_t + * d - int32_t + * q - int64_t + * s - string + * u - uid + * g - gid + * Q - qid + * S - stat + * A - getattr (9P2000.L) + * D - data blob (int32_t size followed by void *, results are not freed) + * T - array of strings (int16_t count, followed by strings) + * R - array of qids (int16_t count, followed by qids) + * ? - return if version is not .u or .l + */ +static int +p9_buf_vreadf(struct p9_buffer *buf, int proto_version, const char *fmt, + va_list ap) +{ + const char *ptr; + int error; + + error = 0; + + for (ptr = fmt; *ptr; ptr++) { + switch (*ptr) { + case 'b': + { + int8_t *val = va_arg(ap, int8_t *); + + if (buf_read(buf, val, sizeof(*val))) + error = EFAULT; + break; + } + case 'w': + { + int16_t *val = va_arg(ap, int16_t *); + + if (buf_read(buf, val, sizeof(*val))) + error = EFAULT; + break; + } + case 'd': + { + int32_t *val = va_arg(ap, int32_t *); + + if (buf_read(buf, val, sizeof(*val))) + error = EFAULT; + break; + } + case 'q': + { + int64_t *val = va_arg(ap, int64_t *); + + if (buf_read(buf, val, sizeof(*val))) + error = EFAULT; + break; + } + case 's': + { + char **sptr_p = va_arg(ap, char **); + uint16_t len; + char *sptr; + + error = buf_read(buf, &len, sizeof(uint16_t)); + if (error) + break; + + sptr = malloc(len + 1, M_TEMP, M_NOWAIT | M_ZERO); + + if (buf_read(buf, sptr, len)) { + error = EFAULT; + free(sptr, M_TEMP); + sptr = NULL; + } else { + (sptr)[len] = 0; + *sptr_p = sptr; + } + break; + } + case 'u': + { + uid_t *val = va_arg(ap, uid_t *); + + if (buf_read(buf, val, sizeof(*val))) + error = EFAULT; + break; + + } + case 'g': + { + gid_t *val = va_arg(ap, gid_t *); + + if (buf_read(buf, val, sizeof(*val))) + error = EFAULT; + break; + + } + case 'Q': + { + struct p9_qid *qid = va_arg(ap, struct p9_qid *); + + error = p9_buf_readf(buf, proto_version, "bdq", + &qid->type, &qid->version, &qid->path); + + break; + } + case 'S': + { + struct p9_wstat *stbuf = va_arg(ap, struct p9_wstat *); + + error = p9_buf_readf(buf, proto_version, "wwdQdddqssss?sddd", + &stbuf->size, &stbuf->type, &stbuf->dev, &stbuf->qid, + &stbuf->mode, &stbuf->atime, &stbuf->mtime, &stbuf->length, + &stbuf->name, &stbuf->uid, &stbuf->gid, &stbuf->muid, + &stbuf->extension, &stbuf->n_uid, &stbuf->n_gid, &stbuf->n_muid); + + if (error != 0) + stat_free(stbuf); + break; + } + case 'A': + { + struct p9_stat_dotl *stbuf = va_arg(ap, struct p9_stat_dotl *); + + error = p9_buf_readf(buf, proto_version, "qQdugqqqqqqqqqqqqqqq", + &stbuf->st_result_mask, &stbuf->qid, &stbuf->st_mode, + &stbuf->st_uid,&stbuf->st_gid, &stbuf->st_nlink, + &stbuf->st_rdev, &stbuf->st_size, &stbuf->st_blksize, + &stbuf->st_blocks, &stbuf->st_atime_sec, + &stbuf->st_atime_nsec, &stbuf->st_mtime_sec, + &stbuf->st_mtime_nsec, &stbuf->st_ctime_sec, + &stbuf->st_ctime_nsec, &stbuf->st_btime_sec, + &stbuf->st_btime_nsec, &stbuf->st_gen, + &stbuf->st_data_version); + + break; + } + case 'D': + { + uint32_t *count = va_arg(ap, uint32_t *); + void **data = va_arg(ap, void **); + + error = buf_read(buf, count, sizeof(uint32_t)); + if (error == 0) { + *count = MIN(*count, buf->size - buf->offset); + *data = &buf->sdata[buf->offset]; + } + break; + } + case 'T': + { + uint16_t *nwname_p = va_arg(ap, uint16_t *); + char ***wnames_p = va_arg(ap, char ***); + uint16_t nwname; + char **wnames; + int i; + + error = buf_read(buf, nwname_p, sizeof(uint16_t)); + if (error != 0) + break; + + nwname = *nwname_p; + wnames = malloc(sizeof(char *) * nwname, M_TEMP, M_NOWAIT | M_ZERO); + + for (i = 0; i < nwname && (error == 0); i++) + error = p9_buf_readf(buf, proto_version, "s", &wnames[i]); + + if (error != 0) { + for (i = 0; i < nwname; i++) + free((wnames)[i], M_TEMP); + free(wnames, M_TEMP); + } else + *wnames_p = wnames; + break; + } + case 'R': + { + uint16_t *nwqid_p = va_arg(ap, uint16_t *); + struct p9_qid **wqids_p = va_arg(ap, struct p9_qid **); + uint16_t nwqid; + struct p9_qid *wqids; + int i; + + wqids = NULL; + error = buf_read(buf, nwqid_p, sizeof(uint16_t)); + if (error != 0) + break; + + nwqid = *nwqid_p; + wqids = malloc(nwqid * sizeof(struct p9_qid), M_TEMP, M_NOWAIT | M_ZERO); + if (wqids == NULL) { + error = ENOMEM; + break; + } + for (i = 0; i < nwqid && (error == 0); i++) + error = p9_buf_readf(buf, proto_version, "Q", &(wqids)[i]); + + if (error != 0) { + free(wqids, M_TEMP); + } else + *wqids_p = wqids; + + break; + } + case '?': + { + if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L)) + return (0); + break; + } + default: + break; + } + + if (error != 0) + break; + } + + return (error); +} + +/* + * Main buf_write routine. This copies the data into the buffer from the + * respective values based on the data type. + * Here + * b - int8_t + * w - int16_t + * d - int32_t + * q - int64_t + * s - string + * u - uid + * g - gid + * Q - qid + * S - stat + * D - data blob (int32_t size followed by void *, results are not freed) + * T - array of strings (int16_t count, followed by strings) + * W - string of a specific length + * R - array of qids (int16_t count, followed by qids) + * A - setattr (9P2000.L) + * ? - return if version is not .u or .l + */ + +int +p9_buf_vwritef(struct p9_buffer *buf, int proto_version, const char *fmt, + va_list ap) +{ + const char *ptr; + int error; + + error = 0; + + for (ptr = fmt; *ptr; ptr++) { + switch (*ptr) { + case 'b': + { + int8_t val = va_arg(ap, int); + + if (buf_write(buf, &val, sizeof(val))) + error = EFAULT; + break; + } + case 'w': + { + int16_t val = va_arg(ap, int); + + if (buf_write(buf, &val, sizeof(val))) + error = EFAULT; + break; + } + case 'd': + { + int32_t val = va_arg(ap, int32_t); + + if (buf_write(buf, &val, sizeof(val))) + error = EFAULT; + break; + } + case 'q': + { + int64_t val = va_arg(ap, int64_t); + + if (buf_write(buf, &val, sizeof(val))) + error = EFAULT; + + break; + } + case 's': + { + const char *sptr = va_arg(ap, const char *); + uint16_t len = 0; + + if (sptr) + len = MIN(strlen(sptr), P9FS_MAXLEN); + + error = buf_write(buf, &len, sizeof(uint16_t)); + if (error == 0 && buf_write(buf, sptr, len)) + error = EFAULT; + break; + } + case 'u': + { + uid_t val = va_arg(ap, uid_t); + + if (buf_write(buf, &val, sizeof(val))) + error = EFAULT; + break; + + } + case 'g': + { + gid_t val = va_arg(ap, gid_t); + + if (buf_write(buf, &val, sizeof(val))) + error = EFAULT; + break; + + } + case 'Q': + { + const struct p9_qid *qid = va_arg(ap, const struct p9_qid *); + + error = p9_buf_writef(buf, proto_version, "bdq", + qid->type, qid->version, qid->path); + break; + } + case 'S': + { + struct p9_wstat *stbuf = va_arg(ap, struct p9_wstat *); + + error = p9_buf_writef(buf, proto_version, + "wwdQdddqssss?sddd", stbuf->size, stbuf->type, stbuf->dev, &stbuf->qid, + stbuf->mode, stbuf->atime, stbuf->mtime, stbuf->length, stbuf->name, + stbuf->uid, stbuf->gid, stbuf->muid, stbuf->extension, stbuf->n_uid, + stbuf->n_gid, stbuf->n_muid); + + if (error != 0) + stat_free(stbuf); + + break; + } + case 'D': + { + uint32_t count = va_arg(ap, uint32_t); + void *data = va_arg(ap, void *); + + error = buf_write(buf, &count, sizeof(uint32_t)); + if ((error == 0) && buf_write(buf, data, count)) + error = EFAULT; + + break; + } + case 'T': + { + char **wnames = va_arg(ap, char **); + uint16_t nwnames = va_arg(ap, int); + + error = buf_write(buf, &nwnames, sizeof(uint16_t)); + if (error == 0) { + int i = 0; + for (i = 0; i < nwnames; i++) { + error = p9_buf_writef(buf, proto_version, "s", wnames[i]); + if (error != 0) + break; + } + } + break; + } + case 'W': + { + const char *sptr = va_arg(ap, const char*); + uint16_t len = va_arg(ap, int); + + error = buf_write(buf, &len, sizeof(uint16_t)); + if (error == 0 && buf_write(buf, sptr, len)) + error = EFAULT; + break; + + } + case 'R': + { + uint16_t nwqid = va_arg(ap, int); + struct p9_qid *wqids = va_arg(ap, struct p9_qid *); + int i; + + error = buf_write(buf, &nwqid, sizeof(uint16_t)); + if (error == 0) { + + for (i = 0; i < nwqid; i++) { + error = p9_buf_writef(buf, proto_version, "Q", &wqids[i]); + if (error != 0) + break; + } + } + break; + } + case 'A': + { + struct p9_iattr_dotl *p9attr = va_arg(ap, struct p9_iattr_dotl *); + + error = p9_buf_writef(buf, proto_version, "ddugqqqqq", + p9attr->valid, p9attr->mode, p9attr->uid, + p9attr->gid, p9attr->size, p9attr->atime_sec, + p9attr->atime_nsec, p9attr->mtime_sec, + p9attr->mtime_nsec); + + break; + } + case '?': + { + if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L)) + return (0); + break; + } + default: + break; + } + + if (error != 0) + break; + } + + return (error); +} + +/* Variadic form of buf_read */ +int +p9_buf_readf(struct p9_buffer *buf, int proto_version, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = p9_buf_vreadf(buf, proto_version, fmt, ap); + va_end(ap); + + return (ret); +} + +/* Variadic form of buf_write */ +static int +p9_buf_writef(struct p9_buffer *buf, int proto_version, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = p9_buf_vwritef(buf, proto_version, fmt, ap); + va_end(ap); + + return (ret); +} + +/* File stats read routine for P9 to get attributes of files */ +int +p9stat_read(struct p9_client *clnt, char *buf, size_t len, struct p9_wstat *st) +{ + struct p9_buffer msg_buf; + int ret; + + msg_buf.size = len; + msg_buf.capacity = len; + msg_buf.sdata = buf; + msg_buf.offset = 0; + + ret = p9_buf_readf(&msg_buf, clnt->proto_version, "S", st); + if (ret) { + P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, ret); + } + + return (ret); +} + +/* + * P9_header preparation routine. All p9 buffers have to have this header(QEMU_HEADER) at the + * front of the buffer. + */ +int +p9_buf_prepare(struct p9_buffer *buf, int8_t type) +{ + buf->id = type; + return (p9_buf_writef(buf, 0, "dbw", 0, type, buf->tag)); +} + +/* + * Final write to the buffer, this is the total size of the buffer. Since the buffer length can + * vary with request, this is computed at the end just before sending the request to the driver + */ +int +p9_buf_finalize(struct p9_client *clnt, struct p9_buffer *buf) +{ + int size; + int error; + + size = buf->size; + buf->size = 0; + error = p9_buf_writef(buf, 0, "d", size); + buf->size = size; + + P9_DEBUG(LPROTO, "%s: size=%d type: %d tag: %d\n", + __func__, buf->size, buf->id, buf->tag); + + return (error); +} + +/* Reset values of the buffer */ +void +p9_buf_reset(struct p9_buffer *buf) +{ + + buf->offset = 0; + buf->size = 0; +} + +/* + * Directory entry read with the buf we have. Call this once we have the buf to parse. + * This buf, obtained from the server, is parsed to make dirent in readdir. + */ +int +p9_dirent_read(struct p9_client *clnt, char *buf, int start, int len, + struct p9_dirent *dent) +{ + struct p9_buffer msg_buf; + int ret; + char *nameptr; + uint16_t sle; + + msg_buf.size = len; + msg_buf.capacity = len; + msg_buf.sdata = buf; + msg_buf.offset = start; + + ret = p9_buf_readf(&msg_buf, clnt->proto_version, "Qqbs", &dent->qid, + &dent->d_off, &dent->d_type, &nameptr); + if (ret) { + P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, ret); + goto out; + } + + sle = strlen(nameptr); + strncpy(dent->d_name, nameptr, sle); + dent->len = sle; + free(nameptr, M_TEMP); +out: + return (msg_buf.offset); +} diff --git a/sys/fs/p9fs/p9_protocol.h b/sys/fs/p9fs/p9_protocol.h new file mode 100644 index 000000000000..ddd8571adc8d --- /dev/null +++ b/sys/fs/p9fs/p9_protocol.h @@ -0,0 +1,280 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* File contains 9P protocol definitions */ + +#ifndef FS_P9FS_P9_PROTOCOL_H +#define FS_P9FS_P9_PROTOCOL_H + +#include + +/* 9P message types */ +enum p9_cmds_t { + P9PROTO_TLERROR = 6, /* not used */ + P9PROTO_RLERROR, /* response for any failed request */ + P9PROTO_TSTATFS = 8, /* file system status request */ + P9PROTO_RSTATFS, /* file system status response */ + P9PROTO_TLOPEN = 12, /* open a file (9P2000.L) */ + P9PROTO_RLOPEN, /* response to opne request (9P2000.L) */ + P9PROTO_TLCREATE = 14, /* prepare for handle for I/O on a new file (9P2000.L) */ + P9PROTO_RLCREATE, /* response with file access information (9P2000.L) */ + P9PROTO_TSYMLINK = 16, /* symlink creation request */ + P9PROTO_RSYMLINK, /* symlink creation response */ + P9PROTO_TMKNOD = 18, /* create a special file object request */ + P9PROTO_RMKNOD, /* create a special file object response */ + P9PROTO_TRENAME = 20, /* rename a file request */ + P9PROTO_RRENAME, /* rename a file response */ + P9PROTO_TREADLINK = 22, /* request to read value of symbolic link */ + P9PROTO_RREADLINK, /* response to read value of symbolic link request */ + P9PROTO_TGETATTR = 24, /* get file attributes request */ + P9PROTO_RGETATTR, /* get file attributes response */ + P9PROTO_TSETATTR = 26, /* set file attributes request */ + P9PROTO_RSETATTR, /* set file attributes response */ + P9PROTO_TXATTRWALK = 30,/* request to read extended attributes */ + P9PROTO_RXATTRWALK, /* response from server with attributes */ + P9PROTO_TXATTRCREATE = 32,/* request to set extended attribute */ + P9PROTO_RXATTRCREATE, /* response from server for setting extended attribute */ + P9PROTO_TREADDIR = 40, /* request to read a directory */ + P9PROTO_RREADDIR, /* response from server for read request */ + P9PROTO_TFSYNC = 50, /* request to flush an cached data to disk */ + P9PROTO_RFSYNC, /* response when cache dat is flushed */ + P9PROTO_TLOCK = 52, /* acquire or release a POSIX record lock */ + P9PROTO_RLOCK, /* response with the status of the lock */ + P9PROTO_TGETLOCK = 54, /* request to check for presence of a POSIX record lock */ + P9PROTO_RGETLOCK, /* response with the details of the lock if acquired */ + P9PROTO_TLINK = 70, /* request to create hard link */ + P9PROTO_RLINK, /* create hard link response */ + P9PROTO_TMKDIR = 72, /* create a directory request */ + P9PROTO_RMKDIR, /* create a directory response */ + P9PROTO_TRENAMEAT = 74, /* request to rename a file or directory */ + P9PROTO_RRENAMEAT, /* reponse to rename request */ + P9PROTO_TUNLINKAT = 76, /* unlink a file or directory */ + P9PROTO_RUNLINKAT, /* reponse to unlink request */ + P9PROTO_TVERSION = 100, /* request for version handshake */ + P9PROTO_RVERSION, /* response for version handshake */ + P9PROTO_TAUTH = 102, /* request to establish authentication channel */ + P9PROTO_RAUTH, /* response with authentication information */ + P9PROTO_TATTACH = 104, /* establish a user access to a file system*/ + P9PROTO_RATTACH, /* response with top level handle to file hierarchy */ + P9PROTO_TERROR = 106, /* not used */ + P9PROTO_RERROR, /* response for any failed request */ + P9PROTO_TFLUSH = 108, /* request to abort a previous request */ + P9PROTO_RFLUSH, /* response when previous request has been cancelled */ + P9PROTO_TWALK = 110, /* descend a directory hierarchy */ + P9PROTO_RWALK, /* response with new handle for position within hierarchy */ + P9PROTO_TOPEN = 112, /* prepare file handle for I/O for an existing file */ + P9PROTO_ROPEN, /* response with file access information */ + P9PROTO_TCREATE = 114, /* prepare for handle for I/O on a new file */ + P9PROTO_RCREATE, /* response with file access information */ + P9PROTO_TREAD = 116, /* request to transfer data from a file */ + P9PROTO_RREAD, /* response with data requested */ + P9PROTO_TWRITE = 118, /* request to transfer data to a file */ + P9PROTO_RWRITE, /* response with how much data was written to the file */ + P9PROTO_TCLUNK = 120, /* forget about a handle to a file within the File System */ + P9PROTO_RCLUNK, /* response from the server for forgetting the file handle */ + P9PROTO_TREMOVE = 122, /* request to remove a file */ + P9PROTO_RREMOVE, /* response when server has removed the file */ + P9PROTO_TSTAT = 124, /* request file entity attributes */ + P9PROTO_RSTAT, /* response with file entity attributes */ + P9PROTO_TWSTAT = 126, /* request to update file entity attributes */ + P9PROTO_RWSTAT, /* response when file entity attributes are updated */ +}; + +/* File Open Modes */ +enum p9_open_mode_t { + P9PROTO_OREAD = 0x00, /* open file for reading only */ + P9PROTO_OWRITE = 0x01, /* open file for writing only */ + P9PROTO_ORDWR = 0x02, /* open file for both reading and writing */ + P9PROTO_OEXEC = 0x03, /* open file for execution */ + P9PROTO_OTRUNC = 0x10, /* truncate file to zero length before opening it */ + P9PROTO_OREXEC = 0x20, /* close the file when exec system call is made */ + P9PROTO_ORCLOSE = 0x40, /* remove the file when it is closed */ + P9PROTO_OAPPEND = 0x80, /* open the file and seek to the end of the file */ + P9PROTO_OEXCL = 0x1000, /* only create a file and not open it */ +}; + +/* FIle Permissions */ +enum p9_perm_t { + P9PROTO_DMDIR = 0x80000000, /* permission bit for directories */ + P9PROTO_DMAPPEND = 0x40000000, /* permission bit for is append-only */ + P9PROTO_DMEXCL = 0x20000000, /* permission bit for exclusive use (only one open handle allowed) */ + P9PROTO_DMMOUNT = 0x10000000, /* permission bit for mount points */ + P9PROTO_DMAUTH = 0x08000000, /* permission bit for authentication file */ + P9PROTO_DMTMP = 0x04000000, /* permission bit for non-backed-up files */ + P9PROTO_DMSYMLINK = 0x02000000, /* permission bit for symbolic link (9P2000.u) */ + P9PROTO_DMLINK = 0x01000000, /* permission bit for hard-link (9P2000.u) */ + P9PROTO_DMDEVICE = 0x00800000, /* permission bit for device files (9P2000.u) */ + P9PROTO_DMNAMEDPIPE = 0x00200000,/* permission bit for named pipe (9P2000.u) */ + P9PROTO_DMSOCKET = 0x00100000, /* permission bit for socket (9P2000.u) */ + P9PROTO_DMSETUID = 0x00080000, /* permission bit for setuid (9P2000.u) */ + P9PROTO_DMSETGID = 0x00040000, /* permission bit for setgid (9P2000.u) */ + P9PROTO_DMSETVTX = 0x00010000, /* permission bit for sticky bit (9P2000.u) */ +}; + +/* + * QID types - they are primarly used to + * differentiate semantics for a file system + */ +enum p9_qid_t { + P9PROTO_QTDIR = 0x80, /* directory */ + P9PROTO_QTAPPEND = 0x40, /* append-only */ + P9PROTO_QTEXCL = 0x20, /* exclusive use (only one open handle allowed)*/ + P9PROTO_QTMOUNT = 0x10, /* mount points */ + P9PROTO_QTAUTH = 0x08, /* authentication file */ + P9PROTO_QTTMP = 0x04, /* non-backed-up files */ + P9PROTO_QTSYMLINK = 0x02, /* symbolic links */ + P9PROTO_QTLINK = 0x01, /* hard link */ + P9PROTO_QTFILE = 0x00, /* normal files */ +}; + +/* P9 Magic Numbers */ +#define P9PROTO_NOFID (uint32_t)(~0) +#define P9_DEFUNAME "nobody" +#define P9_DEFANAME "" +#define P9_NONUNAME (uint32_t)(~0) +#define P9_MAXWELEM 16 + +/* Exchange unit between Qemu and Client */ +struct p9_qid { + uint8_t type; /* the type of the file */ + uint32_t version; /* version number for given path */ + uint64_t path; /* the file servers unique id for file */ +}; + +/* FS information stat structure */ +struct p9_statfs { + uint32_t type; /* type of file system */ + uint32_t bsize; /* optimal transfer block size */ + uint64_t blocks; /* total data blocks in file system */ + uint64_t bfree; /* free blocks in fs */ + uint64_t bavail; /* free blocks avail to non-superuser */ + uint64_t files; /* total file nodes in file system */ + uint64_t ffree; /* free file nodes in fs */ + uint64_t fsid; /* file system id */ + uint32_t namelen; /* maximum length of filenames */ +}; + + +/* File system metadata information */ +struct p9_wstat { + uint16_t size; /* total byte count of the following data */ + uint16_t type; /* type of file */ + uint32_t dev; /* id of device containing file */ + struct p9_qid qid; /* identifier used by server for file system entity information */ + uint32_t mode; /* protection */ + uint32_t atime; /* time of last access */ + uint32_t mtime; /* time of last modification */ + uint64_t length; /* length of file in bytes */ + char *name; /* file name */ + char *uid; /* user ID of owner */ + char *gid; /* group ID of owner */ + char *muid; /* name of the user who last modified the file */ + char *extension; /* 9p2000.u extensions */ + uid_t n_uid; /* 9p2000.u extensions */ + gid_t n_gid; /* 9p2000.u extensions */ + uid_t n_muid; /* 9p2000.u extensions */ +}; + +/* The linux version of FS information stat structure*/ +struct p9_stat_dotl { + uint64_t st_result_mask;/* indicates fields that are requested */ + struct p9_qid qid; /* identifier used by server for file system entity information */ + uint32_t st_mode; /* protection */ + uid_t st_uid; /* user ID of owner */ + gid_t st_gid; /* group ID of owner */ + uint64_t st_nlink; /* number of hard links */ + uint64_t st_rdev; /* device ID (if special file) */ + uint64_t st_size; /* total size, in bytes */ + uint64_t st_blksize; /* blocksize for file system I/O */ + uint64_t st_blocks; /* number of 512B blocks allocated */ + uint64_t st_atime_sec; /* time of last access, seconds */ + uint64_t st_atime_nsec; /* time of last access, nanoseconds */ + uint64_t st_mtime_sec; /* time of last modification, seconds */ + uint64_t st_mtime_nsec; /* time of last modifictaion, nanoseconds */ + uint64_t st_ctime_sec; /* time of last status change, seconds*/ + uint64_t st_ctime_nsec; /* time of last status change, nanoseconds*/ + uint64_t st_btime_sec; /* following memebers are reserved for future use */ + uint64_t st_btime_nsec; + uint64_t st_gen; + uint64_t st_data_version; +}; + +/* P9 inode attribute for setattr */ +struct p9_iattr_dotl { + uint32_t valid; /* bit fields specifying which fields are valid */ + uint32_t mode; /* protection */ + uid_t uid; /* user id of owner */ + gid_t gid; /* group id */ + uint64_t size; /* file size */ + uint64_t atime_sec; /* last access time in seconds */ + uint64_t atime_nsec; /* last access time in nanoseconds */ + uint64_t mtime_sec; /* last modification time in seconds */ + uint64_t mtime_nsec; /* last modification time in nanoseconds */ +}; + +#define P9PROTO_STATS_MODE 0x00000001ULL +#define P9PROTO_STATS_NLINK 0x00000002ULL +#define P9PROTO_STATS_UID 0x00000004ULL +#define P9PROTO_STATS_GID 0x00000008ULL +#define P9PROTO_STATS_RDEV 0x00000010ULL +#define P9PROTO_STATS_ATIME 0x00000020ULL +#define P9PROTO_STATS_MTIME 0x00000040ULL +#define P9PROTO_STATS_CTIME 0x00000080ULL +#define P9PROTO_STATS_INO 0x00000100ULL +#define P9PROTO_STATS_SIZE 0x00000200ULL +#define P9PROTO_STATS_BLOCKS 0x00000400ULL + +#define P9PROTO_STATS_BTIME 0x00000800ULL +#define P9PROTO_STATS_GEN 0x00001000ULL +#define P9PROTO_STATS_DATA_VERSION 0x00002000ULL + +#define P9PROTO_STATS_BASIC 0x000007ffULL /* Mask for fields up to BLOCKS */ +#define P9PROTO_STATS_ALL 0x00003fffULL /* Mask for All fields above */ + +#define P9PROTO_SETATTR_MODE 0x00000001UL +#define P9PROTO_SETATTR_UID 0x00000002UL +#define P9PROTO_SETATTR_GID 0x00000004UL +#define P9PROTO_SETATTR_SIZE 0x00000008UL +#define P9PROTO_SETATTR_ATIME 0x00000010UL +#define P9PROTO_SETATTR_MTIME 0x00000020UL +#define P9PROTO_SETATTR_CTIME 0x00000040UL +#define P9PROTO_SETATTR_ATIME_SET 0x00000080UL +#define P9PROTO_SETATTR_MTIME_SET 0x00000100UL +#define P9PROTO_SETATTR_MASK 0x000001bfUL + +#define P9PROTO_TGETATTR_BLK 512 + +/* PDU buffer used for SG lists. */ +struct p9_buffer { + uint32_t size; + uint16_t tag; + uint8_t id; + size_t offset; + size_t capacity; + uint8_t *sdata; +}; + +#endif /* FS_P9FS_P9_PROTOCOL_H */ diff --git a/sys/fs/p9fs/p9_transport.c b/sys/fs/p9fs/p9_transport.c new file mode 100644 index 000000000000..c82d81fedcd7 --- /dev/null +++ b/sys/fs/p9fs/p9_transport.c @@ -0,0 +1,70 @@ +/*- + * Copyright (c) 2022-present Doug Rabson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include + +TAILQ_HEAD(, p9_trans_module) transports; + +static void +p9_transport_init(void) +{ + + TAILQ_INIT(&transports); +} + +SYSINIT(p9_transport, SI_SUB_DRIVERS, SI_ORDER_FIRST, p9_transport_init, NULL); + +void +p9_register_trans(struct p9_trans_module *m) +{ + + TAILQ_INSERT_TAIL(&transports, m, link); +} + +void +p9_unregister_trans(struct p9_trans_module *m) +{ + + TAILQ_REMOVE(&transports, m, link); +} + +struct p9_trans_module * +p9_get_trans_by_name(char *name) +{ + struct p9_trans_module *m; + + TAILQ_FOREACH(m, &transports, link) { + if (strcmp(m->name, name) == 0) + return (m); + } + return (NULL); +} + diff --git a/sys/fs/p9fs/p9_transport.h b/sys/fs/p9fs/p9_transport.h new file mode 100644 index 000000000000..143c29f2382e --- /dev/null +++ b/sys/fs/p9fs/p9_transport.h @@ -0,0 +1,53 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* Transport definitions */ +#ifndef FS_P9FS_P9_TRANSPORT_H +#define FS_P9FS_P9_TRANSPORT_H + +#include + +struct p9_req_t; + +/* Tranport module interface */ +struct p9_trans_module { + TAILQ_ENTRY(p9_trans_module) link; + char *name; /* name of transport */ + /* member function to create a new conection on this transport*/ + int (*create)(const char *mount_tag, void **handlep); + /* member function to terminate a connection on this transport */ + void (*close) (void *handle); + /* member function to issue a request to the transport*/ + int (*request) (void *handle, struct p9_req_t *req); + /* member function to cancel a request if it has been sent */ + int (*cancel) (void *handle, struct p9_req_t *req); +}; + +void p9_register_trans(struct p9_trans_module *m); +void p9_unregister_trans(struct p9_trans_module *m); +struct p9_trans_module *p9_get_trans_by_name(char *s); + +#endif /* FS_P9FS_P9_TRANSPORT_H */ diff --git a/sys/fs/p9fs/p9fs.h b/sys/fs/p9fs/p9fs.h new file mode 100644 index 000000000000..a270d8b5ce5f --- /dev/null +++ b/sys/fs/p9fs/p9fs.h @@ -0,0 +1,203 @@ +/*- + * Copyright (c) 2017-2020 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* This file has prototypes specific to the p9fs file system */ + +#ifndef FS_P9FS_P9FS_H +#define FS_P9FS_P9FS_H + +struct p9fs_session; + +/* QID: Unique identification for the file being accessed */ +struct p9fs_qid { + uint8_t qid_mode; /* file mode specifiying file type */ + uint32_t qid_version; /* version of the file */ + uint64_t qid_path; /* unique integer among all files in hierarchy */ +}; + +/* + * The in memory representation of the on disk inode. Save the current + * fields to write it back later. + */ +struct p9fs_inode { + /* Make it simple first, Add more fields later */ + uint64_t i_size; /* size of the inode */ + uint16_t i_type; /* type of inode */ + uint32_t i_dev; /* type of device */ + uint32_t i_mode; /* mode of the inode */ + uint32_t i_atime; /* time of last access */ + uint32_t i_mtime; /* time of last modification */ + uint32_t i_ctime; /* time of last status change */ + uint32_t i_atime_nsec; /* times of last access in nanoseconds resolution */ + uint32_t i_mtime_nsec; /* time of last modification in nanoseconds resolution */ + uint32_t i_ctime_nsec; /* time of last status change in nanoseconds resolution */ + uint64_t i_length; + char *i_name; /* inode name */ + char *i_uid; /* inode user id */ + char *i_gid; /* inode group id */ + char *i_muid; + char *i_extension; /* 9p2000.u extensions */ + uid_t n_uid; /* 9p2000.u extensions */ + gid_t n_gid; /* 9p2000.u extensions */ + uid_t n_muid; /* 9p2000.u extensions */ + /* bookkeeping info on the client. */ + uint16_t i_links_count; /*number of references to the inode*/ + uint64_t i_qid_path; /* using inode number for reference. */ + uint64_t i_flags; + uint64_t blksize; /* block size for file system */ + uint64_t blocks; /* number of 512B blocks allocated */ + uint64_t gen; /* reserved for future use */ + uint64_t data_version; /* reserved for future use */ + +}; + +#define P9FS_VFID_MTX(_sc) (&(_sc)->vfid_mtx) +#define P9FS_VFID_LOCK(_sc) mtx_lock(P9FS_VFID_MTX(_sc)) +#define P9FS_VFID_UNLOCK(_sc) mtx_unlock(P9FS_VFID_MTX(_sc)) +#define P9FS_VFID_LOCK_INIT(_sc) mtx_init(P9FS_VFID_MTX(_sc), \ + "VFID List lock", NULL, MTX_DEF) +#define P9FS_VFID_LOCK_DESTROY(_sc) mtx_destroy(P9FS_VFID_MTX(_sc)) + +#define P9FS_VOFID_MTX(_sc) (&(_sc)->vofid_mtx) +#define P9FS_VOFID_LOCK(_sc) mtx_lock(P9FS_VOFID_MTX(_sc)) +#define P9FS_VOFID_UNLOCK(_sc) mtx_unlock(P9FS_VOFID_MTX(_sc)) +#define P9FS_VOFID_LOCK_INIT(_sc) mtx_init(P9FS_VOFID_MTX(_sc), \ + "VOFID List lock", NULL, MTX_DEF) +#define P9FS_VOFID_LOCK_DESTROY(_sc) mtx_destroy(P9FS_VOFID_MTX(_sc)) + +#define VFID 0x01 +#define VOFID 0x02 + +/* A Plan9 node. */ +struct p9fs_node { + STAILQ_HEAD( ,p9_fid) vfid_list; /* vfid related to uid */ + struct mtx vfid_mtx; /* mutex for vfid list */ + STAILQ_HEAD( ,p9_fid) vofid_list; /* vofid related to uid */ + struct mtx vofid_mtx; /* mutex for vofid list */ + struct p9fs_node *parent; /* pointer to parent p9fs node */ + struct p9fs_qid vqid; /* the server qid, will be from the host */ + struct vnode *v_node; /* vnode for this fs_node. */ + struct p9fs_inode inode; /* in memory representation of ondisk information*/ + struct p9fs_session *p9fs_ses; /* Session_ptr for this node */ + STAILQ_ENTRY(p9fs_node) p9fs_node_next; + uint64_t flags; +}; + +#define P9FS_VTON(vp) ((struct p9fs_node *)(vp)->v_data) +#define P9FS_NTOV(node) ((node)->v_node) +#define VFSTOP9(mp) ((struct p9fs_mount *)(mp)->mnt_data) +#define QEMU_DIRENTRY_SZ 25 +#define P9FS_NODE_MODIFIED 0x1 /* indicating file change */ +#define P9FS_ROOT 0x2 /* indicating root p9fs node */ +#define P9FS_NODE_DELETED 0x4 /* indicating file or directory delete */ +#define P9FS_NODE_IN_SESSION 0x8 /* p9fs_node is in the session - virt_node_list */ +#define IS_ROOT(node) (node->flags & P9FS_ROOT) + +#define P9FS_SET_LINKS(inode) do { \ + (inode)->i_links_count = 1; \ +} while (0) \ + +#define P9FS_INCR_LINKS(inode) do { \ + (inode)->i_links_count++; \ +} while (0) \ + +#define P9FS_DECR_LINKS(inode) do { \ + (inode)->i_links_count--; \ +} while (0) \ + +#define P9FS_CLR_LINKS(inode) do { \ + (inode)->i_links_count = 0; \ +} while (0) \ + +#define P9FS_MTX(_sc) (&(_sc)->p9fs_mtx) +#define P9FS_LOCK(_sc) mtx_lock(P9FS_MTX(_sc)) +#define P9FS_UNLOCK(_sc) mtx_unlock(P9FS_MTX(_sc)) +#define P9FS_LOCK_INIT(_sc) mtx_init(P9FS_MTX(_sc), \ + "P9FS session chain lock", NULL, MTX_DEF) +#define P9FS_LOCK_DESTROY(_sc) mtx_destroy(P9FS_MTX(_sc)) + +/* Session structure for the FS */ +struct p9fs_session { + unsigned char flags; /* these flags for the session */ + struct mount *p9fs_mount; /* mount point */ + struct p9fs_node rnp; /* root p9fs node for this session */ + uid_t uid; /* the uid that has access */ + const char *uname; /* user name to mount as */ + const char *aname; /* name of remote file tree being mounted */ + struct p9_client *clnt; /* 9p client */ + struct mtx p9fs_mtx; /* mutex used for guarding the chain.*/ + STAILQ_HEAD( ,p9fs_node) virt_node_list; /* list of p9fs nodes in this session*/ + struct p9_fid *mnt_fid; /* to save nobody 's fid for unmounting as root user */ +}; + +struct p9fs_mount { + struct p9fs_session p9fs_session; /* per instance session information */ + struct mount *p9fs_mountp; /* mount point */ + int mount_tag_len; /* length of the mount tag */ + char *mount_tag; /* mount tag used */ +}; + +/* All session flags based on 9p versions */ +enum virt_session_flags { + P9FS_PROTO_2000U = 0x01, + P9FS_PROTO_2000L = 0x02, +}; + +/* Session access flags */ +#define P9_ACCESS_ANY 0x04 /* single attach for all users */ +#define P9_ACCESS_SINGLE 0x08 /* access to only the user who mounts */ +#define P9_ACCESS_USER 0x10 /* new attach established for every user */ +#define P9_ACCESS_MASK (P9_ACCESS_ANY|P9_ACCESS_SINGLE|P9_ACCESS_USER) + +u_quad_t p9fs_round_filesize_to_bytes(uint64_t filesize, uint64_t bsize); +u_quad_t p9fs_pow2_filesize_to_bytes(uint64_t filesize, uint64_t bsize); + +/* These are all the P9FS specific vops */ +int p9fs_stat_vnode_l(void); +int p9fs_stat_vnode_dotl(struct p9_stat_dotl *st, struct vnode *vp); +int p9fs_reload_stats_dotl(struct vnode *vp, struct ucred *cred); +int p9fs_proto_dotl(struct p9fs_session *vses); +struct p9_fid *p9fs_init_session(struct mount *mp, int *error); +void p9fs_close_session(struct mount *mp); +void p9fs_prepare_to_close(struct mount *mp); +void p9fs_complete_close(struct mount *mp); +int p9fs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp); +int p9fs_vget_common(struct mount *mp, struct p9fs_node *np, int flags, + struct p9fs_node *parent, struct p9_fid *fid, struct vnode **vpp, + char *name); +int p9fs_node_cmp(struct vnode *vp, void *arg); +void p9fs_destroy_node(struct p9fs_node **npp); +void p9fs_dispose_node(struct p9fs_node **npp); +void p9fs_cleanup(struct p9fs_node *vp); +void p9fs_fid_remove_all(struct p9fs_node *np, int leave_ofids); +void p9fs_fid_remove(struct p9fs_node *np, struct p9_fid *vfid, + int fid_type); +void p9fs_fid_add(struct p9fs_node *np, struct p9_fid *fid, + int fid_type); +struct p9_fid *p9fs_get_fid(struct p9_client *clnt, + struct p9fs_node *np, struct ucred *cred, int fid_type, int mode, int *error); + +#endif /* FS_P9FS_P9FS_H */ diff --git a/sys/fs/p9fs/p9fs_proto.h b/sys/fs/p9fs/p9fs_proto.h new file mode 100644 index 000000000000..d78caa686f36 --- /dev/null +++ b/sys/fs/p9fs/p9fs_proto.h @@ -0,0 +1,42 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +/* + * Plan9 filesystem (9P2000.u) protocol definitions. + */ + +#ifndef FS_P9FS_P9FS_PROTO_H +#define FS_P9FS_P9FS_PROTO_H + +//#include + +/* File permissions */ +#define P9FS_OREAD 0 +#define P9FS_OWRITE 1 +#define P9FS_ORDWR 2 +#define P9FS_OEXEC 3 +#define P9FS_OTRUNC 0x10 + +#endif /* FS_P9FS_P9FS_PROTO_H */ diff --git a/sys/fs/p9fs/p9fs_subr.c b/sys/fs/p9fs/p9fs_subr.c new file mode 100644 index 000000000000..d0f04f6c5e97 --- /dev/null +++ b/sys/fs/p9fs/p9fs_subr.c @@ -0,0 +1,411 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +/*- + * 9P filesystem subroutines. This file consists of all the Non VFS subroutines. + * It contains all of the functions related to the driver submission which form + * the upper layer i.e, p9fs driver. This will interact with the client to make + * sure we have correct API calls in the header. + */ + +#include +#include +#include +#include +#include +#include + +#include "p9fs_proto.h" + +#include +#include +#include +#include + +int +p9fs_proto_dotl(struct p9fs_session *vses) +{ + + return (vses->flags & P9FS_PROTO_2000L); +} + +/* Initialize a p9fs session */ +struct p9_fid * +p9fs_init_session(struct mount *mp, int *error) +{ + struct p9fs_session *vses; + struct p9fs_mount *virtmp; + struct p9_fid *fid; + char *access; + + virtmp = VFSTOP9(mp); + vses = &virtmp->p9fs_session; + vses->uid = P9_NONUNAME; + vses->uname = P9_DEFUNAME; + vses->aname = P9_DEFANAME; + + /* + * Create the client structure. Call into the driver to create + * driver structures for the actual IO transfer. + */ + vses->clnt = p9_client_create(mp, error, virtmp->mount_tag); + + if (vses->clnt == NULL) { + P9_DEBUG(ERROR, "%s: p9_client_create failed\n", __func__); + return (NULL); + } + /* + * Find the client version and cache the copy. We will use this copy + * throughout FS layer. + */ + if (p9_is_proto_dotl(vses->clnt)) + vses->flags |= P9FS_PROTO_2000L; + else if (p9_is_proto_dotu(vses->clnt)) + vses->flags |= P9FS_PROTO_2000U; + + /* Set the access mode */ + access = vfs_getopts(mp->mnt_optnew, "access", error); + if (access == NULL) + vses->flags |= P9_ACCESS_USER; + else if (!strcmp(access, "any")) + vses->flags |= P9_ACCESS_ANY; + else if (!strcmp(access, "single")) + vses->flags |= P9_ACCESS_SINGLE; + else if (!strcmp(access, "user")) + vses->flags |= P9_ACCESS_USER; + else { + P9_DEBUG(ERROR, "%s: unknown access mode\n", __func__); + *error = EINVAL; + goto out; + } + + *error = 0; + /* Attach with the backend host*/ + fid = p9_client_attach(vses->clnt, NULL, vses->uname, P9_NONUNAME, + vses->aname, error); + vses->mnt_fid = fid; + + if (*error != 0) { + P9_DEBUG(ERROR, "%s: attach failed: %d\n", __func__, *error); + goto out; + } + P9_DEBUG(SUBR, "%s: attach successful fid :%p\n", __func__, fid); + fid->uid = vses->uid; + + /* initialize the node list for the session */ + STAILQ_INIT(&vses->virt_node_list); + P9FS_LOCK_INIT(vses); + + P9_DEBUG(SUBR, "%s: INIT session successful\n", __func__); + + return (fid); +out: + p9_client_destroy(vses->clnt); + return (NULL); +} + +/* Begin to terminate a session */ +void +p9fs_prepare_to_close(struct mount *mp) +{ + struct p9fs_session *vses; + struct p9fs_mount *vmp; + struct p9fs_node *np, *pnp, *tmp; + + vmp = VFSTOP9(mp); + vses = &vmp->p9fs_session; + + /* break the node->parent references */ + STAILQ_FOREACH_SAFE(np, &vses->virt_node_list, p9fs_node_next, tmp) { + if (np->parent && np->parent != np) { + pnp = np->parent; + np->parent = NULL; + vrele(P9FS_NTOV(pnp)); + } + } + + /* We are about to teardown, we dont allow anything other than clunk after this.*/ + p9_client_begin_disconnect(vses->clnt); +} + +/* Shutdown a session */ +void +p9fs_complete_close(struct mount *mp) +{ + struct p9fs_session *vses; + struct p9fs_mount *vmp; + + vmp = VFSTOP9(mp); + vses = &vmp->p9fs_session; + + /* Finish the close*/ + p9_client_disconnect(vses->clnt); +} + + +/* Call from unmount. Close the session. */ +void +p9fs_close_session(struct mount *mp) +{ + struct p9fs_session *vses; + struct p9fs_mount *vmp; + + vmp = VFSTOP9(mp); + vses = &vmp->p9fs_session; + + p9fs_complete_close(mp); + /* Clean up the clnt structure. */ + p9_client_destroy(vses->clnt); + P9FS_LOCK_DESTROY(vses); + P9_DEBUG(SUBR, "%s: Clean close session .\n", __func__); +} + +/* + * Remove all the fids of a particular type from a p9fs node + * as well as destroy/clunk them. + */ +void +p9fs_fid_remove_all(struct p9fs_node *np, int leave_ofids) +{ + struct p9_fid *fid, *tfid; + + STAILQ_FOREACH_SAFE(fid, &np->vfid_list, fid_next, tfid) { + STAILQ_REMOVE(&np->vfid_list, fid, p9_fid, fid_next); + p9_client_clunk(fid); + } + + if (!leave_ofids) { + STAILQ_FOREACH_SAFE(fid, &np->vofid_list, fid_next, tfid) { + STAILQ_REMOVE(&np->vofid_list, fid, p9_fid, fid_next); + p9_client_clunk(fid); + } + } +} + + +/* Remove a fid from its corresponding fid list */ +void +p9fs_fid_remove(struct p9fs_node *np, struct p9_fid *fid, int fid_type) +{ + + switch (fid_type) { + case VFID: + P9FS_VFID_LOCK(np); + STAILQ_REMOVE(&np->vfid_list, fid, p9_fid, fid_next); + P9FS_VFID_UNLOCK(np); + break; + case VOFID: + P9FS_VOFID_LOCK(np); + STAILQ_REMOVE(&np->vofid_list, fid, p9_fid, fid_next); + P9FS_VOFID_UNLOCK(np); + break; + } +} + +/* Add a fid to the corresponding fid list */ +void +p9fs_fid_add(struct p9fs_node *np, struct p9_fid *fid, int fid_type) +{ + + switch (fid_type) { + case VFID: + P9FS_VFID_LOCK(np); + STAILQ_INSERT_TAIL(&np->vfid_list, fid, fid_next); + P9FS_VFID_UNLOCK(np); + break; + case VOFID: + P9FS_VOFID_LOCK(np); + STAILQ_INSERT_TAIL(&np->vofid_list, fid, fid_next); + P9FS_VOFID_UNLOCK(np); + break; + } +} + +/* Build the path from root to current directory */ +static int +p9fs_get_full_path(struct p9fs_node *np, char ***names) +{ + int i, n; + struct p9fs_node *node; + char **wnames; + + n = 0; + for (node = np ; (node != NULL) && !IS_ROOT(node) ; node = node->parent) + n++; + + if (node == NULL) + return (0); + + wnames = malloc(n * sizeof(char *), M_TEMP, M_ZERO|M_WAITOK); + + for (i = n-1, node = np; i >= 0 ; i--, node = node->parent) + wnames[i] = node->inode.i_name; + + *names = wnames; + return (n); +} + +/* + * Return TRUE if this fid can be used for the requested mode. + */ +static int +p9fs_compatible_mode(struct p9_fid *fid, int mode) +{ + /* + * Return TRUE for an exact match. For OREAD and OWRITE, allow + * existing ORDWR fids to match. Only check the low two bits + * of mode. + * + * TODO: figure out if this is correct for O_APPEND + */ + int fid_mode = fid->mode & 3; + if (fid_mode == mode) + return (TRUE); + if (fid_mode == P9PROTO_ORDWR) + return (mode == P9PROTO_OREAD || mode == P9PROTO_OWRITE); + return (FALSE); +} + +/* + * Retrieve fid structure corresponding to a particular + * uid and fid type for a p9fs node + */ +static struct p9_fid * +p9fs_get_fid_from_uid(struct p9fs_node *np, uid_t uid, int fid_type, int mode) +{ + struct p9_fid *fid; + + switch (fid_type) { + case VFID: + P9FS_VFID_LOCK(np); + STAILQ_FOREACH(fid, &np->vfid_list, fid_next) { + if (fid->uid == uid) { + P9FS_VFID_UNLOCK(np); + return (fid); + } + } + P9FS_VFID_UNLOCK(np); + break; + case VOFID: + P9FS_VOFID_LOCK(np); + STAILQ_FOREACH(fid, &np->vofid_list, fid_next) { + if (fid->uid == uid && p9fs_compatible_mode(fid, mode)) { + P9FS_VOFID_UNLOCK(np); + return (fid); + } + } + P9FS_VOFID_UNLOCK(np); + break; + } + + return (NULL); +} + +/* + * Function returns the fid sturcture for a file corresponding to current user id. + * First it searches in the fid list of the corresponding p9fs node. + * New fid will be created if not already present and added in the corresponding + * fid list in the p9fs node. + * If the user is not already attached then this will attach the user first + * and then create a new fid for this particular file by doing dir walk. + */ +struct p9_fid * +p9fs_get_fid(struct p9_client *clnt, struct p9fs_node *np, struct ucred *cred, + int fid_type, int mode, int *error) +{ + uid_t uid; + struct p9_fid *fid, *oldfid; + struct p9fs_node *root; + struct p9fs_session *vses; + int i, l, clone; + char **wnames = NULL; + uint16_t nwnames; + + oldfid = NULL; + vses = np->p9fs_ses; + + if (vses->flags & P9_ACCESS_ANY) + uid = vses->uid; + else if (cred) + uid = cred->cr_uid; + else + uid = 0; + + /* + * Search for the fid in corresponding fid list. + * We should return NULL for VOFID if it is not present in the list. + * Because VOFID should have been created during the file open. + * If VFID is not present in the list then we should create one. + */ + fid = p9fs_get_fid_from_uid(np, uid, fid_type, mode); + if (fid != NULL || fid_type == VOFID) + return (fid); + + /* Check root if the user is attached */ + root = &np->p9fs_ses->rnp; + fid = p9fs_get_fid_from_uid(root, uid, fid_type, mode); + if(fid == NULL) { + /* Attach the user */ + fid = p9_client_attach(clnt, NULL, NULL, uid, + vses->aname, error); + if (*error != 0) + return (NULL); + p9fs_fid_add(root, fid, fid_type); + } + + /* If we are looking for root then return it */ + if (IS_ROOT(np)) + return (fid); + + /* Get full path from root to p9fs node */ + nwnames = p9fs_get_full_path(np, &wnames); + + /* + * Could not get full path. + * If p9fs node is not deleted, parent should exist. + */ + KASSERT(nwnames != 0, ("%s: Directory of %s doesn't exist", __func__, np->inode.i_name)); + + clone = 1; + i = 0; + while (i < nwnames) { + l = MIN(nwnames - i, P9_MAXWELEM); + + fid = p9_client_walk(fid, l, wnames, clone, error); + if (*error != 0) { + if (oldfid) + p9_client_clunk(oldfid); + fid = NULL; + goto bail_out; + } + oldfid = fid; + clone = 0; + i += l ; + } + p9fs_fid_add(np, fid, fid_type); +bail_out: + free(wnames, M_TEMP); + return (fid); +} diff --git a/sys/fs/p9fs/p9fs_vfsops.c b/sys/fs/p9fs/p9fs_vfsops.c new file mode 100644 index 000000000000..6cc65aca35d3 --- /dev/null +++ b/sys/fs/p9fs/p9fs_vfsops.c @@ -0,0 +1,602 @@ +/*- + * Copyright (c) 2017-2020 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file consists of all the VFS interactions of VFS ops which include + * mount, unmount, initilaize etc. for p9fs. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +SYSCTL_NODE(_vfs, OID_AUTO, p9fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "Plan 9 filesystem"); + +/* This count is static now. Can be made tunable later */ +#define P9FS_FLUSH_RETRIES 10 + +static MALLOC_DEFINE(M_P9MNT, "p9fs_mount", "Mount structures for p9fs"); +static uma_zone_t p9fs_node_zone; +uma_zone_t p9fs_io_buffer_zone; +uma_zone_t p9fs_getattr_zone; +uma_zone_t p9fs_setattr_zone; +extern struct vop_vector p9fs_vnops; + +/* option parsing */ +static const char *p9fs_opts[] = { + "from", "trans", "access", NULL +}; + +/* Dispose p9fs node, freeing it to the UMA zone */ +void +p9fs_dispose_node(struct p9fs_node **npp) +{ + struct p9fs_node *node; + struct vnode *vp; + + node = *npp; + + if (node == NULL) + return; + + if (node->parent && node->parent != node) { + vrele(P9FS_NTOV(node->parent)); + } + + P9_DEBUG(VOPS, "%s: node: %p\n", __func__, *npp); + + vp = P9FS_NTOV(node); + vp->v_data = NULL; + + /* Free our associated memory */ + if (!(vp->v_vflag & VV_ROOT)) { + free(node->inode.i_name, M_TEMP); + uma_zfree(p9fs_node_zone, node); + } + + *npp = NULL; +} + +/* Initialize memory allocation */ +static int +p9fs_init(struct vfsconf *vfsp) +{ + + p9fs_node_zone = uma_zcreate("p9fs node zone", + sizeof(struct p9fs_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + + /* Create the getattr_dotl zone */ + p9fs_getattr_zone = uma_zcreate("p9fs getattr zone", + sizeof(struct p9_stat_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + + /* Create the setattr_dotl zone */ + p9fs_setattr_zone = uma_zcreate("p9fs setattr zone", + sizeof(struct p9_iattr_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + + /* + * Create the io_buffer zone pool to keep things simpler in case of + * multiple threads. Each thread works with its own so there is no + * contention. + */ + p9fs_io_buffer_zone = uma_zcreate("p9fs io_buffer zone", + P9FS_MTU, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + + return (0); +} + +/* Destroy all the allocated memory */ +static int +p9fs_uninit(struct vfsconf *vfsp) +{ + + uma_zdestroy(p9fs_node_zone); + uma_zdestroy(p9fs_io_buffer_zone); + uma_zdestroy(p9fs_getattr_zone); + uma_zdestroy(p9fs_setattr_zone); + + return (0); +} + +/* Function to umount p9fs */ +static int +p9fs_unmount(struct mount *mp, int mntflags) +{ + struct p9fs_mount *vmp; + struct p9fs_session *vses; + int error, flags, i; + + error = 0; + flags = 0; + vmp = VFSTOP9(mp); + if (vmp == NULL) + return (0); + + vses = &vmp->p9fs_session; + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + + p9fs_prepare_to_close(mp); + for (i = 0; i < P9FS_FLUSH_RETRIES; i++) { + + /* Flush everything on this mount point.*/ + error = vflush(mp, 1, flags, curthread); + + if (error == 0 || (mntflags & MNT_FORCE) == 0) + break; + /* Sleep until interrupted or 1 tick expires. */ + error = tsleep(&error, PSOCK, "p9unmnt", 1); + if (error == EINTR) + break; + error = EBUSY; + } + + if (error != 0) + goto out; + p9fs_close_session(mp); + /* Cleanup the mount structure. */ + free(vmp, M_P9MNT); + mp->mnt_data = NULL; + return (error); +out: + /* Restore the flag in case of error */ + vses->clnt->trans_status = P9FS_CONNECT; + return (error); +} + +/* + * Compare qid stored in p9fs node + * Return 1 if does not match otherwise return 0 + */ +int +p9fs_node_cmp(struct vnode *vp, void *arg) +{ + struct p9fs_node *np; + struct p9_qid *qid; + + np = vp->v_data; + qid = (struct p9_qid *)arg; + + if (np == NULL) + return (1); + + if (np->vqid.qid_path == qid->path) { + if (vp->v_vflag & VV_ROOT) + return (0); + else if (np->vqid.qid_mode == qid->type && + np->vqid.qid_version == qid->version) + return (0); + } + + return (1); +} + +/* + * Cleanup p9fs node + * - Destroy the FID LIST locks + * - Dispose all node knowledge + */ +void +p9fs_destroy_node(struct p9fs_node **npp) +{ + struct p9fs_node *np; + + np = *npp; + + if (np == NULL) + return; + + /* Destroy the FID LIST locks */ + P9FS_VFID_LOCK_DESTROY(np); + P9FS_VOFID_LOCK_DESTROY(np); + + /* Dispose all node knowledge.*/ + p9fs_dispose_node(&np); +} + +/* + * Common code used across p9fs to return vnode for the file represented + * by the fid. + * Lookup for the vnode in hash_list. This lookup is based on the qid path + * which is unique to a file. p9fs_node_cmp is called in this lookup process. + * I. If the vnode we are looking for is found in the hash list + * 1. Check if the vnode is a valid vnode by reloading its stats + * a. if the reloading of the vnode stats returns error then remove the + * vnode from hash list and return + * b. If reloading of vnode stats returns without any error then, clunk the + * new fid which was created for the vnode as we know that the vnode + * already has a fid associated with it and return the vnode. + * This is to avoid fid leaks + * II. If vnode is not found in the hash list then, create new vnode, p9fs + * node and return the vnode + */ +int +p9fs_vget_common(struct mount *mp, struct p9fs_node *np, int flags, + struct p9fs_node *parent, struct p9_fid *fid, struct vnode **vpp, + char *name) +{ + struct p9fs_mount *vmp; + struct p9fs_session *vses; + struct vnode *vp; + struct p9fs_node *node; + struct thread *td; + uint32_t hash; + int error, error_reload = 0; + struct p9fs_inode *inode; + + td = curthread; + vmp = VFSTOP9(mp); + vses = &vmp->p9fs_session; + + /* Look for vp in the hash_list */ + hash = fnv_32_buf(&fid->qid.path, sizeof(uint64_t), FNV1_32_INIT); + error = vfs_hash_get(mp, hash, flags, td, &vp, p9fs_node_cmp, + &fid->qid); + if (error != 0) + return (error); + else if (vp != NULL) { + if (vp->v_vflag & VV_ROOT) { + if (np == NULL) + p9_client_clunk(fid); + *vpp = vp; + return (0); + } + error = p9fs_reload_stats_dotl(vp, curthread->td_ucred); + if (error != 0) { + node = vp->v_data; + /* Remove stale vnode from hash list */ + vfs_hash_remove(vp); + node->flags |= P9FS_NODE_DELETED; + + vput(vp); + *vpp = NULLVP; + vp = NULL; + } else { + *vpp = vp; + /* Clunk the new fid if not root */ + p9_client_clunk(fid); + return (0); + } + } + + /* + * We must promote to an exclusive lock for vnode creation. This + * can happen if lookup is passed LOCKSHARED. + */ + if ((flags & LK_TYPE_MASK) == LK_SHARED) { + flags &= ~LK_TYPE_MASK; + flags |= LK_EXCLUSIVE; + } + + /* Allocate a new vnode. */ + if ((error = getnewvnode("p9fs", mp, &p9fs_vnops, &vp)) != 0) { + *vpp = NULLVP; + P9_DEBUG(ERROR, "%s: getnewvnode failed: %d\n", __func__, error); + return (error); + } + + /* If we dont have it, create one. */ + if (np == NULL) { + np = uma_zalloc(p9fs_node_zone, M_WAITOK | M_ZERO); + /* Initialize the VFID list */ + P9FS_VFID_LOCK_INIT(np); + STAILQ_INIT(&np->vfid_list); + p9fs_fid_add(np, fid, VFID); + + /* Initialize the VOFID list */ + P9FS_VOFID_LOCK_INIT(np); + STAILQ_INIT(&np->vofid_list); + + vref(P9FS_NTOV(parent)); + np->parent = parent; + np->p9fs_ses = vses; /* Map the current session */ + inode = &np->inode; + /*Fill the name of the file in inode */ + inode->i_name = malloc(strlen(name)+1, M_TEMP, M_NOWAIT | M_ZERO); + strlcpy(inode->i_name, name, strlen(name)+1); + } else { + vp->v_type = VDIR; /* root vp is a directory */ + vp->v_vflag |= VV_ROOT; + vref(vp); /* Increment a reference on root vnode during mount */ + } + + vp->v_data = np; + np->v_node = vp; + inode = &np->inode; + inode->i_qid_path = fid->qid.path; + P9FS_SET_LINKS(inode); + + lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); + error = insmntque(vp, mp); + if (error != 0) { + /* + * vput(vp) is already called from insmntque_stddtr(). + * Just goto 'out' to dispose the node. + */ + goto out; + } + + /* Init the vnode with the disk info*/ + error = p9fs_reload_stats_dotl(vp, curthread->td_ucred); + if (error != 0) { + error_reload = 1; + goto out; + } + + error = vfs_hash_insert(vp, hash, flags, td, vpp, + p9fs_node_cmp, &fid->qid); + if (error != 0) { + goto out; + } + + if (*vpp == NULL) { + P9FS_LOCK(vses); + STAILQ_INSERT_TAIL(&vses->virt_node_list, np, p9fs_node_next); + np->flags |= P9FS_NODE_IN_SESSION; + P9FS_UNLOCK(vses); + + *vpp = vp; + } else { + /* + * Returning matching vp found in hashlist. + * So cleanup the np allocated above in this context. + */ + if (!IS_ROOT(np)) { + p9fs_destroy_node(&np); + } + } + + return (0); +out: + /* Something went wrong, dispose the node */ + if (!IS_ROOT(np)) { + p9fs_destroy_node(&np); + } + + if (error_reload) { + vput(vp); + } + + *vpp = NULLVP; + return (error); +} + +/* Main mount function for 9pfs */ +static int +p9_mount(struct mount *mp) +{ + struct p9_fid *fid; + struct p9fs_mount *vmp; + struct p9fs_session *vses; + struct p9fs_node *p9fs_root; + int error; + char *from; + int len; + + /* Verify the validity of mount options */ + if (vfs_filteropt(mp->mnt_optnew, p9fs_opts)) + return (EINVAL); + + /* Extract NULL terminated mount tag from mount options */ + error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len); + if (error != 0 || from[len - 1] != '\0') + return (EINVAL); + + /* Allocate and initialize the private mount structure. */ + vmp = malloc(sizeof (struct p9fs_mount), M_P9MNT, M_WAITOK | M_ZERO); + mp->mnt_data = vmp; + vmp->p9fs_mountp = mp; + vmp->mount_tag = from; + vmp->mount_tag_len = len; + vses = &vmp->p9fs_session; + vses->p9fs_mount = mp; + p9fs_root = &vses->rnp; + /* Hardware iosize from the Qemu */ + mp->mnt_iosize_max = PAGE_SIZE; + /* + * Init the session for the p9fs root. This creates a new root fid and + * attaches the client and server. + */ + fid = p9fs_init_session(mp, &error); + if (fid == NULL) { + goto out; + } + + P9FS_VFID_LOCK_INIT(p9fs_root); + STAILQ_INIT(&p9fs_root->vfid_list); + p9fs_fid_add(p9fs_root, fid, VFID); + P9FS_VOFID_LOCK_INIT(p9fs_root); + STAILQ_INIT(&p9fs_root->vofid_list); + p9fs_root->parent = p9fs_root; + p9fs_root->flags |= P9FS_ROOT; + p9fs_root->p9fs_ses = vses; + vfs_getnewfsid(mp); + strlcpy(mp->mnt_stat.f_mntfromname, from, + sizeof(mp->mnt_stat.f_mntfromname)); + MNT_ILOCK(mp); + mp->mnt_flag |= MNT_LOCAL; + mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED; + MNT_IUNLOCK(mp); + P9_DEBUG(VOPS, "%s: Mount successful\n", __func__); + /* Mount structures created. */ + + return (0); +out: + P9_DEBUG(ERROR, "%s: Mount Failed \n", __func__); + if (vmp != NULL) { + free(vmp, M_P9MNT); + mp->mnt_data = NULL; + } + return (error); +} + +/* Mount entry point */ +static int +p9fs_mount(struct mount *mp) +{ + int error; + + /* + * Minimal support for MNT_UPDATE - allow changing from + * readonly. + */ + if (mp->mnt_flag & MNT_UPDATE) { + if ((mp->mnt_flag & MNT_RDONLY) && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { + mp->mnt_flag &= ~MNT_RDONLY; + } + return (0); + } + + error = p9_mount(mp); + if (error != 0) + (void) p9fs_unmount(mp, MNT_FORCE); + + return (error); +} + +/* + * Retrieve the root vnode of this mount. After filesystem is mounted, the root + * vnode is created for the first time. Subsequent calls to p9fs root will + * return the same vnode created during mount. + */ +static int +p9fs_root(struct mount *mp, int lkflags, struct vnode **vpp) +{ + struct p9fs_mount *vmp; + struct p9fs_node *np; + struct p9_client *clnt; + struct p9_fid *vfid; + int error; + + vmp = VFSTOP9(mp); + np = &vmp->p9fs_session.rnp; + clnt = vmp->p9fs_session.clnt; + error = 0; + + P9_DEBUG(VOPS, "%s: node=%p name=%s\n",__func__, np, np->inode.i_name); + + vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error); + + if (error != 0) { + /* for root use the nobody user's fid as vfid. + * This is used while unmounting as root when non-root + * user has mounted p9fs + */ + if (vfid == NULL && clnt->trans_status == P9FS_BEGIN_DISCONNECT) + vfid = vmp->p9fs_session.mnt_fid; + else { + *vpp = NULLVP; + return (error); + } + } + + error = p9fs_vget_common(mp, np, lkflags, np, vfid, vpp, NULL); + if (error != 0) { + *vpp = NULLVP; + return (error); + } + np->v_node = *vpp; + return (error); +} + +/* Retrieve the file system statistics */ +static int +p9fs_statfs(struct mount *mp __unused, struct statfs *buf) +{ + struct p9fs_mount *vmp; + struct p9fs_node *np; + struct p9_client *clnt; + struct p9_fid *vfid; + struct p9_statfs statfs; + int res, error; + + vmp = VFSTOP9(mp); + np = &vmp->p9fs_session.rnp; + clnt = vmp->p9fs_session.clnt; + error = 0; + + vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error); + if (error != 0) { + return (error); + } + + res = p9_client_statfs(vfid, &statfs); + + if (res == 0) { + buf->f_type = statfs.type; + /* + * We have a limit of 4k irrespective of what the + * Qemu server can do. + */ + if (statfs.bsize > PAGE_SIZE) + buf->f_bsize = PAGE_SIZE; + else + buf->f_bsize = statfs.bsize; + + buf->f_iosize = buf->f_bsize; + buf->f_blocks = statfs.blocks; + buf->f_bfree = statfs.bfree; + buf->f_bavail = statfs.bavail; + buf->f_files = statfs.files; + buf->f_ffree = statfs.ffree; + } + else { + /* Atleast set these if stat fail */ + buf->f_bsize = PAGE_SIZE; + buf->f_iosize = buf->f_bsize; /* XXX */ + } + + return (0); +} + +static int +p9fs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) +{ + + return (EINVAL); +} + +struct vfsops p9fs_vfsops = { + .vfs_init = p9fs_init, + .vfs_uninit = p9fs_uninit, + .vfs_mount = p9fs_mount, + .vfs_unmount = p9fs_unmount, + .vfs_root = p9fs_root, + .vfs_statfs = p9fs_statfs, + .vfs_fhtovp = p9fs_fhtovp, +}; + +VFS_SET(p9fs_vfsops, p9fs, VFCF_JAIL); +MODULE_VERSION(p9fs, 1); diff --git a/sys/fs/p9fs/p9fs_vnops.c b/sys/fs/p9fs/p9fs_vnops.c new file mode 100644 index 000000000000..c1bea18d5eef --- /dev/null +++ b/sys/fs/p9fs/p9fs_vnops.c @@ -0,0 +1,2148 @@ +/* + * Copyright (c) 2017-2020 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* This file contains VFS file ops for the 9P protocol. + * This makes the upper layer of the p9fs driver. These functions interact + * with the VFS layer and lower layer of p9fs driver which is 9Pnet. All + * the user file operations are handled here. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +/* File permissions. */ +#define IEXEC 0000100 /* Executable. */ +#define IWRITE 0000200 /* Writeable. */ +#define IREAD 0000400 /* Readable. */ +#define ISVTX 0001000 /* Sticky bit. */ +#define ISGID 0002000 /* Set-gid. */ +#define ISUID 0004000 /* Set-uid. */ + +static MALLOC_DEFINE(M_P9UIOV, "uio", "UIOV structures for strategy in p9fs"); +extern uma_zone_t p9fs_io_buffer_zone; +extern uma_zone_t p9fs_getattr_zone; +extern uma_zone_t p9fs_setattr_zone; +/* For the root vnode's vnops. */ +struct vop_vector p9fs_vnops; + +static uint32_t p9fs_unix2p9_mode(uint32_t mode); + +static void +p9fs_itimes(struct vnode *vp) +{ + struct p9fs_node *node; + struct timespec ts; + struct p9fs_inode *inode; + + node = P9FS_VTON(vp); + inode = &node->inode; + + vfs_timestamp(&ts); + inode->i_mtime = ts.tv_sec; +} + +/* + * Cleanup the p9fs node, the in memory representation of a vnode for p9fs. + * The cleanup includes invalidating all cache entries for the vnode, + * destroying the vobject, removing vnode from hashlist, removing p9fs node + * from the list of session p9fs nodes, and disposing of the p9fs node. + * Basically it is doing a reverse of what a create/vget does. + */ +void +p9fs_cleanup(struct p9fs_node *np) +{ + struct vnode *vp; + struct p9fs_session *vses; + + if (np == NULL) + return; + + vp = P9FS_NTOV(np); + vses = np->p9fs_ses; + + /* Remove the vnode from hash list if vnode is not already deleted */ + if ((np->flags & P9FS_NODE_DELETED) == 0) + vfs_hash_remove(vp); + + P9FS_LOCK(vses); + if ((np->flags & P9FS_NODE_IN_SESSION) != 0) { + np->flags &= ~P9FS_NODE_IN_SESSION; + STAILQ_REMOVE(&vses->virt_node_list, np, p9fs_node, p9fs_node_next); + } else { + P9FS_UNLOCK(vses); + return; + } + P9FS_UNLOCK(vses); + + /* Invalidate all entries to a particular vnode. */ + cache_purge(vp); + + /* Destroy the vm object and flush associated pages. */ + vnode_destroy_vobject(vp); + + /* Remove the vnode from hash list if vnode is not already deleted */ + if ((np->flags & P9FS_NODE_DELETED) == 0) + vfs_hash_remove(vp); + + /* Invalidate all entries to a particular vnode. */ + cache_purge(vp); + + /* Destroy the vm object and flush associated pages. */ + vnode_destroy_vobject(vp); + + /* Remove all the FID */ + p9fs_fid_remove_all(np, FALSE); + + /* Dispose all node knowledge.*/ + p9fs_destroy_node(&np); +} + +/* + * Reclaim VOP is defined to be called for every vnode. This starts off + * the cleanup by clunking(remove the fid on the server) and calls + * p9fs_cleanup to free all the resources allocated for p9fs node. + */ +static int +p9fs_reclaim(struct vop_reclaim_args *ap) +{ + struct vnode *vp; + struct p9fs_node *np; + + vp = ap->a_vp; + np = P9FS_VTON(vp); + + P9_DEBUG(VOPS, "%s: vp:%p node:%p\n", __func__, vp, np); + p9fs_cleanup(np); + + return (0); +} + +/* + * recycle vnodes which are no longer referenced i.e, their usecount is zero + */ +static int +p9fs_inactive(struct vop_inactive_args *ap) +{ + struct vnode *vp; + struct p9fs_node *np; + + vp = ap->a_vp; + np = P9FS_VTON(vp); + + P9_DEBUG(VOPS, "%s: vp:%p node:%p file:%s\n", __func__, vp, np, np->inode.i_name); + if (np->flags & P9FS_NODE_DELETED) + vrecycle(vp); + + return (0); +} + +struct p9fs_lookup_alloc_arg { + struct componentname *cnp; + struct p9fs_node *dnp; + struct p9_fid *newfid; +}; + +/* Callback for vn_get_ino */ +static int +p9fs_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) +{ + struct p9fs_lookup_alloc_arg *p9aa = arg; + + return (p9fs_vget_common(mp, NULL, p9aa->cnp->cn_lkflags, p9aa->dnp, + p9aa->newfid, vpp, p9aa->cnp->cn_nameptr)); +} + +/* + * p9fs_lookup is called for every component name that is being searched for. + * + * I. If component is found on the server, we look for the in-memory + * repesentation(vnode) of this component in namecache. + * A. If the node is found in the namecache, we check is the vnode is still + * valid. + * 1. If it is still valid, return vnode. + * 2. If it is not valid, we remove this vnode from the name cache and + * create a new vnode for the component and return that vnode. + * B. If the vnode is not found in the namecache, we look for it in the + * hash list. + * 1. If the vnode is in the hash list, we check if the vnode is still + * valid. + * a. If it is still valid, we add that vnode to the namecache for + * future lookups and return the vnode. + * b. If it is not valid, create a new vnode and p9fs node, + * initialize them and return the vnode. + * 2. If the vnode is not found in the hash list, we create a new vnode + * and p9fs node, initialize them and return the vnode. + * II. If the component is not found on the server, an error code is returned. + * A. For the creation case, we return EJUSTRETURN so VFS can handle it. + * B. For all other cases, ENOENT is returned. + */ +static int +p9fs_lookup(struct vop_lookup_args *ap) +{ + struct vnode *dvp; + struct vnode **vpp, *vp; + struct componentname *cnp; + struct p9fs_node *dnp; /*dir p9_node */ + struct p9fs_node *np; + struct p9fs_session *vses; + struct mount *mp; /* Get the mount point */ + struct p9_fid *dvfid, *newfid; + int error; + struct vattr vattr; + int flags; + char tmpchr; + + dvp = ap->a_dvp; + vpp = ap->a_vpp; + cnp = ap->a_cnp; + dnp = P9FS_VTON(dvp); + error = 0; + flags = cnp->cn_flags; + *vpp = NULLVP; + + if (dnp == NULL) + return (ENOENT); + + vses = dnp->p9fs_ses; + mp = vses->p9fs_mount; + + /* Do the cache part ourselves */ + if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) + return (EROFS); + + if (dvp->v_type != VDIR) + return (ENOTDIR); + + error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, curthread); + if (error) + return (error); + + /* Do the directory walk on host to check if file exist */ + dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error); + if (error) + return (error); + + /* + * Save the character present at namelen in nameptr string and + * null terminate the character to get the search name for p9_dir_walk + * This is done to handle when lookup is for "a" and component + * name contains a/b/c + */ + tmpchr = cnp->cn_nameptr[cnp->cn_namelen]; + cnp->cn_nameptr[cnp->cn_namelen] = '\0'; + + /* + * If the client_walk fails, it means the file looking for doesnt exist. + * Create the file is the flags are set or just return the error + */ + newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error); + + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + + if (error != 0 || newfid == NULL) { + /* Clunk the newfid if it is not NULL */ + if (newfid != NULL) + p9_client_clunk(newfid); + + if (error != ENOENT) + return (error); + + /* The requested file was not found. */ + if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && + (flags & ISLASTCN)) { + + if (mp->mnt_flag & MNT_RDONLY) + return (EROFS); + + error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, + curthread); + if (!error) { + return (EJUSTRETURN); + } + } + return (error); + } + + /* Look for the entry in the component cache*/ + error = cache_lookup(dvp, vpp, cnp, NULL, NULL); + if (error > 0 && error != ENOENT) { + P9_DEBUG(VOPS, "%s: Cache lookup error %d \n", __func__, error); + goto out; + } + + if (error == -1) { + vp = *vpp; + /* Check if the entry in cache is stale or not */ + if ((p9fs_node_cmp(vp, &newfid->qid) == 0) && + ((error = VOP_GETATTR(vp, &vattr, cnp->cn_cred)) == 0)) { + goto out; + } + /* + * This case, we have an error coming from getattr, + * act accordingly. + */ + cache_purge(vp); + if (dvp != vp) + vput(vp); + else + vrele(vp); + + *vpp = NULLVP; + } else if (error == ENOENT) { + if (VN_IS_DOOMED(dvp)) + goto out; + if (VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0) { + error = ENOENT; + goto out; + } + cache_purge_negative(dvp); + } + /* Reset values */ + error = 0; + vp = NULLVP; + + tmpchr = cnp->cn_nameptr[cnp->cn_namelen]; + cnp->cn_nameptr[cnp->cn_namelen] = '\0'; + + /* + * Looks like we have found an entry. Now take care of all other cases. + */ + if (flags & ISDOTDOT) { + struct p9fs_lookup_alloc_arg p9aa; + p9aa.cnp = cnp; + p9aa.dnp = dnp; + p9aa.newfid = newfid; + error = vn_vget_ino_gen(dvp, p9fs_lookup_alloc, &p9aa, 0, &vp); + if (error) + goto out; + *vpp = vp; + } else { + /* + * client_walk is equivalent to searching a component name in a + * directory(fid) here. If new fid is returned, we have found an + * entry for this component name so, go and create the rest of + * the vnode infra(vget_common) for the returned newfid. + */ + if ((cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) + && (flags & ISLASTCN)) { + error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, + curthread); + if (error) + goto out; + + error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags, + dnp, newfid, &vp, cnp->cn_nameptr); + if (error) + goto out; + + *vpp = vp; + np = P9FS_VTON(vp); + if ((dnp->inode.i_mode & ISVTX) && + cnp->cn_cred->cr_uid != 0 && + cnp->cn_cred->cr_uid != dnp->inode.n_uid && + cnp->cn_cred->cr_uid != np->inode.n_uid) { + vput(*vpp); + *vpp = NULL; + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + return (EPERM); + } + } else { + error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags, + dnp, newfid, &vp, cnp->cn_nameptr); + if (error) + goto out; + *vpp = vp; + } + } + + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + + /* Store the result the cache if MAKEENTRY is specified in flags */ + if ((cnp->cn_flags & MAKEENTRY) != 0) + cache_enter(dvp, *vpp, cnp); + return (error); +out: + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + p9_client_clunk(newfid); + return (error); +} + +/* + * Common creation function for file/directory with respective flags. We first + * open the parent directory in order to create the file under it. For this, + * as 9P protocol suggests, we need to call client_walk to create the open fid. + * Once we have the open fid, the file_create function creates the direntry with + * the name and perm specified under the parent dir. If this succeeds (an entry + * is created for the new file on the server), we create our metadata for this + * file (vnode, p9fs node calling vget). Once we are done, we clunk the open + * fid of the parent directory. + */ +static int +create_common(struct p9fs_node *dnp, struct componentname *cnp, + char *extension, uint32_t perm, uint8_t mode, struct vnode **vpp) +{ + char tmpchr; + struct p9_fid *dvfid, *ofid, *newfid; + struct p9fs_session *vses; + struct mount *mp; + int error; + + P9_DEBUG(VOPS, "%s: name %s\n", __func__, cnp->cn_nameptr); + + vses = dnp->p9fs_ses; + mp = vses->p9fs_mount; + newfid = NULL; + error = 0; + + dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error); + if (error != 0) + return (error); + + /* Clone the directory fid to create the new file */ + ofid = p9_client_walk(dvfid, 0, NULL, 1, &error); + if (error != 0) + return (error); + + /* + * Save the character present at namelen in nameptr string and + * null terminate the character to get the search name for p9_dir_walk + */ + tmpchr = cnp->cn_nameptr[cnp->cn_namelen]; + cnp->cn_nameptr[cnp->cn_namelen] = '\0'; + + error = p9_client_file_create(ofid, cnp->cn_nameptr, perm, mode, + extension); + if (error != 0) { + P9_DEBUG(ERROR, "%s: p9_client_fcreate failed %d\n", __func__, error); + goto out; + } + + /* If its not hardlink only then do the walk, else we are done. */ + if (!(perm & P9PROTO_DMLINK)) { + /* + * Do the lookup part and add the vnode, p9fs node. Note that vpp + * is filled in here. + */ + newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error); + if (newfid != NULL) { + error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags, + dnp, newfid, vpp, cnp->cn_nameptr); + if (error != 0) + goto out; + } else { + /* Not found return NOENTRY.*/ + goto out; + } + + if ((cnp->cn_flags & MAKEENTRY) != 0) + cache_enter(P9FS_NTOV(dnp), *vpp, cnp); + } + P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n", + __func__, *vpp, dnp, (uintmax_t)dvfid->fid); + /* Clunk the open ofid. */ + if (ofid != NULL) + (void)p9_client_clunk(ofid); + + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + return (0); +out: + if (ofid != NULL) + (void)p9_client_clunk(ofid); + + if (newfid != NULL) + (void)p9_client_clunk(newfid); + + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + return (error); +} + +/* + * This is the main file creation VOP. Make the permissions of the new + * file and call the create_common common code to complete the create. + */ +static int +p9fs_create(struct vop_create_args *ap) +{ + struct vnode *dvp; + struct vnode **vpp; + struct componentname *cnp; + uint32_t mode; + struct p9fs_node *dnp; + struct p9fs_inode *dinode; + uint32_t perm; + int ret; + + dvp = ap->a_dvp; + vpp = ap->a_vpp; + cnp = ap->a_cnp; + dnp = P9FS_VTON(dvp); + dinode = &dnp->inode; + mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); + perm = p9fs_unix2p9_mode(mode); + + P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp); + + ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp); + if (ret == 0) { + P9FS_INCR_LINKS(dinode); + } + + return (ret); +} + +/* + * p9fs_mkdir is the main directory creation vop. Make the permissions of the new dir + * and call the create_common common code to complete the create. + */ +static int +p9fs_mkdir(struct vop_mkdir_args *ap) +{ + struct vnode *dvp; + struct vnode **vpp; + struct componentname *cnp; + uint32_t mode; + struct p9fs_node *dnp; + struct p9fs_inode *dinode; + uint32_t perm; + int ret; + + dvp = ap->a_dvp; + vpp = ap->a_vpp; + cnp = ap->a_cnp; + dnp = P9FS_VTON(dvp); + dinode = &dnp->inode; + mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); + perm = p9fs_unix2p9_mode(mode | S_IFDIR); + + P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp); + + ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp); + if (ret == 0) + P9FS_INCR_LINKS(dinode); + + return (ret); +} + +/* + * p9fs_mknod is the main node creation vop. Make the permissions of the new node + * and call the create_common common code to complete the create. + */ +static int +p9fs_mknod(struct vop_mknod_args *ap) +{ + struct vnode *dvp; + struct vnode **vpp; + struct componentname *cnp; + uint32_t mode; + struct p9fs_node *dnp; + struct p9fs_inode *dinode; + uint32_t perm; + int ret; + + dvp = ap->a_dvp; + vpp = ap->a_vpp; + cnp = ap->a_cnp; + dnp = P9FS_VTON(dvp); + dinode = &dnp->inode; + mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); + perm = p9fs_unix2p9_mode(mode); + + P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp); + + ret = create_common(dnp, cnp, NULL, perm, P9PROTO_OREAD, vpp); + if (ret == 0) { + P9FS_INCR_LINKS(dinode); + } + + return (ret); +} + +/* Convert open mode permissions to P9 */ +static int +p9fs_uflags_mode(int uflags, int extended) +{ + uint32_t ret; + + /* Convert first to O flags.*/ + uflags = OFLAGS(uflags); + + switch (uflags & 3) { + + case O_RDONLY: + ret = P9PROTO_OREAD; + break; + + case O_WRONLY: + ret = P9PROTO_OWRITE; + break; + + case O_RDWR: + ret = P9PROTO_ORDWR; + break; + } + + if (extended) { + if (uflags & O_EXCL) + ret |= P9PROTO_OEXCL; + + if (uflags & O_APPEND) + ret |= P9PROTO_OAPPEND; + } + + return (ret); +} + +/* + * This is the main open VOP for every file open. If the file is already + * open, then increment and return. If there is no open fid for this file, + * there needs to be a client_walk which creates a new open fid for this file. + * Once we have a open fid, call the open on this file with the mode creating + * the vobject. + */ +static int +p9fs_open(struct vop_open_args *ap) +{ + int error; + struct vnode *vp; + struct p9fs_node *np; + struct p9fs_session *vses; + struct p9_fid *vofid, *vfid; + size_t filesize; + uint32_t mode; + + error = 0; + vp = ap->a_vp; + np = P9FS_VTON(vp); + vses = np->p9fs_ses; + + P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp); + + if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) + return (EOPNOTSUPP); + + error = p9fs_reload_stats_dotl(vp, ap->a_cred); + if (error != 0) + return (error); + + ASSERT_VOP_LOCKED(vp, __func__); + /* + * Invalidate the pages of the vm_object cache if the file is modified + * based on the flag set in reload stats + */ + if (vp->v_type == VREG && (np->flags & P9FS_NODE_MODIFIED) != 0) { + error = vinvalbuf(vp, 0, 0, 0); + if (error != 0) + return (error); + np->flags &= ~P9FS_NODE_MODIFIED; + } + + vfid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VFID, -1, &error); + if (error != 0) + return (error); + + /* + * Translate kernel fflags to 9p mode + */ + mode = p9fs_uflags_mode(ap->a_mode, 1); + + /* + * Search the fid in vofid_list for current user. If found increase the open + * count and return. If not found clone a new fid and open the file using + * that cloned fid. + */ + vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID, mode, &error); + if (vofid != NULL) { + vofid->v_opens++; + return (0); + } else { + /*vofid is the open fid for this file.*/ + vofid = p9_client_walk(vfid, 0, NULL, 1, &error); + if (error != 0) + return (error); + } + + error = p9_client_open(vofid, mode); + if (error != 0) + p9_client_clunk(vofid); + else { + vofid->v_opens = 1; + filesize = np->inode.i_size; + vnode_create_vobject(vp, filesize, ap->a_td); + p9fs_fid_add(np, vofid, VOFID); + } + + return (error); +} + +/* + * Close the open references. Just reduce the open count on vofid and return. + * Let clunking of VOFID happen in p9fs_reclaim. + */ +static int +p9fs_close(struct vop_close_args *ap) +{ + struct vnode *vp; + struct p9fs_node *np; + struct p9fs_session *vses; + struct p9_fid *vofid; + int error; + + vp = ap->a_vp; + np = P9FS_VTON(vp); + + if (np == NULL) + return (0); + + vses = np->p9fs_ses; + error = 0; + + P9_DEBUG(VOPS, "%s: file_name %s\n", __func__, np->inode.i_name); + + /* + * Translate kernel fflags to 9p mode + */ + vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID, + p9fs_uflags_mode(ap->a_fflag, 1), &error); + if (vofid == NULL) + return (0); + + vofid->v_opens--; + + return (0); +} + +/* Helper routine for checking if fileops are possible on this file */ +static int +p9fs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode) +{ + + /* Check if we are allowed to write */ + switch (vap->va_type) { + case VDIR: + case VLNK: + case VREG: + /* + * Normal nodes: check if we're on a read-only mounted + * file system and bail out if we're trying to write. + */ + if ((mode & VMODIFY_PERMS) && (vp->v_mount->mnt_flag & MNT_RDONLY)) + return (EROFS); + break; + case VBLK: + case VCHR: + case VSOCK: + case VFIFO: + /* + * Special nodes: even on read-only mounted file systems + * these are allowed to be written to if permissions allow. + */ + break; + default: + /* No idea what this is */ + return (EINVAL); + } + + return (0); +} + +/* Check the access permissions of the file. */ +static int +p9fs_access(struct vop_access_args *ap) +{ + struct vnode *vp; + accmode_t accmode; + struct ucred *cred; + struct vattr vap; + int error; + + vp = ap->a_vp; + accmode = ap->a_accmode; + cred = ap->a_cred; + + P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp); + + /* make sure getattr is working correctly and is defined.*/ + error = VOP_GETATTR(vp, &vap, cred); + if (error != 0) + return (error); + + error = p9fs_check_possible(vp, &vap, accmode); + if (error != 0) + return (error); + + /* Call the Generic Access check in VOPS*/ + error = vaccess(vp->v_type, vap.va_mode, vap.va_uid, vap.va_gid, accmode, + cred); + + + return (error); +} + +/* + * Reload the file stats from the server and update the inode structure present + * in p9fs node. + */ +int +p9fs_reload_stats_dotl(struct vnode *vp, struct ucred *cred) +{ + struct p9_stat_dotl *stat; + int error; + struct p9fs_node *node; + struct p9fs_session *vses; + struct p9_fid *vfid; + + error = 0; + node = P9FS_VTON(vp); + vses = node->p9fs_ses; + + vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OREAD, &error); + if (vfid == NULL) { + vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error); + if (error) + return (error); + } + + stat = uma_zalloc(p9fs_getattr_zone, M_WAITOK | M_ZERO); + + error = p9_client_getattr(vfid, stat, P9PROTO_STATS_ALL); + if (error != 0) { + P9_DEBUG(ERROR, "%s: p9_client_getattr failed: %d\n", __func__, error); + goto out; + } + + /* Init the vnode with the disk info */ + p9fs_stat_vnode_dotl(stat, vp); +out: + if (stat != NULL) { + uma_zfree(p9fs_getattr_zone, stat); + } + + return (error); +} + +/* + * Read the current inode values into the vap attr. We reload the stats from + * the server. + */ +static int +p9fs_getattr_dotl(struct vop_getattr_args *ap) +{ + struct vnode *vp; + struct vattr *vap; + struct p9fs_node *node; + struct p9fs_inode *inode; + int error; + + vp = ap->a_vp; + vap = ap->a_vap; + node = P9FS_VTON(vp); + + if (node == NULL) + return (ENOENT); + + inode = &node->inode; + + P9_DEBUG(VOPS, "%s: %u %u\n", __func__, inode->i_mode, IFTOVT(inode->i_mode)); + + /* Reload our stats once to get the right values.*/ + error = p9fs_reload_stats_dotl(vp, ap->a_cred); + if (error != 0) { + P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, error); + return (error); + } + + /* Basic info */ + VATTR_NULL(vap); + + vap->va_atime.tv_sec = inode->i_atime; + vap->va_mtime.tv_sec = inode->i_mtime; + vap->va_ctime.tv_sec = inode->i_ctime; + vap->va_atime.tv_nsec = inode->i_atime_nsec; + vap->va_mtime.tv_nsec = inode->i_mtime_nsec; + vap->va_ctime.tv_nsec = inode->i_ctime_nsec; + vap->va_type = IFTOVT(inode->i_mode); + vap->va_mode = inode->i_mode; + vap->va_uid = inode->n_uid; + vap->va_gid = inode->n_gid; + vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; + vap->va_size = inode->i_size; + vap->va_nlink = inode->i_links_count; + vap->va_blocksize = inode->blksize; + vap->va_fileid = inode->i_qid_path; + vap->va_flags = inode->i_flags; + vap->va_gen = inode->gen; + vap->va_filerev = inode->data_version; + vap->va_vaflags = 0; + vap->va_bytes = inode->blocks * P9PROTO_TGETATTR_BLK; + + return (0); +} + +/* Convert a standard FreeBSD permission to P9. */ +static uint32_t +p9fs_unix2p9_mode(uint32_t mode) +{ + uint32_t res; + + res = mode & 0777; + if (S_ISDIR(mode)) + res |= P9PROTO_DMDIR; + if (S_ISSOCK(mode)) + res |= P9PROTO_DMSOCKET; + if (S_ISLNK(mode)) + res |= P9PROTO_DMSYMLINK; + if (S_ISFIFO(mode)) + res |= P9PROTO_DMNAMEDPIPE; + if ((mode & S_ISUID) == S_ISUID) + res |= P9PROTO_DMSETUID; + if ((mode & S_ISGID) == S_ISGID) + res |= P9PROTO_DMSETGID; + if ((mode & S_ISVTX) == S_ISVTX) + res |= P9PROTO_DMSETVTX; + + return (res); +} + +/* Update inode with the stats read from server.(9P2000.L version) */ +int +p9fs_stat_vnode_dotl(struct p9_stat_dotl *stat, struct vnode *vp) +{ + struct p9fs_node *np; + struct p9fs_inode *inode; + + np = P9FS_VTON(vp); + inode = &np->inode; + + ASSERT_VOP_LOCKED(vp, __func__); + /* Update the pager size if file size changes on host */ + if (inode->i_size != stat->st_size) { + inode->i_size = stat->st_size; + if (vp->v_type == VREG) + vnode_pager_setsize(vp, inode->i_size); + } + + inode->i_mtime = stat->st_mtime_sec; + inode->i_atime = stat->st_atime_sec; + inode->i_ctime = stat->st_ctime_sec; + inode->i_mtime_nsec = stat->st_mtime_nsec; + inode->i_atime_nsec = stat->st_atime_nsec; + inode->i_ctime_nsec = stat->st_ctime_nsec; + inode->n_uid = stat->st_uid; + inode->n_gid = stat->st_gid; + inode->i_mode = stat->st_mode; + vp->v_type = IFTOVT(inode->i_mode); + inode->i_links_count = stat->st_nlink; + inode->blksize = stat->st_blksize; + inode->blocks = stat->st_blocks; + inode->gen = stat->st_gen; + inode->data_version = stat->st_data_version; + + ASSERT_VOP_LOCKED(vp, __func__); + /* Setting a flag if file changes based on qid version */ + if (np->vqid.qid_version != stat->qid.version) + np->flags |= P9FS_NODE_MODIFIED; + memcpy(&np->vqid, &stat->qid, sizeof(stat->qid)); + + return (0); +} + +/* + * Write the current in memory inode stats into persistent stats structure + * to write to the server(for linux version). + */ +static int +p9fs_inode_to_iattr(struct p9fs_inode *inode, struct p9_iattr_dotl *p9attr) +{ + p9attr->size = inode->i_size; + p9attr->mode = inode->i_mode; + p9attr->uid = inode->n_uid; + p9attr->gid = inode->n_gid; + p9attr->atime_sec = inode->i_atime; + p9attr->atime_nsec = inode->i_atime_nsec; + p9attr->mtime_sec = inode->i_mtime; + p9attr->mtime_nsec = inode->i_mtime_nsec; + + return (0); +} + +/* + * Modify the ownership of a file whenever the chown is called on the + * file. + */ +static int +p9fs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, + struct thread *td) +{ + struct p9fs_node *np; + struct p9fs_inode *inode; + uid_t ouid; + gid_t ogid; + int error; + + np = P9FS_VTON(vp); + inode = &np->inode; + + if (uid == (uid_t)VNOVAL) + uid = inode->n_uid; + if (gid == (gid_t)VNOVAL) + gid = inode->n_gid; + /* + * To modify the ownership of a file, must possess VADMIN for that + * file. + */ + if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td))) + return (error); + /* + * To change the owner of a file, or change the group of a file to a + * group of which we are not a member, the caller must have + * privilege. + */ + if (((uid != inode->n_uid && uid != cred->cr_uid) || + (gid != inode->n_gid && !groupmember(gid, cred))) && + (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) + return (error); + + ogid = inode->n_gid; + ouid = inode->n_uid; + + inode->n_gid = gid; + inode->n_uid = uid; + + if ((inode->i_mode & (ISUID | ISGID)) && + (ouid != uid || ogid != gid)) { + + if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) + inode->i_mode &= ~(ISUID | ISGID); + } + P9_DEBUG(VOPS, "%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp, cred, td); + + return (0); +} + +/* + * Update the in memory inode with all chmod new permissions/mode. Typically a + * setattr is called to update it to server. + */ +static int +p9fs_chmod(struct vnode *vp, uint32_t mode, struct ucred *cred, struct thread *td) +{ + struct p9fs_node *np; + struct p9fs_inode *inode; + uint32_t nmode; + int error; + + np = P9FS_VTON(vp); + inode = &np->inode; + + P9_DEBUG(VOPS, "%s: vp %p, mode %x, cred %p, td %p\n", __func__, vp, mode, cred, td); + /* + * To modify the permissions on a file, must possess VADMIN + * for that file. + */ + if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) + return (error); + + /* + * Privileged processes may set the sticky bit on non-directories, + * as well as set the setgid bit on a file with a group that the + * process is not a member of. Both of these are allowed in + * jail(8). + */ + if (vp->v_type != VDIR && (mode & S_ISTXT)) { + if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) + return (EFTYPE); + } + if (!groupmember(inode->n_gid, cred) && (mode & ISGID)) { + error = priv_check_cred(cred, PRIV_VFS_SETGID); + if (error != 0) + return (error); + } + + /* + * Deny setting setuid if we are not the file owner. + */ + if ((mode & ISUID) && inode->n_uid != cred->cr_uid) { + error = priv_check_cred(cred, PRIV_VFS_ADMIN); + if (error != 0) + return (error); + } + nmode = inode->i_mode; + nmode &= ~ALLPERMS; + nmode |= (mode & ALLPERMS); + inode->i_mode = nmode; + + P9_DEBUG(VOPS, "%s: to mode %x %d \n ", __func__, nmode, error); + + return (error); +} + +/* + * Set the attributes of a file referenced by fid. A valid bitmask is sent + * in request selecting which fields to set + */ +static int +p9fs_setattr_dotl(struct vop_setattr_args *ap) +{ + struct vnode *vp; + struct vattr *vap; + struct p9fs_node *node; + struct p9fs_inode *inode; + struct ucred *cred; + struct thread *td; + struct p9_iattr_dotl *p9attr; + struct p9fs_session *vses; + struct p9_fid *vfid; + uint64_t oldfilesize; + int error; + + vp = ap->a_vp; + vap = ap->a_vap; + node = P9FS_VTON(vp); + inode = &node->inode; + cred = ap->a_cred; + td = curthread; + vses = node->p9fs_ses; + error = 0; + + if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || + (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || + (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || + (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { + P9_DEBUG(ERROR, "%s: unsettable attribute\n", __func__); + return (EINVAL); + } + /* Disallow write attempts on read only filesystem */ + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + + /* Setting of flags is not supported */ + if (vap->va_flags != VNOVAL) + return (EOPNOTSUPP); + + /* Allocate p9attr struct */ + p9attr = uma_zalloc(p9fs_setattr_zone, M_WAITOK | M_ZERO); + if (p9attr == NULL) + return (ENOMEM); + + /* Check if we need to change the ownership of the file*/ + if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { + P9_DEBUG(VOPS, "%s: vp:%p td:%p uid/gid %x/%x\n", __func__, + vp, td, vap->va_uid, vap->va_gid); + + error = p9fs_chown(vp, vap->va_uid, vap->va_gid, cred, td); + p9attr->valid |= P9PROTO_SETATTR_UID | P9PROTO_SETATTR_GID | + P9PROTO_SETATTR_MODE; + if (error) + goto out; + } + + /* Check for mode changes */ + if (vap->va_mode != (mode_t)VNOVAL) { + P9_DEBUG(VOPS, "%s: vp:%p td:%p mode %x\n", __func__, vp, td, + vap->va_mode); + + error = p9fs_chmod(vp, (int)vap->va_mode, cred, td); + p9attr->valid |= P9PROTO_SETATTR_MODE; + if (error) + goto out; + } + + /* Update the size of the file and update mtime */ + if (vap->va_size != (uint64_t)VNOVAL) { + P9_DEBUG(VOPS, "%s: vp:%p td:%p size:%jx\n", __func__, + vp, td, (uintmax_t)vap->va_size); + switch (vp->v_type) { + case VDIR: + error = EISDIR; + goto out; + case VLNK: + case VREG: + /* Invalidate cached pages of vp */ + error = vinvalbuf(vp, 0, 0, 0); + if (error) + goto out; + oldfilesize = inode->i_size; + inode->i_size = vap->va_size; + /* Update the p9fs_inode time */ + p9fs_itimes(vp); + p9attr->valid |= P9PROTO_SETATTR_SIZE | + P9PROTO_SETATTR_ATIME | + P9PROTO_SETATTR_MTIME | + P9PROTO_SETATTR_ATIME_SET | + P9PROTO_SETATTR_MTIME_SET ; + break; + default: + goto out; + } + } else if (vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL) { + P9_DEBUG(VOPS, "%s: vp:%p td:%p time a/m %jx/%jx/\n", + __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec, + (uintmax_t)vap->va_mtime.tv_sec); + /* Update the p9fs_inode times */ + p9fs_itimes(vp); + p9attr->valid |= P9PROTO_SETATTR_ATIME | + P9PROTO_SETATTR_MTIME | P9PROTO_SETATTR_ATIME_SET | + P9PROTO_SETATTR_MTIME_SET; + } + + vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OWRITE, &error); + if (vfid == NULL) { + vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error); + if (error) + goto out; + } + + /* Write the inode structure values into p9attr */ + p9fs_inode_to_iattr(inode, p9attr); + error = p9_client_setattr(vfid, p9attr); + if (vap->va_size != (uint64_t)VNOVAL && vp->v_type == VREG) { + if (error) + inode->i_size = oldfilesize; + else + vnode_pager_setsize(vp, inode->i_size); + } +out: + if (p9attr) { + uma_zfree(p9fs_setattr_zone, p9attr); + } + P9_DEBUG(VOPS, "%s: error: %d\n", __func__, error); + return (error); +} + +struct open_fid_state { + struct p9_fid *vofid; + int fflags; + int opened; +}; + +/* + * TODO: change this to take P9PROTO_* mode and avoid routing through + * VOP_OPEN, factoring out implementation of p9fs_open. + */ +static int +p9fs_get_open_fid(struct vnode *vp, int fflags, struct ucred *cr, struct open_fid_state *statep) +{ + struct p9fs_node *np; + struct p9fs_session *vses; + struct p9_fid *vofid; + int mode = p9fs_uflags_mode(fflags, TRUE); + int error = 0; + + statep->opened = FALSE; + + np = P9FS_VTON(vp); + vses = np->p9fs_ses; + vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error); + if (vofid == NULL) { + error = VOP_OPEN(vp, fflags, cr, curthread, NULL); + if (error) { + return (error); + } + vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error); + if (vofid == NULL) { + return (EBADF); + } + statep->fflags = fflags; + statep->opened = TRUE; + } + statep->vofid = vofid; + return (0); +} + +static void +p9fs_release_open_fid(struct vnode *vp, struct ucred *cr, struct open_fid_state *statep) +{ + if (statep->opened) { + (void) VOP_CLOSE(vp, statep->fflags, cr, curthread); + } +} + +/* + * An I/O buffer is used to to do any transfer. The uio is the vfs structure we + * need to copy data into. As long as resid is greater than zero, we call + * client_read to read data from offset(offset into the file) in the open fid + * for the file into the I/O buffer. The data is read into the user data buffer. + */ +static int +p9fs_read(struct vop_read_args *ap) +{ + struct vnode *vp; + struct uio *uio; + struct p9fs_node *np; + uint64_t offset; + int64_t ret; + uint64_t resid; + uint32_t count; + int error; + char *io_buffer = NULL; + uint64_t filesize; + struct open_fid_state ostate; + + vp = ap->a_vp; + uio = ap->a_uio; + np = P9FS_VTON(vp); + error = 0; + + if (vp->v_type == VCHR || vp->v_type == VBLK) + return (EOPNOTSUPP); + if (vp->v_type != VREG) + return (EISDIR); + if (uio->uio_resid == 0) + return (0); + if (uio->uio_offset < 0) + return (EINVAL); + + error = p9fs_get_open_fid(vp, FREAD, ap->a_cred, &ostate); + if (error) + return (error); + + /* where in the file are we to start reading */ + offset = uio->uio_offset; + filesize = np->inode.i_size; + if (uio->uio_offset >= filesize) + goto out; + + P9_DEBUG(VOPS, "%s: called %jd at %ju\n", + __func__, (intmax_t)uio->uio_resid, (uintmax_t)uio->uio_offset); + + /* Work with a local buffer from the pool for this vop */ + + io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO); + while ((resid = uio->uio_resid) > 0) { + if (offset >= filesize) + break; + count = MIN(filesize - uio->uio_offset , resid); + if (count == 0) + break; + + /* Copy count bytes into the uio */ + ret = p9_client_read(ostate.vofid, offset, count, io_buffer); + /* + * This is the only place in the entire p9fs where we check the + * error for < 0 as p9_client_read/write return the number of + * bytes instead of an error code. In this case if ret is < 0, + * it means there is an IO error. + */ + if (ret < 0) { + error = -ret; + goto out; + } + error = uiomove(io_buffer, ret, uio); + if (error != 0) + goto out; + + offset += ret; + } + uio->uio_offset = offset; +out: + uma_zfree(p9fs_io_buffer_zone, io_buffer); + p9fs_release_open_fid(vp, ap->a_cred, &ostate); + + return (error); +} + +/* + * The user buffer contains the data to be written. This data is copied first + * from uio into I/O buffer. This I/O buffer is used to do the client_write to + * the fid of the file starting from the offset given upto count bytes. The + * number of bytes written is returned to the caller. + */ +static int +p9fs_write(struct vop_write_args *ap) +{ + struct vnode *vp; + struct uio *uio; + struct p9fs_node *np; + uint64_t off, offset; + int64_t ret; + uint64_t resid, bytes_written; + uint32_t count; + int error, ioflag; + uint64_t file_size; + char *io_buffer = NULL; + struct open_fid_state ostate; + + vp = ap->a_vp; + uio = ap->a_uio; + np = P9FS_VTON(vp); + error = 0; + ioflag = ap->a_ioflag; + + error = p9fs_get_open_fid(vp, FWRITE, ap->a_cred, &ostate); + if (error) + return (error); + + P9_DEBUG(VOPS, "%s: %#zx at %#jx\n", + __func__, uio->uio_resid, (uintmax_t)uio->uio_offset); + + if (uio->uio_offset < 0) { + error = EINVAL; + goto out; + } + if (uio->uio_resid == 0) + goto out; + + file_size = np->inode.i_size; + + switch (vp->v_type) { + case VREG: + if (ioflag & IO_APPEND) + uio->uio_offset = file_size; + break; + case VDIR: + return (EISDIR); + case VLNK: + break; + default: + panic("%s: bad file type vp: %p", __func__, vp); + } + + resid = uio->uio_resid; + offset = uio->uio_offset; + bytes_written = 0; + error = 0; + + io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO); + while ((resid = uio->uio_resid) > 0) { + off = 0; + count = MIN(resid, P9FS_IOUNIT); + error = uiomove(io_buffer, count, uio); + + if (error != 0) { + P9_DEBUG(ERROR, "%s: uiomove failed: %d\n", __func__, error); + goto out; + } + + /* While count still exists, keep writing.*/ + while (count > 0) { + /* Copy count bytes from the uio */ + ret = p9_client_write(ostate.vofid, offset, count, + io_buffer + off); + if (ret < 0) { + if (bytes_written == 0) { + error = -ret; + goto out; + } else { + break; + } + } + P9_DEBUG(VOPS, "%s: write %#zx at %#jx\n", + __func__, uio->uio_resid, (uintmax_t)uio->uio_offset); + + off += ret; + offset += ret; + bytes_written += ret; + count -= ret; + } + } + /* Update the fields in the node to reflect the change*/ + if (file_size < uio->uio_offset + uio->uio_resid) { + np->inode.i_size = uio->uio_offset + uio->uio_resid; + vnode_pager_setsize(vp, uio->uio_offset + uio->uio_resid); + } +out: + if (io_buffer) + uma_zfree(p9fs_io_buffer_zone, io_buffer); + p9fs_release_open_fid(vp, ap->a_cred, &ostate); + + return (error); +} + +/* + * Common handler of all removal-related VOPs (e.g. rmdir, rm). Perform the + * client_remove op to send messages to remove the node's fid on the server. + * After that, does a node metadata cleanup on client side. + */ +static int +remove_common(struct p9fs_node *np, struct ucred *cred) +{ + int error; + struct p9fs_session *vses; + struct vnode *vp; + struct p9_fid *vfid; + + error = 0; + vses = np->p9fs_ses; + vp = P9FS_NTOV(np); + + vfid = p9fs_get_fid(vses->clnt, np, cred, VFID, -1, &error); + if (error != 0) + return (error); + + error = p9_client_remove(vfid); + if (error != 0) + return (error); + + /* Remove all non-open fids associated with the vp */ + p9fs_fid_remove_all(np, TRUE); + + /* Invalidate all entries of vnode from name cache and hash list. */ + cache_purge(vp); + + vfs_hash_remove(vp); + np->flags |= P9FS_NODE_DELETED; + + return (error); +} + +/* Remove vop for all files. Call common code for remove and adjust links */ +static int +p9fs_remove(struct vop_remove_args *ap) +{ + struct vnode *vp; + struct p9fs_node *np; + struct vnode *dvp; + struct p9fs_node *dnp; + struct p9fs_inode *dinode; + int error; + + vp = ap->a_vp; + np = P9FS_VTON(vp); + dvp = ap->a_dvp; + dnp = P9FS_VTON(dvp); + dinode = &dnp->inode; + + P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np); + + if (vp->v_type == VDIR) + return (EISDIR); + + error = remove_common(np, ap->a_cnp->cn_cred); + if (error == 0) + P9FS_DECR_LINKS(dinode); + + return (error); +} + +/* Remove vop for all directories. Call common code for remove and adjust links */ +static int +p9fs_rmdir(struct vop_rmdir_args *ap) +{ + struct vnode *vp; + struct p9fs_node *np; + struct vnode *dvp; + struct p9fs_node *dnp; + struct p9fs_inode *dinode; + int error; + + vp = ap->a_vp; + np = P9FS_VTON(vp); + dvp = ap->a_dvp; + dnp = P9FS_VTON(dvp); + dinode = &dnp->inode; + + P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np); + + error = remove_common(np, ap->a_cnp->cn_cred); + if (error == 0) + P9FS_DECR_LINKS(dinode); + + return (error); +} + +/* + * Create symlinks. Make the permissions and call create_common code + * for Soft links. + */ +static int +p9fs_symlink(struct vop_symlink_args *ap) +{ + struct vnode *dvp; + struct vnode **vpp; + struct vattr *vap; + struct componentname *cnp; + char *symtgt; + struct p9fs_node *dnp; + struct p9fs_session *vses; + struct mount *mp; + struct p9_fid *dvfid, *newfid; + int error; + char tmpchr; + gid_t gid; + + dvp = ap->a_dvp; + vpp = ap->a_vpp; + vap = ap->a_vap; + cnp = ap->a_cnp; + symtgt = (char*)(uintptr_t) ap->a_target; + dnp = P9FS_VTON(dvp); + vses = dnp->p9fs_ses; + mp = vses->p9fs_mount; + newfid = NULL; + error = 0; + gid = vap->va_gid; + + P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp); + + /* + * Save the character present at namelen in nameptr string and + * null terminate the character to get the search name for p9_dir_walk + */ + tmpchr = cnp->cn_nameptr[cnp->cn_namelen]; + cnp->cn_nameptr[cnp->cn_namelen] = '\0'; + + dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error); + if (error != 0) + goto out; + + error = p9_create_symlink(dvfid, cnp->cn_nameptr, symtgt, gid); + if (error != 0) + goto out; + + /*create vnode for symtgt */ + newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error); + if (newfid != NULL) { + error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags, + dnp, newfid, vpp, cnp->cn_nameptr); + if (error != 0) + goto out; + } else + goto out; + + if ((cnp->cn_flags & MAKEENTRY) != 0) { + cache_enter(P9FS_NTOV(dnp), *vpp, cnp); + } + P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n", + __func__, *vpp, dnp, (uintmax_t)dvfid->fid); + + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + return (error); + +out: + if (newfid != NULL) + p9_client_clunk(newfid); + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + return (error); +} + +/* Create hard link */ +static int +p9fs_link(struct vop_link_args *ap) +{ + struct vnode *vp; + struct vnode *tdvp; + struct componentname *cnp; + struct p9fs_node *dnp; + struct p9fs_node *np; + struct p9fs_inode *inode; + struct p9fs_session *vses; + struct p9_fid *dvfid, *oldvfid; + int error; + + vp = ap->a_vp; + tdvp = ap->a_tdvp; + cnp = ap->a_cnp; + dnp = P9FS_VTON(tdvp); + np = P9FS_VTON(vp); + inode = &np->inode; + vses = np->p9fs_ses; + error = 0; + + P9_DEBUG(VOPS, "%s: tdvp %p vp %p\n", __func__, tdvp, vp); + + dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error); + if (error != 0) + return (error); + oldvfid = p9fs_get_fid(vses->clnt, np, cnp->cn_cred, VFID, -1, &error); + if (error != 0) + return (error); + + error = p9_create_hardlink(dvfid, oldvfid, cnp->cn_nameptr); + if (error != 0) + return (error); + /* Increment ref count on the inode */ + P9FS_INCR_LINKS(inode); + + return (0); +} + +/* Read contents of the symbolic link */ +static int +p9fs_readlink(struct vop_readlink_args *ap) +{ + struct vnode *vp; + struct uio *uio; + struct p9fs_node *dnp; + struct p9fs_session *vses; + struct p9_fid *dvfid; + int error, len; + char *target; + + vp = ap->a_vp; + uio = ap->a_uio; + dnp = P9FS_VTON(vp); + vses = dnp->p9fs_ses; + error = 0; + + P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp); + + dvfid = p9fs_get_fid(vses->clnt, dnp, ap->a_cred, VFID, -1, &error); + if (error != 0) + return (error); + + error = p9_readlink(dvfid, &target); + if (error != 0) + return (error); + + len = strlen(target); + error = uiomove(target, len, uio); + + return (0); +} + +/* + * Iterate through a directory. An entire 8k data is read into the I/O buffer. + * This buffer is parsed to make dir entries and fed to the user buffer to + * complete it to the VFS. + */ +static int +p9fs_readdir(struct vop_readdir_args *ap) +{ + struct uio *uio; + struct vnode *vp; + struct dirent cde; + int64_t offset; + uint64_t diroffset; + struct p9fs_node *np; + int error; + int32_t count; + struct p9_client *clnt; + struct p9_dirent dent; + char *io_buffer; + struct p9_fid *vofid; + + uio = ap->a_uio; + vp = ap->a_vp; + np = P9FS_VTON(ap->a_vp); + offset = 0; + diroffset = 0; + error = 0; + count = 0; + clnt = np->p9fs_ses->clnt; + + P9_DEBUG(VOPS, "%s: vp %p, offset %ld, resid %zd\n", __func__, vp, uio->uio_offset, uio->uio_resid); + + if (ap->a_uio->uio_iov->iov_len <= 0) + return (EINVAL); + + if (vp->v_type != VDIR) + return (ENOTDIR); + + vofid = p9fs_get_fid(clnt, np, ap->a_cred, VOFID, P9PROTO_OREAD, &error); + if (vofid == NULL) { + P9_DEBUG(ERROR, "%s: NULL FID\n", __func__); + return (EBADF); + } + + io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK); + + /* We haven't reached the end yet. read more. */ + diroffset = uio->uio_offset; + while (uio->uio_resid >= sizeof(struct dirent)) { + /* + * We need to read more data as what is indicated by filesize because + * filesize is based on data stored in struct dirent structure but + * we read data in struct p9_dirent format which has different size. + * Hence we read max data(P9FS_IOUNIT) everytime from host, convert + * it into struct dirent structure and send it back. + */ + count = P9FS_IOUNIT; + bzero(io_buffer, P9FS_MTU); + count = p9_client_readdir(vofid, (char *)io_buffer, + diroffset, count); + + if (count == 0) + break; + + if (count < 0) { + error = EIO; + goto out; + } + + offset = 0; + while (offset + QEMU_DIRENTRY_SZ <= count) { + + /* + * Read and make sense out of the buffer in one dirent + * This is part of 9p protocol read. This reads one p9_dirent, + * appends it to dirent(FREEBSD specifc) and continues to parse the buffer. + */ + bzero(&dent, sizeof(dent)); + offset = p9_dirent_read(clnt, io_buffer, offset, count, + &dent); + if (offset < 0 || offset > count) { + error = EIO; + goto out; + } + + bzero(&cde, sizeof(cde)); + strncpy(cde.d_name, dent.d_name, dent.len); + cde.d_fileno = dent.qid.path; + cde.d_type = dent.d_type; + cde.d_namlen = dent.len; + cde.d_reclen = GENERIC_DIRSIZ(&cde); + + /* + * If there isn't enough space in the uio to return a + * whole dirent, break off read + */ + if (uio->uio_resid < GENERIC_DIRSIZ(&cde)) + break; + + /* Transfer */ + error = uiomove(&cde, GENERIC_DIRSIZ(&cde), uio); + if (error != 0) { + error = EIO; + goto out; + } + diroffset = dent.d_off; + } + } + /* Pass on last transferred offset */ + uio->uio_offset = diroffset; + +out: + uma_zfree(p9fs_io_buffer_zone, io_buffer); + + return (error); +} + +static void +p9fs_doio(struct vnode *vp, struct buf *bp, struct p9_fid *vofid, struct ucred *cr) +{ + struct uio *uiov; + struct iovec io; + int error; + uint64_t off, offset; + uint64_t filesize; + uint64_t resid; + uint32_t count; + int64_t ret; + struct p9fs_node *np; + char *io_buffer; + + error = 0; + np = P9FS_VTON(vp); + + filesize = np->inode.i_size; + uiov = malloc(sizeof(struct uio), M_P9UIOV, M_WAITOK); + uiov->uio_iov = &io; + uiov->uio_iovcnt = 1; + uiov->uio_segflg = UIO_SYSSPACE; + io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO); + + if (bp->b_iocmd == BIO_READ) { + io.iov_len = uiov->uio_resid = bp->b_bcount; + io.iov_base = bp->b_data; + uiov->uio_rw = UIO_READ; + + switch (vp->v_type) { + + case VREG: + { + uiov->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; + + if (uiov->uio_resid) { + int left = uiov->uio_resid; + int nread = bp->b_bcount - left; + + if (left > 0) + bzero((char *)bp->b_data + nread, left); + } + /* where in the file are we to start reading */ + offset = uiov->uio_offset; + if (uiov->uio_offset >= filesize) + goto out; + + while ((resid = uiov->uio_resid) > 0) { + if (offset >= filesize) + break; + count = min(filesize - uiov->uio_offset, resid); + if (count == 0) + break; + + P9_DEBUG(VOPS, "%s: read called %#zx at %#jx\n", + __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset); + + /* Copy count bytes into the uio */ + ret = p9_client_read(vofid, offset, count, io_buffer); + error = uiomove(io_buffer, ret, uiov); + + if (error != 0) + goto out; + offset += ret; + } + break; + } + default: + printf("vfs: type %x unexpected\n", vp->v_type); + break; + } + } else { + if (bp->b_dirtyend > bp->b_dirtyoff) { + io.iov_len = uiov->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; + uiov->uio_offset = ((off_t)bp->b_blkno) * PAGE_SIZE + bp->b_dirtyoff; + io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; + uiov->uio_rw = UIO_WRITE; + + if (uiov->uio_offset < 0) { + error = EINVAL; + goto out; + } + + if (uiov->uio_resid == 0) + goto out; + + resid = uiov->uio_resid; + offset = uiov->uio_offset; + error = 0; + + while ((resid = uiov->uio_resid) > 0) { + off = 0; + count = MIN(resid, P9FS_IOUNIT); + error = uiomove(io_buffer, count, uiov); + if (error != 0) { + goto out; + } + + while (count > 0) { + /* Copy count bytes from the uio */ + ret = p9_client_write(vofid, offset, count, + io_buffer + off); + if (ret < 0) + goto out; + + P9_DEBUG(VOPS, "%s: write called %#zx at %#jx\n", + __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset); + off += ret; + offset += ret; + count -= ret; + } + } + + /* Update the fields in the node to reflect the change */ + if (filesize < uiov->uio_offset + uiov->uio_resid) { + np->inode.i_size = uiov->uio_offset + uiov->uio_resid; + vnode_pager_setsize(vp, uiov->uio_offset + uiov->uio_resid); + /* update the modified timers. */ + p9fs_itimes(vp); + } + } else { + bp->b_resid = 0; + goto out1; + } + } +out: + /* Set the error */ + if (error != 0) { + bp->b_error = error; + bp->b_ioflags |= BIO_ERROR; + } + bp->b_resid = uiov->uio_resid; +out1: + bufdone(bp); + uma_zfree(p9fs_io_buffer_zone, io_buffer); + free(uiov, M_P9UIOV); +} + +/* + * The I/O buffer is mapped to a uio and a client_write/client_read is performed + * the same way as p9fs_read and p9fs_write. + */ +static int +p9fs_strategy(struct vop_strategy_args *ap) +{ + struct vnode *vp; + struct buf *bp; + struct ucred *cr; + int error; + struct open_fid_state ostate; + + vp = ap->a_vp; + bp = ap->a_bp; + error = 0; + + P9_DEBUG(VOPS, "%s: vp %p, iocmd %d\n ", __func__, vp, bp->b_iocmd); + + if (bp->b_iocmd == BIO_READ) + cr = bp->b_rcred; + else + cr = bp->b_wcred; + + error = p9fs_get_open_fid(vp, bp->b_iocmd == BIO_READ ? FREAD : FWRITE, cr, &ostate); + if (error) { + P9_DEBUG(ERROR, "%s: p9fs_get_open_fid failed: %d\n", __func__, error); + bp->b_error = error; + bp->b_ioflags |= BIO_ERROR; + bufdone(bp); + return (0); + } + + p9fs_doio(vp, bp, ostate.vofid, cr); + p9fs_release_open_fid(vp, cr, &ostate); + + return (0); +} + +/* Rename a file */ +static int +p9fs_rename(struct vop_rename_args *ap) +{ + struct vnode *tvp; + struct vnode *tdvp; + struct vnode *fvp; + struct vnode *fdvp; + struct componentname *tcnp; + struct componentname *fcnp; + struct p9fs_node *tdnode; + struct p9fs_node *fdnode; + struct p9fs_inode *fdinode; + struct p9fs_node *fnode; + struct p9fs_inode *finode; + struct p9fs_session *vses; + struct p9fs_node *tnode; + struct p9fs_inode *tinode; + struct p9_fid *olddirvfid, *newdirvfid ; + int error; + + tvp = ap->a_tvp; + tdvp = ap->a_tdvp; + fvp = ap->a_fvp; + fdvp = ap->a_fdvp; + tcnp = ap->a_tcnp; + fcnp = ap->a_fcnp; + tdnode = P9FS_VTON(tdvp); + fdnode = P9FS_VTON(fdvp); + fdinode = &fdnode->inode; + fnode = P9FS_VTON(fvp); + finode = &fnode->inode; + vses = fnode->p9fs_ses; + error = 0; + + P9_DEBUG(VOPS, "%s: tvp %p, tdvp %p, fvp %p, fdvp %p\n ", __func__, tvp, tdvp, fvp, fdvp); + + /* Check for cross mount operation */ + if (fvp->v_mount != tdvp->v_mount || + (tvp && (fvp->v_mount != tvp->v_mount))) { + error = EXDEV; + goto out; + } + + /* warning if you are renaming to the same name */ + if (fvp == tvp) + error = 0; + + olddirvfid = p9fs_get_fid(vses->clnt, fdnode, fcnp->cn_cred, VFID, -1, &error); + if (error != 0) + goto out; + newdirvfid = p9fs_get_fid(vses->clnt, tdnode, tcnp->cn_cred, VFID, -1, &error); + if (error != 0) + goto out; + + error = p9_client_renameat(olddirvfid, fcnp->cn_nameptr, newdirvfid, tcnp->cn_nameptr); + if (error != 0) + goto out; + + /* + * decrement the link count on the "from" file whose name is going + * to be changed if its a directory + */ + if (fvp->v_type == VDIR) { + if (tvp && tvp->v_type == VDIR) + cache_purge(tdvp); + P9FS_DECR_LINKS(fdinode); + cache_purge(fdvp); + } + + /* Taking exclusive lock on the from node before decrementing the link count */ + if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) + goto out; + P9FS_DECR_LINKS(finode); + VOP_UNLOCK(fvp); + + if (tvp) { + tnode = P9FS_VTON(tvp); + tinode = &tnode->inode; + P9FS_DECR_LINKS(tinode); + } + +out: + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp) + vput(tvp); + vrele(fdvp); + vrele(fvp); + return (error); +} + + +struct vop_vector p9fs_vnops = { + .vop_default = &default_vnodeops, + .vop_lookup = p9fs_lookup, + .vop_open = p9fs_open, + .vop_close = p9fs_close, + .vop_access = p9fs_access, + .vop_getattr = p9fs_getattr_dotl, + .vop_setattr = p9fs_setattr_dotl, + .vop_reclaim = p9fs_reclaim, + .vop_inactive = p9fs_inactive, + .vop_readdir = p9fs_readdir, + .vop_create = p9fs_create, + .vop_mknod = p9fs_mknod, + .vop_read = p9fs_read, + .vop_write = p9fs_write, + .vop_remove = p9fs_remove, + .vop_mkdir = p9fs_mkdir, + .vop_rmdir = p9fs_rmdir, + .vop_strategy = p9fs_strategy, + .vop_symlink = p9fs_symlink, + .vop_rename = p9fs_rename, + .vop_link = p9fs_link, + .vop_readlink = p9fs_readlink, +}; +VFS_VOP_VECTOR_REGISTER(p9fs_vnops); diff --git a/sys/kern/vfs_mountroot.c b/sys/kern/vfs_mountroot.c index 1069cf127f2a..e0d1cec5bd71 100644 --- a/sys/kern/vfs_mountroot.c +++ b/sys/kern/vfs_mountroot.c @@ -1019,6 +1019,7 @@ vfs_mountroot_wait_if_neccessary(const char *fs, const char *dev) * behaviour by setting vfs.root_mount_always_wait=1. */ if (strcmp(fs, "zfs") == 0 || strstr(fs, "nfs") != NULL || + strcmp(fs, "p9fs") == 0 || dev[0] == '\0' || root_mount_always_wait != 0) { vfs_mountroot_wait(); return (0); diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 7ce956957fd5..5ef245f17051 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -305,6 +305,7 @@ SUBDIR= \ otus \ ${_otusfw} \ ow \ + p9fs \ ${_padlock} \ ${_padlock_rng} \ ${_pchtherm} \ diff --git a/sys/modules/p9fs/Makefile b/sys/modules/p9fs/Makefile new file mode 100644 index 000000000000..ae90cb3a46d7 --- /dev/null +++ b/sys/modules/p9fs/Makefile @@ -0,0 +1,8 @@ +.PATH: ${SRCTOP}/sys/fs/p9fs + +KMOD= p9fs +SRCS= vnode_if.h \ + p9_client.c p9_protocol.c p9_transport.c \ + p9fs_subr.c p9fs_vfsops.c p9fs_vnops.c + +.include diff --git a/sys/modules/virtio/Makefile b/sys/modules/virtio/Makefile index 7622e58c0c93..f00cdc7b23ca 100644 --- a/sys/modules/virtio/Makefile +++ b/sys/modules/virtio/Makefile @@ -22,6 +22,6 @@ # SUCH DAMAGE. # -SUBDIR= virtio pci network block balloon scsi random console +SUBDIR= virtio pci network block balloon scsi random console p9fs .include diff --git a/sys/modules/virtio/p9fs/Makefile b/sys/modules/virtio/p9fs/Makefile new file mode 100644 index 000000000000..48631efb6973 --- /dev/null +++ b/sys/modules/virtio/p9fs/Makefile @@ -0,0 +1,32 @@ +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# + +.PATH: ${SRCTOP}/sys/dev/virtio/p9fs + +KMOD= virtio_p9fs +SRCS= virtio_p9fs.c +SRCS+= virtio_bus_if.h virtio_if.h +SRCS+= bus_if.h device_if.h + +.include