freebsd-src/sys/fs/p9fs/p9fs_vnops.c

/*
 * Copyright (c) 2017-2020 Juniper Networks, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
*	notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *	notice, this list of conditions and the following disclaimer in the
 *	documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */

/* This file contains VFS file ops for the 9P protocol.
 * This makes the upper layer of the p9fs driver. These functions interact
 * with the VFS layer and lower layer of p9fs driver which is 9Pnet. All
 * the user file operations are handled here.
 */
#include <sys/cdefs.h>
#include <sys/systm.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/dirent.h>
#include <sys/fcntl.h>
#include <sys/namei.h>
#include <sys/priv.h>
#include <sys/stat.h>
#include <sys/vnode.h>
#include <sys/rwlock.h>
#include <sys/vmmeter.h>

#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/vnode_pager.h>

#include <fs/p9fs/p9_client.h>
#include <fs/p9fs/p9_debug.h>
#include <fs/p9fs/p9fs.h>
#include <fs/p9fs/p9fs_proto.h>

/* File permissions. */
#define IEXEC		0000100 /* Executable. */
#define IWRITE		0000200 /* Writeable. */
#define IREAD		0000400 /* Readable. */
#define ISVTX		0001000 /* Sticky bit. */
#define ISGID		0002000 /* Set-gid. */
#define ISUID		0004000 /* Set-uid. */

static MALLOC_DEFINE(M_P9UIOV, "uio", "UIOV structures for strategy in p9fs");
extern uma_zone_t p9fs_io_buffer_zone;
extern uma_zone_t p9fs_getattr_zone;
extern uma_zone_t p9fs_setattr_zone;
extern uma_zone_t p9fs_pbuf_zone;
/* For the root vnode's vnops. */
struct vop_vector p9fs_vnops;

static uint32_t p9fs_unix2p9_mode(uint32_t mode);

static void
p9fs_itimes(struct vnode *vp)
{
	struct p9fs_node *node;
	struct timespec ts;
	struct p9fs_inode *inode;

	node = P9FS_VTON(vp);
	inode = &node->inode;

	vfs_timestamp(&ts);
	inode->i_mtime = ts.tv_sec;
}

/*
 * Cleanup the p9fs node, the in memory representation of a vnode for p9fs.
 * The cleanup includes invalidating all cache entries for the vnode,
 * destroying the vobject, removing vnode from hashlist, removing p9fs node
 * from the list of session p9fs nodes, and disposing of the p9fs node.
 * Basically it is doing a reverse of what a create/vget does.
 */
void
p9fs_cleanup(struct p9fs_node *np)
{
	struct vnode *vp;
	struct p9fs_session *vses;

	if (np == NULL)
		return;

	vp = P9FS_NTOV(np);
	vses = np->p9fs_ses;

	/* Remove the vnode from hash list if vnode is not already deleted */
	if ((np->flags & P9FS_NODE_DELETED) == 0)
		vfs_hash_remove(vp);

	P9FS_LOCK(vses);
	if ((np->flags & P9FS_NODE_IN_SESSION) != 0) {
		np->flags &= ~P9FS_NODE_IN_SESSION;
		STAILQ_REMOVE(&vses->virt_node_list, np, p9fs_node, p9fs_node_next);
	} else {
		P9FS_UNLOCK(vses);
		return;
	}
	P9FS_UNLOCK(vses);

	/* Invalidate all entries to a particular vnode. */
	cache_purge(vp);

	/* Destroy the vm object and flush associated pages. */
	vnode_destroy_vobject(vp);

	/* Remove the vnode from hash list if vnode is not already deleted */
	if ((np->flags & P9FS_NODE_DELETED) == 0)
		vfs_hash_remove(vp);

	/* Invalidate all entries to a particular vnode. */
	cache_purge(vp);

	/* Destroy the vm object and flush associated pages. */
	vnode_destroy_vobject(vp);

	/* Remove all the FID */
	p9fs_fid_remove_all(np, FALSE);

	/* Dispose all node knowledge.*/
	p9fs_destroy_node(&np);
}

/*
 * Reclaim VOP is defined to be called for every vnode. This starts off
 * the cleanup by clunking(remove the fid on the server) and calls
 * p9fs_cleanup to free all the resources allocated for p9fs node.
 */
static int
p9fs_reclaim(struct vop_reclaim_args *ap)
{
	struct vnode *vp;
	struct p9fs_node *np;

	vp = ap->a_vp;
	np = P9FS_VTON(vp);

	P9_DEBUG(VOPS, "%s: vp:%p node:%p\n", __func__, vp, np);
	p9fs_cleanup(np);

	return (0);
}

/*
 * recycle vnodes which are no longer referenced i.e, their usecount is zero
 */
static int
p9fs_inactive(struct vop_inactive_args *ap)
{
	struct vnode *vp;
	struct p9fs_node *np;

	vp = ap->a_vp;
	np = P9FS_VTON(vp);

	P9_DEBUG(VOPS, "%s: vp:%p node:%p file:%s\n", __func__, vp, np, np->inode.i_name);
	if (np->flags & P9FS_NODE_DELETED)
		vrecycle(vp);

	return (0);
}

struct p9fs_lookup_alloc_arg {
	struct componentname *cnp;
	struct p9fs_node *dnp;
	struct p9_fid *newfid;
};

/* Callback for vn_get_ino */
static int
p9fs_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
{
	struct p9fs_lookup_alloc_arg *p9aa = arg;

	return (p9fs_vget_common(mp, NULL, p9aa->cnp->cn_lkflags, p9aa->dnp,
		p9aa->newfid, vpp, p9aa->cnp->cn_nameptr));
}

/*
 * p9fs_lookup is called for every component name that is being searched for.
 *
 * I. If component is found on the server, we look for the in-memory
 *    repesentation(vnode) of this component in namecache.
 *    A. If the node is found in the namecache, we check is the vnode is still
 *	 valid.
 *	 1. If it is still valid, return vnode.
 *	 2. If it is not valid, we remove this vnode from the name cache and
 *	    create a new vnode for the component and return that vnode.
 *    B. If the vnode is not found in the namecache, we look for it in the
 *       hash list.
 *       1. If the vnode is in the hash list, we check if the vnode is still
 *	    valid.
 *	    a. If it is still valid, we add that vnode to the namecache for
 *	       future lookups and return the vnode.
 *	    b. If it is not valid, create a new vnode and p9fs node,
 *	       initialize them and return the vnode.
 *	 2. If the vnode is not found in the hash list, we create a new vnode
 *	    and p9fs node, initialize them and return the vnode.
 * II. If the component is not found on the server, an error code is returned.
 *     A. For the creation case, we return EJUSTRETURN so VFS can handle it.
 *     B. For all other cases, ENOENT is returned.
 */
static int
p9fs_lookup(struct vop_lookup_args *ap)
{
	struct vnode *dvp;
	struct vnode **vpp, *vp;
	struct componentname *cnp;
	struct p9fs_node *dnp; /*dir p9_node */
	struct p9fs_node *np;
	struct p9fs_session *vses;
	struct mount *mp; /* Get the mount point */
	struct p9_fid *dvfid, *newfid;
	int error;
	struct vattr vattr;
	int flags;
	char tmpchr;

	dvp = ap->a_dvp;
	vpp = ap->a_vpp;
	cnp = ap->a_cnp;
	dnp = P9FS_VTON(dvp);
	error = 0;
	flags = cnp->cn_flags;
	*vpp = NULLVP;

	if (dnp == NULL)
		return (ENOENT);

	if (cnp->cn_nameptr[0] == '.' && strlen(cnp->cn_nameptr) == 1) {
		vref(dvp);
		*vpp = dvp;
		return (0);
	}

	vses = dnp->p9fs_ses;
	mp = vses->p9fs_mount;

	/* Do the cache part ourselves */
	if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
		return (EROFS);

	if (dvp->v_type != VDIR)
		return (ENOTDIR);

	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, curthread);
	if (error)
		return (error);

	/* Do the directory walk on host to check if file exist */
	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
	if (error)
		return (error);

	/*
	 * Save the character present at namelen in nameptr string and
	 * null terminate the character to get the search name for p9_dir_walk
	 * This is done to handle when lookup is for "a" and component
	 * name contains a/b/c
	 */
	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
	cnp->cn_nameptr[cnp->cn_namelen] = '\0';

	/*
	 * If the client_walk fails, it means the file looking for doesnt exist.
	 * Create the file is the flags are set or just return the error
	 */
	newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);

	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;

	if (error != 0 || newfid == NULL) {
		/* Clunk the newfid if it is not NULL */
		if (newfid != NULL)
			p9_client_clunk(newfid);

		if (error != ENOENT)
			return (error);

		/* The requested file was not found. */
		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
		    (flags & ISLASTCN)) {

			if (mp->mnt_flag & MNT_RDONLY)
				return (EROFS);

			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
			    curthread);
			if (!error) {
				return (EJUSTRETURN);
			}
		}
		return (error);
	}

	/* Look for the entry in the component cache*/
	error = cache_lookup(dvp, vpp, cnp, NULL, NULL);
	if (error > 0 && error != ENOENT) {
		P9_DEBUG(VOPS, "%s: Cache lookup error %d \n", __func__, error);
		goto out;
	}

	if (error == -1) {
		vp = *vpp;
		/* Check if the entry in cache is stale or not */
		if ((p9fs_node_cmp(vp, &newfid->qid) == 0) &&
		    ((error = VOP_GETATTR(vp, &vattr, cnp->cn_cred)) == 0)) {
			goto out;
		}
		/*
		 * This case, we have an error coming from getattr,
		 * act accordingly.
		 */
		cache_purge(vp);
		if (dvp != vp)
			vput(vp);
		else
			vrele(vp);

		*vpp = NULLVP;
	} else if (error == ENOENT) {
		if (VN_IS_DOOMED(dvp))
			goto out;
		if (VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0) {
			error = ENOENT;
			goto out;
		}
		cache_purge_negative(dvp);
	}
	/* Reset values */
	error = 0;
	vp = NULLVP;

	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
	cnp->cn_nameptr[cnp->cn_namelen] = '\0';

	/*
	 * Looks like we have found an entry. Now take care of all other cases.
	 */
	if (flags & ISDOTDOT) {
		struct p9fs_lookup_alloc_arg p9aa;
		p9aa.cnp = cnp;
		p9aa.dnp = dnp;
		p9aa.newfid = newfid;
		error = vn_vget_ino_gen(dvp, p9fs_lookup_alloc, &p9aa, 0, &vp);
		if (error)
			goto out;
		*vpp = vp;
	} else {
		/*
		 * client_walk is equivalent to searching a component name in a
		 * directory(fid) here. If new fid is returned, we have found an
		 * entry for this component name so, go and create the rest of
		 * the vnode infra(vget_common) for the returned newfid.
		 */
		if ((cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
		    && (flags & ISLASTCN)) {
			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
			    curthread);
			if (error)
				goto out;

			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
			    dnp, newfid, &vp, cnp->cn_nameptr);
			if (error)
				goto out;

			*vpp = vp;
			np = P9FS_VTON(vp);
			if ((dnp->inode.i_mode & ISVTX) &&
			    cnp->cn_cred->cr_uid != 0 &&
			    cnp->cn_cred->cr_uid != dnp->inode.n_uid &&
			    cnp->cn_cred->cr_uid != np->inode.n_uid) {
				vput(*vpp);
				*vpp = NULL;
				cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
				return (EPERM);
			}
		} else {
			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
			    dnp, newfid, &vp, cnp->cn_nameptr);
			if (error)
				goto out;
			*vpp = vp;
		}
	}

	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;

	/* Store the result the cache if MAKEENTRY is specified in flags */
	if ((cnp->cn_flags & MAKEENTRY) != 0)
		cache_enter(dvp, *vpp, cnp);
	return (error);
out:
	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
	p9_client_clunk(newfid);
	return (error);
}

/*
 * Common creation function for file/directory with respective flags. We first
 * open the parent directory in order to create the file under it. For this,
 * as 9P protocol suggests, we need to call client_walk to create the open fid.
 * Once we have the open fid, the file_create function creates the direntry with
 * the name and perm specified under the parent dir. If this succeeds (an entry
 * is created for the new file on the server), we create our metadata for this
 * file (vnode, p9fs node calling vget). Once we are done, we clunk the open
 * fid of the parent directory.
 */
static int
create_common(struct p9fs_node *dnp, struct componentname *cnp,
    char *extension, uint32_t perm, uint8_t mode, struct vnode **vpp)
{
	char tmpchr;
	struct p9_fid *dvfid, *ofid, *newfid;
	struct p9fs_session *vses;
	struct mount *mp;
	int error;

	P9_DEBUG(VOPS, "%s: name %s\n", __func__, cnp->cn_nameptr);

	vses = dnp->p9fs_ses;
	mp = vses->p9fs_mount;
	newfid = NULL;
	error = 0;

	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
	if (error != 0)
		return (error);

	/* Clone the directory fid to create the new file */
	ofid = p9_client_walk(dvfid, 0, NULL, 1, &error);
	if (error != 0)
		return (error);

	/*
	 * Save the character present at namelen in nameptr string and
	 * null terminate the character to get the search name for p9_dir_walk
	 */
	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
	cnp->cn_nameptr[cnp->cn_namelen] = '\0';

	error = p9_client_file_create(ofid, cnp->cn_nameptr, perm, mode,
		    extension);
	if (error != 0) {
		P9_DEBUG(ERROR, "%s: p9_client_fcreate failed %d\n", __func__, error);
		goto out;
	}

	/* If its not hardlink only then do the walk, else we are done. */
	if (!(perm & P9PROTO_DMLINK)) {
		/*
		 * Do the lookup part and add the vnode, p9fs node. Note that vpp
		 * is filled in here.
		 */
		newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
		if (newfid != NULL) {
			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
			    dnp, newfid, vpp, cnp->cn_nameptr);
			if (error != 0)
				goto out;
		} else {
			/* Not found return NOENTRY.*/
			goto out;
		}

		if ((cnp->cn_flags & MAKEENTRY) != 0)
			cache_enter(P9FS_NTOV(dnp), *vpp, cnp);
	}
	P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n",
	    __func__, *vpp, dnp, (uintmax_t)dvfid->fid);
	/* Clunk the open ofid. */
	if (ofid != NULL)
		(void)p9_client_clunk(ofid);

	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
	return (0);
out:
	if (ofid != NULL)
		(void)p9_client_clunk(ofid);

	if (newfid != NULL)
		(void)p9_client_clunk(newfid);

	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
	return (error);
}

/*
 * This is the main file creation VOP. Make the permissions of the new
 * file and call the create_common common code to complete the create.
 */
static int
p9fs_create(struct vop_create_args *ap)
{
	struct vnode *dvp;
	struct vnode **vpp;
	struct componentname *cnp;
	uint32_t mode;
	struct p9fs_node *dnp;
	struct p9fs_inode *dinode;
	uint32_t perm;
	int ret;

	dvp = ap->a_dvp;
	vpp = ap->a_vpp;
	cnp = ap->a_cnp;
	dnp = P9FS_VTON(dvp);
	dinode = &dnp->inode;
	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
	perm = p9fs_unix2p9_mode(mode);

	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);

	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp);
	if (ret == 0) {
		P9FS_INCR_LINKS(dinode);
	}

	return (ret);
}

/*
 * p9fs_mkdir is the main directory creation vop. Make the permissions of the new dir
 * and call the create_common common code to complete the create.
 */
static int
p9fs_mkdir(struct vop_mkdir_args *ap)
{
	struct vnode *dvp;
	struct vnode **vpp;
	struct componentname *cnp;
	uint32_t mode;
	struct p9fs_node *dnp;
	struct p9fs_inode *dinode;
	uint32_t perm;
	int ret;

	dvp = ap->a_dvp;
	vpp = ap->a_vpp;
	cnp = ap->a_cnp;
	dnp = P9FS_VTON(dvp);
	dinode = &dnp->inode;
	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
	perm = p9fs_unix2p9_mode(mode | S_IFDIR);

	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);

	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp);
	if (ret == 0)
		P9FS_INCR_LINKS(dinode);

	return (ret);
}

/*
 * p9fs_mknod is the main node creation vop. Make the permissions of the new node
 * and call the create_common common code to complete the create.
 */
static int
p9fs_mknod(struct vop_mknod_args *ap)
{
	struct vnode *dvp;
	struct vnode **vpp;
	struct componentname *cnp;
	uint32_t mode;
	struct p9fs_node *dnp;
	struct p9fs_inode *dinode;
	uint32_t perm;
	int ret;

	dvp = ap->a_dvp;
	vpp = ap->a_vpp;
	cnp = ap->a_cnp;
	dnp = P9FS_VTON(dvp);
	dinode = &dnp->inode;
	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
	perm = p9fs_unix2p9_mode(mode);

	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);

	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_OREAD, vpp);
	if (ret == 0) {
		P9FS_INCR_LINKS(dinode);
	}

	return (ret);
}

/* Convert open mode permissions to P9 */
static int
p9fs_uflags_mode(int uflags, int extended)
{
	uint32_t ret;

	/* Convert first to O flags.*/
	uflags = OFLAGS(uflags);

	switch (uflags & 3) {

	case O_RDONLY:
	    ret = P9PROTO_OREAD;
	    break;

	case O_WRONLY:
	    ret = P9PROTO_OWRITE;
	    break;

	case O_RDWR:
	    ret = P9PROTO_ORDWR;
	    break;
	}

	if (extended) {
		if (uflags & O_EXCL)
			ret |= P9PROTO_OEXCL;

		if (uflags & O_APPEND)
			ret |= P9PROTO_OAPPEND;
	}

	return (ret);
}

/*
 * This is the main open VOP for every file open. If the file is already
 * open, then increment and return. If there is no open fid for this file,
 * there needs to be a client_walk which creates a new open fid for this file.
 * Once we have a open fid, call the open on this file with the mode creating
 * the vobject.
 */
static int
p9fs_open(struct vop_open_args *ap)
{
	int error;
	struct vnode *vp;
	struct p9fs_node *np;
	struct p9fs_session *vses;
	struct p9_fid *vofid, *vfid;
	size_t filesize;
	uint32_t mode;

	error = 0;
	vp = ap->a_vp;
	np = P9FS_VTON(vp);
	vses = np->p9fs_ses;

	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);

	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
		return (EOPNOTSUPP);

	error = p9fs_reload_stats_dotl(vp, ap->a_cred);
	if (error != 0)
		return (error);

	ASSERT_VOP_LOCKED(vp, __func__);
	/*
	 * Invalidate the pages of the vm_object cache if the file is modified
	 * based on the flag set in reload stats
	 */
	if (vp->v_type == VREG && (np->flags & P9FS_NODE_MODIFIED) != 0) {
		error = vinvalbuf(vp, 0, 0, 0);
		if (error != 0)
			return (error);
		np->flags &= ~P9FS_NODE_MODIFIED;
	}

	vfid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VFID, -1, &error);
	if (error != 0)
		return (error);

	/*
	 * Translate kernel fflags to 9p mode
	 */
	mode = p9fs_uflags_mode(ap->a_mode, 1);

	/*
	 * Search the fid in vofid_list for current user. If found increase the open
	 * count and return. If not found clone a new fid and open the file using
	 * that cloned fid.
	 */
	vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID, mode, &error);
	if (vofid != NULL) {
		vofid->v_opens++;
		return (0);
	} else {
		/*vofid is the open fid for this file.*/
		vofid = p9_client_walk(vfid, 0, NULL, 1, &error);
		if (error != 0)
			return (error);
	}

	error = p9_client_open(vofid, mode);
	if (error != 0)
		p9_client_clunk(vofid);
	else {
		vofid->v_opens = 1;
		filesize = np->inode.i_size;
		vnode_create_vobject(vp, filesize, ap->a_td);
		p9fs_fid_add(np, vofid, VOFID);
	}

	return (error);
}

/*
 * Close the open references. Just reduce the open count on vofid and return.
 * Let clunking of VOFID happen in p9fs_reclaim.
 */
static int
p9fs_close(struct vop_close_args *ap)
{
	struct vnode *vp;
	struct p9fs_node *np;
	struct p9fs_session *vses;
	struct p9_fid *vofid;
	int error;

	vp = ap->a_vp;
	np = P9FS_VTON(vp);

	if (np == NULL)
		return (0);

	vses = np->p9fs_ses;
	error = 0;

	P9_DEBUG(VOPS, "%s: file_name %s\n", __func__, np->inode.i_name);

	/*
	 * Translate kernel fflags to 9p mode
	 */
	vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID,
	    p9fs_uflags_mode(ap->a_fflag, 1), &error);
	if (vofid == NULL)
		return (0);

	vofid->v_opens--;

	return (0);
}

/* Helper routine for checking if fileops are possible on this file */
static int
p9fs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode)
{

	/* Check if we are allowed to write */
	switch (vap->va_type) {
	case VDIR:
	case VLNK:
	case VREG:
		/*
		 * Normal nodes: check if we're on a read-only mounted
		 * file system and bail out if we're trying to write.
		 */
		if ((mode & VMODIFY_PERMS) && (vp->v_mount->mnt_flag & MNT_RDONLY))
			return (EROFS);
		break;
	case VBLK:
	case VCHR:
	case VSOCK:
	case VFIFO:
		/*
		 * Special nodes: even on read-only mounted file systems
		 * these are allowed to be written to if permissions allow.
		 */
		break;
	default:
		/* No idea what this is */
		return (EINVAL);
	}

	return (0);
}

/* Check the access permissions of the file. */
static int
p9fs_access(struct vop_access_args *ap)
{
	struct vnode *vp;
	accmode_t accmode;
	struct ucred *cred;
	struct vattr vap;
	int error;

	vp = ap->a_vp;
	accmode = ap->a_accmode;
	cred = ap->a_cred;

	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);

	/* make sure getattr is working correctly and is defined.*/
	error = VOP_GETATTR(vp, &vap, cred);
	if (error != 0)
		return (error);

	error = p9fs_check_possible(vp, &vap, accmode);
	if (error != 0)
		return (error);

	/* Call the Generic Access check in VOPS*/
	error = vaccess(vp->v_type, vap.va_mode, vap.va_uid, vap.va_gid, accmode,
	    cred);


	return (error);
}

/*
 * Reload the file stats from the server and update the inode structure present
 * in p9fs node.
 */
int
p9fs_reload_stats_dotl(struct vnode *vp, struct ucred *cred)
{
	struct p9_stat_dotl *stat;
	int error;
	struct p9fs_node *node;
	struct p9fs_session *vses;
	struct p9_fid *vfid;

	error = 0;
	node = P9FS_VTON(vp);
	vses = node->p9fs_ses;

	vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OREAD, &error);
	if (vfid == NULL) {
		vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error);
		if (error)
			return (error);
	}

	stat = uma_zalloc(p9fs_getattr_zone, M_WAITOK | M_ZERO);

	error = p9_client_getattr(vfid, stat, P9PROTO_STATS_ALL);
	if (error != 0) {
		P9_DEBUG(ERROR, "%s: p9_client_getattr failed: %d\n", __func__, error);
		goto out;
	}

	/* Init the vnode with the disk info */
	p9fs_stat_vnode_dotl(stat, vp);
out:
	if (stat != NULL) {
		uma_zfree(p9fs_getattr_zone, stat);
	}

	return (error);
}

/*
 * Read the current inode values into the vap attr. We reload the stats from
 * the server.
 */
static int
p9fs_getattr_dotl(struct vop_getattr_args *ap)
{
	struct vnode *vp;
	struct vattr *vap;
	struct p9fs_node *node;
	struct p9fs_inode *inode;
	int error;

	vp = ap->a_vp;
	vap = ap->a_vap;
	node = P9FS_VTON(vp);

	if (node == NULL)
		return (ENOENT);

	inode = &node->inode;

	P9_DEBUG(VOPS, "%s: %u %u\n", __func__, inode->i_mode, IFTOVT(inode->i_mode));

	/* Reload our stats once to get the right values.*/
	error = p9fs_reload_stats_dotl(vp, ap->a_cred);
	if (error != 0) {
		P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, error);
		return (error);
	}

	/* Basic info */
	VATTR_NULL(vap);

	vap->va_atime.tv_sec = inode->i_atime;
	vap->va_mtime.tv_sec = inode->i_mtime;
	vap->va_ctime.tv_sec = inode->i_ctime;
	vap->va_atime.tv_nsec = inode->i_atime_nsec;
	vap->va_mtime.tv_nsec = inode->i_mtime_nsec;
	vap->va_ctime.tv_nsec = inode->i_ctime_nsec;
	vap->va_type = IFTOVT(inode->i_mode);
	vap->va_mode = inode->i_mode;
	vap->va_uid = inode->n_uid;
	vap->va_gid = inode->n_gid;
	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
	vap->va_size = inode->i_size;
	vap->va_nlink = inode->i_links_count;
	vap->va_blocksize = inode->blksize;
	vap->va_fileid = inode->i_qid_path;
	vap->va_flags = inode->i_flags;
	vap->va_gen = inode->gen;
	vap->va_filerev = inode->data_version;
	vap->va_vaflags = 0;
	vap->va_bytes = inode->blocks * P9PROTO_TGETATTR_BLK;

	return (0);
}

/* Convert a standard FreeBSD permission to P9. */
static uint32_t
p9fs_unix2p9_mode(uint32_t mode)
{
	uint32_t res;

	res = mode & 0777;
	if (S_ISDIR(mode))
		res |= P9PROTO_DMDIR;
	if (S_ISSOCK(mode))
		res |= P9PROTO_DMSOCKET;
	if (S_ISLNK(mode))
		res |= P9PROTO_DMSYMLINK;
	if (S_ISFIFO(mode))
		res |= P9PROTO_DMNAMEDPIPE;
	if ((mode & S_ISUID) == S_ISUID)
		res |= P9PROTO_DMSETUID;
	if ((mode & S_ISGID) == S_ISGID)
		res |= P9PROTO_DMSETGID;
	if ((mode & S_ISVTX) == S_ISVTX)
		res |= P9PROTO_DMSETVTX;

	return (res);
}

/* Update inode with the stats read from server.(9P2000.L version) */
int
p9fs_stat_vnode_dotl(struct p9_stat_dotl *stat, struct vnode *vp)
{
	struct p9fs_node *np;
	struct p9fs_inode *inode;

	np = P9FS_VTON(vp);
	inode = &np->inode;

	ASSERT_VOP_LOCKED(vp, __func__);
	/* Update the pager size if file size changes on host */
	if (inode->i_size != stat->st_size) {
		inode->i_size = stat->st_size;
		if (vp->v_type == VREG)
			vnode_pager_setsize(vp, inode->i_size);
	}

	inode->i_mtime = stat->st_mtime_sec;
	inode->i_atime = stat->st_atime_sec;
	inode->i_ctime = stat->st_ctime_sec;
	inode->i_mtime_nsec = stat->st_mtime_nsec;
	inode->i_atime_nsec = stat->st_atime_nsec;
	inode->i_ctime_nsec = stat->st_ctime_nsec;
	inode->n_uid = stat->st_uid;
	inode->n_gid = stat->st_gid;
	inode->i_mode = stat->st_mode;
	vp->v_type = IFTOVT(inode->i_mode);
	inode->i_links_count = stat->st_nlink;
	inode->blksize = stat->st_blksize;
	inode->blocks = stat->st_blocks;
	inode->gen = stat->st_gen;
	inode->data_version = stat->st_data_version;

	ASSERT_VOP_LOCKED(vp, __func__);
	/* Setting a flag if file changes based on qid version */
	if (np->vqid.qid_version != stat->qid.version)
		np->flags |= P9FS_NODE_MODIFIED;
	memcpy(&np->vqid, &stat->qid, sizeof(stat->qid));

	return (0);
}

/*
 * Write the current in memory inode stats into persistent stats structure
 * to write to the server(for linux version).
 */
static int
p9fs_inode_to_iattr(struct p9fs_inode *inode, struct p9_iattr_dotl *p9attr)
{
	p9attr->size = inode->i_size;
	p9attr->mode = inode->i_mode;
	p9attr->uid = inode->n_uid;
	p9attr->gid = inode->n_gid;
	p9attr->atime_sec = inode->i_atime;
	p9attr->atime_nsec = inode->i_atime_nsec;
	p9attr->mtime_sec = inode->i_mtime;
	p9attr->mtime_nsec = inode->i_mtime_nsec;

	return (0);
}

/*
 * Modify the ownership of a file whenever the chown is called on the
 * file.
 */
static int
p9fs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
    struct thread *td)
{
	struct p9fs_node *np;
	struct p9fs_inode *inode;
	uid_t ouid;
	gid_t ogid;
	int error;

	np = P9FS_VTON(vp);
	inode = &np->inode;

	if (uid == (uid_t)VNOVAL)
		uid = inode->n_uid;
	if (gid == (gid_t)VNOVAL)
		gid = inode->n_gid;
	/*
	 * To modify the ownership of a file, must possess VADMIN for that
	 * file.
	 */
	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
		return (error);
	/*
	 * To change the owner of a file, or change the group of a file to a
	 * group of which we are not a member, the caller must have
	 * privilege.
	 */
	if (((uid != inode->n_uid && uid != cred->cr_uid) ||
	    (gid != inode->n_gid && !groupmember(gid, cred))) &&
	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN)))
		return (error);

	ogid = inode->n_gid;
	ouid = inode->n_uid;

	inode->n_gid = gid;
	inode->n_uid = uid;

	if ((inode->i_mode & (ISUID | ISGID)) &&
	    (ouid != uid || ogid != gid)) {

		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID))
			inode->i_mode &= ~(ISUID | ISGID);
	}
	P9_DEBUG(VOPS, "%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp, cred, td);

	return (0);
}

/*
 * Update the in memory inode with all chmod new permissions/mode. Typically a
 * setattr is called to update it to server.
 */
static int
p9fs_chmod(struct vnode *vp, uint32_t  mode, struct ucred *cred, struct thread *td)
{
	struct p9fs_node *np;
	struct p9fs_inode *inode;
	uint32_t nmode;
	int error;

	np = P9FS_VTON(vp);
	inode = &np->inode;

	P9_DEBUG(VOPS, "%s: vp %p, mode %x, cred %p, td %p\n",  __func__, vp, mode, cred, td);
	/*
	 * To modify the permissions on a file, must possess VADMIN
	 * for that file.
	 */
	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
		return (error);

	/*
	 * Privileged processes may set the sticky bit on non-directories,
	 * as well as set the setgid bit on a file with a group that the
	 * process is not a member of. Both of these are allowed in
	 * jail(8).
	 */
	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE))
			return (EFTYPE);
	}
	if (!groupmember(inode->n_gid, cred) && (mode & ISGID)) {
		error = priv_check_cred(cred, PRIV_VFS_SETGID);
		if (error != 0)
			return (error);
	}

	/*
	 * Deny setting setuid if we are not the file owner.
	 */
	if ((mode & ISUID) && inode->n_uid != cred->cr_uid) {
		error = priv_check_cred(cred, PRIV_VFS_ADMIN);
		if (error != 0)
			return (error);
	}
	nmode = inode->i_mode;
	nmode &= ~ALLPERMS;
	nmode |= (mode & ALLPERMS);
	inode->i_mode = nmode;

	P9_DEBUG(VOPS, "%s: to mode %x  %d \n ", __func__, nmode, error);

	return (error);
}

/*
 * Set the attributes of a file referenced by fid. A valid bitmask is sent
 * in request selecting which fields to set
 */
static int
p9fs_setattr_dotl(struct vop_setattr_args *ap)
{
	struct vnode *vp;
	struct vattr *vap;
	struct p9fs_node *node;
	struct p9fs_inode *inode;
	struct ucred *cred;
	struct thread *td;
	struct p9_iattr_dotl *p9attr;
	struct p9fs_session *vses;
	struct p9_fid *vfid;
	uint64_t oldfilesize;
	int error;

	vp = ap->a_vp;
	vap = ap->a_vap;
	node = P9FS_VTON(vp);
	inode = &node->inode;
	cred = ap->a_cred;
	td = curthread;
	vses = node->p9fs_ses;
	error = 0;

	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
		P9_DEBUG(ERROR, "%s: unsettable attribute\n", __func__);
		return (EINVAL);
	}
	/* Disallow write attempts on read only filesystem */
	if (vp->v_mount->mnt_flag & MNT_RDONLY)
		return (EROFS);

	/* Setting of flags is not supported */
	if (vap->va_flags != VNOVAL)
		return (EOPNOTSUPP);

	/* Allocate p9attr struct */
	p9attr = uma_zalloc(p9fs_setattr_zone, M_WAITOK | M_ZERO);
	if (p9attr == NULL)
		return (ENOMEM);

	/* Check if we need to change the ownership of the file*/
	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
		P9_DEBUG(VOPS, "%s: vp:%p td:%p uid/gid %x/%x\n", __func__,
		    vp, td, vap->va_uid, vap->va_gid);

		error = p9fs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
		p9attr->valid |= P9PROTO_SETATTR_UID | P9PROTO_SETATTR_GID |
			P9PROTO_SETATTR_MODE;
		if (error)
			goto out;
	}

	/* Check for mode changes */
	if (vap->va_mode != (mode_t)VNOVAL) {
		P9_DEBUG(VOPS, "%s: vp:%p td:%p mode %x\n", __func__, vp, td,
		    vap->va_mode);

		error = p9fs_chmod(vp, (int)vap->va_mode, cred, td);
		p9attr->valid |= P9PROTO_SETATTR_MODE;
		if (error)
			goto out;
	}

	/* Update the size of the file and update mtime */
	if (vap->va_size != (uint64_t)VNOVAL) {
		P9_DEBUG(VOPS, "%s: vp:%p td:%p size:%jx\n", __func__,
		    vp, td, (uintmax_t)vap->va_size);
		switch (vp->v_type) {
			case VDIR:
				error = EISDIR;
				goto out;
			case VLNK:
			case VREG:
				/* Invalidate cached pages of vp */
				error = vinvalbuf(vp, 0, 0, 0);
				if (error)
					goto out;
				oldfilesize = inode->i_size;
				inode->i_size = vap->va_size;
				/* Update the p9fs_inode time */
				p9fs_itimes(vp);
				p9attr->valid |= P9PROTO_SETATTR_SIZE |
				    P9PROTO_SETATTR_ATIME |
				    P9PROTO_SETATTR_MTIME |
				    P9PROTO_SETATTR_ATIME_SET |
				    P9PROTO_SETATTR_MTIME_SET ;
				break;
			default:
				goto out;
		}
	} else if (vap->va_atime.tv_sec != VNOVAL ||
		    vap->va_mtime.tv_sec != VNOVAL) {
		P9_DEBUG(VOPS, "%s: vp:%p td:%p time a/m %jx/%jx/\n",
		    __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec,
		    (uintmax_t)vap->va_mtime.tv_sec);
		/* Update the p9fs_inode times */
		p9fs_itimes(vp);
		p9attr->valid |= P9PROTO_SETATTR_ATIME |
			P9PROTO_SETATTR_MTIME | P9PROTO_SETATTR_ATIME_SET |
			P9PROTO_SETATTR_MTIME_SET;
	}

	vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OWRITE, &error);
	if (vfid == NULL) {
		vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error);
		if (error)
			goto out;
	}

	/* Write the inode structure values into p9attr */
	p9fs_inode_to_iattr(inode, p9attr);
	error = p9_client_setattr(vfid, p9attr);
	if (vap->va_size != (uint64_t)VNOVAL && vp->v_type == VREG) {
		if (error)
			inode->i_size = oldfilesize;
		else
			vnode_pager_setsize(vp, inode->i_size);
	}
out:
	if (p9attr) {
		uma_zfree(p9fs_setattr_zone, p9attr);
	}
	P9_DEBUG(VOPS, "%s: error: %d\n", __func__, error);
	return (error);
}

struct open_fid_state {
	struct p9_fid *vofid;
	int fflags;
	int opened;
};

/*
 * TODO: change this to take P9PROTO_* mode and avoid routing through
 * VOP_OPEN, factoring out implementation of p9fs_open.
 */
static int
p9fs_get_open_fid(struct vnode *vp, int fflags, struct ucred *cr, struct open_fid_state *statep)
{
	struct p9fs_node *np;
	struct p9fs_session *vses;
	struct p9_fid *vofid;
	int mode = p9fs_uflags_mode(fflags, TRUE);
	int error = 0;

	statep->opened = FALSE;

	np = P9FS_VTON(vp);
	vses = np->p9fs_ses;
	vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error);
	if (vofid == NULL) {
		error = VOP_OPEN(vp, fflags, cr, curthread, NULL);
		if (error) {
			return (error);
		}
		vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error);
		if (vofid == NULL) {
			return (EBADF);
		}
		statep->fflags = fflags;
		statep->opened = TRUE;
	}
	statep->vofid = vofid;
	return (0);
}

static void
p9fs_release_open_fid(struct vnode *vp, struct ucred *cr, struct open_fid_state *statep)
{
	if (statep->opened) {
		(void) VOP_CLOSE(vp, statep->fflags, cr, curthread);
	}
}

/*
 * An I/O buffer is used to to do any transfer. The uio is the vfs structure we
 * need to copy data into. As long as resid is greater than zero, we call
 * client_read to read data from offset(offset into the file) in the open fid
 * for the file into the I/O buffer. The data is read into the user data buffer.
 */
static int
p9fs_read(struct vop_read_args *ap)
{
	struct vnode *vp;
	struct uio *uio;
	struct p9fs_node *np;
	uint64_t offset;
	int64_t ret;
	uint64_t resid;
	uint32_t count;
	int error;
	char *io_buffer = NULL;
	uint64_t filesize;
	struct open_fid_state ostate;

	vp = ap->a_vp;
	uio = ap->a_uio;
	np = P9FS_VTON(vp);
	error = 0;

	if (vp->v_type == VCHR || vp->v_type == VBLK)
		return (EOPNOTSUPP);
	if (vp->v_type != VREG)
		return (EISDIR);
	if (uio->uio_resid == 0)
		return (0);
	if (uio->uio_offset < 0)
		return (EINVAL);

	error = p9fs_get_open_fid(vp, FREAD, ap->a_cred, &ostate);
	if (error)
		return (error);

	/* where in the file are we to start reading */
	offset = uio->uio_offset;
	filesize = np->inode.i_size;
	if (uio->uio_offset >= filesize)
		goto out;

	P9_DEBUG(VOPS, "%s: called %jd at %ju\n",
	    __func__, (intmax_t)uio->uio_resid, (uintmax_t)uio->uio_offset);

	/* Work with a local buffer from the pool for this vop */

	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
	while ((resid = uio->uio_resid) > 0) {
		if (offset >= filesize)
			break;
		count = MIN(filesize - uio->uio_offset , resid);
		if (count == 0)
			break;

		/* Copy count bytes into the uio */
		ret = p9_client_read(ostate.vofid, offset, count, io_buffer);
		/*
		 * This is the only place in the entire p9fs where we check the
		 * error for < 0 as p9_client_read/write return the number of
		 * bytes instead of an error code. In this case if ret is < 0,
		 * it means there is an IO error.
		 */
		if (ret < 0) {
			error = -ret;
			goto out;
		}
		error = uiomove(io_buffer, ret, uio);
		if (error != 0)
			goto out;

		offset += ret;
	}
	uio->uio_offset = offset;
out:
	uma_zfree(p9fs_io_buffer_zone, io_buffer);
	p9fs_release_open_fid(vp, ap->a_cred, &ostate);

	return (error);
}

/*
 * The user buffer contains the data to be written. This data is copied first
 * from uio into I/O buffer. This I/O  buffer is used to do the client_write to
 * the fid of the file starting from the offset given upto count bytes. The
 * number of bytes written is returned to the caller.
 */
static int
p9fs_write(struct vop_write_args *ap)
{
	struct vnode *vp;
	struct uio *uio;
	struct p9fs_node *np;
	uint64_t off, offset;
	int64_t ret;
	uint64_t resid, bytes_written;
	uint32_t count;
	int error, ioflag;
	uint64_t file_size;
	char *io_buffer = NULL;
	struct open_fid_state ostate;

	vp = ap->a_vp;
	uio = ap->a_uio;
	np = P9FS_VTON(vp);
	error = 0;
	ioflag = ap->a_ioflag;

	error = p9fs_get_open_fid(vp, FWRITE, ap->a_cred, &ostate);
	if (error)
		return (error);

	P9_DEBUG(VOPS, "%s: %#zx at %#jx\n",
	    __func__, uio->uio_resid, (uintmax_t)uio->uio_offset);

	if (uio->uio_offset < 0) {
		error = EINVAL;
		goto out;
	}
	if (uio->uio_resid == 0)
		goto out;

	file_size = np->inode.i_size;

	switch (vp->v_type) {
	case VREG:
		if (ioflag & IO_APPEND)
			uio->uio_offset = file_size;
		break;
	case VDIR:
		return (EISDIR);
	case VLNK:
		break;
	default:
		panic("%s: bad file type vp: %p", __func__, vp);
	}

	resid = uio->uio_resid;
	offset = uio->uio_offset;
	bytes_written = 0;
	error = 0;

	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
	while ((resid = uio->uio_resid) > 0) {
                off = 0;
		count = MIN(resid, P9FS_IOUNIT);
		error = uiomove(io_buffer, count, uio);

		if (error != 0) {
			P9_DEBUG(ERROR, "%s: uiomove failed: %d\n", __func__, error);
			goto out;
		}

		/* While count still exists, keep writing.*/
		while (count > 0) {
			/* Copy count bytes from the uio */
			ret = p9_client_write(ostate.vofid, offset, count,
                                io_buffer + off);
			if (ret < 0) {
				if (bytes_written == 0) {
					error = -ret;
					goto out;
				} else {
					break;
				}
			}
			P9_DEBUG(VOPS, "%s: write %#zx at %#jx\n",
			    __func__, uio->uio_resid, (uintmax_t)uio->uio_offset);

                        off += ret;
			offset += ret;
			bytes_written += ret;
			count -= ret;
		}
	}
	/* Update the fields in the node to reflect the change*/
	if (file_size < uio->uio_offset + uio->uio_resid) {
		np->inode.i_size = uio->uio_offset + uio->uio_resid;
		vnode_pager_setsize(vp, uio->uio_offset + uio->uio_resid);
	}
out:
	if (io_buffer)
		uma_zfree(p9fs_io_buffer_zone, io_buffer);
	p9fs_release_open_fid(vp, ap->a_cred, &ostate);

	return (error);
}

/*
 * Common handler of all removal-related VOPs (e.g. rmdir, rm). Perform the
 * client_remove op to send messages to remove the node's fid on the server.
 * After that, does a node metadata cleanup on client side.
 */
static int
remove_common(struct p9fs_node *np, struct ucred *cred)
{
	int error;
	struct p9fs_session *vses;
	struct vnode *vp;
	struct p9_fid *vfid;

	error = 0;
	vses = np->p9fs_ses;
	vp = P9FS_NTOV(np);

	vfid = p9fs_get_fid(vses->clnt, np, cred, VFID, -1, &error);
	if (error != 0)
		return (error);

	error = p9_client_remove(vfid);
	if (error != 0)
		return (error);

	/* Remove all non-open fids associated with the vp */
	p9fs_fid_remove_all(np, TRUE);

	/* Invalidate all entries of vnode from name cache and hash list. */
	cache_purge(vp);

	vfs_hash_remove(vp);
	np->flags |= P9FS_NODE_DELETED;

	return (error);
}

/* Remove vop for all files. Call common code for remove and adjust links */
static int
p9fs_remove(struct vop_remove_args *ap)
{
	struct vnode *vp;
	struct p9fs_node *np;
	struct vnode *dvp;
	struct p9fs_node *dnp;
	struct p9fs_inode *dinode;
	int error;

	vp = ap->a_vp;
	np = P9FS_VTON(vp);
	dvp = ap->a_dvp;
	dnp = P9FS_VTON(dvp);
	dinode = &dnp->inode;

	P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np);

	if (vp->v_type == VDIR)
		return (EISDIR);

	error = remove_common(np, ap->a_cnp->cn_cred);
	if (error == 0)
		P9FS_DECR_LINKS(dinode);

	return (error);
}

/* Remove vop for all directories. Call common code for remove and adjust links */
static int
p9fs_rmdir(struct vop_rmdir_args *ap)
{
	struct vnode *vp;
	struct p9fs_node *np;
	struct vnode *dvp;
	struct p9fs_node *dnp;
	struct p9fs_inode *dinode;
	int error;

	vp = ap->a_vp;
	np = P9FS_VTON(vp);
	dvp = ap->a_dvp;
	dnp = P9FS_VTON(dvp);
	dinode = &dnp->inode;

	P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np);

	error = remove_common(np, ap->a_cnp->cn_cred);
	if (error == 0)
		P9FS_DECR_LINKS(dinode);

	return (error);
}

/*
 * Create symlinks. Make the permissions and call create_common code
 * for Soft links.
 */
static int
p9fs_symlink(struct vop_symlink_args *ap)
{
	struct vnode *dvp;
	struct vnode **vpp;
	struct vattr *vap;
	struct componentname *cnp;
	char *symtgt;
	struct p9fs_node *dnp;
	struct p9fs_session *vses;
	struct mount *mp;
	struct p9_fid *dvfid, *newfid;
	int error;
	char tmpchr;
	gid_t gid;

	dvp = ap->a_dvp;
	vpp = ap->a_vpp;
	vap = ap->a_vap;
	cnp = ap->a_cnp;
	symtgt = (char*)(uintptr_t) ap->a_target;
	dnp = P9FS_VTON(dvp);
	vses = dnp->p9fs_ses;
	mp = vses->p9fs_mount;
	newfid = NULL;
	error = 0;
	gid = vap->va_gid;

	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);

	/*
	 * Save the character present at namelen in nameptr string and
	 * null terminate the character to get the search name for p9_dir_walk
	 */
	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
	cnp->cn_nameptr[cnp->cn_namelen] = '\0';

	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
	if (error != 0)
		goto out;

	error = p9_create_symlink(dvfid, cnp->cn_nameptr, symtgt, gid);
	if (error != 0)
		goto out;

	/*create vnode for symtgt */
	newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
	if (newfid != NULL) {
		error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
		    dnp, newfid, vpp, cnp->cn_nameptr);
		if (error != 0)
			goto out;
	} else
		goto out;

	if ((cnp->cn_flags & MAKEENTRY) != 0) {
		cache_enter(P9FS_NTOV(dnp), *vpp, cnp);
	}
	P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n",
	    __func__, *vpp, dnp, (uintmax_t)dvfid->fid);

	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
	return (error);

out:
	if (newfid != NULL)
		p9_client_clunk(newfid);
	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
	return (error);
}

/* Create hard link */
static int
p9fs_link(struct vop_link_args *ap)
{
	struct vnode *vp;
	struct vnode *tdvp;
	struct componentname *cnp;
	struct p9fs_node *dnp;
	struct p9fs_node *np;
	struct p9fs_inode *inode;
	struct p9fs_session *vses;
	struct p9_fid *dvfid, *oldvfid;
	int error;

	vp = ap->a_vp;
	tdvp = ap->a_tdvp;
	cnp = ap->a_cnp;
	dnp = P9FS_VTON(tdvp);
	np = P9FS_VTON(vp);
	inode = &np->inode;
	vses = np->p9fs_ses;
	error = 0;

	P9_DEBUG(VOPS, "%s: tdvp %p vp %p\n", __func__, tdvp, vp);

	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
	if (error != 0)
		return (error);
	oldvfid = p9fs_get_fid(vses->clnt, np, cnp->cn_cred, VFID, -1, &error);
	if (error != 0)
		return (error);

	error = p9_create_hardlink(dvfid, oldvfid, cnp->cn_nameptr);
	if (error != 0)
		return (error);
	/* Increment ref count on the inode */
	P9FS_INCR_LINKS(inode);

	return (0);
}

/* Read contents of the symbolic link */
static int
p9fs_readlink(struct vop_readlink_args *ap)
{
	struct vnode *vp;
	struct uio *uio;
	struct p9fs_node *dnp;
	struct p9fs_session *vses;
	struct p9_fid *dvfid;
	int error, len;
	char *target;

	vp = ap->a_vp;
	uio = ap->a_uio;
	dnp = P9FS_VTON(vp);
	vses = dnp->p9fs_ses;
	error = 0;

	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);

	dvfid = p9fs_get_fid(vses->clnt, dnp, ap->a_cred, VFID, -1, &error);
	if (error != 0)
		return (error);

	error = p9_readlink(dvfid, &target);
	if (error != 0)
		return (error);

	len = strlen(target);
	error = uiomove(target, len, uio);

	return (0);
}

/*
 * Iterate through a directory. An entire 8k data is read into the I/O buffer.
 * This buffer is parsed to make dir entries and fed to the user buffer to
 * complete it to the VFS.
 */
static int
p9fs_readdir(struct vop_readdir_args *ap)
{
	struct uio *uio;
	struct vnode *vp;
	struct dirent cde;
	int64_t offset;
	uint64_t diroffset;
	struct p9fs_node *np;
	int error;
	int32_t count;
	struct p9_client *clnt;
	struct p9_dirent dent;
	char *io_buffer;
	struct p9_fid *vofid;

	uio = ap->a_uio;
	vp = ap->a_vp;
	np = P9FS_VTON(ap->a_vp);
	offset = 0;
	diroffset = 0;
	error = 0;
	count = 0;
	clnt = np->p9fs_ses->clnt;

	P9_DEBUG(VOPS, "%s: vp %p, offset %jd, resid %zd\n", __func__, vp, (intmax_t) uio->uio_offset, uio->uio_resid);

	if (ap->a_uio->uio_iov->iov_len <= 0)
		return (EINVAL);

	if (vp->v_type != VDIR)
		return (ENOTDIR);

	vofid = p9fs_get_fid(clnt, np, ap->a_cred, VOFID, P9PROTO_OREAD, &error);
	if (vofid == NULL) {
		P9_DEBUG(ERROR, "%s: NULL FID\n", __func__);
		return (EBADF);
	}

	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK);

	/* We haven't reached the end yet. read more. */
	diroffset = uio->uio_offset;
	while (uio->uio_resid >= sizeof(struct dirent)) {
		/*
		 * We need to read more data as what is indicated by filesize because
		 * filesize is based on data stored in struct dirent structure but
		 * we read data in struct p9_dirent format which has different size.
		 * Hence we read max data(P9FS_IOUNIT) everytime from host, convert
		 * it into struct dirent structure and send it back.
		 */
		count = P9FS_IOUNIT;
		bzero(io_buffer, P9FS_MTU);
		count = p9_client_readdir(vofid, (char *)io_buffer,
		    diroffset, count);

		if (count == 0)
			break;

		if (count < 0) {
			error = EIO;
			goto out;
		}

		offset = 0;
		while (offset + QEMU_DIRENTRY_SZ <= count) {

			/*
			 * Read and make sense out of the buffer in one dirent
			 * This is part of 9p protocol read. This reads one p9_dirent,
			 * appends it to dirent(FREEBSD specifc) and continues to parse the buffer.
			 */
			bzero(&dent, sizeof(dent));
			offset = p9_dirent_read(clnt, io_buffer, offset, count,
				&dent);
			if (offset < 0 || offset > count) {
				error = EIO;
				goto out;
			}

			bzero(&cde, sizeof(cde));
			strncpy(cde.d_name, dent.d_name, dent.len);
			cde.d_fileno = dent.qid.path;
			cde.d_type = dent.d_type;
			cde.d_namlen = dent.len;
			cde.d_reclen = GENERIC_DIRSIZ(&cde);

                        /*
                         * If there isn't enough space in the uio to return a
                         * whole dirent, break off read
                         */
                        if (uio->uio_resid < GENERIC_DIRSIZ(&cde))
                                break;

			/* Transfer */
			error = uiomove(&cde, GENERIC_DIRSIZ(&cde), uio);
			if (error != 0) {
				error = EIO;
				goto out;
			}
			diroffset = dent.d_off;
		}
	}
	/* Pass on last transferred offset */
	uio->uio_offset = diroffset;

out:
	uma_zfree(p9fs_io_buffer_zone, io_buffer);

	return (error);
}

static void
p9fs_doio(struct vnode *vp, struct buf *bp, struct p9_fid *vofid, struct ucred *cr)
{
	struct uio *uiov;
	struct iovec io;
	int error;
	uint64_t off, offset;
	uint64_t filesize;
	uint64_t resid;
	uint32_t count;
	int64_t ret;
	struct p9fs_node *np;
	char *io_buffer;

	error = 0;
	np = P9FS_VTON(vp);

	filesize = np->inode.i_size;
	uiov = malloc(sizeof(struct uio), M_P9UIOV, M_WAITOK);
	uiov->uio_iov = &io;
	uiov->uio_iovcnt = 1;
	uiov->uio_segflg = UIO_SYSSPACE;
	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);

	if (bp->b_iocmd == BIO_READ) {
		io.iov_len = uiov->uio_resid = bp->b_bcount;
		io.iov_base = bp->b_data;
		uiov->uio_rw = UIO_READ;

		switch (vp->v_type) {

		case VREG:
		{
			uiov->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;

			if (uiov->uio_resid) {
				int left = uiov->uio_resid;
				int nread = bp->b_bcount - left;

				if (left > 0)
					bzero((char *)bp->b_data + nread, left);
			}
			/* where in the file are we to start reading */
			offset = uiov->uio_offset;
			if (uiov->uio_offset >= filesize)
				goto out;

			while ((resid = uiov->uio_resid) > 0) {
				if (offset >= filesize)
					break;
				count = min(filesize - uiov->uio_offset, resid);
				if (count == 0)
					break;

				P9_DEBUG(VOPS, "%s: read called %#zx at %#jx\n",
				    __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset);

				/* Copy count bytes into the uio */
				ret = p9_client_read(vofid, offset, count, io_buffer);
				error = uiomove(io_buffer, ret, uiov);

				if (error != 0)
					goto out;
				offset += ret;
			}
			break;
		}
		default:
			printf("vfs:  type %x unexpected\n", vp->v_type);
			break;
		}
	} else {
		if (bp->b_dirtyend > bp->b_dirtyoff) {
			io.iov_len = uiov->uio_resid = bp->b_dirtyend - bp->b_dirtyoff;
			uiov->uio_offset = ((off_t)bp->b_blkno) * PAGE_SIZE + bp->b_dirtyoff;
			io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
			uiov->uio_rw = UIO_WRITE;

			if (uiov->uio_offset < 0) {
				error = EINVAL;
				goto out;
			}

			if (uiov->uio_resid == 0)
				goto out;

			resid = uiov->uio_resid;
			offset = uiov->uio_offset;
			error = 0;

			while ((resid = uiov->uio_resid) > 0) {
                                off = 0;
				count = MIN(resid, P9FS_IOUNIT);
				error = uiomove(io_buffer, count, uiov);
				if (error != 0) {
					goto out;
				}

				while (count > 0) {
					/* Copy count bytes from the uio */
					ret = p9_client_write(vofid, offset, count,
                                                io_buffer + off);
					if (ret < 0)
						goto out;

					P9_DEBUG(VOPS, "%s: write called %#zx at %#jx\n",
					    __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset);
                                        off += ret;
					offset += ret;
					count -= ret;
				}
			}

			/* Update the fields in the node to reflect the change */
			if (filesize < uiov->uio_offset + uiov->uio_resid) {
				np->inode.i_size = uiov->uio_offset + uiov->uio_resid;
				vnode_pager_setsize(vp, uiov->uio_offset + uiov->uio_resid);
				/* update the modified timers. */
				p9fs_itimes(vp);
			}
		} else {
			 bp->b_resid = 0;
			 goto out1;
		}
	}
out:
	/* Set the error */
	if (error != 0) {
		bp->b_error = error;
		bp->b_ioflags |= BIO_ERROR;
	}
	bp->b_resid = uiov->uio_resid;
out1:
	bufdone(bp);
	uma_zfree(p9fs_io_buffer_zone, io_buffer);
	free(uiov, M_P9UIOV);
}

/*
 * The I/O buffer is mapped to a uio and a client_write/client_read is performed
 * the same way as p9fs_read and p9fs_write.
 */
static int
p9fs_strategy(struct vop_strategy_args *ap)
{
	struct vnode *vp;
	struct buf *bp;
	struct ucred *cr;
	int error;
	struct open_fid_state ostate;

	vp = ap->a_vp;
	bp = ap->a_bp;
	error = 0;

	P9_DEBUG(VOPS, "%s: vp %p, iocmd %d\n ", __func__, vp, bp->b_iocmd);

	if (bp->b_iocmd == BIO_READ)
		cr = bp->b_rcred;
	else
		cr = bp->b_wcred;

	error = p9fs_get_open_fid(vp, bp->b_iocmd == BIO_READ ? FREAD : FWRITE, cr, &ostate);
	if (error) {
		P9_DEBUG(ERROR, "%s: p9fs_get_open_fid failed: %d\n", __func__, error);
		bp->b_error = error;
		bp->b_ioflags |= BIO_ERROR;
		bufdone(bp);
		return (0);
	}

	p9fs_doio(vp, bp, ostate.vofid, cr);
	p9fs_release_open_fid(vp, cr, &ostate);

	return (0);
}

/* Rename a file */
static int
p9fs_rename(struct vop_rename_args *ap)
{
	struct vnode *tvp;
	struct vnode *tdvp;
	struct vnode *fvp;
	struct vnode *fdvp;
	struct componentname *tcnp;
	struct componentname *fcnp;
	struct p9fs_node *tdnode;
	struct p9fs_node *fdnode;
	struct p9fs_inode *fdinode;
	struct p9fs_node *fnode;
	struct p9fs_inode *finode;
	struct p9fs_session *vses;
	struct p9fs_node *tnode;
	struct p9fs_inode *tinode;
	struct p9_fid *olddirvfid, *newdirvfid ;
	int error;

	tvp = ap->a_tvp;
	tdvp = ap->a_tdvp;
	fvp = ap->a_fvp;
	fdvp = ap->a_fdvp;
	tcnp = ap->a_tcnp;
	fcnp = ap->a_fcnp;
	tdnode = P9FS_VTON(tdvp);
	fdnode = P9FS_VTON(fdvp);
	fdinode = &fdnode->inode;
	fnode = P9FS_VTON(fvp);
	finode = &fnode->inode;
	vses = fnode->p9fs_ses;
	error = 0;

	P9_DEBUG(VOPS, "%s: tvp %p, tdvp %p, fvp %p, fdvp %p\n ", __func__, tvp, tdvp, fvp, fdvp);

	/* Check for cross mount operation */
	if (fvp->v_mount != tdvp->v_mount ||
	    (tvp && (fvp->v_mount != tvp->v_mount))) {
		error = EXDEV;
		goto out;
	}

	/* warning  if you are renaming to the same name */
	if (fvp == tvp)
		error = 0;

	olddirvfid = p9fs_get_fid(vses->clnt, fdnode, fcnp->cn_cred, VFID, -1, &error);
	if (error != 0)
		goto out;
	newdirvfid = p9fs_get_fid(vses->clnt, tdnode, tcnp->cn_cred, VFID, -1, &error);
	if (error != 0)
		goto out;

	error = p9_client_renameat(olddirvfid, fcnp->cn_nameptr, newdirvfid, tcnp->cn_nameptr);
	if (error != 0)
		goto out;

	/*
	 * decrement the link count on the "from" file whose name is going
	 * to be changed if its a directory
	 */
	if (fvp->v_type == VDIR) {
		if (tvp && tvp->v_type == VDIR)
			cache_purge(tdvp);
		P9FS_DECR_LINKS(fdinode);
		cache_purge(fdvp);
	}

	/* Taking exclusive lock on the from node before decrementing the link count */
	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
		goto out;
	P9FS_DECR_LINKS(finode);
	VOP_UNLOCK(fvp);

	if (tvp) {
		tnode = P9FS_VTON(tvp);
		tinode = &tnode->inode;
		P9FS_DECR_LINKS(tinode);
	}

out:
	if (tdvp == tvp)
		vrele(tdvp);
	else
		vput(tdvp);
	if (tvp)
		vput(tvp);
	vrele(fdvp);
	vrele(fvp);
	return (error);
}

/*
 * Put VM pages, synchronously.
 * XXX: like smbfs, cannot use vop_stdputpages due to mapping requirement
 */
static int
p9fs_putpages(struct vop_putpages_args *ap)
{
	struct uio uio;
	struct iovec iov;
	int i, error, npages, count;
	off_t offset;
	int *rtvals;
	struct vnode *vp;
	struct thread *td;
	struct ucred *cred;
	struct p9fs_node *np;
	vm_page_t *pages;
	vm_offset_t kva;
	struct buf *bp;

	vp = ap->a_vp;
	np = P9FS_VTON(vp);
	td = curthread;
	cred = curthread->td_ucred;
	pages = ap->a_m;
	count = ap->a_count;
	rtvals = ap->a_rtvals;
	npages = btoc(count);
	offset = IDX_TO_OFF(pages[0]->pindex);

	/*
	 * When putting pages, do not extend file past EOF.
	 */
	if (offset + count > np->inode.i_size) {
		count = np->inode.i_size - offset;
		if (count < 0)
			count = 0;
	}

	for (i = 0; i < npages; i++)
		rtvals[i] = VM_PAGER_ERROR;

	bp = uma_zalloc(p9fs_pbuf_zone, M_WAITOK);
	kva = (vm_offset_t) bp->b_data;
	pmap_qenter(kva, pages, npages);

	VM_CNT_INC(v_vnodeout);
	VM_CNT_ADD(v_vnodepgsout, count);

	iov.iov_base = (caddr_t) kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = offset;
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_WRITE;
	uio.uio_td = td;

	P9_DEBUG(VOPS, "of=%jd,resid=%zd\n", (intmax_t)uio.uio_offset, uio.uio_resid);

	error = VOP_WRITE(vp, &uio, vnode_pager_putpages_ioflags(ap->a_sync),
	    cred);

	pmap_qremove(kva, npages);
	uma_zfree(p9fs_pbuf_zone, bp);

	if (error == 0)
		vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid,
		    np->inode.i_size - offset, npages * PAGE_SIZE);

	return (rtvals[0]);
}

struct vop_vector p9fs_vnops = {
	.vop_default =		&default_vnodeops,
	.vop_lookup =		p9fs_lookup,
	.vop_open =		p9fs_open,
	.vop_close =		p9fs_close,
	.vop_access =		p9fs_access,
	.vop_getattr =		p9fs_getattr_dotl,
	.vop_setattr =		p9fs_setattr_dotl,
	.vop_reclaim =		p9fs_reclaim,
	.vop_inactive =		p9fs_inactive,
	.vop_readdir =		p9fs_readdir,
	.vop_create =		p9fs_create,
	.vop_mknod =		p9fs_mknod,
	.vop_read =		p9fs_read,
	.vop_write =		p9fs_write,
	.vop_remove =		p9fs_remove,
	.vop_mkdir =		p9fs_mkdir,
	.vop_rmdir =		p9fs_rmdir,
	.vop_strategy =		p9fs_strategy,
	.vop_symlink =		p9fs_symlink,
	.vop_rename =           p9fs_rename,
	.vop_link =		p9fs_link,
	.vop_readlink =		p9fs_readlink,
	.vop_putpages =		p9fs_putpages,
};
VFS_VOP_VECTOR_REGISTER(p9fs_vnops);