linux/fs/xfs/libxfs/xfs_attr.c
Brian Foster c468562879 xfs: cancel tx on xfs_defer_finish() error during xattr set/remove
Chris Dunlop reports a problem where an xattr operation fails,
reports the following error to syslog and hangs during unmount:

 ================================================
 [ BUG: lock held when returning to user space! ]
 ...
 ------------------------------------------------
 <PID> is leaving the kernel with locks still held!
 1 lock held by <PID>:
  #0:  (sb_internal){......}, at: [<ffffffffa07692a3>] xfs_trans_alloc+0xe3/0x130 [xfs]

The failure/shutdown occurs during deferred ops processing which
leads to an error return from xfs_defer_finish() via
xfs_attr_leaf_addname(). While the root cause of the failure is
unknown corruption, the cause of the subsequent BUG above and
unmount hang is failure to cancel the transaction before returning
to userspace.

The transaction is not cancelled because the out_defer_cancel error
handling paths in the xfs_attr_[leaf|node]_[add|remove]name()
functions clear args.trans without releasing the transaction. The
callers therefore lose the reference to the transaction and fail to
cancel it.

Since xfs_attr_[set|remove]() always cancel args.trans when != NULL
and xfs_defer_finish()->...->xfs_trans_roll() should always return
with a valid transaction, update the leaf/node xattr functions to
not reset args.trans in the error path responsible for cancelling
deferred ops.

Reported-by: Chris Dunlop <chris@onthe.net.au>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2018-01-16 14:53:28 -08:00

1346 lines
34 KiB
C

/*
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_attr_sf.h"
#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
#include "xfs_attr_remote.h"
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
/*
* xfs_attr.c
*
* Provide the external interfaces to manage attribute lists.
*/
/*========================================================================
* Function prototypes for the kernel.
*========================================================================*/
/*
* Internal routines when attribute list fits inside the inode.
*/
STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
/*
* Internal routines when attribute list is one block.
*/
STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
/*
* Internal routines when attribute list is more than one block.
*/
STATIC int xfs_attr_node_get(xfs_da_args_t *args);
STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
STATIC int
xfs_attr_args_init(
struct xfs_da_args *args,
struct xfs_inode *dp,
const unsigned char *name,
int flags)
{
if (!name)
return -EINVAL;
memset(args, 0, sizeof(*args));
args->geo = dp->i_mount->m_attr_geo;
args->whichfork = XFS_ATTR_FORK;
args->dp = dp;
args->flags = flags;
args->name = name;
args->namelen = strlen((const char *)name);
if (args->namelen >= MAXNAMELEN)
return -EFAULT; /* match IRIX behaviour */
args->hashval = xfs_da_hashname(args->name, args->namelen);
return 0;
}
int
xfs_inode_hasattr(
struct xfs_inode *ip)
{
if (!XFS_IFORK_Q(ip) ||
(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_anextents == 0))
return 0;
return 1;
}
/*========================================================================
* Overall external interface routines.
*========================================================================*/
/* Retrieve an extended attribute and its value. Must have ilock. */
int
xfs_attr_get_ilocked(
struct xfs_inode *ip,
struct xfs_da_args *args)
{
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
if (!xfs_inode_hasattr(ip))
return -ENOATTR;
else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
return xfs_attr_shortform_getvalue(args);
else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
return xfs_attr_leaf_get(args);
else
return xfs_attr_node_get(args);
}
/* Retrieve an extended attribute by name, and its value. */
int
xfs_attr_get(
struct xfs_inode *ip,
const unsigned char *name,
unsigned char *value,
int *valuelenp,
int flags)
{
struct xfs_da_args args;
uint lock_mode;
int error;
XFS_STATS_INC(ip->i_mount, xs_attr_get);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
error = xfs_attr_args_init(&args, ip, name, flags);
if (error)
return error;
args.value = value;
args.valuelen = *valuelenp;
/* Entirely possible to look up a name which doesn't exist */
args.op_flags = XFS_DA_OP_OKNOENT;
lock_mode = xfs_ilock_attr_map_shared(ip);
error = xfs_attr_get_ilocked(ip, &args);
xfs_iunlock(ip, lock_mode);
*valuelenp = args.valuelen;
return error == -EEXIST ? 0 : error;
}
/*
* Calculate how many blocks we need for the new attribute,
*/
STATIC int
xfs_attr_calc_size(
struct xfs_da_args *args,
int *local)
{
struct xfs_mount *mp = args->dp->i_mount;
int size;
int nblks;
/*
* Determine space new attribute will use, and if it would be
* "local" or "remote" (note: local != inline).
*/
size = xfs_attr_leaf_newentsize(args, local);
nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
if (*local) {
if (size > (args->geo->blksize / 2)) {
/* Double split possible */
nblks *= 2;
}
} else {
/*
* Out of line attribute, cannot double split, but
* make room for the attribute value itself.
*/
uint dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
nblks += dblocks;
nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
}
return nblks;
}
int
xfs_attr_set(
struct xfs_inode *dp,
const unsigned char *name,
unsigned char *value,
int valuelen,
int flags)
{
struct xfs_mount *mp = dp->i_mount;
struct xfs_buf *leaf_bp = NULL;
struct xfs_da_args args;
struct xfs_defer_ops dfops;
struct xfs_trans_res tres;
xfs_fsblock_t firstblock;
int rsvd = (flags & ATTR_ROOT) != 0;
int error, err2, local;
XFS_STATS_INC(mp, xs_attr_set);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return -EIO;
error = xfs_attr_args_init(&args, dp, name, flags);
if (error)
return error;
args.value = value;
args.valuelen = valuelen;
args.firstblock = &firstblock;
args.dfops = &dfops;
args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
args.total = xfs_attr_calc_size(&args, &local);
error = xfs_qm_dqattach(dp, 0);
if (error)
return error;
/*
* If the inode doesn't have an attribute fork, add one.
* (inode must not be locked when we call this routine)
*/
if (XFS_IFORK_Q(dp) == 0) {
int sf_size = sizeof(xfs_attr_sf_hdr_t) +
XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen);
error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
if (error)
return error;
}
tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
/*
* Root fork attributes can use reserved data blocks for this
* operation if necessary
*/
error = xfs_trans_alloc(mp, &tres, args.total, 0,
rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
if (error)
return error;
xfs_ilock(dp, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS);
if (error) {
xfs_iunlock(dp, XFS_ILOCK_EXCL);
xfs_trans_cancel(args.trans);
return error;
}
xfs_trans_ijoin(args.trans, dp, 0);
/*
* If the attribute list is non-existent or a shortform list,
* upgrade it to a single-leaf-block attribute list.
*/
if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
(dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
dp->i_d.di_anextents == 0)) {
/*
* Build initial attribute list (if required).
*/
if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
xfs_attr_shortform_create(&args);
/*
* Try to add the attr to the attribute list in
* the inode.
*/
error = xfs_attr_shortform_addname(&args);
if (error != -ENOSPC) {
/*
* Commit the shortform mods, and we're done.
* NOTE: this is also the error path (EEXIST, etc).
*/
ASSERT(args.trans != NULL);
/*
* If this is a synchronous mount, make sure that
* the transaction goes to disk before returning
* to the user.
*/
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(args.trans);
if (!error && (flags & ATTR_KERNOTIME) == 0) {
xfs_trans_ichgtime(args.trans, dp,
XFS_ICHGTIME_CHG);
}
err2 = xfs_trans_commit(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error ? error : err2;
}
/*
* It won't fit in the shortform, transform to a leaf block.
* GROT: another possible req'mt for a double-split btree op.
*/
xfs_defer_init(args.dfops, args.firstblock);
error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
if (error)
goto out_defer_cancel;
/*
* Prevent the leaf buffer from being unlocked so that a
* concurrent AIL push cannot grab the half-baked leaf
* buffer and run into problems with the write verifier.
*/
xfs_trans_bhold(args.trans, leaf_bp);
xfs_defer_bjoin(args.dfops, leaf_bp);
xfs_defer_ijoin(args.dfops, dp);
error = xfs_defer_finish(&args.trans, args.dfops);
if (error)
goto out_defer_cancel;
/*
* Commit the leaf transformation. We'll need another (linked)
* transaction to add the new attribute to the leaf, which
* means that we have to hold & join the leaf buffer here too.
*/
error = xfs_trans_roll_inode(&args.trans, dp);
if (error)
goto out;
xfs_trans_bjoin(args.trans, leaf_bp);
leaf_bp = NULL;
}
if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
error = xfs_attr_leaf_addname(&args);
else
error = xfs_attr_node_addname(&args);
if (error)
goto out;
/*
* If this is a synchronous mount, make sure that the
* transaction goes to disk before returning to the user.
*/
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(args.trans);
if ((flags & ATTR_KERNOTIME) == 0)
xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
/*
* Commit the last in the sequence of transactions.
*/
xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
error = xfs_trans_commit(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
out_defer_cancel:
xfs_defer_cancel(&dfops);
out:
if (leaf_bp)
xfs_trans_brelse(args.trans, leaf_bp);
if (args.trans)
xfs_trans_cancel(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
}
/*
* Generic handler routine to remove a name from an attribute list.
* Transitions attribute list from Btree to shortform as necessary.
*/
int
xfs_attr_remove(
struct xfs_inode *dp,
const unsigned char *name,
int flags)
{
struct xfs_mount *mp = dp->i_mount;
struct xfs_da_args args;
struct xfs_defer_ops dfops;
xfs_fsblock_t firstblock;
int error;
XFS_STATS_INC(mp, xs_attr_remove);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return -EIO;
error = xfs_attr_args_init(&args, dp, name, flags);
if (error)
return error;
args.firstblock = &firstblock;
args.dfops = &dfops;
/*
* we have no control over the attribute names that userspace passes us
* to remove, so we have to allow the name lookup prior to attribute
* removal to fail.
*/
args.op_flags = XFS_DA_OP_OKNOENT;
error = xfs_qm_dqattach(dp, 0);
if (error)
return error;
/*
* Root fork attributes can use reserved data blocks for this
* operation if necessary
*/
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrrm,
XFS_ATTRRM_SPACE_RES(mp), 0,
(flags & ATTR_ROOT) ? XFS_TRANS_RESERVE : 0,
&args.trans);
if (error)
return error;
xfs_ilock(dp, XFS_ILOCK_EXCL);
/*
* No need to make quota reservations here. We expect to release some
* blocks not allocate in the common case.
*/
xfs_trans_ijoin(args.trans, dp, 0);
if (!xfs_inode_hasattr(dp)) {
error = -ENOATTR;
} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
error = xfs_attr_shortform_remove(&args);
} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
error = xfs_attr_leaf_removename(&args);
} else {
error = xfs_attr_node_removename(&args);
}
if (error)
goto out;
/*
* If this is a synchronous mount, make sure that the
* transaction goes to disk before returning to the user.
*/
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(args.trans);
if ((flags & ATTR_KERNOTIME) == 0)
xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
/*
* Commit the last in the sequence of transactions.
*/
xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
error = xfs_trans_commit(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
out:
if (args.trans)
xfs_trans_cancel(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
}
/*========================================================================
* External routines when attribute list is inside the inode
*========================================================================*/
/*
* Add a name to the shortform attribute list structure
* This is the external routine.
*/
STATIC int
xfs_attr_shortform_addname(xfs_da_args_t *args)
{
int newsize, forkoff, retval;
trace_xfs_attr_sf_addname(args);
retval = xfs_attr_shortform_lookup(args);
if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
return retval;
} else if (retval == -EEXIST) {
if (args->flags & ATTR_CREATE)
return retval;
retval = xfs_attr_shortform_remove(args);
ASSERT(retval == 0);
}
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
return -ENOSPC;
newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
if (!forkoff)
return -ENOSPC;
xfs_attr_shortform_add(args, forkoff);
return 0;
}
/*========================================================================
* External routines when attribute list is one block
*========================================================================*/
/*
* Add a name to the leaf attribute list structure
*
* This leaf block cannot have a "remote" value, we only call this routine
* if bmap_one_block() says there is only one block (ie: no remote blks).
*/
STATIC int
xfs_attr_leaf_addname(xfs_da_args_t *args)
{
xfs_inode_t *dp;
struct xfs_buf *bp;
int retval, error, forkoff;
trace_xfs_attr_leaf_addname(args);
/*
* Read the (only) block in the attribute list in.
*/
dp = args->dp;
args->blkno = 0;
error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
if (error)
return error;
/*
* Look up the given attribute in the leaf block. Figure out if
* the given flags produce an error or call for an atomic rename.
*/
retval = xfs_attr3_leaf_lookup_int(bp, args);
if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
xfs_trans_brelse(args->trans, bp);
return retval;
} else if (retval == -EEXIST) {
if (args->flags & ATTR_CREATE) { /* pure create op */
xfs_trans_brelse(args->trans, bp);
return retval;
}
trace_xfs_attr_leaf_replace(args);
/* save the attribute state for later removal*/
args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */
args->blkno2 = args->blkno; /* set 2nd entry info*/
args->index2 = args->index;
args->rmtblkno2 = args->rmtblkno;
args->rmtblkcnt2 = args->rmtblkcnt;
args->rmtvaluelen2 = args->rmtvaluelen;
/*
* clear the remote attr state now that it is saved so that the
* values reflect the state of the attribute we are about to
* add, not the attribute we just found and will remove later.
*/
args->rmtblkno = 0;
args->rmtblkcnt = 0;
args->rmtvaluelen = 0;
}
/*
* Add the attribute to the leaf block, transitioning to a Btree
* if required.
*/
retval = xfs_attr3_leaf_add(bp, args);
if (retval == -ENOSPC) {
/*
* Promote the attribute list to the Btree format, then
* Commit that transaction so that the node_addname() call
* can manage its own transactions.
*/
xfs_defer_init(args->dfops, args->firstblock);
error = xfs_attr3_leaf_to_node(args);
if (error)
goto out_defer_cancel;
xfs_defer_ijoin(args->dfops, dp);
error = xfs_defer_finish(&args->trans, args->dfops);
if (error)
goto out_defer_cancel;
/*
* Commit the current trans (including the inode) and start
* a new one.
*/
error = xfs_trans_roll_inode(&args->trans, dp);
if (error)
return error;
/*
* Fob the whole rest of the problem off on the Btree code.
*/
error = xfs_attr_node_addname(args);
return error;
}
/*
* Commit the transaction that added the attr name so that
* later routines can manage their own transactions.
*/
error = xfs_trans_roll_inode(&args->trans, dp);
if (error)
return error;
/*
* If there was an out-of-line value, allocate the blocks we
* identified for its storage and copy the value. This is done
* after we create the attribute so that we don't overflow the
* maximum size of a transaction and/or hit a deadlock.
*/
if (args->rmtblkno > 0) {
error = xfs_attr_rmtval_set(args);
if (error)
return error;
}
/*
* If this is an atomic rename operation, we must "flip" the
* incomplete flags on the "new" and "old" attribute/value pairs
* so that one disappears and one appears atomically. Then we
* must remove the "old" attribute/value pair.
*/
if (args->op_flags & XFS_DA_OP_RENAME) {
/*
* In a separate transaction, set the incomplete flag on the
* "old" attr and clear the incomplete flag on the "new" attr.
*/
error = xfs_attr3_leaf_flipflags(args);
if (error)
return error;
/*
* Dismantle the "old" attribute/value pair by removing
* a "remote" value (if it exists).
*/
args->index = args->index2;
args->blkno = args->blkno2;
args->rmtblkno = args->rmtblkno2;
args->rmtblkcnt = args->rmtblkcnt2;
args->rmtvaluelen = args->rmtvaluelen2;
if (args->rmtblkno) {
error = xfs_attr_rmtval_remove(args);
if (error)
return error;
}
/*
* Read in the block containing the "old" attr, then
* remove the "old" attr from that block (neat, huh!)
*/
error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
-1, &bp);
if (error)
return error;
xfs_attr3_leaf_remove(bp, args);
/*
* If the result is small enough, shrink it all into the inode.
*/
if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
xfs_defer_init(args->dfops, args->firstblock);
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
if (error)
goto out_defer_cancel;
xfs_defer_ijoin(args->dfops, dp);
error = xfs_defer_finish(&args->trans, args->dfops);
if (error)
goto out_defer_cancel;
}
/*
* Commit the remove and start the next trans in series.
*/
error = xfs_trans_roll_inode(&args->trans, dp);
} else if (args->rmtblkno > 0) {
/*
* Added a "remote" value, just clear the incomplete flag.
*/
error = xfs_attr3_leaf_clearflag(args);
}
return error;
out_defer_cancel:
xfs_defer_cancel(args->dfops);
return error;
}
/*
* Remove a name from the leaf attribute list structure
*
* This leaf block cannot have a "remote" value, we only call this routine
* if bmap_one_block() says there is only one block (ie: no remote blks).
*/
STATIC int
xfs_attr_leaf_removename(xfs_da_args_t *args)
{
xfs_inode_t *dp;
struct xfs_buf *bp;
int error, forkoff;
trace_xfs_attr_leaf_removename(args);
/*
* Remove the attribute.
*/
dp = args->dp;
args->blkno = 0;
error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
if (error)
return error;
error = xfs_attr3_leaf_lookup_int(bp, args);
if (error == -ENOATTR) {
xfs_trans_brelse(args->trans, bp);
return error;
}
xfs_attr3_leaf_remove(bp, args);
/*
* If the result is small enough, shrink it all into the inode.
*/
if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
xfs_defer_init(args->dfops, args->firstblock);
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
if (error)
goto out_defer_cancel;
xfs_defer_ijoin(args->dfops, dp);
error = xfs_defer_finish(&args->trans, args->dfops);
if (error)
goto out_defer_cancel;
}
return 0;
out_defer_cancel:
xfs_defer_cancel(args->dfops);
return error;
}
/*
* Look up a name in a leaf attribute list structure.
*
* This leaf block cannot have a "remote" value, we only call this routine
* if bmap_one_block() says there is only one block (ie: no remote blks).
*/
STATIC int
xfs_attr_leaf_get(xfs_da_args_t *args)
{
struct xfs_buf *bp;
int error;
trace_xfs_attr_leaf_get(args);
args->blkno = 0;
error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
if (error)
return error;
error = xfs_attr3_leaf_lookup_int(bp, args);
if (error != -EEXIST) {
xfs_trans_brelse(args->trans, bp);
return error;
}
error = xfs_attr3_leaf_getvalue(bp, args);
xfs_trans_brelse(args->trans, bp);
if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
error = xfs_attr_rmtval_get(args);
}
return error;
}
/*========================================================================
* External routines when attribute list size > geo->blksize
*========================================================================*/
/*
* Add a name to a Btree-format attribute list.
*
* This will involve walking down the Btree, and may involve splitting
* leaf nodes and even splitting intermediate nodes up to and including
* the root node (a special case of an intermediate node).
*
* "Remote" attribute values confuse the issue and atomic rename operations
* add a whole extra layer of confusion on top of that.
*/
STATIC int
xfs_attr_node_addname(xfs_da_args_t *args)
{
xfs_da_state_t *state;
xfs_da_state_blk_t *blk;
xfs_inode_t *dp;
xfs_mount_t *mp;
int retval, error;
trace_xfs_attr_node_addname(args);
/*
* Fill in bucket of arguments/results/context to carry around.
*/
dp = args->dp;
mp = dp->i_mount;
restart:
state = xfs_da_state_alloc();
state->args = args;
state->mp = mp;
/*
* Search to see if name already exists, and get back a pointer
* to where it should go.
*/
error = xfs_da3_node_lookup_int(state, &retval);
if (error)
goto out;
blk = &state->path.blk[ state->path.active-1 ];
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
goto out;
} else if (retval == -EEXIST) {
if (args->flags & ATTR_CREATE)
goto out;
trace_xfs_attr_node_replace(args);
/* save the attribute state for later removal*/
args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */
args->blkno2 = args->blkno; /* set 2nd entry info*/
args->index2 = args->index;
args->rmtblkno2 = args->rmtblkno;
args->rmtblkcnt2 = args->rmtblkcnt;
args->rmtvaluelen2 = args->rmtvaluelen;
/*
* clear the remote attr state now that it is saved so that the
* values reflect the state of the attribute we are about to
* add, not the attribute we just found and will remove later.
*/
args->rmtblkno = 0;
args->rmtblkcnt = 0;
args->rmtvaluelen = 0;
}
retval = xfs_attr3_leaf_add(blk->bp, state->args);
if (retval == -ENOSPC) {
if (state->path.active == 1) {
/*
* Its really a single leaf node, but it had
* out-of-line values so it looked like it *might*
* have been a b-tree.
*/
xfs_da_state_free(state);
state = NULL;
xfs_defer_init(args->dfops, args->firstblock);
error = xfs_attr3_leaf_to_node(args);
if (error)
goto out_defer_cancel;
xfs_defer_ijoin(args->dfops, dp);
error = xfs_defer_finish(&args->trans, args->dfops);
if (error)
goto out_defer_cancel;
/*
* Commit the node conversion and start the next
* trans in the chain.
*/
error = xfs_trans_roll_inode(&args->trans, dp);
if (error)
goto out;
goto restart;
}
/*
* Split as many Btree elements as required.
* This code tracks the new and old attr's location
* in the index/blkno/rmtblkno/rmtblkcnt fields and
* in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
*/
xfs_defer_init(args->dfops, args->firstblock);
error = xfs_da3_split(state);
if (error)
goto out_defer_cancel;
xfs_defer_ijoin(args->dfops, dp);
error = xfs_defer_finish(&args->trans, args->dfops);
if (error)
goto out_defer_cancel;
} else {
/*
* Addition succeeded, update Btree hashvals.
*/
xfs_da3_fixhashpath(state, &state->path);
}
/*
* Kill the state structure, we're done with it and need to
* allow the buffers to come back later.
*/
xfs_da_state_free(state);
state = NULL;
/*
* Commit the leaf addition or btree split and start the next
* trans in the chain.
*/
error = xfs_trans_roll_inode(&args->trans, dp);
if (error)
goto out;
/*
* If there was an out-of-line value, allocate the blocks we
* identified for its storage and copy the value. This is done
* after we create the attribute so that we don't overflow the
* maximum size of a transaction and/or hit a deadlock.
*/
if (args->rmtblkno > 0) {
error = xfs_attr_rmtval_set(args);
if (error)
return error;
}
/*
* If this is an atomic rename operation, we must "flip" the
* incomplete flags on the "new" and "old" attribute/value pairs
* so that one disappears and one appears atomically. Then we
* must remove the "old" attribute/value pair.
*/
if (args->op_flags & XFS_DA_OP_RENAME) {
/*
* In a separate transaction, set the incomplete flag on the
* "old" attr and clear the incomplete flag on the "new" attr.
*/
error = xfs_attr3_leaf_flipflags(args);
if (error)
goto out;
/*
* Dismantle the "old" attribute/value pair by removing
* a "remote" value (if it exists).
*/
args->index = args->index2;
args->blkno = args->blkno2;
args->rmtblkno = args->rmtblkno2;
args->rmtblkcnt = args->rmtblkcnt2;
args->rmtvaluelen = args->rmtvaluelen2;
if (args->rmtblkno) {
error = xfs_attr_rmtval_remove(args);
if (error)
return error;
}
/*
* Re-find the "old" attribute entry after any split ops.
* The INCOMPLETE flag means that we will find the "old"
* attr, not the "new" one.
*/
args->flags |= XFS_ATTR_INCOMPLETE;
state = xfs_da_state_alloc();
state->args = args;
state->mp = mp;
state->inleaf = 0;
error = xfs_da3_node_lookup_int(state, &retval);
if (error)
goto out;
/*
* Remove the name and update the hashvals in the tree.
*/
blk = &state->path.blk[ state->path.active-1 ];
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
error = xfs_attr3_leaf_remove(blk->bp, args);
xfs_da3_fixhashpath(state, &state->path);
/*
* Check to see if the tree needs to be collapsed.
*/
if (retval && (state->path.active > 1)) {
xfs_defer_init(args->dfops, args->firstblock);
error = xfs_da3_join(state);
if (error)
goto out_defer_cancel;
xfs_defer_ijoin(args->dfops, dp);
error = xfs_defer_finish(&args->trans, args->dfops);
if (error)
goto out_defer_cancel;
}
/*
* Commit and start the next trans in the chain.
*/
error = xfs_trans_roll_inode(&args->trans, dp);
if (error)
goto out;
} else if (args->rmtblkno > 0) {
/*
* Added a "remote" value, just clear the incomplete flag.
*/
error = xfs_attr3_leaf_clearflag(args);
if (error)
goto out;
}
retval = error = 0;
out:
if (state)
xfs_da_state_free(state);
if (error)
return error;
return retval;
out_defer_cancel:
xfs_defer_cancel(args->dfops);
goto out;
}
/*
* Remove a name from a B-tree attribute list.
*
* This will involve walking down the Btree, and may involve joining
* leaf nodes and even joining intermediate nodes up to and including
* the root node (a special case of an intermediate node).
*/
STATIC int
xfs_attr_node_removename(xfs_da_args_t *args)
{
xfs_da_state_t *state;
xfs_da_state_blk_t *blk;
xfs_inode_t *dp;
struct xfs_buf *bp;
int retval, error, forkoff;
trace_xfs_attr_node_removename(args);
/*
* Tie a string around our finger to remind us where we are.
*/
dp = args->dp;
state = xfs_da_state_alloc();
state->args = args;
state->mp = dp->i_mount;
/*
* Search to see if name exists, and get back a pointer to it.
*/
error = xfs_da3_node_lookup_int(state, &retval);
if (error || (retval != -EEXIST)) {
if (error == 0)
error = retval;
goto out;
}
/*
* If there is an out-of-line value, de-allocate the blocks.
* This is done before we remove the attribute so that we don't
* overflow the maximum size of a transaction and/or hit a deadlock.
*/
blk = &state->path.blk[ state->path.active-1 ];
ASSERT(blk->bp != NULL);
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
if (args->rmtblkno > 0) {
/*
* Fill in disk block numbers in the state structure
* so that we can get the buffers back after we commit
* several transactions in the following calls.
*/
error = xfs_attr_fillstate(state);
if (error)
goto out;
/*
* Mark the attribute as INCOMPLETE, then bunmapi() the
* remote value.
*/
error = xfs_attr3_leaf_setflag(args);
if (error)
goto out;
error = xfs_attr_rmtval_remove(args);
if (error)
goto out;
/*
* Refill the state structure with buffers, the prior calls
* released our buffers.
*/
error = xfs_attr_refillstate(state);
if (error)
goto out;
}
/*
* Remove the name and update the hashvals in the tree.
*/
blk = &state->path.blk[ state->path.active-1 ];
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
retval = xfs_attr3_leaf_remove(blk->bp, args);
xfs_da3_fixhashpath(state, &state->path);
/*
* Check to see if the tree needs to be collapsed.
*/
if (retval && (state->path.active > 1)) {
xfs_defer_init(args->dfops, args->firstblock);
error = xfs_da3_join(state);
if (error)
goto out_defer_cancel;
xfs_defer_ijoin(args->dfops, dp);
error = xfs_defer_finish(&args->trans, args->dfops);
if (error)
goto out_defer_cancel;
/*
* Commit the Btree join operation and start a new trans.
*/
error = xfs_trans_roll_inode(&args->trans, dp);
if (error)
goto out;
}
/*
* If the result is small enough, push it all into the inode.
*/
if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
/*
* Have to get rid of the copy of this dabuf in the state.
*/
ASSERT(state->path.active == 1);
ASSERT(state->path.blk[0].bp);
state->path.blk[0].bp = NULL;
error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp);
if (error)
goto out;
if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
xfs_defer_init(args->dfops, args->firstblock);
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
if (error)
goto out_defer_cancel;
xfs_defer_ijoin(args->dfops, dp);
error = xfs_defer_finish(&args->trans, args->dfops);
if (error)
goto out_defer_cancel;
} else
xfs_trans_brelse(args->trans, bp);
}
error = 0;
out:
xfs_da_state_free(state);
return error;
out_defer_cancel:
xfs_defer_cancel(args->dfops);
goto out;
}
/*
* Fill in the disk block numbers in the state structure for the buffers
* that are attached to the state structure.
* This is done so that we can quickly reattach ourselves to those buffers
* after some set of transaction commits have released these buffers.
*/
STATIC int
xfs_attr_fillstate(xfs_da_state_t *state)
{
xfs_da_state_path_t *path;
xfs_da_state_blk_t *blk;
int level;
trace_xfs_attr_fillstate(state->args);
/*
* Roll down the "path" in the state structure, storing the on-disk
* block number for those buffers in the "path".
*/
path = &state->path;
ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
if (blk->bp) {
blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
blk->bp = NULL;
} else {
blk->disk_blkno = 0;
}
}
/*
* Roll down the "altpath" in the state structure, storing the on-disk
* block number for those buffers in the "altpath".
*/
path = &state->altpath;
ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
if (blk->bp) {
blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
blk->bp = NULL;
} else {
blk->disk_blkno = 0;
}
}
return 0;
}
/*
* Reattach the buffers to the state structure based on the disk block
* numbers stored in the state structure.
* This is done after some set of transaction commits have released those
* buffers from our grip.
*/
STATIC int
xfs_attr_refillstate(xfs_da_state_t *state)
{
xfs_da_state_path_t *path;
xfs_da_state_blk_t *blk;
int level, error;
trace_xfs_attr_refillstate(state->args);
/*
* Roll down the "path" in the state structure, storing the on-disk
* block number for those buffers in the "path".
*/
path = &state->path;
ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
if (blk->disk_blkno) {
error = xfs_da3_node_read(state->args->trans,
state->args->dp,
blk->blkno, blk->disk_blkno,
&blk->bp, XFS_ATTR_FORK);
if (error)
return error;
} else {
blk->bp = NULL;
}
}
/*
* Roll down the "altpath" in the state structure, storing the on-disk
* block number for those buffers in the "altpath".
*/
path = &state->altpath;
ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
if (blk->disk_blkno) {
error = xfs_da3_node_read(state->args->trans,
state->args->dp,
blk->blkno, blk->disk_blkno,
&blk->bp, XFS_ATTR_FORK);
if (error)
return error;
} else {
blk->bp = NULL;
}
}
return 0;
}
/*
* Look up a filename in a node attribute list.
*
* This routine gets called for any attribute fork that has more than one
* block, ie: both true Btree attr lists and for single-leaf-blocks with
* "remote" values taking up more blocks.
*/
STATIC int
xfs_attr_node_get(xfs_da_args_t *args)
{
xfs_da_state_t *state;
xfs_da_state_blk_t *blk;
int error, retval;
int i;
trace_xfs_attr_node_get(args);
state = xfs_da_state_alloc();
state->args = args;
state->mp = args->dp->i_mount;
/*
* Search to see if name exists, and get back a pointer to it.
*/
error = xfs_da3_node_lookup_int(state, &retval);
if (error) {
retval = error;
} else if (retval == -EEXIST) {
blk = &state->path.blk[ state->path.active-1 ];
ASSERT(blk->bp != NULL);
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
/*
* Get the value, local or "remote"
*/
retval = xfs_attr3_leaf_getvalue(blk->bp, args);
if (!retval && (args->rmtblkno > 0)
&& !(args->flags & ATTR_KERNOVAL)) {
retval = xfs_attr_rmtval_get(args);
}
}
/*
* If not in a transaction, we have to release all the buffers.
*/
for (i = 0; i < state->path.active; i++) {
xfs_trans_brelse(args->trans, state->path.blk[i].bp);
state->path.blk[i].bp = NULL;
}
xfs_da_state_free(state);
return retval;
}