xfs: update for 4.2-rc1

This update contains:
 
 o A new sparse on-disk inode record format to allow small extents to
   be used for inode allocation when free space is fragmented.
 o DAX support. This includes minor changes to the DAX core code to
   fix problems with lock ordering and bufferhead mapping abuse.
 o transaction commit interface cleanup
 o removal of various unnecessary XFS specific type definitions
 o cleanup and optimisation of freelist preparation before allocation
 o various minor cleanups
 o bug fixes for
 	- transaction reservation leaks
 	- incorrect inode logging in unwritten extent conversion
 	- mmap lock vs freeze ordering
 	- remote symlink mishandling
 	- attribute fork removal issues.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQIcBAABAgAGBQJVkhI0AAoJEK3oKUf0dfod45MQAJCOEkNduBdlfPvTCMPjj/7z
 vzcfDdzgKwhpPTMXSDRvw4zDPt3C2FLMBJqxtPpC4sKGKG/8G0kFvw8bDtBag1m9
 ru5nI5LaQ6LC5RcU40zxBx1s/L8qYvyfUlxeoOT5lSwN9c6ENGOCQ3bUk4pSKaee
 pWDplag9LbfQomW2GHtxd8agMUZEYx0R1vgfv88V8xgPka8CvQo81XUgkb4PcDZV
 ugR+wDUsvwMS01aLYBmRFkMXuExNuCJVwtvdTJS+ZWGHzyTpulFoANUW6QT24gAM
 eP4yRXN4bv9vXrXpg8JkF25DHsfw4HBwNEL17ZvoB8t3oJp1/NYaH8ce1jS0+I8i
 NCtaO+qUqDSTGQZKgmeDPwCciQp54ra9LEdmIJFxpZxiBof9g/tIYEFgRklyFLwR
 GZU6Io6VpBa1oTGlC4D1cmG6bdcnhMB9MGVVCbqnB5mRRDKCmVgCyJwusd1pi7Re
 G4O6KkFt21O7+fP13VsjP57KoaJzsIgZ/+H3Ff/fJOJ33AKYTRCmwi8+IMi2n5JI
 zz+V0AIBQZAx9dlVyENnxufh9eJYcnwta0lUSLCCo91fZKxbo3ktK1kVHNZP5EGs
 IMFM1Ka6hibY20rWlR3GH0dfyP5/yNcvNgTMYPKjj9SVjTar1aSfF2rGpkqYXYyH
 D4FICbtDgtOc2ClfpI2k
 =3x+W
 -----END PGP SIGNATURE-----

Merge tag 'xfs-for-linus-4.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pul xfs updates from Dave Chinner:
 "There's a couple of small API changes to the core DAX code which
  required small changes to the ext2 and ext4 code bases, but otherwise
  everything is within the XFS codebase.

  This update contains:

   - A new sparse on-disk inode record format to allow small extents to
     be used for inode allocation when free space is fragmented.

   - DAX support.  This includes minor changes to the DAX core code to
     fix problems with lock ordering and bufferhead mapping abuse.

   - transaction commit interface cleanup

   - removal of various unnecessary XFS specific type definitions

   - cleanup and optimisation of freelist preparation before allocation

   - various minor cleanups

   - bug fixes for
	- transaction reservation leaks
	- incorrect inode logging in unwritten extent conversion
	- mmap lock vs freeze ordering
	- remote symlink mishandling
	- attribute fork removal issues"

* tag 'xfs-for-linus-4.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (49 commits)
  xfs: don't truncate attribute extents if no extents exist
  xfs: clean up XFS_MIN_FREELIST macros
  xfs: sanitise error handling in xfs_alloc_fix_freelist
  xfs: factor out free space extent length check
  xfs: xfs_alloc_fix_freelist() can use incore perag structures
  xfs: remove xfs_caddr_t
  xfs: use void pointers in log validation helpers
  xfs: return a void pointer from xfs_buf_offset
  xfs: remove inst_t
  xfs: remove __psint_t and __psunsigned_t
  xfs: fix remote symlinks on V5/CRC filesystems
  xfs: fix xfs_log_done interface
  xfs: saner xfs_trans_commit interface
  xfs: remove the flags argument to xfs_trans_cancel
  xfs: pass a boolean flag to xfs_trans_free_items
  xfs: switch remaining xfs_trans_dup users to xfs_trans_roll
  xfs: check min blks for random debug mode sparse allocations
  xfs: fix sparse inodes 32-bit compile failure
  xfs: add initial DAX support
  xfs: add DAX IO path support
  ...
This commit is contained in:
Linus Torvalds 2015-06-30 20:16:08 -07:00
commit 68b4449d79
60 changed files with 1612 additions and 869 deletions

View file

@ -236,10 +236,10 @@ Removed Mount Options
Name Removed Name Removed
---- ------- ---- -------
delaylog/nodelaylog v3.20 delaylog/nodelaylog v4.0
ihashsize v3.20 ihashsize v4.0
irixsgid v3.20 irixsgid v4.0
osyncisdsync/osyncisosync v3.20 osyncisdsync/osyncisosync v4.0
sysctls sysctls
@ -346,5 +346,5 @@ Removed Sysctls
Name Removed Name Removed
---- ------- ---- -------
fs.xfs.xfsbufd_centisec v3.20 fs.xfs.xfsbufd_centisec v4.0
fs.xfs.age_buffer_centisecs v3.20 fs.xfs.age_buffer_centisecs v4.0

View file

@ -309,14 +309,21 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
out: out:
i_mmap_unlock_read(mapping); i_mmap_unlock_read(mapping);
if (bh->b_end_io)
bh->b_end_io(bh, 1);
return error; return error;
} }
static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, /**
get_block_t get_block) * __dax_fault - handle a page fault on a DAX file
* @vma: The virtual memory area where the fault occurred
* @vmf: The description of the fault
* @get_block: The filesystem method used to translate file offsets to blocks
*
* When a page fault occurs, filesystems may call this helper in their
* fault handler for DAX files. __dax_fault() assumes the caller has done all
* the necessary locking for the page fault to proceed successfully.
*/
int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block, dax_iodone_t complete_unwritten)
{ {
struct file *file = vma->vm_file; struct file *file = vma->vm_file;
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
@ -417,7 +424,19 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
page_cache_release(page); page_cache_release(page);
} }
/*
* If we successfully insert the new mapping over an unwritten extent,
* we need to ensure we convert the unwritten extent. If there is an
* error inserting the mapping, the filesystem needs to leave it as
* unwritten to prevent exposure of the stale underlying data to
* userspace, but we still need to call the completion function so
* the private resources on the mapping buffer can be released. We
* indicate what the callback should do via the uptodate variable, same
* as for normal BH based IO completions.
*/
error = dax_insert_mapping(inode, &bh, vma, vmf); error = dax_insert_mapping(inode, &bh, vma, vmf);
if (buffer_unwritten(&bh))
complete_unwritten(&bh, !error);
out: out:
if (error == -ENOMEM) if (error == -ENOMEM)
@ -434,6 +453,7 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
} }
goto out; goto out;
} }
EXPORT_SYMBOL(__dax_fault);
/** /**
* dax_fault - handle a page fault on a DAX file * dax_fault - handle a page fault on a DAX file
@ -445,7 +465,7 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
* fault handler for DAX files. * fault handler for DAX files.
*/ */
int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block) get_block_t get_block, dax_iodone_t complete_unwritten)
{ {
int result; int result;
struct super_block *sb = file_inode(vma->vm_file)->i_sb; struct super_block *sb = file_inode(vma->vm_file)->i_sb;
@ -454,7 +474,7 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
sb_start_pagefault(sb); sb_start_pagefault(sb);
file_update_time(vma->vm_file); file_update_time(vma->vm_file);
} }
result = do_dax_fault(vma, vmf, get_block); result = __dax_fault(vma, vmf, get_block, complete_unwritten);
if (vmf->flags & FAULT_FLAG_WRITE) if (vmf->flags & FAULT_FLAG_WRITE)
sb_end_pagefault(sb); sb_end_pagefault(sb);

View file

@ -28,12 +28,12 @@
#ifdef CONFIG_FS_DAX #ifdef CONFIG_FS_DAX
static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
return dax_fault(vma, vmf, ext2_get_block); return dax_fault(vma, vmf, ext2_get_block, NULL);
} }
static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
return dax_mkwrite(vma, vmf, ext2_get_block); return dax_mkwrite(vma, vmf, ext2_get_block, NULL);
} }
static const struct vm_operations_struct ext2_dax_vm_ops = { static const struct vm_operations_struct ext2_dax_vm_ops = {

View file

@ -192,15 +192,27 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
} }
#ifdef CONFIG_FS_DAX #ifdef CONFIG_FS_DAX
static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
{
struct inode *inode = bh->b_assoc_map->host;
/* XXX: breaks on 32-bit > 16GB. Is that even supported? */
loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
int err;
if (!uptodate)
return;
WARN_ON(!buffer_unwritten(bh));
err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
}
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
return dax_fault(vma, vmf, ext4_get_block); return dax_fault(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
/* Is this the right get_block? */ /* Is this the right get_block? */
} }
static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
return dax_mkwrite(vma, vmf, ext4_get_block); return dax_mkwrite(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
} }
static const struct vm_operations_struct ext4_dax_vm_ops = { static const struct vm_operations_struct ext4_dax_vm_ops = {

View file

@ -656,18 +656,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
return retval; return retval;
} }
static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
{
struct inode *inode = bh->b_assoc_map->host;
/* XXX: breaks on 32-bit > 16GB. Is that even supported? */
loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
int err;
if (!uptodate)
return;
WARN_ON(!buffer_unwritten(bh));
err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
}
/* Maximum number of blocks we map for direct IO at once. */ /* Maximum number of blocks we map for direct IO at once. */
#define DIO_MAX_BLOCKS 4096 #define DIO_MAX_BLOCKS 4096
@ -705,10 +693,15 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
map_bh(bh, inode->i_sb, map.m_pblk); map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) { if (IS_DAX(inode) && buffer_unwritten(bh)) {
/*
* dgc: I suspect unwritten conversion on ext4+DAX is
* fundamentally broken here when there are concurrent
* read/write in progress on this inode.
*/
WARN_ON_ONCE(io_end);
bh->b_assoc_map = inode->i_mapping; bh->b_assoc_map = inode->i_mapping;
bh->b_private = (void *)(unsigned long)iblock; bh->b_private = (void *)(unsigned long)iblock;
bh->b_end_io = ext4_end_io_unwritten;
} }
if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
set_buffer_defer_completion(bh); set_buffer_defer_completion(bh);

View file

@ -149,13 +149,27 @@ xfs_alloc_compute_aligned(
{ {
xfs_agblock_t bno; xfs_agblock_t bno;
xfs_extlen_t len; xfs_extlen_t len;
xfs_extlen_t diff;
/* Trim busy sections out of found extent */ /* Trim busy sections out of found extent */
xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
/*
* If we have a largish extent that happens to start before min_agbno,
* see if we can shift it into range...
*/
if (bno < args->min_agbno && bno + len > args->min_agbno) {
diff = args->min_agbno - bno;
if (len > diff) {
bno += diff;
len -= diff;
}
}
if (args->alignment > 1 && len >= args->minlen) { if (args->alignment > 1 && len >= args->minlen) {
xfs_agblock_t aligned_bno = roundup(bno, args->alignment); xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
xfs_extlen_t diff = aligned_bno - bno;
diff = aligned_bno - bno;
*resbno = aligned_bno; *resbno = aligned_bno;
*reslen = diff >= len ? 0 : len - diff; *reslen = diff >= len ? 0 : len - diff;
@ -795,9 +809,13 @@ xfs_alloc_find_best_extent(
* The good extent is closer than this one. * The good extent is closer than this one.
*/ */
if (!dir) { if (!dir) {
if (*sbnoa > args->max_agbno)
goto out_use_good;
if (*sbnoa >= args->agbno + gdiff) if (*sbnoa >= args->agbno + gdiff)
goto out_use_good; goto out_use_good;
} else { } else {
if (*sbnoa < args->min_agbno)
goto out_use_good;
if (*sbnoa <= args->agbno - gdiff) if (*sbnoa <= args->agbno - gdiff)
goto out_use_good; goto out_use_good;
} }
@ -884,6 +902,17 @@ xfs_alloc_ag_vextent_near(
dofirst = prandom_u32() & 1; dofirst = prandom_u32() & 1;
#endif #endif
/* handle unitialized agbno range so caller doesn't have to */
if (!args->min_agbno && !args->max_agbno)
args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
ASSERT(args->min_agbno <= args->max_agbno);
/* clamp agbno to the range if it's outside */
if (args->agbno < args->min_agbno)
args->agbno = args->min_agbno;
if (args->agbno > args->max_agbno)
args->agbno = args->max_agbno;
restart: restart:
bno_cur_lt = NULL; bno_cur_lt = NULL;
bno_cur_gt = NULL; bno_cur_gt = NULL;
@ -976,6 +1005,8 @@ xfs_alloc_ag_vextent_near(
&ltbnoa, &ltlena); &ltbnoa, &ltlena);
if (ltlena < args->minlen) if (ltlena < args->minlen)
continue; continue;
if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
continue;
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
xfs_alloc_fix_len(args); xfs_alloc_fix_len(args);
ASSERT(args->len >= args->minlen); ASSERT(args->len >= args->minlen);
@ -1096,11 +1127,11 @@ xfs_alloc_ag_vextent_near(
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
xfs_alloc_compute_aligned(args, ltbno, ltlen, xfs_alloc_compute_aligned(args, ltbno, ltlen,
&ltbnoa, &ltlena); &ltbnoa, &ltlena);
if (ltlena >= args->minlen) if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
break; break;
if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
goto error0; goto error0;
if (!i) { if (!i || ltbnoa < args->min_agbno) {
xfs_btree_del_cursor(bno_cur_lt, xfs_btree_del_cursor(bno_cur_lt,
XFS_BTREE_NOERROR); XFS_BTREE_NOERROR);
bno_cur_lt = NULL; bno_cur_lt = NULL;
@ -1112,11 +1143,11 @@ xfs_alloc_ag_vextent_near(
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
xfs_alloc_compute_aligned(args, gtbno, gtlen, xfs_alloc_compute_aligned(args, gtbno, gtlen,
&gtbnoa, &gtlena); &gtbnoa, &gtlena);
if (gtlena >= args->minlen) if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
break; break;
if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
goto error0; goto error0;
if (!i) { if (!i || gtbnoa > args->max_agbno) {
xfs_btree_del_cursor(bno_cur_gt, xfs_btree_del_cursor(bno_cur_gt,
XFS_BTREE_NOERROR); XFS_BTREE_NOERROR);
bno_cur_gt = NULL; bno_cur_gt = NULL;
@ -1216,6 +1247,7 @@ xfs_alloc_ag_vextent_near(
ASSERT(ltnew >= ltbno); ASSERT(ltnew >= ltbno);
ASSERT(ltnew + rlen <= ltbnoa + ltlena); ASSERT(ltnew + rlen <= ltbnoa + ltlena);
ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
args->agbno = ltnew; args->agbno = ltnew;
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
@ -1825,11 +1857,11 @@ xfs_alloc_compute_maxlevels(
xfs_extlen_t xfs_extlen_t
xfs_alloc_longest_free_extent( xfs_alloc_longest_free_extent(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_perag *pag) struct xfs_perag *pag,
xfs_extlen_t need)
{ {
xfs_extlen_t need, delta = 0; xfs_extlen_t delta = 0;
need = XFS_MIN_FREELIST_PAG(pag, mp);
if (need > pag->pagf_flcount) if (need > pag->pagf_flcount)
delta = need - pag->pagf_flcount; delta = need - pag->pagf_flcount;
@ -1838,131 +1870,150 @@ xfs_alloc_longest_free_extent(
return pag->pagf_flcount > 0 || pag->pagf_longest > 0; return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
} }
unsigned int
xfs_alloc_min_freelist(
struct xfs_mount *mp,
struct xfs_perag *pag)
{
unsigned int min_free;
/* space needed by-bno freespace btree */
min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
mp->m_ag_maxlevels);
/* space needed by-size freespace btree */
min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
mp->m_ag_maxlevels);
return min_free;
}
/*
* Check if the operation we are fixing up the freelist for should go ahead or
* not. If we are freeing blocks, we always allow it, otherwise the allocation
* is dependent on whether the size and shape of free space available will
* permit the requested allocation to take place.
*/
static bool
xfs_alloc_space_available(
struct xfs_alloc_arg *args,
xfs_extlen_t min_free,
int flags)
{
struct xfs_perag *pag = args->pag;
xfs_extlen_t longest;
int available;
if (flags & XFS_ALLOC_FLAG_FREEING)
return true;
/* do we have enough contiguous free space for the allocation? */
longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free);
if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
return false;
/* do have enough free space remaining for the allocation? */
available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
min_free - args->total);
if (available < (int)args->minleft)
return false;
return true;
}
/* /*
* Decide whether to use this allocation group for this allocation. * Decide whether to use this allocation group for this allocation.
* If so, fix up the btree freelist's size. * If so, fix up the btree freelist's size.
*/ */
STATIC int /* error */ STATIC int /* error */
xfs_alloc_fix_freelist( xfs_alloc_fix_freelist(
xfs_alloc_arg_t *args, /* allocation argument structure */ struct xfs_alloc_arg *args, /* allocation argument structure */
int flags) /* XFS_ALLOC_FLAG_... */ int flags) /* XFS_ALLOC_FLAG_... */
{ {
xfs_buf_t *agbp; /* agf buffer pointer */ struct xfs_mount *mp = args->mp;
xfs_agf_t *agf; /* a.g. freespace structure pointer */ struct xfs_perag *pag = args->pag;
xfs_buf_t *agflbp;/* agfl buffer pointer */ struct xfs_trans *tp = args->tp;
xfs_agblock_t bno; /* freelist block */ struct xfs_buf *agbp = NULL;
xfs_extlen_t delta; /* new blocks needed in freelist */ struct xfs_buf *agflbp = NULL;
int error; /* error result code */ struct xfs_alloc_arg targs; /* local allocation arguments */
xfs_extlen_t longest;/* longest extent in allocation group */ xfs_agblock_t bno; /* freelist block */
xfs_mount_t *mp; /* file system mount point structure */ xfs_extlen_t need; /* total blocks needed in freelist */
xfs_extlen_t need; /* total blocks needed in freelist */ int error;
xfs_perag_t *pag; /* per-ag information structure */
xfs_alloc_arg_t targs; /* local allocation arguments */
xfs_trans_t *tp; /* transaction pointer */
mp = args->mp;
pag = args->pag;
tp = args->tp;
if (!pag->pagf_init) { if (!pag->pagf_init) {
if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
&agbp))) if (error)
return error; goto out_no_agbp;
if (!pag->pagf_init) { if (!pag->pagf_init) {
ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
args->agbp = NULL; goto out_agbp_relse;
return 0;
} }
} else }
agbp = NULL;
/* /*
* If this is a metadata preferred pag and we are user data * If this is a metadata preferred pag and we are user data then try
* then try somewhere else if we are not being asked to * somewhere else if we are not being asked to try harder at this
* try harder at this point * point
*/ */
if (pag->pagf_metadata && args->userdata && if (pag->pagf_metadata && args->userdata &&
(flags & XFS_ALLOC_FLAG_TRYLOCK)) { (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
args->agbp = NULL; goto out_agbp_relse;
return 0;
} }
if (!(flags & XFS_ALLOC_FLAG_FREEING)) { need = xfs_alloc_min_freelist(mp, pag);
/* if (!xfs_alloc_space_available(args, need, flags))
* If it looks like there isn't a long enough extent, or enough goto out_agbp_relse;
* total blocks, reject it.
*/
need = XFS_MIN_FREELIST_PAG(pag, mp);
longest = xfs_alloc_longest_free_extent(mp, pag);
if ((args->minlen + args->alignment + args->minalignslop - 1) >
longest ||
((int)(pag->pagf_freeblks + pag->pagf_flcount -
need - args->total) < (int)args->minleft)) {
if (agbp)
xfs_trans_brelse(tp, agbp);
args->agbp = NULL;
return 0;
}
}
/* /*
* Get the a.g. freespace buffer. * Get the a.g. freespace buffer.
* Can fail if we're not blocking on locks, and it's held. * Can fail if we're not blocking on locks, and it's held.
*/ */
if (agbp == NULL) { if (!agbp) {
if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
&agbp))) if (error)
return error; goto out_no_agbp;
if (agbp == NULL) { if (!agbp) {
ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
args->agbp = NULL; goto out_no_agbp;
return 0;
}
}
/*
* Figure out how many blocks we should have in the freelist.
*/
agf = XFS_BUF_TO_AGF(agbp);
need = XFS_MIN_FREELIST(agf, mp);
/*
* If there isn't enough total or single-extent, reject it.
*/
if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
delta = need > be32_to_cpu(agf->agf_flcount) ?
(need - be32_to_cpu(agf->agf_flcount)) : 0;
longest = be32_to_cpu(agf->agf_longest);
longest = (longest > delta) ? (longest - delta) :
(be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
if ((args->minlen + args->alignment + args->minalignslop - 1) >
longest ||
((int)(be32_to_cpu(agf->agf_freeblks) +
be32_to_cpu(agf->agf_flcount) - need - args->total) <
(int)args->minleft)) {
xfs_trans_brelse(tp, agbp);
args->agbp = NULL;
return 0;
} }
} }
/* If there isn't enough total space or single-extent, reject it. */
need = xfs_alloc_min_freelist(mp, pag);
if (!xfs_alloc_space_available(args, need, flags))
goto out_agbp_relse;
/* /*
* Make the freelist shorter if it's too long. * Make the freelist shorter if it's too long.
*
* Note that from this point onwards, we will always release the agf and
* agfl buffers on error. This handles the case where we error out and
* the buffers are clean or may not have been joined to the transaction
* and hence need to be released manually. If they have been joined to
* the transaction, then xfs_trans_brelse() will handle them
* appropriately based on the recursion count and dirty state of the
* buffer.
*
* XXX (dgc): When we have lots of free space, does this buy us
* anything other than extra overhead when we need to put more blocks
* back on the free list? Maybe we should only do this when space is
* getting low or the AGFL is more than half full?
*/ */
while (be32_to_cpu(agf->agf_flcount) > need) { while (pag->pagf_flcount > need) {
xfs_buf_t *bp; struct xfs_buf *bp;
error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
if (error) if (error)
return error; goto out_agbp_relse;
if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))) error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1);
return error; if (error)
goto out_agbp_relse;
bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
xfs_trans_binval(tp, bp); xfs_trans_binval(tp, bp);
} }
/*
* Initialize the args structure.
*/
memset(&targs, 0, sizeof(targs)); memset(&targs, 0, sizeof(targs));
targs.tp = tp; targs.tp = tp;
targs.mp = mp; targs.mp = mp;
@ -1971,21 +2022,20 @@ xfs_alloc_fix_freelist(
targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
targs.type = XFS_ALLOCTYPE_THIS_AG; targs.type = XFS_ALLOCTYPE_THIS_AG;
targs.pag = pag; targs.pag = pag;
if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp))) error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
return error; if (error)
/* goto out_agbp_relse;
* Make the freelist longer if it's too short.
*/ /* Make the freelist longer if it's too short. */
while (be32_to_cpu(agf->agf_flcount) < need) { while (pag->pagf_flcount < need) {
targs.agbno = 0; targs.agbno = 0;
targs.maxlen = need - be32_to_cpu(agf->agf_flcount); targs.maxlen = need - pag->pagf_flcount;
/*
* Allocate as many blocks as possible at once. /* Allocate as many blocks as possible at once. */
*/ error = xfs_alloc_ag_vextent(&targs);
if ((error = xfs_alloc_ag_vextent(&targs))) { if (error)
xfs_trans_brelse(tp, agflbp); goto out_agflbp_relse;
return error;
}
/* /*
* Stop if we run out. Won't happen if callers are obeying * Stop if we run out. Won't happen if callers are obeying
* the restrictions correctly. Can happen for free calls * the restrictions correctly. Can happen for free calls
@ -1994,9 +2044,7 @@ xfs_alloc_fix_freelist(
if (targs.agbno == NULLAGBLOCK) { if (targs.agbno == NULLAGBLOCK) {
if (flags & XFS_ALLOC_FLAG_FREEING) if (flags & XFS_ALLOC_FLAG_FREEING)
break; break;
xfs_trans_brelse(tp, agflbp); goto out_agflbp_relse;
args->agbp = NULL;
return 0;
} }
/* /*
* Put each allocated block on the list. * Put each allocated block on the list.
@ -2005,12 +2053,21 @@ xfs_alloc_fix_freelist(
error = xfs_alloc_put_freelist(tp, agbp, error = xfs_alloc_put_freelist(tp, agbp,
agflbp, bno, 0); agflbp, bno, 0);
if (error) if (error)
return error; goto out_agflbp_relse;
} }
} }
xfs_trans_brelse(tp, agflbp); xfs_trans_brelse(tp, agflbp);
args->agbp = agbp; args->agbp = agbp;
return 0; return 0;
out_agflbp_relse:
xfs_trans_brelse(tp, agflbp);
out_agbp_relse:
if (agbp)
xfs_trans_brelse(tp, agbp);
out_no_agbp:
args->agbp = NULL;
return error;
} }
/* /*

View file

@ -112,6 +112,8 @@ typedef struct xfs_alloc_arg {
xfs_extlen_t total; /* total blocks needed in xaction */ xfs_extlen_t total; /* total blocks needed in xaction */
xfs_extlen_t alignment; /* align answer to multiple of this */ xfs_extlen_t alignment; /* align answer to multiple of this */
xfs_extlen_t minalignslop; /* slop for minlen+alignment calcs */ xfs_extlen_t minalignslop; /* slop for minlen+alignment calcs */
xfs_agblock_t min_agbno; /* set an agbno range for NEAR allocs */
xfs_agblock_t max_agbno; /* ... */
xfs_extlen_t len; /* output: actual size of extent */ xfs_extlen_t len; /* output: actual size of extent */
xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */ xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */
xfs_alloctype_t otype; /* original allocation type */ xfs_alloctype_t otype; /* original allocation type */
@ -128,11 +130,9 @@ typedef struct xfs_alloc_arg {
#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/ #define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/
#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */ #define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */
/* xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
* Find the length of the longest extent in an AG. struct xfs_perag *pag, xfs_extlen_t need);
*/ unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
xfs_extlen_t
xfs_alloc_longest_free_extent(struct xfs_mount *mp,
struct xfs_perag *pag); struct xfs_perag *pag);
/* /*

View file

@ -266,7 +266,7 @@ xfs_attr_set(
tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
error = xfs_trans_reserve(args.trans, &tres, args.total, 0); error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
if (error) { if (error) {
xfs_trans_cancel(args.trans, 0); xfs_trans_cancel(args.trans);
return error; return error;
} }
xfs_ilock(dp, XFS_ILOCK_EXCL); xfs_ilock(dp, XFS_ILOCK_EXCL);
@ -276,7 +276,7 @@ xfs_attr_set(
XFS_QMOPT_RES_REGBLKS); XFS_QMOPT_RES_REGBLKS);
if (error) { if (error) {
xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL);
xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_trans_cancel(args.trans);
return error; return error;
} }
@ -320,8 +320,7 @@ xfs_attr_set(
xfs_trans_ichgtime(args.trans, dp, xfs_trans_ichgtime(args.trans, dp,
XFS_ICHGTIME_CHG); XFS_ICHGTIME_CHG);
} }
err2 = xfs_trans_commit(args.trans, err2 = xfs_trans_commit(args.trans);
XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error ? error : err2; return error ? error : err2;
@ -383,16 +382,14 @@ xfs_attr_set(
* Commit the last in the sequence of transactions. * Commit the last in the sequence of transactions.
*/ */
xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error; return error;
out: out:
if (args.trans) { if (args.trans)
xfs_trans_cancel(args.trans, xfs_trans_cancel(args.trans);
XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
}
xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error; return error;
} }
@ -462,7 +459,7 @@ xfs_attr_remove(
error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm, error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm,
XFS_ATTRRM_SPACE_RES(mp), 0); XFS_ATTRRM_SPACE_RES(mp), 0);
if (error) { if (error) {
xfs_trans_cancel(args.trans, 0); xfs_trans_cancel(args.trans);
return error; return error;
} }
@ -501,16 +498,14 @@ xfs_attr_remove(
* Commit the last in the sequence of transactions. * Commit the last in the sequence of transactions.
*/ */
xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error; return error;
out: out:
if (args.trans) { if (args.trans)
xfs_trans_cancel(args.trans, xfs_trans_cancel(args.trans);
XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
}
xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error; return error;
} }

View file

@ -1112,7 +1112,6 @@ xfs_bmap_add_attrfork(
int committed; /* xaction was committed */ int committed; /* xaction was committed */
int logflags; /* logging flags */ int logflags; /* logging flags */
int error; /* error return value */ int error; /* error return value */
int cancel_flags = 0;
ASSERT(XFS_IFORK_Q(ip) == 0); ASSERT(XFS_IFORK_Q(ip) == 0);
@ -1124,17 +1123,15 @@ xfs_bmap_add_attrfork(
tp->t_flags |= XFS_TRANS_RESERVE; tp->t_flags |= XFS_TRANS_RESERVE;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS); XFS_QMOPT_RES_REGBLKS);
if (error) if (error)
goto trans_cancel; goto trans_cancel;
cancel_flags |= XFS_TRANS_ABORT;
if (XFS_IFORK_Q(ip)) if (XFS_IFORK_Q(ip))
goto trans_cancel; goto trans_cancel;
if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
@ -1218,14 +1215,14 @@ xfs_bmap_add_attrfork(
error = xfs_bmap_finish(&tp, &flist, &committed); error = xfs_bmap_finish(&tp, &flist, &committed);
if (error) if (error)
goto bmap_cancel; goto bmap_cancel;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error; return error;
bmap_cancel: bmap_cancel:
xfs_bmap_cancel(&flist); xfs_bmap_cancel(&flist);
trans_cancel: trans_cancel:
xfs_trans_cancel(tp, cancel_flags); xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error; return error;
} }
@ -3521,7 +3518,8 @@ xfs_bmap_longest_free_extent(
} }
} }
longest = xfs_alloc_longest_free_extent(mp, pag); longest = xfs_alloc_longest_free_extent(mp, pag,
xfs_alloc_min_freelist(mp, pag));
if (*blen < longest) if (*blen < longest)
*blen = longest; *blen = longest;
@ -4424,7 +4422,15 @@ xfs_bmapi_convert_unwritten(
error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
&bma->cur, mval, bma->firstblock, bma->flist, &bma->cur, mval, bma->firstblock, bma->flist,
&tmp_logflags); &tmp_logflags);
bma->logflags |= tmp_logflags; /*
* Log the inode core unconditionally in the unwritten extent conversion
* path because the conversion might not have done so (e.g., if the
* extent count hasn't changed). We need to make sure the inode is dirty
* in the transaction for the sake of fsync(), even if nothing has
* changed, because fsync() will not force the log for this transaction
* unless it sees the inode pinned.
*/
bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
if (error) if (error)
return error; return error;
@ -5918,7 +5924,7 @@ xfs_bmap_split_extent(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
@ -5936,10 +5942,9 @@ xfs_bmap_split_extent(
if (error) if (error)
goto out; goto out;
return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); return xfs_trans_commit(tp);
out: out:
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_trans_cancel(tp);
return error; return error;
} }

View file

@ -170,7 +170,7 @@ typedef struct xfs_sb {
__uint32_t sb_features_log_incompat; __uint32_t sb_features_log_incompat;
__uint32_t sb_crc; /* superblock crc */ __uint32_t sb_crc; /* superblock crc */
__uint32_t sb_pad; xfs_extlen_t sb_spino_align; /* sparse inode chunk alignment */
xfs_ino_t sb_pquotino; /* project quota inode */ xfs_ino_t sb_pquotino; /* project quota inode */
xfs_lsn_t sb_lsn; /* last write sequence */ xfs_lsn_t sb_lsn; /* last write sequence */
@ -256,7 +256,7 @@ typedef struct xfs_dsb {
__be32 sb_features_log_incompat; __be32 sb_features_log_incompat;
__le32 sb_crc; /* superblock crc */ __le32 sb_crc; /* superblock crc */
__be32 sb_pad; __be32 sb_spino_align; /* sparse inode chunk alignment */
__be64 sb_pquotino; /* project quota inode */ __be64 sb_pquotino; /* project quota inode */
__be64 sb_lsn; /* last write sequence */ __be64 sb_lsn; /* last write sequence */
@ -457,8 +457,10 @@ xfs_sb_has_ro_compat_feature(
} }
#define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */ #define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */
#define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */
#define XFS_SB_FEAT_INCOMPAT_ALL \ #define XFS_SB_FEAT_INCOMPAT_ALL \
(XFS_SB_FEAT_INCOMPAT_FTYPE) (XFS_SB_FEAT_INCOMPAT_FTYPE| \
XFS_SB_FEAT_INCOMPAT_SPINODES)
#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
static inline bool static inline bool
@ -506,6 +508,12 @@ static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT); (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
} }
static inline bool xfs_sb_version_hassparseinodes(struct xfs_sb *sbp)
{
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_SPINODES);
}
/* /*
* end of superblock version macros * end of superblock version macros
*/ */
@ -758,19 +766,6 @@ typedef struct xfs_agfl {
#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
(MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
#define XFS_MIN_FREELIST(a,mp) \
(XFS_MIN_FREELIST_RAW( \
be32_to_cpu((a)->agf_levels[XFS_BTNUM_BNOi]), \
be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp))
#define XFS_MIN_FREELIST_PAG(pag,mp) \
(XFS_MIN_FREELIST_RAW( \
(unsigned int)(pag)->pagf_levels[XFS_BTNUM_BNOi], \
(unsigned int)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp))
#define XFS_AGB_TO_FSB(mp,agno,agbno) \ #define XFS_AGB_TO_FSB(mp,agno,agbno) \
(((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno)) (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno))
#define XFS_FSB_TO_AGNO(mp,fsbno) \ #define XFS_FSB_TO_AGNO(mp,fsbno) \
@ -1216,26 +1211,54 @@ typedef __uint64_t xfs_inofree_t;
#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1) #define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1)
#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i)) #define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i))
#define XFS_INOBT_HOLEMASK_FULL 0 /* holemask for full chunk */
#define XFS_INOBT_HOLEMASK_BITS (NBBY * sizeof(__uint16_t))
#define XFS_INODES_PER_HOLEMASK_BIT \
(XFS_INODES_PER_CHUNK / (NBBY * sizeof(__uint16_t)))
static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
{ {
return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i; return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i;
} }
/* /*
* Data record structure * The on-disk inode record structure has two formats. The original "full"
* format uses a 4-byte freecount. The "sparse" format uses a 1-byte freecount
* and replaces the 3 high-order freecount bytes wth the holemask and inode
* count.
*
* The holemask of the sparse record format allows an inode chunk to have holes
* that refer to blocks not owned by the inode record. This facilitates inode
* allocation in the event of severe free space fragmentation.
*/ */
typedef struct xfs_inobt_rec { typedef struct xfs_inobt_rec {
__be32 ir_startino; /* starting inode number */ __be32 ir_startino; /* starting inode number */
__be32 ir_freecount; /* count of free inodes (set bits) */ union {
struct {
__be32 ir_freecount; /* count of free inodes */
} f;
struct {
__be16 ir_holemask;/* hole mask for sparse chunks */
__u8 ir_count; /* total inode count */
__u8 ir_freecount; /* count of free inodes */
} sp;
} ir_u;
__be64 ir_free; /* free inode mask */ __be64 ir_free; /* free inode mask */
} xfs_inobt_rec_t; } xfs_inobt_rec_t;
typedef struct xfs_inobt_rec_incore { typedef struct xfs_inobt_rec_incore {
xfs_agino_t ir_startino; /* starting inode number */ xfs_agino_t ir_startino; /* starting inode number */
__int32_t ir_freecount; /* count of free inodes (set bits) */ __uint16_t ir_holemask; /* hole mask for sparse chunks */
__uint8_t ir_count; /* total inode count */
__uint8_t ir_freecount; /* count of free inodes (set bits) */
xfs_inofree_t ir_free; /* free inode mask */ xfs_inofree_t ir_free; /* free inode mask */
} xfs_inobt_rec_incore_t; } xfs_inobt_rec_incore_t;
static inline bool xfs_inobt_issparse(uint16_t holemask)
{
/* non-zero holemask represents a sparse rec. */
return holemask;
}
/* /*
* Key structure * Key structure
@ -1453,8 +1476,8 @@ struct xfs_acl {
sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp))) sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp)))
/* On-disk XFS extended attribute names */ /* On-disk XFS extended attribute names */
#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE" #define SGI_ACL_FILE "SGI_ACL_FILE"
#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT" #define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT"
#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) #define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)

View file

@ -239,6 +239,7 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */ #define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */
#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ #define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */ #define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */
#define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */
/* /*
* Minimum and maximum sizes need for growth checks. * Minimum and maximum sizes need for growth checks.

View file

@ -65,6 +65,8 @@ xfs_inobt_lookup(
int *stat) /* success/failure */ int *stat) /* success/failure */
{ {
cur->bc_rec.i.ir_startino = ino; cur->bc_rec.i.ir_startino = ino;
cur->bc_rec.i.ir_holemask = 0;
cur->bc_rec.i.ir_count = 0;
cur->bc_rec.i.ir_freecount = 0; cur->bc_rec.i.ir_freecount = 0;
cur->bc_rec.i.ir_free = 0; cur->bc_rec.i.ir_free = 0;
return xfs_btree_lookup(cur, dir, stat); return xfs_btree_lookup(cur, dir, stat);
@ -82,7 +84,14 @@ xfs_inobt_update(
union xfs_btree_rec rec; union xfs_btree_rec rec;
rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount); if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
rec.inobt.ir_u.sp.ir_holemask = cpu_to_be16(irec->ir_holemask);
rec.inobt.ir_u.sp.ir_count = irec->ir_count;
rec.inobt.ir_u.sp.ir_freecount = irec->ir_freecount;
} else {
/* ir_holemask/ir_count not supported on-disk */
rec.inobt.ir_u.f.ir_freecount = cpu_to_be32(irec->ir_freecount);
}
rec.inobt.ir_free = cpu_to_be64(irec->ir_free); rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
return xfs_btree_update(cur, &rec); return xfs_btree_update(cur, &rec);
} }
@ -100,12 +109,27 @@ xfs_inobt_get_rec(
int error; int error;
error = xfs_btree_get_rec(cur, &rec, stat); error = xfs_btree_get_rec(cur, &rec, stat);
if (!error && *stat == 1) { if (error || *stat == 0)
irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); return error;
irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);
irec->ir_free = be64_to_cpu(rec->inobt.ir_free); irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
irec->ir_count = rec->inobt.ir_u.sp.ir_count;
irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
} else {
/*
* ir_holemask/ir_count not supported on-disk. Fill in hardcoded
* values for full inode chunks.
*/
irec->ir_holemask = XFS_INOBT_HOLEMASK_FULL;
irec->ir_count = XFS_INODES_PER_CHUNK;
irec->ir_freecount =
be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
} }
return error; irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
return 0;
} }
/* /*
@ -114,10 +138,14 @@ xfs_inobt_get_rec(
STATIC int STATIC int
xfs_inobt_insert_rec( xfs_inobt_insert_rec(
struct xfs_btree_cur *cur, struct xfs_btree_cur *cur,
__uint16_t holemask,
__uint8_t count,
__int32_t freecount, __int32_t freecount,
xfs_inofree_t free, xfs_inofree_t free,
int *stat) int *stat)
{ {
cur->bc_rec.i.ir_holemask = holemask;
cur->bc_rec.i.ir_count = count;
cur->bc_rec.i.ir_freecount = freecount; cur->bc_rec.i.ir_freecount = freecount;
cur->bc_rec.i.ir_free = free; cur->bc_rec.i.ir_free = free;
return xfs_btree_insert(cur, stat); return xfs_btree_insert(cur, stat);
@ -154,7 +182,9 @@ xfs_inobt_insert(
} }
ASSERT(i == 0); ASSERT(i == 0);
error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK, error = xfs_inobt_insert_rec(cur, XFS_INOBT_HOLEMASK_FULL,
XFS_INODES_PER_CHUNK,
XFS_INODES_PER_CHUNK,
XFS_INOBT_ALL_FREE, &i); XFS_INOBT_ALL_FREE, &i);
if (error) { if (error) {
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
@ -220,6 +250,7 @@ xfs_ialloc_inode_init(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_trans *tp, struct xfs_trans *tp,
struct list_head *buffer_list, struct list_head *buffer_list,
int icount,
xfs_agnumber_t agno, xfs_agnumber_t agno,
xfs_agblock_t agbno, xfs_agblock_t agbno,
xfs_agblock_t length, xfs_agblock_t length,
@ -275,7 +306,7 @@ xfs_ialloc_inode_init(
* they track in the AIL as if they were physically logged. * they track in the AIL as if they were physically logged.
*/ */
if (tp) if (tp)
xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos, xfs_icreate_log(tp, agno, agbno, icount,
mp->m_sb.sb_inodesize, length, gen); mp->m_sb.sb_inodesize, length, gen);
} else } else
version = 2; version = 2;
@ -346,6 +377,214 @@ xfs_ialloc_inode_init(
return 0; return 0;
} }
/*
* Align startino and allocmask for a recently allocated sparse chunk such that
* they are fit for insertion (or merge) into the on-disk inode btrees.
*
* Background:
*
* When enabled, sparse inode support increases the inode alignment from cluster
* size to inode chunk size. This means that the minimum range between two
* non-adjacent inode records in the inobt is large enough for a full inode
* record. This allows for cluster sized, cluster aligned block allocation
* without need to worry about whether the resulting inode record overlaps with
* another record in the tree. Without this basic rule, we would have to deal
* with the consequences of overlap by potentially undoing recent allocations in
* the inode allocation codepath.
*
* Because of this alignment rule (which is enforced on mount), there are two
* inobt possibilities for newly allocated sparse chunks. One is that the
* aligned inode record for the chunk covers a range of inodes not already
* covered in the inobt (i.e., it is safe to insert a new sparse record). The
* other is that a record already exists at the aligned startino that considers
* the newly allocated range as sparse. In the latter case, record content is
* merged in hope that sparse inode chunks fill to full chunks over time.
*/
STATIC void
xfs_align_sparse_ino(
struct xfs_mount *mp,
xfs_agino_t *startino,
uint16_t *allocmask)
{
xfs_agblock_t agbno;
xfs_agblock_t mod;
int offset;
agbno = XFS_AGINO_TO_AGBNO(mp, *startino);
mod = agbno % mp->m_sb.sb_inoalignmt;
if (!mod)
return;
/* calculate the inode offset and align startino */
offset = mod << mp->m_sb.sb_inopblog;
*startino -= offset;
/*
* Since startino has been aligned down, left shift allocmask such that
* it continues to represent the same physical inodes relative to the
* new startino.
*/
*allocmask <<= offset / XFS_INODES_PER_HOLEMASK_BIT;
}
/*
* Determine whether the source inode record can merge into the target. Both
* records must be sparse, the inode ranges must match and there must be no
* allocation overlap between the records.
*/
STATIC bool
__xfs_inobt_can_merge(
struct xfs_inobt_rec_incore *trec, /* tgt record */
struct xfs_inobt_rec_incore *srec) /* src record */
{
uint64_t talloc;
uint64_t salloc;
/* records must cover the same inode range */
if (trec->ir_startino != srec->ir_startino)
return false;
/* both records must be sparse */
if (!xfs_inobt_issparse(trec->ir_holemask) ||
!xfs_inobt_issparse(srec->ir_holemask))
return false;
/* both records must track some inodes */
if (!trec->ir_count || !srec->ir_count)
return false;
/* can't exceed capacity of a full record */
if (trec->ir_count + srec->ir_count > XFS_INODES_PER_CHUNK)
return false;
/* verify there is no allocation overlap */
talloc = xfs_inobt_irec_to_allocmask(trec);
salloc = xfs_inobt_irec_to_allocmask(srec);
if (talloc & salloc)
return false;
return true;
}
/*
* Merge the source inode record into the target. The caller must call
* __xfs_inobt_can_merge() to ensure the merge is valid.
*/
STATIC void
__xfs_inobt_rec_merge(
struct xfs_inobt_rec_incore *trec, /* target */
struct xfs_inobt_rec_incore *srec) /* src */
{
ASSERT(trec->ir_startino == srec->ir_startino);
/* combine the counts */
trec->ir_count += srec->ir_count;
trec->ir_freecount += srec->ir_freecount;
/*
* Merge the holemask and free mask. For both fields, 0 bits refer to
* allocated inodes. We combine the allocated ranges with bitwise AND.
*/
trec->ir_holemask &= srec->ir_holemask;
trec->ir_free &= srec->ir_free;
}
/*
* Insert a new sparse inode chunk into the associated inode btree. The inode
* record for the sparse chunk is pre-aligned to a startino that should match
* any pre-existing sparse inode record in the tree. This allows sparse chunks
* to fill over time.
*
* This function supports two modes of handling preexisting records depending on
* the merge flag. If merge is true, the provided record is merged with the
* existing record and updated in place. The merged record is returned in nrec.
* If merge is false, an existing record is replaced with the provided record.
* If no preexisting record exists, the provided record is always inserted.
*
* It is considered corruption if a merge is requested and not possible. Given
* the sparse inode alignment constraints, this should never happen.
*/
STATIC int
xfs_inobt_insert_sprec(
struct xfs_mount *mp,
struct xfs_trans *tp,
struct xfs_buf *agbp,
int btnum,
struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */
bool merge) /* merge or replace */
{
struct xfs_btree_cur *cur;
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
int error;
int i;
struct xfs_inobt_rec_incore rec;
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
/* the new record is pre-aligned so we know where to look */
error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
if (error)
goto error;
/* if nothing there, insert a new record and return */
if (i == 0) {
error = xfs_inobt_insert_rec(cur, nrec->ir_holemask,
nrec->ir_count, nrec->ir_freecount,
nrec->ir_free, &i);
if (error)
goto error;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
goto out;
}
/*
* A record exists at this startino. Merge or replace the record
* depending on what we've been asked to do.
*/
if (merge) {
error = xfs_inobt_get_rec(cur, &rec, &i);
if (error)
goto error;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
XFS_WANT_CORRUPTED_GOTO(mp,
rec.ir_startino == nrec->ir_startino,
error);
/*
* This should never fail. If we have coexisting records that
* cannot merge, something is seriously wrong.
*/
XFS_WANT_CORRUPTED_GOTO(mp, __xfs_inobt_can_merge(nrec, &rec),
error);
trace_xfs_irec_merge_pre(mp, agno, rec.ir_startino,
rec.ir_holemask, nrec->ir_startino,
nrec->ir_holemask);
/* merge to nrec to output the updated record */
__xfs_inobt_rec_merge(nrec, &rec);
trace_xfs_irec_merge_post(mp, agno, nrec->ir_startino,
nrec->ir_holemask);
error = xfs_inobt_rec_check_count(mp, nrec);
if (error)
goto error;
}
error = xfs_inobt_update(cur, nrec);
if (error)
goto error;
out:
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
return 0;
error:
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return error;
}
/* /*
* Allocate new inodes in the allocation group specified by agbp. * Allocate new inodes in the allocation group specified by agbp.
* Return 0 for success, else error code. * Return 0 for success, else error code.
@ -364,11 +603,22 @@ xfs_ialloc_ag_alloc(
xfs_agino_t newlen; /* new number of inodes */ xfs_agino_t newlen; /* new number of inodes */
int isaligned = 0; /* inode allocation at stripe unit */ int isaligned = 0; /* inode allocation at stripe unit */
/* boundary */ /* boundary */
uint16_t allocmask = (uint16_t) -1; /* init. to full chunk */
struct xfs_inobt_rec_incore rec;
struct xfs_perag *pag; struct xfs_perag *pag;
int do_sparse = 0;
memset(&args, 0, sizeof(args)); memset(&args, 0, sizeof(args));
args.tp = tp; args.tp = tp;
args.mp = tp->t_mountp; args.mp = tp->t_mountp;
args.fsbno = NULLFSBLOCK;
#ifdef DEBUG
/* randomly do sparse inode allocations */
if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) &&
args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks)
do_sparse = prandom_u32() & 1;
#endif
/* /*
* Locking will ensure that we don't have two callers in here * Locking will ensure that we don't have two callers in here
@ -390,6 +640,8 @@ xfs_ialloc_ag_alloc(
agno = be32_to_cpu(agi->agi_seqno); agno = be32_to_cpu(agi->agi_seqno);
args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
args.mp->m_ialloc_blks; args.mp->m_ialloc_blks;
if (do_sparse)
goto sparse_alloc;
if (likely(newino != NULLAGINO && if (likely(newino != NULLAGINO &&
(args.agbno < be32_to_cpu(agi->agi_length)))) { (args.agbno < be32_to_cpu(agi->agi_length)))) {
args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
@ -428,8 +680,7 @@ xfs_ialloc_ag_alloc(
* subsequent requests. * subsequent requests.
*/ */
args.minalignslop = 0; args.minalignslop = 0;
} else }
args.fsbno = NULLFSBLOCK;
if (unlikely(args.fsbno == NULLFSBLOCK)) { if (unlikely(args.fsbno == NULLFSBLOCK)) {
/* /*
@ -480,6 +731,47 @@ xfs_ialloc_ag_alloc(
return error; return error;
} }
/*
* Finally, try a sparse allocation if the filesystem supports it and
* the sparse allocation length is smaller than a full chunk.
*/
if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) &&
args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks &&
args.fsbno == NULLFSBLOCK) {
sparse_alloc:
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.agbno = be32_to_cpu(agi->agi_root);
args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
args.alignment = args.mp->m_sb.sb_spino_align;
args.prod = 1;
args.minlen = args.mp->m_ialloc_min_blks;
args.maxlen = args.minlen;
/*
* The inode record will be aligned to full chunk size. We must
* prevent sparse allocation from AG boundaries that result in
* invalid inode records, such as records that start at agbno 0
* or extend beyond the AG.
*
* Set min agbno to the first aligned, non-zero agbno and max to
* the last aligned agbno that is at least one full chunk from
* the end of the AG.
*/
args.min_agbno = args.mp->m_sb.sb_inoalignmt;
args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
args.mp->m_sb.sb_inoalignmt) -
args.mp->m_ialloc_blks;
error = xfs_alloc_vextent(&args);
if (error)
return error;
newlen = args.len << args.mp->m_sb.sb_inopblog;
ASSERT(newlen <= XFS_INODES_PER_CHUNK);
allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1;
}
if (args.fsbno == NULLFSBLOCK) { if (args.fsbno == NULLFSBLOCK) {
*alloc = 0; *alloc = 0;
return 0; return 0;
@ -495,8 +787,8 @@ xfs_ialloc_ag_alloc(
* rather than a linear progression to prevent the next generation * rather than a linear progression to prevent the next generation
* number from being easily guessable. * number from being easily guessable.
*/ */
error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno, error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, agno,
args.len, prandom_u32()); args.agbno, args.len, prandom_u32());
if (error) if (error)
return error; return error;
@ -504,6 +796,73 @@ xfs_ialloc_ag_alloc(
* Convert the results. * Convert the results.
*/ */
newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
if (xfs_inobt_issparse(~allocmask)) {
/*
* We've allocated a sparse chunk. Align the startino and mask.
*/
xfs_align_sparse_ino(args.mp, &newino, &allocmask);
rec.ir_startino = newino;
rec.ir_holemask = ~allocmask;
rec.ir_count = newlen;
rec.ir_freecount = newlen;
rec.ir_free = XFS_INOBT_ALL_FREE;
/*
* Insert the sparse record into the inobt and allow for a merge
* if necessary. If a merge does occur, rec is updated to the
* merged record.
*/
error = xfs_inobt_insert_sprec(args.mp, tp, agbp, XFS_BTNUM_INO,
&rec, true);
if (error == -EFSCORRUPTED) {
xfs_alert(args.mp,
"invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",
XFS_AGINO_TO_INO(args.mp, agno,
rec.ir_startino),
rec.ir_holemask, rec.ir_count);
xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE);
}
if (error)
return error;
/*
* We can't merge the part we've just allocated as for the inobt
* due to finobt semantics. The original record may or may not
* exist independent of whether physical inodes exist in this
* sparse chunk.
*
* We must update the finobt record based on the inobt record.
* rec contains the fully merged and up to date inobt record
* from the previous call. Set merge false to replace any
* existing record with this one.
*/
if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
error = xfs_inobt_insert_sprec(args.mp, tp, agbp,
XFS_BTNUM_FINO, &rec,
false);
if (error)
return error;
}
} else {
/* full chunk - insert new records to both btrees */
error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
XFS_BTNUM_INO);
if (error)
return error;
if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
error = xfs_inobt_insert(args.mp, tp, agbp, newino,
newlen, XFS_BTNUM_FINO);
if (error)
return error;
}
}
/*
* Update AGI counts and newino.
*/
be32_add_cpu(&agi->agi_count, newlen); be32_add_cpu(&agi->agi_count, newlen);
be32_add_cpu(&agi->agi_freecount, newlen); be32_add_cpu(&agi->agi_freecount, newlen);
pag = xfs_perag_get(args.mp, agno); pag = xfs_perag_get(args.mp, agno);
@ -511,20 +870,6 @@ xfs_ialloc_ag_alloc(
xfs_perag_put(pag); xfs_perag_put(pag);
agi->agi_newino = cpu_to_be32(newino); agi->agi_newino = cpu_to_be32(newino);
/*
* Insert records describing the new inode chunk into the btrees.
*/
error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
XFS_BTNUM_INO);
if (error)
return error;
if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
XFS_BTNUM_FINO);
if (error)
return error;
}
/* /*
* Log allocation group header fields * Log allocation group header fields
*/ */
@ -645,7 +990,7 @@ xfs_ialloc_ag_select(
* if we fail allocation due to alignment issues then it is most * if we fail allocation due to alignment issues then it is most
* likely a real ENOSPC condition. * likely a real ENOSPC condition.
*/ */
ineed = mp->m_ialloc_blks; ineed = mp->m_ialloc_min_blks;
if (flags && ineed > 1) if (flags && ineed > 1)
ineed += xfs_ialloc_cluster_alignment(mp); ineed += xfs_ialloc_cluster_alignment(mp);
longest = pag->pagf_longest; longest = pag->pagf_longest;
@ -731,6 +1076,27 @@ xfs_ialloc_get_rec(
return 0; return 0;
} }
/*
* Return the offset of the first free inode in the record. If the inode chunk
* is sparsely allocated, we convert the record holemask to inode granularity
* and mask off the unallocated regions from the inode free mask.
*/
STATIC int
xfs_inobt_first_free_inode(
struct xfs_inobt_rec_incore *rec)
{
xfs_inofree_t realfree;
/* if there are no holes, return the first available offset */
if (!xfs_inobt_issparse(rec->ir_holemask))
return xfs_lowbit64(rec->ir_free);
realfree = xfs_inobt_irec_to_allocmask(rec);
realfree &= rec->ir_free;
return xfs_lowbit64(realfree);
}
/* /*
* Allocate an inode using the inobt-only algorithm. * Allocate an inode using the inobt-only algorithm.
*/ */
@ -961,7 +1327,7 @@ xfs_dialloc_ag_inobt(
} }
alloc_inode: alloc_inode:
offset = xfs_lowbit64(rec.ir_free); offset = xfs_inobt_first_free_inode(&rec);
ASSERT(offset >= 0); ASSERT(offset >= 0);
ASSERT(offset < XFS_INODES_PER_CHUNK); ASSERT(offset < XFS_INODES_PER_CHUNK);
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
@ -1210,7 +1576,7 @@ xfs_dialloc_ag(
if (error) if (error)
goto error_cur; goto error_cur;
offset = xfs_lowbit64(rec.ir_free); offset = xfs_inobt_first_free_inode(&rec);
ASSERT(offset >= 0); ASSERT(offset >= 0);
ASSERT(offset < XFS_INODES_PER_CHUNK); ASSERT(offset < XFS_INODES_PER_CHUNK);
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
@ -1439,6 +1805,83 @@ xfs_dialloc(
return error; return error;
} }
/*
* Free the blocks of an inode chunk. We must consider that the inode chunk
* might be sparse and only free the regions that are allocated as part of the
* chunk.
*/
STATIC void
xfs_difree_inode_chunk(
struct xfs_mount *mp,
xfs_agnumber_t agno,
struct xfs_inobt_rec_incore *rec,
struct xfs_bmap_free *flist)
{
xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino);
int startidx, endidx;
int nextbit;
xfs_agblock_t agbno;
int contigblk;
DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
if (!xfs_inobt_issparse(rec->ir_holemask)) {
/* not sparse, calculate extent info directly */
xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
XFS_AGINO_TO_AGBNO(mp, rec->ir_startino)),
mp->m_ialloc_blks, flist, mp);
return;
}
/* holemask is only 16-bits (fits in an unsigned long) */
ASSERT(sizeof(rec->ir_holemask) <= sizeof(holemask[0]));
holemask[0] = rec->ir_holemask;
/*
* Find contiguous ranges of zeroes (i.e., allocated regions) in the
* holemask and convert the start/end index of each range to an extent.
* We start with the start and end index both pointing at the first 0 in
* the mask.
*/
startidx = endidx = find_first_zero_bit(holemask,
XFS_INOBT_HOLEMASK_BITS);
nextbit = startidx + 1;
while (startidx < XFS_INOBT_HOLEMASK_BITS) {
nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
nextbit);
/*
* If the next zero bit is contiguous, update the end index of
* the current range and continue.
*/
if (nextbit != XFS_INOBT_HOLEMASK_BITS &&
nextbit == endidx + 1) {
endidx = nextbit;
goto next;
}
/*
* nextbit is not contiguous with the current end index. Convert
* the current start/end to an extent and add it to the free
* list.
*/
agbno = sagbno + (startidx * XFS_INODES_PER_HOLEMASK_BIT) /
mp->m_sb.sb_inopblock;
contigblk = ((endidx - startidx + 1) *
XFS_INODES_PER_HOLEMASK_BIT) /
mp->m_sb.sb_inopblock;
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
flist, mp);
/* reset range to current bit and carry on... */
startidx = endidx = nextbit;
next:
nextbit++;
}
}
STATIC int STATIC int
xfs_difree_inobt( xfs_difree_inobt(
struct xfs_mount *mp, struct xfs_mount *mp,
@ -1446,8 +1889,7 @@ xfs_difree_inobt(
struct xfs_buf *agbp, struct xfs_buf *agbp,
xfs_agino_t agino, xfs_agino_t agino,
struct xfs_bmap_free *flist, struct xfs_bmap_free *flist,
int *deleted, struct xfs_icluster *xic,
xfs_ino_t *first_ino,
struct xfs_inobt_rec_incore *orec) struct xfs_inobt_rec_incore *orec)
{ {
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
@ -1501,20 +1943,23 @@ xfs_difree_inobt(
rec.ir_freecount++; rec.ir_freecount++;
/* /*
* When an inode cluster is free, it becomes eligible for removal * When an inode chunk is free, it becomes eligible for removal. Don't
* remove the chunk if the block size is large enough for multiple inode
* chunks (that might not be free).
*/ */
if (!(mp->m_flags & XFS_MOUNT_IKEEP) && if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
(rec.ir_freecount == mp->m_ialloc_inos)) { rec.ir_free == XFS_INOBT_ALL_FREE &&
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
*deleted = 1; xic->deleted = 1;
*first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
/* /*
* Remove the inode cluster from the AGI B+Tree, adjust the * Remove the inode cluster from the AGI B+Tree, adjust the
* AGI and Superblock inode counts, and mark the disk space * AGI and Superblock inode counts, and mark the disk space
* to be freed when the transaction is committed. * to be freed when the transaction is committed.
*/ */
ilen = mp->m_ialloc_inos; ilen = rec.ir_freecount;
be32_add_cpu(&agi->agi_count, -ilen); be32_add_cpu(&agi->agi_count, -ilen);
be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
@ -1530,11 +1975,9 @@ xfs_difree_inobt(
goto error0; goto error0;
} }
xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, xfs_difree_inode_chunk(mp, agno, &rec, flist);
XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)),
mp->m_ialloc_blks, flist, mp);
} else { } else {
*deleted = 0; xic->deleted = 0;
error = xfs_inobt_update(cur, &rec); error = xfs_inobt_update(cur, &rec);
if (error) { if (error) {
@ -1599,7 +2042,9 @@ xfs_difree_finobt(
*/ */
XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error); XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);
error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, error = xfs_inobt_insert_rec(cur, ibtrec->ir_holemask,
ibtrec->ir_count,
ibtrec->ir_freecount,
ibtrec->ir_free, &i); ibtrec->ir_free, &i);
if (error) if (error)
goto error; goto error;
@ -1634,8 +2079,13 @@ xfs_difree_finobt(
* free inode. Hence, if all of the inodes are free and we aren't * free inode. Hence, if all of the inodes are free and we aren't
* keeping inode chunks permanently on disk, remove the record. * keeping inode chunks permanently on disk, remove the record.
* Otherwise, update the record with the new information. * Otherwise, update the record with the new information.
*
* Note that we currently can't free chunks when the block size is large
* enough for multiple chunks. Leave the finobt record to remain in sync
* with the inobt.
*/ */
if (rec.ir_freecount == mp->m_ialloc_inos && if (rec.ir_free == XFS_INOBT_ALL_FREE &&
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK &&
!(mp->m_flags & XFS_MOUNT_IKEEP)) { !(mp->m_flags & XFS_MOUNT_IKEEP)) {
error = xfs_btree_delete(cur, &i); error = xfs_btree_delete(cur, &i);
if (error) if (error)
@ -1671,8 +2121,7 @@ xfs_difree(
struct xfs_trans *tp, /* transaction pointer */ struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t inode, /* inode to be freed */ xfs_ino_t inode, /* inode to be freed */
struct xfs_bmap_free *flist, /* extents to free */ struct xfs_bmap_free *flist, /* extents to free */
int *deleted,/* set if inode cluster was deleted */ struct xfs_icluster *xic) /* cluster info if deleted */
xfs_ino_t *first_ino)/* first inode in deleted cluster */
{ {
/* REFERENCED */ /* REFERENCED */
xfs_agblock_t agbno; /* block number containing inode */ xfs_agblock_t agbno; /* block number containing inode */
@ -1723,8 +2172,7 @@ xfs_difree(
/* /*
* Fix up the inode allocation btree. * Fix up the inode allocation btree.
*/ */
error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino, error = xfs_difree_inobt(mp, tp, agbp, agino, flist, xic, &rec);
&rec);
if (error) if (error)
goto error0; goto error0;

View file

@ -28,6 +28,13 @@ struct xfs_btree_cur;
/* Move inodes in clusters of this size */ /* Move inodes in clusters of this size */
#define XFS_INODE_BIG_CLUSTER_SIZE 8192 #define XFS_INODE_BIG_CLUSTER_SIZE 8192
struct xfs_icluster {
bool deleted; /* record is deleted */
xfs_ino_t first_ino; /* first inode number */
uint64_t alloc; /* inode phys. allocation bitmap for
* sparse chunks */
};
/* Calculate and return the number of filesystem blocks per inode cluster */ /* Calculate and return the number of filesystem blocks per inode cluster */
static inline int static inline int
xfs_icluster_size_fsb( xfs_icluster_size_fsb(
@ -44,8 +51,7 @@ xfs_icluster_size_fsb(
static inline struct xfs_dinode * static inline struct xfs_dinode *
xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)
{ {
return (struct xfs_dinode *) return xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog);
(xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog));
} }
/* /*
@ -90,8 +96,7 @@ xfs_difree(
struct xfs_trans *tp, /* transaction pointer */ struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t inode, /* inode to be freed */ xfs_ino_t inode, /* inode to be freed */
struct xfs_bmap_free *flist, /* extents to free */ struct xfs_bmap_free *flist, /* extents to free */
int *deleted, /* set if inode cluster was deleted */ struct xfs_icluster *ifree); /* cluster info if deleted */
xfs_ino_t *first_ino); /* first inode in deleted cluster */
/* /*
* Return the location of the inode in imap, for mapping it into a buffer. * Return the location of the inode in imap, for mapping it into a buffer.
@ -156,7 +161,7 @@ int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
* Inode chunk initialisation routine * Inode chunk initialisation routine
*/ */
int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
struct list_head *buffer_list, struct list_head *buffer_list, int icount,
xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_agnumber_t agno, xfs_agblock_t agbno,
xfs_agblock_t length, unsigned int gen); xfs_agblock_t length, unsigned int gen);

View file

@ -167,7 +167,16 @@ xfs_inobt_init_rec_from_cur(
union xfs_btree_rec *rec) union xfs_btree_rec *rec)
{ {
rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino); rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount); if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
rec->inobt.ir_u.sp.ir_holemask =
cpu_to_be16(cur->bc_rec.i.ir_holemask);
rec->inobt.ir_u.sp.ir_count = cur->bc_rec.i.ir_count;
rec->inobt.ir_u.sp.ir_freecount = cur->bc_rec.i.ir_freecount;
} else {
/* ir_holemask/ir_count not supported on-disk */
rec->inobt.ir_u.f.ir_freecount =
cpu_to_be32(cur->bc_rec.i.ir_freecount);
}
rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free); rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
} }
@ -418,3 +427,85 @@ xfs_inobt_maxrecs(
return blocklen / sizeof(xfs_inobt_rec_t); return blocklen / sizeof(xfs_inobt_rec_t);
return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));
} }
/*
* Convert the inode record holemask to an inode allocation bitmap. The inode
* allocation bitmap is inode granularity and specifies whether an inode is
* physically allocated on disk (not whether the inode is considered allocated
* or free by the fs).
*
* A bit value of 1 means the inode is allocated, a value of 0 means it is free.
*/
uint64_t
xfs_inobt_irec_to_allocmask(
struct xfs_inobt_rec_incore *rec)
{
uint64_t bitmap = 0;
uint64_t inodespbit;
int nextbit;
uint allocbitmap;
/*
* The holemask has 16-bits for a 64 inode record. Therefore each
* holemask bit represents multiple inodes. Create a mask of bits to set
* in the allocmask for each holemask bit.
*/
inodespbit = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
/*
* Allocated inodes are represented by 0 bits in holemask. Invert the 0
* bits to 1 and convert to a uint so we can use xfs_next_bit(). Mask
* anything beyond the 16 holemask bits since this casts to a larger
* type.
*/
allocbitmap = ~rec->ir_holemask & ((1 << XFS_INOBT_HOLEMASK_BITS) - 1);
/*
* allocbitmap is the inverted holemask so every set bit represents
* allocated inodes. To expand from 16-bit holemask granularity to
* 64-bit (e.g., bit-per-inode), set inodespbit bits in the target
* bitmap for every holemask bit.
*/
nextbit = xfs_next_bit(&allocbitmap, 1, 0);
while (nextbit != -1) {
ASSERT(nextbit < (sizeof(rec->ir_holemask) * NBBY));
bitmap |= (inodespbit <<
(nextbit * XFS_INODES_PER_HOLEMASK_BIT));
nextbit = xfs_next_bit(&allocbitmap, 1, nextbit + 1);
}
return bitmap;
}
#if defined(DEBUG) || defined(XFS_WARN)
/*
* Verify that an in-core inode record has a valid inode count.
*/
int
xfs_inobt_rec_check_count(
struct xfs_mount *mp,
struct xfs_inobt_rec_incore *rec)
{
int inocount = 0;
int nextbit = 0;
uint64_t allocbmap;
int wordsz;
wordsz = sizeof(allocbmap) / sizeof(unsigned int);
allocbmap = xfs_inobt_irec_to_allocmask(rec);
nextbit = xfs_next_bit((uint *) &allocbmap, wordsz, nextbit);
while (nextbit != -1) {
inocount++;
nextbit = xfs_next_bit((uint *) &allocbmap, wordsz,
nextbit + 1);
}
if (inocount != rec->ir_count)
return -EFSCORRUPTED;
return 0;
}
#endif /* DEBUG */

View file

@ -62,4 +62,14 @@ extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
xfs_btnum_t); xfs_btnum_t);
extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
/* ir_holemask to inode allocation bitmap conversion */
uint64_t xfs_inobt_irec_to_allocmask(struct xfs_inobt_rec_incore *);
#if defined(DEBUG) || defined(XFS_WARN)
int xfs_inobt_rec_check_count(struct xfs_mount *,
struct xfs_inobt_rec_incore *);
#else
#define xfs_inobt_rec_check_count(mp, rec) 0
#endif /* DEBUG */
#endif /* __XFS_IALLOC_BTREE_H__ */ #endif /* __XFS_IALLOC_BTREE_H__ */

View file

@ -46,8 +46,7 @@ xfs_inobp_check(
j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
for (i = 0; i < j; i++) { for (i = 0; i < j; i++) {
dip = (xfs_dinode_t *)xfs_buf_offset(bp, dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
i * mp->m_sb.sb_inodesize);
if (!dip->di_next_unlinked) { if (!dip->di_next_unlinked) {
xfs_alert(mp, xfs_alert(mp,
"Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.", "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
@ -86,8 +85,7 @@ xfs_inode_buf_verify(
int di_ok; int di_ok;
xfs_dinode_t *dip; xfs_dinode_t *dip;
dip = (struct xfs_dinode *)xfs_buf_offset(bp, dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
(i << mp->m_sb.sb_inodelog));
di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
XFS_DINODE_GOOD_VERSION(dip->di_version); XFS_DINODE_GOOD_VERSION(dip->di_version);
if (unlikely(XFS_TEST_ERROR(!di_ok, mp, if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
@ -186,7 +184,7 @@ xfs_imap_to_bp(
} }
*bpp = bp; *bpp = bp;
*dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); *dipp = xfs_buf_offset(bp, imap->im_boffset);
return 0; return 0;
} }

View file

@ -174,6 +174,27 @@ xfs_mount_validate_sb(
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
/*
* Full inode chunks must be aligned to inode chunk size when
* sparse inodes are enabled to support the sparse chunk
* allocation algorithm and prevent overlapping inode records.
*/
if (xfs_sb_version_hassparseinodes(sbp)) {
uint32_t align;
xfs_alert(mp,
"EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize
>> sbp->sb_blocklog;
if (sbp->sb_inoalignmt != align) {
xfs_warn(mp,
"Inode block alignment (%u) must match chunk size (%u) for sparse inodes.",
sbp->sb_inoalignmt, align);
return -EINVAL;
}
}
if (unlikely( if (unlikely(
sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
xfs_warn(mp, xfs_warn(mp,
@ -374,7 +395,7 @@ __xfs_sb_from_disk(
be32_to_cpu(from->sb_features_log_incompat); be32_to_cpu(from->sb_features_log_incompat);
/* crc is only used on disk, not in memory; just init to 0 here. */ /* crc is only used on disk, not in memory; just init to 0 here. */
to->sb_crc = 0; to->sb_crc = 0;
to->sb_pad = 0; to->sb_spino_align = be32_to_cpu(from->sb_spino_align);
to->sb_pquotino = be64_to_cpu(from->sb_pquotino); to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
to->sb_lsn = be64_to_cpu(from->sb_lsn); to->sb_lsn = be64_to_cpu(from->sb_lsn);
/* Convert on-disk flags to in-memory flags? */ /* Convert on-disk flags to in-memory flags? */
@ -516,7 +537,7 @@ xfs_sb_to_disk(
cpu_to_be32(from->sb_features_incompat); cpu_to_be32(from->sb_features_incompat);
to->sb_features_log_incompat = to->sb_features_log_incompat =
cpu_to_be32(from->sb_features_log_incompat); cpu_to_be32(from->sb_features_log_incompat);
to->sb_pad = 0; to->sb_spino_align = cpu_to_be32(from->sb_spino_align);
to->sb_lsn = cpu_to_be64(from->sb_lsn); to->sb_lsn = cpu_to_be64(from->sb_lsn);
} }
} }
@ -689,6 +710,11 @@ xfs_sb_mount_common(
mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
sbp->sb_inopblock); sbp->sb_inopblock);
mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
if (sbp->sb_spino_align)
mp->m_ialloc_min_blks = sbp->sb_spino_align;
else
mp->m_ialloc_min_blks = mp->m_ialloc_blks;
} }
/* /*
@ -792,12 +818,12 @@ xfs_sync_sb(
tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP); tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
xfs_log_sb(tp); xfs_log_sb(tp);
if (wait) if (wait)
xfs_trans_set_sync(tp); xfs_trans_set_sync(tp);
return xfs_trans_commit(tp, 0); return xfs_trans_commit(tp);
} }

View file

@ -181,12 +181,6 @@ int xfs_log_calc_minimum_size(struct xfs_mount *);
#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */
#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer #define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer
count in superblock */ count in superblock */
/*
* Values for call flags parameter.
*/
#define XFS_TRANS_RELEASE_LOG_RES 0x4
#define XFS_TRANS_ABORT 0x8
/* /*
* Field values for xfs_trans_mod_sb. * Field values for xfs_trans_mod_sb.
*/ */

View file

@ -73,9 +73,9 @@ struct xfs_trans_resv {
* 2 trees * (2 blocks/level * max depth - 1) * block size * 2 trees * (2 blocks/level * max depth - 1) * block size
*/ */
#define XFS_ALLOCFREE_LOG_RES(mp,nx) \ #define XFS_ALLOCFREE_LOG_RES(mp,nx) \
((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1))) ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * (mp)->m_ag_maxlevels - 1)))
#define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \ #define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1))) ((nx) * (2 * (2 * (mp)->m_ag_maxlevels - 1)))
/* /*
* Per-directory log reservation for any directory change. * Per-directory log reservation for any directory change.

View file

@ -67,7 +67,7 @@
#define XFS_DIOSTRAT_SPACE_RES(mp, v) \ #define XFS_DIOSTRAT_SPACE_RES(mp, v) \
(XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v)) (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v))
#define XFS_GROWFS_SPACE_RES(mp) \ #define XFS_GROWFS_SPACE_RES(mp) \
(2 * XFS_AG_MAXLEVELS(mp)) (2 * (mp)->m_ag_maxlevels)
#define XFS_GROWFSRT_SPACE_RES(mp,b) \ #define XFS_GROWFSRT_SPACE_RES(mp,b) \
((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK)) ((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK))
#define XFS_LINK_SPACE_RES(mp,nl) \ #define XFS_LINK_SPACE_RES(mp,nl) \

View file

@ -109,7 +109,7 @@ xfs_setfilesize_trans_alloc(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
@ -145,7 +145,7 @@ xfs_setfilesize(
isize = xfs_new_eof(ip, offset + size); isize = xfs_new_eof(ip, offset + size);
if (!isize) { if (!isize) {
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return 0; return 0;
} }
@ -155,7 +155,7 @@ xfs_setfilesize(
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
return xfs_trans_commit(tp, 0); return xfs_trans_commit(tp);
} }
STATIC int STATIC int
@ -1348,7 +1348,7 @@ __xfs_get_blocks(
sector_t iblock, sector_t iblock,
struct buffer_head *bh_result, struct buffer_head *bh_result,
int create, int create,
int direct) bool direct)
{ {
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
@ -1413,6 +1413,7 @@ __xfs_get_blocks(
if (error) if (error)
return error; return error;
new = 1; new = 1;
} else { } else {
/* /*
* Delalloc reservations do not require a transaction, * Delalloc reservations do not require a transaction,
@ -1507,49 +1508,29 @@ xfs_get_blocks(
struct buffer_head *bh_result, struct buffer_head *bh_result,
int create) int create)
{ {
return __xfs_get_blocks(inode, iblock, bh_result, create, 0); return __xfs_get_blocks(inode, iblock, bh_result, create, false);
} }
STATIC int int
xfs_get_blocks_direct( xfs_get_blocks_direct(
struct inode *inode, struct inode *inode,
sector_t iblock, sector_t iblock,
struct buffer_head *bh_result, struct buffer_head *bh_result,
int create) int create)
{ {
return __xfs_get_blocks(inode, iblock, bh_result, create, 1); return __xfs_get_blocks(inode, iblock, bh_result, create, true);
} }
/* static void
* Complete a direct I/O write request. __xfs_end_io_direct_write(
* struct inode *inode,
* The ioend structure is passed from __xfs_get_blocks() to tell us what to do. struct xfs_ioend *ioend,
* If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
* wholly within the EOF and so there is nothing for us to do. Note that in this
* case the completion can be called in interrupt context, whereas if we have an
* ioend we will always be called in task context (i.e. from a workqueue).
*/
STATIC void
xfs_end_io_direct_write(
struct kiocb *iocb,
loff_t offset, loff_t offset,
ssize_t size, ssize_t size)
void *private)
{ {
struct inode *inode = file_inode(iocb->ki_filp); struct xfs_mount *mp = XFS_I(inode)->i_mount;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
struct xfs_ioend *ioend = private;
trace_xfs_gbmap_direct_endio(ip, offset, size, if (XFS_FORCED_SHUTDOWN(mp) || ioend->io_error)
ioend ? ioend->io_type : 0, NULL);
if (!ioend) {
ASSERT(offset + size <= i_size_read(inode));
return;
}
if (XFS_FORCED_SHUTDOWN(mp))
goto out_end_io; goto out_end_io;
/* /*
@ -1586,10 +1567,10 @@ xfs_end_io_direct_write(
* here can result in EOF moving backwards and Bad Things Happen when * here can result in EOF moving backwards and Bad Things Happen when
* that occurs. * that occurs.
*/ */
spin_lock(&ip->i_flags_lock); spin_lock(&XFS_I(inode)->i_flags_lock);
if (offset + size > i_size_read(inode)) if (offset + size > i_size_read(inode))
i_size_write(inode, offset + size); i_size_write(inode, offset + size);
spin_unlock(&ip->i_flags_lock); spin_unlock(&XFS_I(inode)->i_flags_lock);
/* /*
* If we are doing an append IO that needs to update the EOF on disk, * If we are doing an append IO that needs to update the EOF on disk,
@ -1606,6 +1587,98 @@ xfs_end_io_direct_write(
return; return;
} }
/*
* Complete a direct I/O write request.
*
* The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
* If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
* wholly within the EOF and so there is nothing for us to do. Note that in this
* case the completion can be called in interrupt context, whereas if we have an
* ioend we will always be called in task context (i.e. from a workqueue).
*/
STATIC void
xfs_end_io_direct_write(
struct kiocb *iocb,
loff_t offset,
ssize_t size,
void *private)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct xfs_ioend *ioend = private;
trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size,
ioend ? ioend->io_type : 0, NULL);
if (!ioend) {
ASSERT(offset + size <= i_size_read(inode));
return;
}
__xfs_end_io_direct_write(inode, ioend, offset, size);
}
/*
* For DAX we need a mapping buffer callback for unwritten extent conversion
* when page faults allocate blocks and then zero them. Note that in this
* case the mapping indicated by the ioend may extend beyond EOF. We most
* definitely do not want to extend EOF here, so we trim back the ioend size to
* EOF.
*/
#ifdef CONFIG_FS_DAX
void
xfs_end_io_dax_write(
struct buffer_head *bh,
int uptodate)
{
struct xfs_ioend *ioend = bh->b_private;
struct inode *inode = ioend->io_inode;
ssize_t size = ioend->io_size;
ASSERT(IS_DAX(ioend->io_inode));
/* if there was an error zeroing, then don't convert it */
if (!uptodate)
ioend->io_error = -EIO;
/*
* Trim update to EOF, so we don't extend EOF during unwritten extent
* conversion of partial EOF blocks.
*/
spin_lock(&XFS_I(inode)->i_flags_lock);
if (ioend->io_offset + size > i_size_read(inode))
size = i_size_read(inode) - ioend->io_offset;
spin_unlock(&XFS_I(inode)->i_flags_lock);
__xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size);
}
#else
void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { }
#endif
static inline ssize_t
xfs_vm_do_dio(
struct inode *inode,
struct kiocb *iocb,
struct iov_iter *iter,
loff_t offset,
void (*endio)(struct kiocb *iocb,
loff_t offset,
ssize_t size,
void *private),
int flags)
{
struct block_device *bdev;
if (IS_DAX(inode))
return dax_do_io(iocb, inode, iter, offset,
xfs_get_blocks_direct, endio, 0);
bdev = xfs_find_bdev_for_inode(inode);
return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
xfs_get_blocks_direct, endio, NULL, flags);
}
STATIC ssize_t STATIC ssize_t
xfs_vm_direct_IO( xfs_vm_direct_IO(
struct kiocb *iocb, struct kiocb *iocb,
@ -1613,16 +1686,11 @@ xfs_vm_direct_IO(
loff_t offset) loff_t offset)
{ {
struct inode *inode = iocb->ki_filp->f_mapping->host; struct inode *inode = iocb->ki_filp->f_mapping->host;
struct block_device *bdev = xfs_find_bdev_for_inode(inode);
if (iov_iter_rw(iter) == WRITE) { if (iov_iter_rw(iter) == WRITE)
return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, return xfs_vm_do_dio(inode, iocb, iter, offset,
xfs_get_blocks_direct, xfs_end_io_direct_write, DIO_ASYNC_EXTEND);
xfs_end_io_direct_write, NULL, return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0);
DIO_ASYNC_EXTEND);
}
return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
xfs_get_blocks_direct, NULL, NULL, 0);
} }
/* /*

View file

@ -53,7 +53,12 @@ typedef struct xfs_ioend {
} xfs_ioend_t; } xfs_ioend_t;
extern const struct address_space_operations xfs_address_space_operations; extern const struct address_space_operations xfs_address_space_operations;
extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
int xfs_get_blocks(struct inode *inode, sector_t offset,
struct buffer_head *map_bh, int create);
int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
struct buffer_head *map_bh, int create);
void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate);
extern void xfs_count_page_state(struct page *, int *, int *); extern void xfs_count_page_state(struct page *, int *, int *);

View file

@ -394,7 +394,6 @@ xfs_attr_inactive(
{ {
struct xfs_trans *trans; struct xfs_trans *trans;
struct xfs_mount *mp; struct xfs_mount *mp;
int cancel_flags = 0;
int lock_mode = XFS_ILOCK_SHARED; int lock_mode = XFS_ILOCK_SHARED;
int error = 0; int error = 0;
@ -423,7 +422,6 @@ xfs_attr_inactive(
goto out_cancel; goto out_cancel;
lock_mode = XFS_ILOCK_EXCL; lock_mode = XFS_ILOCK_EXCL;
cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT;
xfs_ilock(dp, lock_mode); xfs_ilock(dp, lock_mode);
if (!XFS_IFORK_Q(dp)) if (!XFS_IFORK_Q(dp))
@ -435,8 +433,14 @@ xfs_attr_inactive(
*/ */
xfs_trans_ijoin(trans, dp, 0); xfs_trans_ijoin(trans, dp, 0);
/* invalidate and truncate the attribute fork extents */ /*
if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { * Invalidate and truncate the attribute fork extents. Make sure the
* fork actually has attributes as otherwise the invalidation has no
* blocks to read and returns an error. In this case, just do the fork
* removal below.
*/
if (xfs_inode_hasattr(dp) &&
dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
error = xfs_attr3_root_inactive(&trans, dp); error = xfs_attr3_root_inactive(&trans, dp);
if (error) if (error)
goto out_cancel; goto out_cancel;
@ -449,12 +453,12 @@ xfs_attr_inactive(
/* Reset the attribute fork - this also destroys the in-core fork */ /* Reset the attribute fork - this also destroys the in-core fork */
xfs_attr_fork_remove(dp, trans); xfs_attr_fork_remove(dp, trans);
error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(trans);
xfs_iunlock(dp, lock_mode); xfs_iunlock(dp, lock_mode);
return error; return error;
out_cancel: out_cancel:
xfs_trans_cancel(trans, cancel_flags); xfs_trans_cancel(trans);
out_destroy_fork: out_destroy_fork:
/* kill the in-core attr fork before we drop the inode lock */ /* kill the in-core attr fork before we drop the inode lock */
if (dp->i_afp) if (dp->i_afp)

View file

@ -75,28 +75,20 @@ xfs_bmap_finish(
xfs_efi_log_item_t *efi; /* extent free intention */ xfs_efi_log_item_t *efi; /* extent free intention */
int error; /* error return value */ int error; /* error return value */
xfs_bmap_free_item_t *free; /* free extent item */ xfs_bmap_free_item_t *free; /* free extent item */
struct xfs_trans_res tres; /* new log reservation */
xfs_mount_t *mp; /* filesystem mount structure */ xfs_mount_t *mp; /* filesystem mount structure */
xfs_bmap_free_item_t *next; /* next item on free list */ xfs_bmap_free_item_t *next; /* next item on free list */
xfs_trans_t *ntp; /* new transaction pointer */
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
if (flist->xbf_count == 0) { if (flist->xbf_count == 0) {
*committed = 0; *committed = 0;
return 0; return 0;
} }
ntp = *tp; efi = xfs_trans_get_efi(*tp, flist->xbf_count);
efi = xfs_trans_get_efi(ntp, flist->xbf_count);
for (free = flist->xbf_first; free; free = free->xbfi_next) for (free = flist->xbf_first; free; free = free->xbfi_next)
xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock, xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
free->xbfi_blockcount); free->xbfi_blockcount);
tres.tr_logres = ntp->t_log_res; error = xfs_trans_roll(tp, NULL);
tres.tr_logcount = ntp->t_log_count;
tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
ntp = xfs_trans_dup(*tp);
error = xfs_trans_commit(*tp, 0);
*tp = ntp;
*committed = 1; *committed = 1;
/* /*
* We have a new transaction, so we should return committed=1, * We have a new transaction, so we should return committed=1,
@ -105,19 +97,10 @@ xfs_bmap_finish(
if (error) if (error)
return error; return error;
/* efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count);
* transaction commit worked ok so we can drop the extra ticket
* reference that we gained in xfs_trans_dup()
*/
xfs_log_ticket_put(ntp->t_ticket);
error = xfs_trans_reserve(ntp, &tres, 0, 0);
if (error)
return error;
efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
for (free = flist->xbf_first; free != NULL; free = next) { for (free = flist->xbf_first; free != NULL; free = next) {
next = free->xbfi_next; next = free->xbfi_next;
if ((error = xfs_free_extent(ntp, free->xbfi_startblock, if ((error = xfs_free_extent(*tp, free->xbfi_startblock,
free->xbfi_blockcount))) { free->xbfi_blockcount))) {
/* /*
* The bmap free list will be cleaned up at a * The bmap free list will be cleaned up at a
@ -127,7 +110,7 @@ xfs_bmap_finish(
* happens, since this transaction may not be * happens, since this transaction may not be
* dirty yet. * dirty yet.
*/ */
mp = ntp->t_mountp; mp = (*tp)->t_mountp;
if (!XFS_FORCED_SHUTDOWN(mp)) if (!XFS_FORCED_SHUTDOWN(mp))
xfs_force_shutdown(mp, xfs_force_shutdown(mp,
(error == -EFSCORRUPTED) ? (error == -EFSCORRUPTED) ?
@ -135,7 +118,7 @@ xfs_bmap_finish(
SHUTDOWN_META_IO_ERROR); SHUTDOWN_META_IO_ERROR);
return error; return error;
} }
xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock, xfs_trans_log_efd_extent(*tp, efd, free->xbfi_startblock,
free->xbfi_blockcount); free->xbfi_blockcount);
xfs_bmap_del_free(flist, NULL, free); xfs_bmap_del_free(flist, NULL, free);
} }
@ -878,7 +861,7 @@ xfs_free_eofblocks(
if (need_iolock) { if (need_iolock) {
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return -EAGAIN; return -EAGAIN;
} }
} }
@ -886,7 +869,7 @@ xfs_free_eofblocks(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) { if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp)); ASSERT(XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
if (need_iolock) if (need_iolock)
xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return error; return error;
@ -908,12 +891,9 @@ xfs_free_eofblocks(
* If we get an error at this point we simply don't * If we get an error at this point we simply don't
* bother truncating the file. * bother truncating the file.
*/ */
xfs_trans_cancel(tp, xfs_trans_cancel(tp);
(XFS_TRANS_RELEASE_LOG_RES |
XFS_TRANS_ABORT));
} else { } else {
error = xfs_trans_commit(tp, error = xfs_trans_commit(tp);
XFS_TRANS_RELEASE_LOG_RES);
if (!error) if (!error)
xfs_inode_clear_eofblocks_tag(ip); xfs_inode_clear_eofblocks_tag(ip);
} }
@ -1026,7 +1006,7 @@ xfs_alloc_file_space(
* Free the transaction structure. * Free the transaction structure.
*/ */
ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
break; break;
} }
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
@ -1053,7 +1033,7 @@ xfs_alloc_file_space(
goto error0; goto error0;
} }
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error) { if (error) {
break; break;
@ -1077,7 +1057,7 @@ xfs_alloc_file_space(
xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
error1: /* Just cancel transaction */ error1: /* Just cancel transaction */
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error; return error;
} }
@ -1133,14 +1113,29 @@ xfs_zero_remaining_bytes(
break; break;
ASSERT(imap.br_blockcount >= 1); ASSERT(imap.br_blockcount >= 1);
ASSERT(imap.br_startoff == offset_fsb); ASSERT(imap.br_startoff == offset_fsb);
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
if (imap.br_startblock == HOLESTARTBLOCK ||
imap.br_state == XFS_EXT_UNWRITTEN) {
/* skip the entire extent */
lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff +
imap.br_blockcount) - 1;
continue;
}
lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
if (lastoffset > endoff) if (lastoffset > endoff)
lastoffset = endoff; lastoffset = endoff;
if (imap.br_startblock == HOLESTARTBLOCK)
continue; /* DAX can just zero the backing device directly */
ASSERT(imap.br_startblock != DELAYSTARTBLOCK); if (IS_DAX(VFS_I(ip))) {
if (imap.br_state == XFS_EXT_UNWRITTEN) error = dax_zero_page_range(VFS_I(ip), offset,
lastoffset - offset + 1,
xfs_get_blocks_direct);
if (error)
return error;
continue; continue;
}
error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ? error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp, mp->m_rtdev_targp : mp->m_ddev_targp,
@ -1289,7 +1284,7 @@ xfs_free_file_space(
* Free the transaction structure. * Free the transaction structure.
*/ */
ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
break; break;
} }
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
@ -1320,7 +1315,7 @@ xfs_free_file_space(
goto error0; goto error0;
} }
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
} }
@ -1330,7 +1325,7 @@ xfs_free_file_space(
error0: error0:
xfs_bmap_cancel(&free_list); xfs_bmap_cancel(&free_list);
error1: error1:
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
goto out; goto out;
} }
@ -1462,7 +1457,7 @@ xfs_shift_file_space(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
break; break;
} }
@ -1492,13 +1487,13 @@ xfs_shift_file_space(
if (error) if (error)
goto out; goto out;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
} }
return error; return error;
out: out:
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_trans_cancel(tp);
return error; return error;
} }
@ -1718,7 +1713,7 @@ xfs_swap_extents(
tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
goto out_unlock; goto out_unlock;
} }
@ -1901,7 +1896,7 @@ xfs_swap_extents(
if (mp->m_flags & XFS_MOUNT_WSYNC) if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(tp); xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0); error = xfs_trans_commit(tp);
trace_xfs_swap_extent_after(ip, 0); trace_xfs_swap_extent_after(ip, 0);
trace_xfs_swap_extent_after(tip, 1); trace_xfs_swap_extent_after(tip, 1);
@ -1915,6 +1910,6 @@ xfs_swap_extents(
goto out; goto out;
out_trans_cancel: out_trans_cancel:
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
goto out; goto out;
} }

View file

@ -1419,9 +1419,9 @@ xfs_buf_submit_wait(
return error; return error;
} }
xfs_caddr_t void *
xfs_buf_offset( xfs_buf_offset(
xfs_buf_t *bp, struct xfs_buf *bp,
size_t offset) size_t offset)
{ {
struct page *page; struct page *page;
@ -1431,7 +1431,7 @@ xfs_buf_offset(
offset += bp->b_offset; offset += bp->b_offset;
page = bp->b_pages[offset >> PAGE_SHIFT]; page = bp->b_pages[offset >> PAGE_SHIFT];
return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1)); return page_address(page) + (offset & (PAGE_SIZE-1));
} }
/* /*

View file

@ -299,7 +299,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
/* Buffer Utility Routines */ /* Buffer Utility Routines */
extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); extern void *xfs_buf_offset(struct xfs_buf *, size_t);
/* Delayed Write Buffer Routines */ /* Delayed Write Buffer Routines */
extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);

View file

@ -568,8 +568,6 @@ xfs_qm_dqread(
struct xfs_buf *bp; struct xfs_buf *bp;
struct xfs_trans *tp = NULL; struct xfs_trans *tp = NULL;
int error; int error;
int cancelflags = 0;
dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP); dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
@ -617,7 +615,6 @@ xfs_qm_dqread(
XFS_QM_DQALLOC_SPACE_RES(mp), 0); XFS_QM_DQALLOC_SPACE_RES(mp), 0);
if (error) if (error)
goto error1; goto error1;
cancelflags = XFS_TRANS_RELEASE_LOG_RES;
} }
/* /*
@ -632,7 +629,6 @@ xfs_qm_dqread(
* allocate (ENOENT). * allocate (ENOENT).
*/ */
trace_xfs_dqread_fail(dqp); trace_xfs_dqread_fail(dqp);
cancelflags |= XFS_TRANS_ABORT;
goto error1; goto error1;
} }
@ -670,7 +666,7 @@ xfs_qm_dqread(
xfs_trans_brelse(tp, bp); xfs_trans_brelse(tp, bp);
if (tp) { if (tp) {
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
if (error) if (error)
goto error0; goto error0;
} }
@ -680,7 +676,7 @@ xfs_qm_dqread(
error1: error1:
if (tp) if (tp)
xfs_trans_cancel(tp, cancelflags); xfs_trans_cancel(tp);
error0: error0:
xfs_qm_dqdestroy(dqp); xfs_qm_dqdestroy(dqp);
*O_dqpp = NULL; *O_dqpp = NULL;

View file

@ -127,7 +127,7 @@ xfs_error_report(
struct xfs_mount *mp, struct xfs_mount *mp,
const char *filename, const char *filename,
int linenum, int linenum,
inst_t *ra) void *ra)
{ {
if (level <= xfs_error_level) { if (level <= xfs_error_level) {
xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
@ -146,7 +146,7 @@ xfs_corruption_error(
void *p, void *p,
const char *filename, const char *filename,
int linenum, int linenum,
inst_t *ra) void *ra)
{ {
if (level <= xfs_error_level) if (level <= xfs_error_level)
xfs_hex_dump(p, 64); xfs_hex_dump(p, 64);

View file

@ -21,10 +21,10 @@
struct xfs_mount; struct xfs_mount;
extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
const char *filename, int linenum, inst_t *ra); const char *filename, int linenum, void *ra);
extern void xfs_corruption_error(const char *tag, int level, extern void xfs_corruption_error(const char *tag, int level,
struct xfs_mount *mp, void *p, const char *filename, struct xfs_mount *mp, void *p, const char *filename,
int linenum, inst_t *ra); int linenum, void *ra);
extern void xfs_verifier_error(struct xfs_buf *bp); extern void xfs_verifier_error(struct xfs_buf *bp);
#define XFS_ERROR_REPORT(e, lvl, mp) \ #define XFS_ERROR_REPORT(e, lvl, mp) \

View file

@ -239,7 +239,7 @@ xfs_efi_init(
xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
efip->efi_format.efi_nextents = nextents; efip->efi_format.efi_nextents = nextents;
efip->efi_format.efi_id = (__psint_t)(void*)efip; efip->efi_format.efi_id = (uintptr_t)(void *)efip;
atomic_set(&efip->efi_next_extent, 0); atomic_set(&efip->efi_next_extent, 0);
atomic_set(&efip->efi_refcount, 2); atomic_set(&efip->efi_refcount, 2);

View file

@ -80,14 +80,15 @@ xfs_rw_ilock_demote(
} }
/* /*
* xfs_iozero * xfs_iozero clears the specified range supplied via the page cache (except in
* the DAX case). Writes through the page cache will allocate blocks over holes,
* though the callers usually map the holes first and avoid them. If a block is
* not completely zeroed, then it will be read from disk before being partially
* zeroed.
* *
* xfs_iozero clears the specified range of buffer supplied, * In the DAX case, we can just directly write to the underlying pages. This
* and marks all the affected blocks as valid and modified. If * will not allocate blocks, but will avoid holes and unwritten extents and so
* an affected block is not allocated, it will be allocated. If * not do unnecessary work.
* an affected block is not completely overwritten, and is not
* valid before the operation, it will be read from disk before
* being partially zeroed.
*/ */
int int
xfs_iozero( xfs_iozero(
@ -97,7 +98,8 @@ xfs_iozero(
{ {
struct page *page; struct page *page;
struct address_space *mapping; struct address_space *mapping;
int status; int status = 0;
mapping = VFS_I(ip)->i_mapping; mapping = VFS_I(ip)->i_mapping;
do { do {
@ -109,20 +111,27 @@ xfs_iozero(
if (bytes > count) if (bytes > count)
bytes = count; bytes = count;
status = pagecache_write_begin(NULL, mapping, pos, bytes, if (IS_DAX(VFS_I(ip))) {
AOP_FLAG_UNINTERRUPTIBLE, status = dax_zero_page_range(VFS_I(ip), pos, bytes,
&page, &fsdata); xfs_get_blocks_direct);
if (status) if (status)
break; break;
} else {
status = pagecache_write_begin(NULL, mapping, pos, bytes,
AOP_FLAG_UNINTERRUPTIBLE,
&page, &fsdata);
if (status)
break;
zero_user(page, offset, bytes); zero_user(page, offset, bytes);
status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, status = pagecache_write_end(NULL, mapping, pos, bytes,
page, fsdata); bytes, page, fsdata);
WARN_ON(status <= 0); /* can't return less than zero! */ WARN_ON(status <= 0); /* can't return less than zero! */
status = 0;
}
pos += bytes; pos += bytes;
count -= bytes; count -= bytes;
status = 0;
} while (count); } while (count);
return status; return status;
@ -139,7 +148,7 @@ xfs_update_prealloc_flags(
tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
@ -161,7 +170,7 @@ xfs_update_prealloc_flags(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (flags & XFS_PREALLOC_SYNC) if (flags & XFS_PREALLOC_SYNC)
xfs_trans_set_sync(tp); xfs_trans_set_sync(tp);
return xfs_trans_commit(tp, 0); return xfs_trans_commit(tp);
} }
/* /*
@ -285,7 +294,7 @@ xfs_file_read_iter(
if (file->f_mode & FMODE_NOCMTIME) if (file->f_mode & FMODE_NOCMTIME)
ioflags |= XFS_IO_INVIS; ioflags |= XFS_IO_INVIS;
if (unlikely(ioflags & XFS_IO_ISDIRECT)) { if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) {
xfs_buftarg_t *target = xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ? XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp; mp->m_rtdev_targp : mp->m_ddev_targp;
@ -379,7 +388,11 @@ xfs_file_splice_read(
trace_xfs_file_splice_read(ip, count, *ppos, ioflags); trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); /* for dax, we need to avoid the page cache */
if (IS_DAX(VFS_I(ip)))
ret = default_file_splice_read(infilp, ppos, pipe, count, flags);
else
ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
if (ret > 0) if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret); XFS_STATS_ADD(xs_read_bytes, ret);
@ -673,7 +686,7 @@ xfs_file_dio_aio_write(
mp->m_rtdev_targp : mp->m_ddev_targp; mp->m_rtdev_targp : mp->m_ddev_targp;
/* DIO must be aligned to device logical sector size */ /* DIO must be aligned to device logical sector size */
if ((pos | count) & target->bt_logical_sectormask) if (!IS_DAX(inode) && ((pos | count) & target->bt_logical_sectormask))
return -EINVAL; return -EINVAL;
/* "unaligned" here means not aligned to a filesystem block */ /* "unaligned" here means not aligned to a filesystem block */
@ -759,8 +772,11 @@ xfs_file_dio_aio_write(
out: out:
xfs_rw_iunlock(ip, iolock); xfs_rw_iunlock(ip, iolock);
/* No fallback to buffered IO on errors for XFS. */ /*
ASSERT(ret < 0 || ret == count); * No fallback to buffered IO on errors for XFS. DAX can result in
* partial writes, but direct IO will either complete fully or fail.
*/
ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
return ret; return ret;
} }
@ -843,7 +859,7 @@ xfs_file_write_iter(
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO; return -EIO;
if (unlikely(iocb->ki_flags & IOCB_DIRECT)) if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
ret = xfs_file_dio_aio_write(iocb, from); ret = xfs_file_dio_aio_write(iocb, from);
else else
ret = xfs_file_buffered_aio_write(iocb, from); ret = xfs_file_buffered_aio_write(iocb, from);
@ -1064,17 +1080,6 @@ xfs_file_readdir(
return xfs_readdir(ip, ctx, bufsize); return xfs_readdir(ip, ctx, bufsize);
} }
STATIC int
xfs_file_mmap(
struct file *filp,
struct vm_area_struct *vma)
{
vma->vm_ops = &xfs_file_vm_ops;
file_accessed(filp);
return 0;
}
/* /*
* This type is designed to indicate the type of offset we would like * This type is designed to indicate the type of offset we would like
* to search from page cache for xfs_seek_hole_data(). * to search from page cache for xfs_seek_hole_data().
@ -1455,26 +1460,11 @@ xfs_file_llseek(
* ordering of: * ordering of:
* *
* mmap_sem (MM) * mmap_sem (MM)
* i_mmap_lock (XFS - truncate serialisation) * sb_start_pagefault(vfs, freeze)
* page_lock (MM) * i_mmap_lock (XFS - truncate serialisation)
* i_lock (XFS - extent map serialisation) * page_lock (MM)
* i_lock (XFS - extent map serialisation)
*/ */
STATIC int
xfs_filemap_fault(
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
int error;
trace_xfs_filemap_fault(ip);
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
error = filemap_fault(vma, vmf);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
return error;
}
/* /*
* mmap()d file has taken write protection fault and is being made writable. We * mmap()d file has taken write protection fault and is being made writable. We
@ -1487,16 +1477,66 @@ xfs_filemap_page_mkwrite(
struct vm_area_struct *vma, struct vm_area_struct *vma,
struct vm_fault *vmf) struct vm_fault *vmf)
{ {
struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); struct inode *inode = file_inode(vma->vm_file);
int error; int ret;
trace_xfs_filemap_page_mkwrite(ip); trace_xfs_filemap_page_mkwrite(XFS_I(inode));
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct,
xfs_end_io_dax_write);
} else {
ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks);
ret = block_page_mkwrite_return(ret);
}
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
sb_end_pagefault(inode->i_sb);
return ret;
}
STATIC int
xfs_filemap_fault(
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct xfs_inode *ip = XFS_I(file_inode(vma->vm_file));
int ret;
trace_xfs_filemap_fault(ip);
/* DAX can shortcut the normal fault path on write faults! */
if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip)))
return xfs_filemap_page_mkwrite(vma, vmf);
xfs_ilock(ip, XFS_MMAPLOCK_SHARED); xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
error = block_page_mkwrite(vma, vmf, xfs_get_blocks); ret = filemap_fault(vma, vmf);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
return error; return ret;
}
static const struct vm_operations_struct xfs_file_vm_ops = {
.fault = xfs_filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = xfs_filemap_page_mkwrite,
};
STATIC int
xfs_file_mmap(
struct file *filp,
struct vm_area_struct *vma)
{
file_accessed(filp);
vma->vm_ops = &xfs_file_vm_ops;
if (IS_DAX(file_inode(filp)))
vma->vm_flags |= VM_MIXEDMAP;
return 0;
} }
const struct file_operations xfs_file_operations = { const struct file_operations xfs_file_operations = {
@ -1527,9 +1567,3 @@ const struct file_operations xfs_dir_file_operations = {
#endif #endif
.fsync = xfs_dir_fsync, .fsync = xfs_dir_fsync,
}; };
static const struct vm_operations_struct xfs_file_vm_ops = {
.fault = xfs_filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = xfs_filemap_page_mkwrite,
};

View file

@ -196,7 +196,8 @@ xfs_filestream_pick_ag(
goto next_ag; goto next_ag;
} }
longest = xfs_alloc_longest_free_extent(mp, pag); longest = xfs_alloc_longest_free_extent(mp, pag,
xfs_alloc_min_freelist(mp, pag));
if (((minlen && longest >= minlen) || if (((minlen && longest >= minlen) ||
(!minlen && pag->pagf_freeblks >= minfree)) && (!minlen && pag->pagf_freeblks >= minfree)) &&
(!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||

View file

@ -101,7 +101,9 @@ xfs_fs_geometry(
(xfs_sb_version_hasftype(&mp->m_sb) ? (xfs_sb_version_hasftype(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_FTYPE : 0) | XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
(xfs_sb_version_hasfinobt(&mp->m_sb) ? (xfs_sb_version_hasfinobt(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_FINOBT : 0); XFS_FSOP_GEOM_FLAGS_FINOBT : 0) |
(xfs_sb_version_hassparseinodes(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_SPINODES : 0);
geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
mp->m_sb.sb_logsectsize : BBSIZE; mp->m_sb.sb_logsectsize : BBSIZE;
geo->rtsectsize = mp->m_sb.sb_blocksize; geo->rtsectsize = mp->m_sb.sb_blocksize;
@ -201,7 +203,7 @@ xfs_growfs_data_private(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
XFS_GROWFS_SPACE_RES(mp), 0); XFS_GROWFS_SPACE_RES(mp), 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
@ -489,7 +491,7 @@ xfs_growfs_data_private(
if (dpct) if (dpct)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
xfs_trans_set_sync(tp); xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0); error = xfs_trans_commit(tp);
if (error) if (error)
return error; return error;
@ -557,7 +559,7 @@ xfs_growfs_data_private(
return saved_error ? saved_error : error; return saved_error ? saved_error : error;
error0: error0:
xfs_trans_cancel(tp, XFS_TRANS_ABORT); xfs_trans_cancel(tp);
return error; return error;
} }

View file

@ -905,7 +905,6 @@ xfs_dir_ialloc(
{ {
xfs_trans_t *tp; xfs_trans_t *tp;
xfs_trans_t *ntp;
xfs_inode_t *ip; xfs_inode_t *ip;
xfs_buf_t *ialloc_context = NULL; xfs_buf_t *ialloc_context = NULL;
int code; int code;
@ -954,8 +953,6 @@ xfs_dir_ialloc(
* to succeed the second time. * to succeed the second time.
*/ */
if (ialloc_context) { if (ialloc_context) {
struct xfs_trans_res tres;
/* /*
* Normally, xfs_trans_commit releases all the locks. * Normally, xfs_trans_commit releases all the locks.
* We call bhold to hang on to the ialloc_context across * We call bhold to hang on to the ialloc_context across
@ -964,12 +961,6 @@ xfs_dir_ialloc(
* allocation group. * allocation group.
*/ */
xfs_trans_bhold(tp, ialloc_context); xfs_trans_bhold(tp, ialloc_context);
/*
* Save the log reservation so we can use
* them in the next transaction.
*/
tres.tr_logres = xfs_trans_get_log_res(tp);
tres.tr_logcount = xfs_trans_get_log_count(tp);
/* /*
* We want the quota changes to be associated with the next * We want the quota changes to be associated with the next
@ -985,35 +976,9 @@ xfs_dir_ialloc(
tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY); tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
} }
ntp = xfs_trans_dup(tp); code = xfs_trans_roll(&tp, 0);
code = xfs_trans_commit(tp, 0); if (committed != NULL)
tp = ntp;
if (committed != NULL) {
*committed = 1; *committed = 1;
}
/*
* If we get an error during the commit processing,
* release the buffer that is still held and return
* to the caller.
*/
if (code) {
xfs_buf_relse(ialloc_context);
if (dqinfo) {
tp->t_dqinfo = dqinfo;
xfs_trans_free_dqinfo(tp);
}
*tpp = ntp;
*ipp = NULL;
return code;
}
/*
* transaction commit worked ok so we can drop the extra ticket
* reference that we gained in xfs_trans_dup()
*/
xfs_log_ticket_put(tp->t_ticket);
tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
code = xfs_trans_reserve(tp, &tres, 0, 0);
/* /*
* Re-attach the quota info that we detached from prev trx. * Re-attach the quota info that we detached from prev trx.
@ -1025,7 +990,7 @@ xfs_dir_ialloc(
if (code) { if (code) {
xfs_buf_relse(ialloc_context); xfs_buf_relse(ialloc_context);
*tpp = ntp; *tpp = tp;
*ipp = NULL; *ipp = NULL;
return code; return code;
} }
@ -1127,7 +1092,6 @@ xfs_create(
xfs_bmap_free_t free_list; xfs_bmap_free_t free_list;
xfs_fsblock_t first_block; xfs_fsblock_t first_block;
bool unlock_dp_on_error = false; bool unlock_dp_on_error = false;
uint cancel_flags;
int committed; int committed;
prid_t prid; prid_t prid;
struct xfs_dquot *udqp = NULL; struct xfs_dquot *udqp = NULL;
@ -1164,8 +1128,6 @@ xfs_create(
tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
} }
cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
/* /*
* Initially assume that the file does not exist and * Initially assume that the file does not exist and
* reserve the resources for that case. If that is not * reserve the resources for that case. If that is not
@ -1183,10 +1145,9 @@ xfs_create(
resblks = 0; resblks = 0;
error = xfs_trans_reserve(tp, tres, 0, 0); error = xfs_trans_reserve(tp, tres, 0, 0);
} }
if (error) { if (error)
cancel_flags = 0;
goto out_trans_cancel; goto out_trans_cancel;
}
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
unlock_dp_on_error = true; unlock_dp_on_error = true;
@ -1217,7 +1178,7 @@ xfs_create(
if (error) { if (error) {
if (error == -ENOSPC) if (error == -ENOSPC)
goto out_trans_cancel; goto out_trans_cancel;
goto out_trans_abort; goto out_trans_cancel;
} }
/* /*
@ -1235,7 +1196,7 @@ xfs_create(
resblks - XFS_IALLOC_SPACE_RES(mp) : 0); resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
if (error) { if (error) {
ASSERT(error != -ENOSPC); ASSERT(error != -ENOSPC);
goto out_trans_abort; goto out_trans_cancel;
} }
xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
@ -1269,7 +1230,7 @@ xfs_create(
if (error) if (error)
goto out_bmap_cancel; goto out_bmap_cancel;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
if (error) if (error)
goto out_release_inode; goto out_release_inode;
@ -1282,10 +1243,8 @@ xfs_create(
out_bmap_cancel: out_bmap_cancel:
xfs_bmap_cancel(&free_list); xfs_bmap_cancel(&free_list);
out_trans_abort:
cancel_flags |= XFS_TRANS_ABORT;
out_trans_cancel: out_trans_cancel:
xfs_trans_cancel(tp, cancel_flags); xfs_trans_cancel(tp);
out_release_inode: out_release_inode:
/* /*
* Wait until after the current transaction is aborted to finish the * Wait until after the current transaction is aborted to finish the
@ -1317,7 +1276,6 @@ xfs_create_tmpfile(
struct xfs_inode *ip = NULL; struct xfs_inode *ip = NULL;
struct xfs_trans *tp = NULL; struct xfs_trans *tp = NULL;
int error; int error;
uint cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
prid_t prid; prid_t prid;
struct xfs_dquot *udqp = NULL; struct xfs_dquot *udqp = NULL;
struct xfs_dquot *gdqp = NULL; struct xfs_dquot *gdqp = NULL;
@ -1350,10 +1308,8 @@ xfs_create_tmpfile(
resblks = 0; resblks = 0;
error = xfs_trans_reserve(tp, tres, 0, 0); error = xfs_trans_reserve(tp, tres, 0, 0);
} }
if (error) { if (error)
cancel_flags = 0;
goto out_trans_cancel; goto out_trans_cancel;
}
error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
pdqp, resblks, 1, 0); pdqp, resblks, 1, 0);
@ -1365,7 +1321,7 @@ xfs_create_tmpfile(
if (error) { if (error) {
if (error == -ENOSPC) if (error == -ENOSPC)
goto out_trans_cancel; goto out_trans_cancel;
goto out_trans_abort; goto out_trans_cancel;
} }
if (mp->m_flags & XFS_MOUNT_WSYNC) if (mp->m_flags & XFS_MOUNT_WSYNC)
@ -1381,9 +1337,9 @@ xfs_create_tmpfile(
ip->i_d.di_nlink--; ip->i_d.di_nlink--;
error = xfs_iunlink(tp, ip); error = xfs_iunlink(tp, ip);
if (error) if (error)
goto out_trans_abort; goto out_trans_cancel;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
if (error) if (error)
goto out_release_inode; goto out_release_inode;
@ -1394,10 +1350,8 @@ xfs_create_tmpfile(
*ipp = ip; *ipp = ip;
return 0; return 0;
out_trans_abort:
cancel_flags |= XFS_TRANS_ABORT;
out_trans_cancel: out_trans_cancel:
xfs_trans_cancel(tp, cancel_flags); xfs_trans_cancel(tp);
out_release_inode: out_release_inode:
/* /*
* Wait until after the current transaction is aborted to finish the * Wait until after the current transaction is aborted to finish the
@ -1427,7 +1381,6 @@ xfs_link(
int error; int error;
xfs_bmap_free_t free_list; xfs_bmap_free_t free_list;
xfs_fsblock_t first_block; xfs_fsblock_t first_block;
int cancel_flags;
int committed; int committed;
int resblks; int resblks;
@ -1447,17 +1400,14 @@ xfs_link(
goto std_return; goto std_return;
tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
resblks = XFS_LINK_SPACE_RES(mp, target_name->len); resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0);
if (error == -ENOSPC) { if (error == -ENOSPC) {
resblks = 0; resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0);
} }
if (error) { if (error)
cancel_flags = 0;
goto error_return; goto error_return;
}
xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
@ -1486,19 +1436,19 @@ xfs_link(
if (sip->i_d.di_nlink == 0) { if (sip->i_d.di_nlink == 0) {
error = xfs_iunlink_remove(tp, sip); error = xfs_iunlink_remove(tp, sip);
if (error) if (error)
goto abort_return; goto error_return;
} }
error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
&first_block, &free_list, resblks); &first_block, &free_list, resblks);
if (error) if (error)
goto abort_return; goto error_return;
xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
error = xfs_bumplink(tp, sip); error = xfs_bumplink(tp, sip);
if (error) if (error)
goto abort_return; goto error_return;
/* /*
* If this is a synchronous mount, make sure that the * If this is a synchronous mount, make sure that the
@ -1512,15 +1462,13 @@ xfs_link(
error = xfs_bmap_finish (&tp, &free_list, &committed); error = xfs_bmap_finish (&tp, &free_list, &committed);
if (error) { if (error) {
xfs_bmap_cancel(&free_list); xfs_bmap_cancel(&free_list);
goto abort_return; goto error_return;
} }
return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); return xfs_trans_commit(tp);
abort_return:
cancel_flags |= XFS_TRANS_ABORT;
error_return: error_return:
xfs_trans_cancel(tp, cancel_flags); xfs_trans_cancel(tp);
std_return: std_return:
return error; return error;
} }
@ -1555,7 +1503,6 @@ xfs_itruncate_extents(
{ {
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp = *tpp; struct xfs_trans *tp = *tpp;
struct xfs_trans *ntp;
xfs_bmap_free_t free_list; xfs_bmap_free_t free_list;
xfs_fsblock_t first_block; xfs_fsblock_t first_block;
xfs_fileoff_t first_unmap_block; xfs_fileoff_t first_unmap_block;
@ -1613,29 +1560,7 @@ xfs_itruncate_extents(
if (error) if (error)
goto out_bmap_cancel; goto out_bmap_cancel;
if (committed) { error = xfs_trans_roll(&tp, ip);
/*
* Mark the inode dirty so it will be logged and
* moved forward in the log as part of every commit.
*/
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
ntp = xfs_trans_dup(tp);
error = xfs_trans_commit(tp, 0);
tp = ntp;
xfs_trans_ijoin(tp, ip, 0);
if (error)
goto out;
/*
* Transaction commit worked ok so we can drop the extra ticket
* reference that we gained in xfs_trans_dup()
*/
xfs_log_ticket_put(tp->t_ticket);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) if (error)
goto out; goto out;
} }
@ -1756,7 +1681,7 @@ xfs_inactive_truncate(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) { if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp)); ASSERT(XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
@ -1777,7 +1702,7 @@ xfs_inactive_truncate(
ASSERT(ip->i_d.di_nextents == 0); ASSERT(ip->i_d.di_nextents == 0);
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
if (error) if (error)
goto error_unlock; goto error_unlock;
@ -1785,7 +1710,7 @@ xfs_inactive_truncate(
return 0; return 0;
error_trans_cancel: error_trans_cancel:
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_trans_cancel(tp);
error_unlock: error_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error; return error;
@ -1835,7 +1760,7 @@ xfs_inactive_ifree(
} else { } else {
ASSERT(XFS_FORCED_SHUTDOWN(mp)); ASSERT(XFS_FORCED_SHUTDOWN(mp));
} }
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_trans_cancel(tp);
return error; return error;
} }
@ -1855,7 +1780,7 @@ xfs_inactive_ifree(
__func__, error); __func__, error);
xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
} }
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error; return error;
} }
@ -1874,7 +1799,7 @@ xfs_inactive_ifree(
if (error) if (error)
xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
__func__, error); __func__, error);
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
if (error) if (error)
xfs_notice(mp, "%s: xfs_trans_commit returned error %d", xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
__func__, error); __func__, error);
@ -2235,28 +2160,42 @@ xfs_iunlink_remove(
*/ */
STATIC int STATIC int
xfs_ifree_cluster( xfs_ifree_cluster(
xfs_inode_t *free_ip, xfs_inode_t *free_ip,
xfs_trans_t *tp, xfs_trans_t *tp,
xfs_ino_t inum) struct xfs_icluster *xic)
{ {
xfs_mount_t *mp = free_ip->i_mount; xfs_mount_t *mp = free_ip->i_mount;
int blks_per_cluster; int blks_per_cluster;
int inodes_per_cluster; int inodes_per_cluster;
int nbufs; int nbufs;
int i, j; int i, j;
int ioffset;
xfs_daddr_t blkno; xfs_daddr_t blkno;
xfs_buf_t *bp; xfs_buf_t *bp;
xfs_inode_t *ip; xfs_inode_t *ip;
xfs_inode_log_item_t *iip; xfs_inode_log_item_t *iip;
xfs_log_item_t *lip; xfs_log_item_t *lip;
struct xfs_perag *pag; struct xfs_perag *pag;
xfs_ino_t inum;
inum = xic->first_ino;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
blks_per_cluster = xfs_icluster_size_fsb(mp); blks_per_cluster = xfs_icluster_size_fsb(mp);
inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
nbufs = mp->m_ialloc_blks / blks_per_cluster; nbufs = mp->m_ialloc_blks / blks_per_cluster;
for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) { for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) {
/*
* The allocation bitmap tells us which inodes of the chunk were
* physically allocated. Skip the cluster if an inode falls into
* a sparse region.
*/
ioffset = inum - xic->first_ino;
if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
ASSERT(do_mod(ioffset, inodes_per_cluster) == 0);
continue;
}
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
XFS_INO_TO_AGBNO(mp, inum)); XFS_INO_TO_AGBNO(mp, inum));
@ -2414,8 +2353,7 @@ xfs_ifree(
xfs_bmap_free_t *flist) xfs_bmap_free_t *flist)
{ {
int error; int error;
int delete; struct xfs_icluster xic = { 0 };
xfs_ino_t first_ino;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(ip->i_d.di_nlink == 0); ASSERT(ip->i_d.di_nlink == 0);
@ -2431,7 +2369,7 @@ xfs_ifree(
if (error) if (error)
return error; return error;
error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); error = xfs_difree(tp, ip->i_ino, flist, &xic);
if (error) if (error)
return error; return error;
@ -2448,8 +2386,8 @@ xfs_ifree(
ip->i_d.di_gen++; ip->i_d.di_gen++;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (delete) if (xic.deleted)
error = xfs_ifree_cluster(ip, tp, first_ino); error = xfs_ifree_cluster(ip, tp, &xic);
return error; return error;
} }
@ -2536,7 +2474,6 @@ xfs_remove(
int error = 0; int error = 0;
xfs_bmap_free_t free_list; xfs_bmap_free_t free_list;
xfs_fsblock_t first_block; xfs_fsblock_t first_block;
int cancel_flags;
int committed; int committed;
uint resblks; uint resblks;
@ -2557,7 +2494,6 @@ xfs_remove(
tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
else else
tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
/* /*
* We try to get the real space reservation first, * We try to get the real space reservation first,
@ -2576,7 +2512,6 @@ xfs_remove(
} }
if (error) { if (error) {
ASSERT(error != -ENOSPC); ASSERT(error != -ENOSPC);
cancel_flags = 0;
goto out_trans_cancel; goto out_trans_cancel;
} }
@ -2588,7 +2523,6 @@ xfs_remove(
/* /*
* If we're removing a directory perform some additional validation. * If we're removing a directory perform some additional validation.
*/ */
cancel_flags |= XFS_TRANS_ABORT;
if (is_dir) { if (is_dir) {
ASSERT(ip->i_d.di_nlink >= 2); ASSERT(ip->i_d.di_nlink >= 2);
if (ip->i_d.di_nlink != 2) { if (ip->i_d.di_nlink != 2) {
@ -2644,7 +2578,7 @@ xfs_remove(
if (error) if (error)
goto out_bmap_cancel; goto out_bmap_cancel;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
if (error) if (error)
goto std_return; goto std_return;
@ -2656,7 +2590,7 @@ xfs_remove(
out_bmap_cancel: out_bmap_cancel:
xfs_bmap_cancel(&free_list); xfs_bmap_cancel(&free_list);
out_trans_cancel: out_trans_cancel:
xfs_trans_cancel(tp, cancel_flags); xfs_trans_cancel(tp);
std_return: std_return:
return error; return error;
} }
@ -2730,11 +2664,11 @@ xfs_finish_rename(
error = xfs_bmap_finish(&tp, free_list, &committed); error = xfs_bmap_finish(&tp, free_list, &committed);
if (error) { if (error) {
xfs_bmap_cancel(free_list); xfs_bmap_cancel(free_list);
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); xfs_trans_cancel(tp);
return error; return error;
} }
return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); return xfs_trans_commit(tp);
} }
/* /*
@ -2855,7 +2789,7 @@ xfs_cross_rename(
out_trans_abort: out_trans_abort:
xfs_bmap_cancel(free_list); xfs_bmap_cancel(free_list);
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); xfs_trans_cancel(tp);
return error; return error;
} }
@ -2915,7 +2849,6 @@ xfs_rename(
int num_inodes = __XFS_SORT_INODES; int num_inodes = __XFS_SORT_INODES;
bool new_parent = (src_dp != target_dp); bool new_parent = (src_dp != target_dp);
bool src_is_directory = S_ISDIR(src_ip->i_d.di_mode); bool src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
int cancel_flags = 0;
int spaceres; int spaceres;
int error; int error;
@ -2951,7 +2884,6 @@ xfs_rename(
} }
if (error) if (error)
goto out_trans_cancel; goto out_trans_cancel;
cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
/* /*
* Attach the dquots to the inodes * Attach the dquots to the inodes
@ -3022,10 +2954,8 @@ xfs_rename(
error = xfs_dir_createname(tp, target_dp, target_name, error = xfs_dir_createname(tp, target_dp, target_name,
src_ip->i_ino, &first_block, src_ip->i_ino, &first_block,
&free_list, spaceres); &free_list, spaceres);
if (error == -ENOSPC)
goto out_bmap_cancel;
if (error) if (error)
goto out_trans_abort; goto out_bmap_cancel;
xfs_trans_ichgtime(tp, target_dp, xfs_trans_ichgtime(tp, target_dp,
XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@ -3033,7 +2963,7 @@ xfs_rename(
if (new_parent && src_is_directory) { if (new_parent && src_is_directory) {
error = xfs_bumplink(tp, target_dp); error = xfs_bumplink(tp, target_dp);
if (error) if (error)
goto out_trans_abort; goto out_bmap_cancel;
} }
} else { /* target_ip != NULL */ } else { /* target_ip != NULL */
/* /*
@ -3065,7 +2995,7 @@ xfs_rename(
src_ip->i_ino, src_ip->i_ino,
&first_block, &free_list, spaceres); &first_block, &free_list, spaceres);
if (error) if (error)
goto out_trans_abort; goto out_bmap_cancel;
xfs_trans_ichgtime(tp, target_dp, xfs_trans_ichgtime(tp, target_dp,
XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@ -3076,7 +3006,7 @@ xfs_rename(
*/ */
error = xfs_droplink(tp, target_ip); error = xfs_droplink(tp, target_ip);
if (error) if (error)
goto out_trans_abort; goto out_bmap_cancel;
if (src_is_directory) { if (src_is_directory) {
/* /*
@ -3084,7 +3014,7 @@ xfs_rename(
*/ */
error = xfs_droplink(tp, target_ip); error = xfs_droplink(tp, target_ip);
if (error) if (error)
goto out_trans_abort; goto out_bmap_cancel;
} }
} /* target_ip != NULL */ } /* target_ip != NULL */
@ -3101,7 +3031,7 @@ xfs_rename(
&first_block, &free_list, spaceres); &first_block, &free_list, spaceres);
ASSERT(error != -EEXIST); ASSERT(error != -EEXIST);
if (error) if (error)
goto out_trans_abort; goto out_bmap_cancel;
} }
/* /*
@ -3127,7 +3057,7 @@ xfs_rename(
*/ */
error = xfs_droplink(tp, src_dp); error = xfs_droplink(tp, src_dp);
if (error) if (error)
goto out_trans_abort; goto out_bmap_cancel;
} }
/* /*
@ -3142,7 +3072,7 @@ xfs_rename(
error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
&first_block, &free_list, spaceres); &first_block, &free_list, spaceres);
if (error) if (error)
goto out_trans_abort; goto out_bmap_cancel;
/* /*
* For whiteouts, we need to bump the link count on the whiteout inode. * For whiteouts, we need to bump the link count on the whiteout inode.
@ -3156,10 +3086,10 @@ xfs_rename(
ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0); ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
error = xfs_bumplink(tp, wip); error = xfs_bumplink(tp, wip);
if (error) if (error)
goto out_trans_abort; goto out_bmap_cancel;
error = xfs_iunlink_remove(tp, wip); error = xfs_iunlink_remove(tp, wip);
if (error) if (error)
goto out_trans_abort; goto out_bmap_cancel;
xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
/* /*
@ -3180,12 +3110,10 @@ xfs_rename(
IRELE(wip); IRELE(wip);
return error; return error;
out_trans_abort:
cancel_flags |= XFS_TRANS_ABORT;
out_bmap_cancel: out_bmap_cancel:
xfs_bmap_cancel(&free_list); xfs_bmap_cancel(&free_list);
out_trans_cancel: out_trans_cancel:
xfs_trans_cancel(tp, cancel_flags); xfs_trans_cancel(tp);
if (wip) if (wip)
IRELE(wip); IRELE(wip);
return error; return error;
@ -3464,7 +3392,7 @@ xfs_iflush_int(
ASSERT(ip->i_d.di_version > 1); ASSERT(ip->i_d.di_version > 1);
/* set *dip = inode's place in the buffer */ /* set *dip = inode's place in the buffer */
dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {

View file

@ -336,7 +336,7 @@ xfs_set_dmattrs(
tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
@ -346,7 +346,7 @@ xfs_set_dmattrs(
ip->i_d.di_dmstate = state; ip->i_d.di_dmstate = state;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = xfs_trans_commit(tp, 0); error = xfs_trans_commit(tp);
return error; return error;
} }
@ -1076,7 +1076,7 @@ xfs_ioctl_setattr_get_trans(
return tp; return tp;
out_cancel: out_cancel:
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return ERR_PTR(error); return ERR_PTR(error);
} }
@ -1253,7 +1253,7 @@ xfs_ioctl_setattr(
else else
ip->i_d.di_extsize = 0; ip->i_d.di_extsize = 0;
code = xfs_trans_commit(tp, 0); code = xfs_trans_commit(tp);
/* /*
* Release any dquot(s) the inode had kept before chown. * Release any dquot(s) the inode had kept before chown.
@ -1265,7 +1265,7 @@ xfs_ioctl_setattr(
return code; return code;
error_trans_cancel: error_trans_cancel:
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
error_free_dquots: error_free_dquots:
xfs_qm_dqrele(udqp); xfs_qm_dqrele(udqp);
xfs_qm_dqrele(pdqp); xfs_qm_dqrele(pdqp);
@ -1338,11 +1338,11 @@ xfs_ioc_setxflags(
error = xfs_ioctl_setattr_xflags(tp, ip, &fa); error = xfs_ioctl_setattr_xflags(tp, ip, &fa);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
goto out_drop_write; goto out_drop_write;
} }
error = xfs_trans_commit(tp, 0); error = xfs_trans_commit(tp);
out_drop_write: out_drop_write:
mnt_drop_write_file(filp); mnt_drop_write_file(filp);
return error; return error;

View file

@ -183,7 +183,7 @@ xfs_iomap_write_direct(
* Check for running out of space, note: need lock to return * Check for running out of space, note: need lock to return
*/ */
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
@ -213,7 +213,7 @@ xfs_iomap_write_direct(
error = xfs_bmap_finish(&tp, &free_list, &committed); error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error) if (error)
goto out_bmap_cancel; goto out_bmap_cancel;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
if (error) if (error)
goto out_unlock; goto out_unlock;
@ -236,7 +236,7 @@ xfs_iomap_write_direct(
xfs_bmap_cancel(&free_list); xfs_bmap_cancel(&free_list);
xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
out_trans_cancel: out_trans_cancel:
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_trans_cancel(tp);
goto out_unlock; goto out_unlock;
} }
@ -690,7 +690,7 @@ xfs_iomap_write_allocate(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
nres, 0); nres, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
@ -760,7 +760,7 @@ xfs_iomap_write_allocate(
if (error) if (error)
goto trans_cancel; goto trans_cancel;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
if (error) if (error)
goto error0; goto error0;
@ -791,7 +791,7 @@ xfs_iomap_write_allocate(
trans_cancel: trans_cancel:
xfs_bmap_cancel(&free_list); xfs_bmap_cancel(&free_list);
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_trans_cancel(tp);
error0: error0:
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error; return error;
@ -853,7 +853,7 @@ xfs_iomap_write_unwritten(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
resblks, 0); resblks, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
@ -890,7 +890,7 @@ xfs_iomap_write_unwritten(
if (error) if (error)
goto error_on_bmapi_transaction; goto error_on_bmapi_transaction;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error) if (error)
return error; return error;
@ -914,7 +914,7 @@ xfs_iomap_write_unwritten(
error_on_bmapi_transaction: error_on_bmapi_transaction:
xfs_bmap_cancel(&free_list); xfs_bmap_cancel(&free_list);
xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error; return error;
} }

View file

@ -699,7 +699,7 @@ xfs_setattr_nonsize(
if (mp->m_flags & XFS_MOUNT_WSYNC) if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(tp); xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0); error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
@ -730,7 +730,7 @@ xfs_setattr_nonsize(
return 0; return 0;
out_trans_cancel: out_trans_cancel:
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
out_dqrele: out_dqrele:
xfs_qm_dqrele(udqp); xfs_qm_dqrele(udqp);
@ -752,7 +752,6 @@ xfs_setattr_size(
struct xfs_trans *tp; struct xfs_trans *tp;
int error; int error;
uint lock_flags = 0; uint lock_flags = 0;
uint commit_flags = 0;
bool did_zeroing = false; bool did_zeroing = false;
trace_xfs_setattr(ip); trace_xfs_setattr(ip);
@ -848,7 +847,11 @@ xfs_setattr_size(
* to hope that the caller sees ENOMEM and retries the truncate * to hope that the caller sees ENOMEM and retries the truncate
* operation. * operation.
*/ */
error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); if (IS_DAX(inode))
error = dax_truncate_page(inode, newsize, xfs_get_blocks_direct);
else
error = block_truncate_page(inode->i_mapping, newsize,
xfs_get_blocks);
if (error) if (error)
return error; return error;
truncate_setsize(inode, newsize); truncate_setsize(inode, newsize);
@ -858,7 +861,6 @@ xfs_setattr_size(
if (error) if (error)
goto out_trans_cancel; goto out_trans_cancel;
commit_flags = XFS_TRANS_RELEASE_LOG_RES;
lock_flags |= XFS_ILOCK_EXCL; lock_flags |= XFS_ILOCK_EXCL;
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, ip, 0);
@ -898,7 +900,7 @@ xfs_setattr_size(
if (newsize <= oldsize) { if (newsize <= oldsize) {
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize); error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
if (error) if (error)
goto out_trans_abort; goto out_trans_cancel;
/* /*
* Truncated "down", so we're removing references to old data * Truncated "down", so we're removing references to old data
@ -925,16 +927,14 @@ xfs_setattr_size(
if (mp->m_flags & XFS_MOUNT_WSYNC) if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(tp); xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
out_unlock: out_unlock:
if (lock_flags) if (lock_flags)
xfs_iunlock(ip, lock_flags); xfs_iunlock(ip, lock_flags);
return error; return error;
out_trans_abort:
commit_flags |= XFS_TRANS_ABORT;
out_trans_cancel: out_trans_cancel:
xfs_trans_cancel(tp, commit_flags); xfs_trans_cancel(tp);
goto out_unlock; goto out_unlock;
} }
@ -981,7 +981,7 @@ xfs_vn_update_time(
tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
@ -1003,7 +1003,7 @@ xfs_vn_update_time(
} }
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
return xfs_trans_commit(tp, 0); return xfs_trans_commit(tp);
} }
#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
@ -1188,22 +1188,22 @@ xfs_diflags_to_iflags(
struct inode *inode, struct inode *inode,
struct xfs_inode *ip) struct xfs_inode *ip)
{ {
if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) uint16_t flags = ip->i_d.di_flags;
inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC |
S_NOATIME | S_DAX);
if (flags & XFS_DIFLAG_IMMUTABLE)
inode->i_flags |= S_IMMUTABLE; inode->i_flags |= S_IMMUTABLE;
else if (flags & XFS_DIFLAG_APPEND)
inode->i_flags &= ~S_IMMUTABLE;
if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
inode->i_flags |= S_APPEND; inode->i_flags |= S_APPEND;
else if (flags & XFS_DIFLAG_SYNC)
inode->i_flags &= ~S_APPEND;
if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
inode->i_flags |= S_SYNC; inode->i_flags |= S_SYNC;
else if (flags & XFS_DIFLAG_NOATIME)
inode->i_flags &= ~S_SYNC;
if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
inode->i_flags |= S_NOATIME; inode->i_flags |= S_NOATIME;
else /* XXX: Also needs an on-disk per inode flag! */
inode->i_flags &= ~S_NOATIME; if (ip->i_mount->m_flags & XFS_MOUNT_DAX)
inode->i_flags |= S_DAX;
} }
/* /*

View file

@ -252,7 +252,7 @@ xfs_bulkstat_grab_ichunk(
} }
irec->ir_free |= xfs_inobt_maskn(0, idx); irec->ir_free |= xfs_inobt_maskn(0, idx);
*icount = XFS_INODES_PER_CHUNK - irec->ir_freecount; *icount = irec->ir_count - irec->ir_freecount;
} }
return 0; return 0;
@ -415,6 +415,8 @@ xfs_bulkstat(
goto del_cursor; goto del_cursor;
if (icount) { if (icount) {
irbp->ir_startino = r.ir_startino; irbp->ir_startino = r.ir_startino;
irbp->ir_holemask = r.ir_holemask;
irbp->ir_count = r.ir_count;
irbp->ir_freecount = r.ir_freecount; irbp->ir_freecount = r.ir_freecount;
irbp->ir_free = r.ir_free; irbp->ir_free = r.ir_free;
irbp++; irbp++;
@ -447,13 +449,15 @@ xfs_bulkstat(
* If this chunk has any allocated inodes, save it. * If this chunk has any allocated inodes, save it.
* Also start read-ahead now for this chunk. * Also start read-ahead now for this chunk.
*/ */
if (r.ir_freecount < XFS_INODES_PER_CHUNK) { if (r.ir_freecount < r.ir_count) {
xfs_bulkstat_ichunk_ra(mp, agno, &r); xfs_bulkstat_ichunk_ra(mp, agno, &r);
irbp->ir_startino = r.ir_startino; irbp->ir_startino = r.ir_startino;
irbp->ir_holemask = r.ir_holemask;
irbp->ir_count = r.ir_count;
irbp->ir_freecount = r.ir_freecount; irbp->ir_freecount = r.ir_freecount;
irbp->ir_free = r.ir_free; irbp->ir_free = r.ir_free;
irbp++; irbp++;
icount += XFS_INODES_PER_CHUNK - r.ir_freecount; icount += r.ir_count - r.ir_freecount;
} }
error = xfs_btree_increment(cur, 0, &stat); error = xfs_btree_increment(cur, 0, &stat);
if (error || stat == 0) { if (error || stat == 0) {
@ -599,8 +603,7 @@ xfs_inumbers(
agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1; agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
buffer[bufidx].xi_startino = buffer[bufidx].xi_startino =
XFS_AGINO_TO_INO(mp, agno, r.ir_startino); XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
buffer[bufidx].xi_alloccount = buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount;
XFS_INODES_PER_CHUNK - r.ir_freecount;
buffer[bufidx].xi_allocmask = ~r.ir_free; buffer[bufidx].xi_allocmask = ~r.ir_free;
if (++bufidx == bcount) { if (++bufidx == bcount) {
long written; long written;

View file

@ -32,26 +32,12 @@ typedef unsigned int __uint32_t;
typedef signed long long int __int64_t; typedef signed long long int __int64_t;
typedef unsigned long long int __uint64_t; typedef unsigned long long int __uint64_t;
typedef __uint32_t inst_t; /* an instruction */
typedef __s64 xfs_off_t; /* <file offset> type */ typedef __s64 xfs_off_t; /* <file offset> type */
typedef unsigned long long xfs_ino_t; /* <inode> type */ typedef unsigned long long xfs_ino_t; /* <inode> type */
typedef __s64 xfs_daddr_t; /* <disk address> type */ typedef __s64 xfs_daddr_t; /* <disk address> type */
typedef char * xfs_caddr_t; /* <core address> type */
typedef __u32 xfs_dev_t; typedef __u32 xfs_dev_t;
typedef __u32 xfs_nlink_t; typedef __u32 xfs_nlink_t;
/* __psint_t is the same size as a pointer */
#if (BITS_PER_LONG == 32)
typedef __int32_t __psint_t;
typedef __uint32_t __psunsigned_t;
#elif (BITS_PER_LONG == 64)
typedef __int64_t __psint_t;
typedef __uint64_t __psunsigned_t;
#else
#error BITS_PER_LONG must be 32 or 64
#endif
#include "xfs_types.h" #include "xfs_types.h"
#include "kmem.h" #include "kmem.h"

View file

@ -109,7 +109,7 @@ xlog_ungrant_log_space(
STATIC void STATIC void
xlog_verify_dest_ptr( xlog_verify_dest_ptr(
struct xlog *log, struct xlog *log,
char *ptr); void *ptr);
STATIC void STATIC void
xlog_verify_grant_tail( xlog_verify_grant_tail(
struct xlog *log); struct xlog *log);
@ -513,7 +513,7 @@ xfs_log_done(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xlog_ticket *ticket, struct xlog_ticket *ticket,
struct xlog_in_core **iclog, struct xlog_in_core **iclog,
uint flags) bool regrant)
{ {
struct xlog *log = mp->m_log; struct xlog *log = mp->m_log;
xfs_lsn_t lsn = 0; xfs_lsn_t lsn = 0;
@ -526,14 +526,11 @@ xfs_log_done(
(((ticket->t_flags & XLOG_TIC_INITED) == 0) && (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
(xlog_commit_record(log, ticket, iclog, &lsn)))) { (xlog_commit_record(log, ticket, iclog, &lsn)))) {
lsn = (xfs_lsn_t) -1; lsn = (xfs_lsn_t) -1;
if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { regrant = false;
flags |= XFS_LOG_REL_PERM_RESERV;
}
} }
if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 || if (!regrant) {
(flags & XFS_LOG_REL_PERM_RESERV)) {
trace_xfs_log_done_nonperm(log, ticket); trace_xfs_log_done_nonperm(log, ticket);
/* /*
@ -541,7 +538,6 @@ xfs_log_done(
* request has been made to release a permanent reservation. * request has been made to release a permanent reservation.
*/ */
xlog_ungrant_log_space(log, ticket); xlog_ungrant_log_space(log, ticket);
xfs_log_ticket_put(ticket);
} else { } else {
trace_xfs_log_done_perm(log, ticket); trace_xfs_log_done_perm(log, ticket);
@ -553,6 +549,7 @@ xfs_log_done(
ticket->t_flags |= XLOG_TIC_INITED; ticket->t_flags |= XLOG_TIC_INITED;
} }
xfs_log_ticket_put(ticket);
return lsn; return lsn;
} }
@ -1447,7 +1444,7 @@ xlog_alloc_log(
iclog->ic_bp = bp; iclog->ic_bp = bp;
iclog->ic_data = bp->b_addr; iclog->ic_data = bp->b_addr;
#ifdef DEBUG #ifdef DEBUG
log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); log->l_iclog_bak[i] = &iclog->ic_header;
#endif #endif
head = &iclog->ic_header; head = &iclog->ic_header;
memset(head, 0, sizeof(xlog_rec_header_t)); memset(head, 0, sizeof(xlog_rec_header_t));
@ -1602,7 +1599,7 @@ xlog_pack_data(
int i, j, k; int i, j, k;
int size = iclog->ic_offset + roundoff; int size = iclog->ic_offset + roundoff;
__be32 cycle_lsn; __be32 cycle_lsn;
xfs_caddr_t dp; char *dp;
cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn);
@ -3664,7 +3661,7 @@ xlog_ticket_alloc(
void void
xlog_verify_dest_ptr( xlog_verify_dest_ptr(
struct xlog *log, struct xlog *log,
char *ptr) void *ptr)
{ {
int i; int i;
int good_ptr = 0; int good_ptr = 0;
@ -3767,9 +3764,8 @@ xlog_verify_iclog(
xlog_op_header_t *ophead; xlog_op_header_t *ophead;
xlog_in_core_t *icptr; xlog_in_core_t *icptr;
xlog_in_core_2_t *xhdr; xlog_in_core_2_t *xhdr;
xfs_caddr_t ptr; void *base_ptr, *ptr, *p;
xfs_caddr_t base_ptr; ptrdiff_t field_offset;
__psint_t field_offset;
__uint8_t clientid; __uint8_t clientid;
int len, i, j, k, op_len; int len, i, j, k, op_len;
int idx; int idx;
@ -3788,9 +3784,9 @@ xlog_verify_iclog(
if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
xfs_emerg(log->l_mp, "%s: invalid magic num", __func__); xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
ptr = (xfs_caddr_t) &iclog->ic_header; base_ptr = ptr = &iclog->ic_header;
for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; p = &iclog->ic_header;
ptr += BBSIZE) { for (ptr += BBSIZE; ptr < base_ptr + count; ptr += BBSIZE) {
if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
xfs_emerg(log->l_mp, "%s: unexpected magic num", xfs_emerg(log->l_mp, "%s: unexpected magic num",
__func__); __func__);
@ -3798,20 +3794,19 @@ xlog_verify_iclog(
/* check fields */ /* check fields */
len = be32_to_cpu(iclog->ic_header.h_num_logops); len = be32_to_cpu(iclog->ic_header.h_num_logops);
ptr = iclog->ic_datap; base_ptr = ptr = iclog->ic_datap;
base_ptr = ptr; ophead = ptr;
ophead = (xlog_op_header_t *)ptr;
xhdr = iclog->ic_data; xhdr = iclog->ic_data;
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
ophead = (xlog_op_header_t *)ptr; ophead = ptr;
/* clientid is only 1 byte */ /* clientid is only 1 byte */
field_offset = (__psint_t) p = &ophead->oh_clientid;
((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr); field_offset = p - base_ptr;
if (!syncing || (field_offset & 0x1ff)) { if (!syncing || (field_offset & 0x1ff)) {
clientid = ophead->oh_clientid; clientid = ophead->oh_clientid;
} else { } else {
idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap); idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap);
if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@ -3829,13 +3824,13 @@ xlog_verify_iclog(
(unsigned long)field_offset); (unsigned long)field_offset);
/* check length */ /* check length */
field_offset = (__psint_t) p = &ophead->oh_len;
((xfs_caddr_t)&(ophead->oh_len) - base_ptr); field_offset = p - base_ptr;
if (!syncing || (field_offset & 0x1ff)) { if (!syncing || (field_offset & 0x1ff)) {
op_len = be32_to_cpu(ophead->oh_len); op_len = be32_to_cpu(ophead->oh_len);
} else { } else {
idx = BTOBBT((__psint_t)&ophead->oh_len - idx = BTOBBT((uintptr_t)&ophead->oh_len -
(__psint_t)iclog->ic_datap); (uintptr_t)iclog->ic_datap);
if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);

View file

@ -110,15 +110,6 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
#define XFS_LSN_CMP(x,y) _lsn_cmp(x,y) #define XFS_LSN_CMP(x,y) _lsn_cmp(x,y)
/*
* Macros, structures, prototypes for interface to the log manager.
*/
/*
* Flags to xfs_log_done()
*/
#define XFS_LOG_REL_PERM_RESERV 0x1
/* /*
* Flags to xfs_log_force() * Flags to xfs_log_force()
* *
@ -138,7 +129,7 @@ struct xfs_log_callback;
xfs_lsn_t xfs_log_done(struct xfs_mount *mp, xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
struct xlog_ticket *ticket, struct xlog_ticket *ticket,
struct xlog_in_core **iclog, struct xlog_in_core **iclog,
uint flags); bool regrant);
int _xfs_log_force(struct xfs_mount *mp, int _xfs_log_force(struct xfs_mount *mp,
uint flags, uint flags,
int *log_forced); int *log_forced);
@ -183,7 +174,7 @@ struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
void xfs_log_ticket_put(struct xlog_ticket *ticket); void xfs_log_ticket_put(struct xlog_ticket *ticket);
void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_lsn_t *commit_lsn, int flags); xfs_lsn_t *commit_lsn, bool regrant);
bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
void xfs_log_work_queue(struct xfs_mount *mp); void xfs_log_work_queue(struct xfs_mount *mp);

View file

@ -624,7 +624,7 @@ xlog_cil_push(
spin_unlock(&cil->xc_push_lock); spin_unlock(&cil->xc_push_lock);
/* xfs_log_done always frees the ticket on error. */ /* xfs_log_done always frees the ticket on error. */
commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, false);
if (commit_lsn == -1) if (commit_lsn == -1)
goto out_abort; goto out_abort;
@ -773,14 +773,10 @@ xfs_log_commit_cil(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_trans *tp, struct xfs_trans *tp,
xfs_lsn_t *commit_lsn, xfs_lsn_t *commit_lsn,
int flags) bool regrant)
{ {
struct xlog *log = mp->m_log; struct xlog *log = mp->m_log;
struct xfs_cil *cil = log->l_cilp; struct xfs_cil *cil = log->l_cilp;
int log_flags = 0;
if (flags & XFS_TRANS_RELEASE_LOG_RES)
log_flags = XFS_LOG_REL_PERM_RESERV;
/* lock out background commit */ /* lock out background commit */
down_read(&cil->xc_ctx_lock); down_read(&cil->xc_ctx_lock);
@ -795,7 +791,7 @@ xfs_log_commit_cil(
if (commit_lsn) if (commit_lsn)
*commit_lsn = tp->t_commit_lsn; *commit_lsn = tp->t_commit_lsn;
xfs_log_done(mp, tp->t_ticket, NULL, log_flags); xfs_log_done(mp, tp->t_ticket, NULL, regrant);
xfs_trans_unreserve_and_mod_sb(tp); xfs_trans_unreserve_and_mod_sb(tp);
/* /*
@ -809,7 +805,7 @@ xfs_log_commit_cil(
* the log items. This affects (at least) processing of stale buffers, * the log items. This affects (at least) processing of stale buffers,
* inodes and EFIs. * inodes and EFIs.
*/ */
xfs_trans_free_items(tp, tp->t_commit_lsn, 0); xfs_trans_free_items(tp, tp->t_commit_lsn, false);
xlog_cil_push_background(log); xlog_cil_push_background(log);

View file

@ -409,7 +409,7 @@ struct xlog {
/* The following field are used for debugging; need to hold icloglock */ /* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG #ifdef DEBUG
char *l_iclog_bak[XLOG_MAX_ICLOGS]; void *l_iclog_bak[XLOG_MAX_ICLOGS];
#endif #endif
}; };

View file

@ -147,7 +147,7 @@ xlog_put_bp(
* Return the address of the start of the given block number's data * Return the address of the start of the given block number's data
* in a log buffer. The buffer covers a log sector-aligned region. * in a log buffer. The buffer covers a log sector-aligned region.
*/ */
STATIC xfs_caddr_t STATIC char *
xlog_align( xlog_align(
struct xlog *log, struct xlog *log,
xfs_daddr_t blk_no, xfs_daddr_t blk_no,
@ -203,7 +203,7 @@ xlog_bread(
xfs_daddr_t blk_no, xfs_daddr_t blk_no,
int nbblks, int nbblks,
struct xfs_buf *bp, struct xfs_buf *bp,
xfs_caddr_t *offset) char **offset)
{ {
int error; int error;
@ -225,9 +225,9 @@ xlog_bread_offset(
xfs_daddr_t blk_no, /* block to read from */ xfs_daddr_t blk_no, /* block to read from */
int nbblks, /* blocks to read */ int nbblks, /* blocks to read */
struct xfs_buf *bp, struct xfs_buf *bp,
xfs_caddr_t offset) char *offset)
{ {
xfs_caddr_t orig_offset = bp->b_addr; char *orig_offset = bp->b_addr;
int orig_len = BBTOB(bp->b_length); int orig_len = BBTOB(bp->b_length);
int error, error2; int error, error2;
@ -396,7 +396,7 @@ xlog_find_cycle_start(
xfs_daddr_t *last_blk, xfs_daddr_t *last_blk,
uint cycle) uint cycle)
{ {
xfs_caddr_t offset; char *offset;
xfs_daddr_t mid_blk; xfs_daddr_t mid_blk;
xfs_daddr_t end_blk; xfs_daddr_t end_blk;
uint mid_cycle; uint mid_cycle;
@ -443,7 +443,7 @@ xlog_find_verify_cycle(
uint cycle; uint cycle;
xfs_buf_t *bp; xfs_buf_t *bp;
xfs_daddr_t bufblks; xfs_daddr_t bufblks;
xfs_caddr_t buf = NULL; char *buf = NULL;
int error = 0; int error = 0;
/* /*
@ -509,7 +509,7 @@ xlog_find_verify_log_record(
{ {
xfs_daddr_t i; xfs_daddr_t i;
xfs_buf_t *bp; xfs_buf_t *bp;
xfs_caddr_t offset = NULL; char *offset = NULL;
xlog_rec_header_t *head = NULL; xlog_rec_header_t *head = NULL;
int error = 0; int error = 0;
int smallmem = 0; int smallmem = 0;
@ -616,7 +616,7 @@ xlog_find_head(
xfs_daddr_t *return_head_blk) xfs_daddr_t *return_head_blk)
{ {
xfs_buf_t *bp; xfs_buf_t *bp;
xfs_caddr_t offset; char *offset;
xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
int num_scan_bblks; int num_scan_bblks;
uint first_half_cycle, last_half_cycle; uint first_half_cycle, last_half_cycle;
@ -891,7 +891,7 @@ xlog_find_tail(
{ {
xlog_rec_header_t *rhead; xlog_rec_header_t *rhead;
xlog_op_header_t *op_head; xlog_op_header_t *op_head;
xfs_caddr_t offset = NULL; char *offset = NULL;
xfs_buf_t *bp; xfs_buf_t *bp;
int error, i, found; int error, i, found;
xfs_daddr_t umount_data_blk; xfs_daddr_t umount_data_blk;
@ -1099,7 +1099,7 @@ xlog_find_zeroed(
xfs_daddr_t *blk_no) xfs_daddr_t *blk_no)
{ {
xfs_buf_t *bp; xfs_buf_t *bp;
xfs_caddr_t offset; char *offset;
uint first_cycle, last_cycle; uint first_cycle, last_cycle;
xfs_daddr_t new_blk, last_blk, start_blk; xfs_daddr_t new_blk, last_blk, start_blk;
xfs_daddr_t num_scan_bblks; xfs_daddr_t num_scan_bblks;
@ -1199,7 +1199,7 @@ xlog_find_zeroed(
STATIC void STATIC void
xlog_add_record( xlog_add_record(
struct xlog *log, struct xlog *log,
xfs_caddr_t buf, char *buf,
int cycle, int cycle,
int block, int block,
int tail_cycle, int tail_cycle,
@ -1227,7 +1227,7 @@ xlog_write_log_records(
int tail_cycle, int tail_cycle,
int tail_block) int tail_block)
{ {
xfs_caddr_t offset; char *offset;
xfs_buf_t *bp; xfs_buf_t *bp;
int balign, ealign; int balign, ealign;
int sectbb = log->l_sectBBsize; int sectbb = log->l_sectBBsize;
@ -1789,8 +1789,7 @@ xlog_recover_do_inode_buffer(
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset);
next_unlinked_offset);
*buffer_nextp = *logged_nextp; *buffer_nextp = *logged_nextp;
/* /*
@ -1798,7 +1797,7 @@ xlog_recover_do_inode_buffer(
* have to leave the inode in a consistent state for whoever * have to leave the inode in a consistent state for whoever
* reads it next.... * reads it next....
*/ */
xfs_dinode_calc_crc(mp, (struct xfs_dinode *) xfs_dinode_calc_crc(mp,
xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
} }
@ -2503,8 +2502,8 @@ xlog_recover_inode_pass2(
xfs_buf_t *bp; xfs_buf_t *bp;
xfs_dinode_t *dip; xfs_dinode_t *dip;
int len; int len;
xfs_caddr_t src; char *src;
xfs_caddr_t dest; char *dest;
int error; int error;
int attr_index; int attr_index;
uint fields; uint fields;
@ -2546,7 +2545,7 @@ xlog_recover_inode_pass2(
goto out_release; goto out_release;
} }
ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); dip = xfs_buf_offset(bp, in_f->ilf_boffset);
/* /*
* Make sure the place we're flushing out to really looks * Make sure the place we're flushing out to really looks
@ -2885,7 +2884,7 @@ xlog_recover_dquot_pass2(
return error; return error;
ASSERT(bp); ASSERT(bp);
ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
/* /*
* If the dquot has an LSN in it, recover the dquot only if it's less * If the dquot has an LSN in it, recover the dquot only if it's less
@ -3068,12 +3067,22 @@ xlog_recover_do_icreate_pass2(
return -EINVAL; return -EINVAL;
} }
/* existing allocation is fixed value */ /*
ASSERT(count == mp->m_ialloc_inos); * The inode chunk is either full or sparse and we only support
ASSERT(length == mp->m_ialloc_blks); * m_ialloc_min_blks sized sparse allocations at this time.
if (count != mp->m_ialloc_inos || */
length != mp->m_ialloc_blks) { if (length != mp->m_ialloc_blks &&
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); length != mp->m_ialloc_min_blks) {
xfs_warn(log->l_mp,
"%s: unsupported chunk length", __FUNCTION__);
return -EINVAL;
}
/* verify inode count is consistent with extent length */
if ((count >> mp->m_sb.sb_inopblog) != length) {
xfs_warn(log->l_mp,
"%s: inconsistent inode count and chunk length",
__FUNCTION__);
return -EINVAL; return -EINVAL;
} }
@ -3091,8 +3100,8 @@ xlog_recover_do_icreate_pass2(
XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0)) XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0))
return 0; return 0;
xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length, xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, length,
be32_to_cpu(icl->icl_gen)); be32_to_cpu(icl->icl_gen));
return 0; return 0;
} }
@ -3364,17 +3373,17 @@ STATIC int
xlog_recover_add_to_cont_trans( xlog_recover_add_to_cont_trans(
struct xlog *log, struct xlog *log,
struct xlog_recover *trans, struct xlog_recover *trans,
xfs_caddr_t dp, char *dp,
int len) int len)
{ {
xlog_recover_item_t *item; xlog_recover_item_t *item;
xfs_caddr_t ptr, old_ptr; char *ptr, *old_ptr;
int old_len; int old_len;
if (list_empty(&trans->r_itemq)) { if (list_empty(&trans->r_itemq)) {
/* finish copying rest of trans header */ /* finish copying rest of trans header */
xlog_recover_add_item(&trans->r_itemq); xlog_recover_add_item(&trans->r_itemq);
ptr = (xfs_caddr_t) &trans->r_theader + ptr = (char *)&trans->r_theader +
sizeof(xfs_trans_header_t) - len; sizeof(xfs_trans_header_t) - len;
memcpy(ptr, dp, len); memcpy(ptr, dp, len);
return 0; return 0;
@ -3410,12 +3419,12 @@ STATIC int
xlog_recover_add_to_trans( xlog_recover_add_to_trans(
struct xlog *log, struct xlog *log,
struct xlog_recover *trans, struct xlog_recover *trans,
xfs_caddr_t dp, char *dp,
int len) int len)
{ {
xfs_inode_log_format_t *in_f; /* any will do */ xfs_inode_log_format_t *in_f; /* any will do */
xlog_recover_item_t *item; xlog_recover_item_t *item;
xfs_caddr_t ptr; char *ptr;
if (!len) if (!len)
return 0; return 0;
@ -3504,7 +3513,7 @@ STATIC int
xlog_recovery_process_trans( xlog_recovery_process_trans(
struct xlog *log, struct xlog *log,
struct xlog_recover *trans, struct xlog_recover *trans,
xfs_caddr_t dp, char *dp,
unsigned int len, unsigned int len,
unsigned int flags, unsigned int flags,
int pass) int pass)
@ -3611,8 +3620,8 @@ xlog_recover_process_ophdr(
struct hlist_head rhash[], struct hlist_head rhash[],
struct xlog_rec_header *rhead, struct xlog_rec_header *rhead,
struct xlog_op_header *ohead, struct xlog_op_header *ohead,
xfs_caddr_t dp, char *dp,
xfs_caddr_t end, char *end,
int pass) int pass)
{ {
struct xlog_recover *trans; struct xlog_recover *trans;
@ -3661,11 +3670,11 @@ xlog_recover_process_data(
struct xlog *log, struct xlog *log,
struct hlist_head rhash[], struct hlist_head rhash[],
struct xlog_rec_header *rhead, struct xlog_rec_header *rhead,
xfs_caddr_t dp, char *dp,
int pass) int pass)
{ {
struct xlog_op_header *ohead; struct xlog_op_header *ohead;
xfs_caddr_t end; char *end;
int num_logops; int num_logops;
int error; int error;
@ -3751,11 +3760,11 @@ xlog_recover_process_efi(
} }
set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
error = xfs_trans_commit(tp, 0); error = xfs_trans_commit(tp);
return error; return error;
abort_error: abort_error:
xfs_trans_cancel(tp, XFS_TRANS_ABORT); xfs_trans_cancel(tp);
return error; return error;
} }
@ -3857,13 +3866,13 @@ xlog_recover_clear_agi_bucket(
xfs_trans_log_buf(tp, agibp, offset, xfs_trans_log_buf(tp, agibp, offset,
(offset + sizeof(xfs_agino_t) - 1)); (offset + sizeof(xfs_agino_t) - 1));
error = xfs_trans_commit(tp, 0); error = xfs_trans_commit(tp);
if (error) if (error)
goto out_error; goto out_error;
return; return;
out_abort: out_abort:
xfs_trans_cancel(tp, XFS_TRANS_ABORT); xfs_trans_cancel(tp);
out_error: out_error:
xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno); xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
return; return;
@ -4010,7 +4019,7 @@ xlog_recover_process_iunlinks(
STATIC int STATIC int
xlog_unpack_data_crc( xlog_unpack_data_crc(
struct xlog_rec_header *rhead, struct xlog_rec_header *rhead,
xfs_caddr_t dp, char *dp,
struct xlog *log) struct xlog *log)
{ {
__le32 crc; __le32 crc;
@ -4040,7 +4049,7 @@ xlog_unpack_data_crc(
STATIC int STATIC int
xlog_unpack_data( xlog_unpack_data(
struct xlog_rec_header *rhead, struct xlog_rec_header *rhead,
xfs_caddr_t dp, char *dp,
struct xlog *log) struct xlog *log)
{ {
int i, j, k; int i, j, k;
@ -4122,7 +4131,7 @@ xlog_do_recovery_pass(
{ {
xlog_rec_header_t *rhead; xlog_rec_header_t *rhead;
xfs_daddr_t blk_no; xfs_daddr_t blk_no;
xfs_caddr_t offset; char *offset;
xfs_buf_t *hbp, *dbp; xfs_buf_t *hbp, *dbp;
int error = 0, h_size; int error = 0, h_size;
int bblks, split_bblks; int bblks, split_bblks;

View file

@ -724,6 +724,22 @@ xfs_mountfs(
mp->m_inode_cluster_size = new_size; mp->m_inode_cluster_size = new_size;
} }
/*
* If enabled, sparse inode chunk alignment is expected to match the
* cluster size. Full inode chunk alignment must match the chunk size,
* but that is checked on sb read verification...
*/
if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
mp->m_sb.sb_spino_align !=
XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
xfs_warn(mp,
"Sparse inode block alignment (%u) must match cluster size (%llu).",
mp->m_sb.sb_spino_align,
XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
error = -EINVAL;
goto out_remove_uuid;
}
/* /*
* Set inode alignment fields * Set inode alignment fields
*/ */

View file

@ -101,6 +101,8 @@ typedef struct xfs_mount {
__uint64_t m_flags; /* global mount flags */ __uint64_t m_flags; /* global mount flags */
int m_ialloc_inos; /* inodes in inode allocation */ int m_ialloc_inos; /* inodes in inode allocation */
int m_ialloc_blks; /* blocks in inode allocation */ int m_ialloc_blks; /* blocks in inode allocation */
int m_ialloc_min_blks;/* min blocks in sparse inode
* allocation */
int m_inoalign_mask;/* mask sb_inoalignmt if used */ int m_inoalign_mask;/* mask sb_inoalignmt if used */
uint m_qflags; /* quota status flags */ uint m_qflags; /* quota status flags */
struct xfs_trans_resv m_resv; /* precomputed res values */ struct xfs_trans_resv m_resv; /* precomputed res values */
@ -179,6 +181,8 @@ typedef struct xfs_mount {
allocator */ allocator */
#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */ #define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */
#define XFS_MOUNT_DAX (1ULL << 62) /* TEST ONLY! */
/* /*
* Default minimum read and write sizes. * Default minimum read and write sizes.

View file

@ -306,7 +306,7 @@ xfs_fs_commit_blocks(
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
goto out_drop_iolock; goto out_drop_iolock;
} }
@ -321,7 +321,7 @@ xfs_fs_commit_blocks(
} }
xfs_trans_set_sync(tp); xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0); error = xfs_trans_commit(tp);
out_drop_iolock: out_drop_iolock:
xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL);

View file

@ -756,7 +756,7 @@ xfs_qm_qino_alloc(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create, error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create,
XFS_QM_QINOCREATE_SPACE_RES(mp), 0); XFS_QM_QINOCREATE_SPACE_RES(mp), 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
@ -764,8 +764,7 @@ xfs_qm_qino_alloc(
error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
&committed); &committed);
if (error) { if (error) {
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | xfs_trans_cancel(tp);
XFS_TRANS_ABORT);
return error; return error;
} }
} }
@ -796,7 +795,7 @@ xfs_qm_qino_alloc(
spin_unlock(&mp->m_sb_lock); spin_unlock(&mp->m_sb_lock);
xfs_log_sb(tp); xfs_log_sb(tp);
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
if (error) { if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp)); ASSERT(XFS_FORCED_SHUTDOWN(mp));
xfs_alert(mp, "%s failed (error %d)!", __func__, error); xfs_alert(mp, "%s failed (error %d)!", __func__, error);

View file

@ -239,7 +239,7 @@ xfs_qm_scall_trunc_qfile(
tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL);
goto out_put; goto out_put;
} }
@ -252,15 +252,14 @@ xfs_qm_scall_trunc_qfile(
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | xfs_trans_cancel(tp);
XFS_TRANS_ABORT);
goto out_unlock; goto out_unlock;
} }
ASSERT(ip->i_d.di_nextents == 0); ASSERT(ip->i_d.di_nextents == 0);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
out_unlock: out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
@ -437,7 +436,7 @@ xfs_qm_scall_setqlim(
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_setqlim, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_setqlim, 0, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
goto out_rele; goto out_rele;
} }
@ -548,7 +547,7 @@ xfs_qm_scall_setqlim(
dqp->dq_flags |= XFS_DQ_DIRTY; dqp->dq_flags |= XFS_DQ_DIRTY;
xfs_trans_log_dquot(tp, dqp); xfs_trans_log_dquot(tp, dqp);
error = xfs_trans_commit(tp, 0); error = xfs_trans_commit(tp);
out_rele: out_rele:
xfs_qm_dqrele(dqp); xfs_qm_dqrele(dqp);
@ -571,7 +570,7 @@ xfs_qm_log_quotaoff_end(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
@ -585,8 +584,7 @@ xfs_qm_log_quotaoff_end(
* We don't care about quotoff's performance. * We don't care about quotoff's performance.
*/ */
xfs_trans_set_sync(tp); xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0); return xfs_trans_commit(tp);
return error;
} }
@ -605,7 +603,7 @@ xfs_qm_log_quotaoff(
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
goto out; goto out;
} }
@ -624,7 +622,7 @@ xfs_qm_log_quotaoff(
* We don't care about quotoff's performance. * We don't care about quotoff's performance.
*/ */
xfs_trans_set_sync(tp); xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0); error = xfs_trans_commit(tp);
if (error) if (error)
goto out; goto out;

View file

@ -55,7 +55,6 @@ struct xfs_trans;
typedef struct xfs_dqtrx { typedef struct xfs_dqtrx {
struct xfs_dquot *qt_dquot; /* the dquot this refers to */ struct xfs_dquot *qt_dquot; /* the dquot this refers to */
ulong qt_blk_res; /* blks reserved on a dquot */ ulong qt_blk_res; /* blks reserved on a dquot */
ulong qt_blk_res_used; /* blks used from the reservation */
ulong qt_ino_res; /* inode reserved on a dquot */ ulong qt_ino_res; /* inode reserved on a dquot */
ulong qt_ino_res_used; /* inodes used from the reservation */ ulong qt_ino_res_used; /* inodes used from the reservation */
long qt_bcount_delta; /* dquot blk count changes */ long qt_bcount_delta; /* dquot blk count changes */

View file

@ -780,7 +780,6 @@ xfs_growfs_rt_alloc(
* Allocate space to the file, as necessary. * Allocate space to the file, as necessary.
*/ */
while (oblocks < nblocks) { while (oblocks < nblocks) {
int cancelflags = 0;
xfs_trans_t *tp; xfs_trans_t *tp;
tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC); tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC);
@ -792,7 +791,6 @@ xfs_growfs_rt_alloc(
resblks, 0); resblks, 0);
if (error) if (error)
goto error_cancel; goto error_cancel;
cancelflags = XFS_TRANS_RELEASE_LOG_RES;
/* /*
* Lock the inode. * Lock the inode.
*/ */
@ -804,7 +802,6 @@ xfs_growfs_rt_alloc(
* Allocate blocks to the bitmap file. * Allocate blocks to the bitmap file.
*/ */
nmap = 1; nmap = 1;
cancelflags |= XFS_TRANS_ABORT;
error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
XFS_BMAPI_METADATA, &firstblock, XFS_BMAPI_METADATA, &firstblock,
resblks, &map, &nmap, &flist); resblks, &map, &nmap, &flist);
@ -818,14 +815,13 @@ xfs_growfs_rt_alloc(
error = xfs_bmap_finish(&tp, &flist, &committed); error = xfs_bmap_finish(&tp, &flist, &committed);
if (error) if (error)
goto error_cancel; goto error_cancel;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
if (error) if (error)
goto error; goto error;
/* /*
* Now we need to clear the allocated blocks. * Now we need to clear the allocated blocks.
* Do this one block per transaction, to keep it simple. * Do this one block per transaction, to keep it simple.
*/ */
cancelflags = 0;
for (bno = map.br_startoff, fsbno = map.br_startblock; for (bno = map.br_startoff, fsbno = map.br_startblock;
bno < map.br_startoff + map.br_blockcount; bno < map.br_startoff + map.br_blockcount;
bno++, fsbno++) { bno++, fsbno++) {
@ -851,7 +847,7 @@ xfs_growfs_rt_alloc(
if (bp == NULL) { if (bp == NULL) {
error = -EIO; error = -EIO;
error_cancel: error_cancel:
xfs_trans_cancel(tp, cancelflags); xfs_trans_cancel(tp);
goto error; goto error;
} }
memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
@ -859,7 +855,7 @@ xfs_growfs_rt_alloc(
/* /*
* Commit the transaction. * Commit the transaction.
*/ */
error = xfs_trans_commit(tp, 0); error = xfs_trans_commit(tp);
if (error) if (error)
goto error; goto error;
} }
@ -973,7 +969,6 @@ xfs_growfs_rt(
bmbno < nrbmblocks; bmbno < nrbmblocks;
bmbno++) { bmbno++) {
xfs_trans_t *tp; xfs_trans_t *tp;
int cancelflags = 0;
*nmp = *mp; *nmp = *mp;
nsbp = &nmp->m_sb; nsbp = &nmp->m_sb;
@ -1015,7 +1010,6 @@ xfs_growfs_rt(
mp->m_rbmip->i_d.di_size = mp->m_rbmip->i_d.di_size =
nsbp->sb_rbmblocks * nsbp->sb_blocksize; nsbp->sb_rbmblocks * nsbp->sb_blocksize;
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
cancelflags |= XFS_TRANS_ABORT;
/* /*
* Get the summary inode into the transaction. * Get the summary inode into the transaction.
*/ */
@ -1062,7 +1056,7 @@ xfs_growfs_rt(
nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno); nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno);
if (error) { if (error) {
error_cancel: error_cancel:
xfs_trans_cancel(tp, cancelflags); xfs_trans_cancel(tp);
break; break;
} }
/* /*
@ -1076,7 +1070,7 @@ xfs_growfs_rt(
mp->m_rsumlevels = nrsumlevels; mp->m_rsumlevels = nrsumlevels;
mp->m_rsumsize = nrsumsize; mp->m_rsumsize = nrsumsize;
error = xfs_trans_commit(tp, 0); error = xfs_trans_commit(tp);
if (error) if (error)
break; break;
} }

View file

@ -112,6 +112,8 @@ static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */
#define MNTOPT_DISCARD "discard" /* Discard unused blocks */ #define MNTOPT_DISCARD "discard" /* Discard unused blocks */
#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ #define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */
#define MNTOPT_DAX "dax" /* Enable direct access to bdev pages */
/* /*
* Table driven mount option parser. * Table driven mount option parser.
* *
@ -363,6 +365,10 @@ xfs_parseargs(
mp->m_flags |= XFS_MOUNT_DISCARD; mp->m_flags |= XFS_MOUNT_DISCARD;
} else if (!strcmp(this_char, MNTOPT_NODISCARD)) { } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
mp->m_flags &= ~XFS_MOUNT_DISCARD; mp->m_flags &= ~XFS_MOUNT_DISCARD;
#ifdef CONFIG_FS_DAX
} else if (!strcmp(this_char, MNTOPT_DAX)) {
mp->m_flags |= XFS_MOUNT_DAX;
#endif
} else { } else {
xfs_warn(mp, "unknown mount option [%s].", this_char); xfs_warn(mp, "unknown mount option [%s].", this_char);
return -EINVAL; return -EINVAL;
@ -452,8 +458,8 @@ xfs_parseargs(
} }
struct proc_xfs_info { struct proc_xfs_info {
int flag; uint64_t flag;
char *str; char *str;
}; };
STATIC int STATIC int
@ -474,6 +480,7 @@ xfs_showargs(
{ XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, { XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
{ XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
{ XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE }, { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE },
{ XFS_MOUNT_DAX, "," MNTOPT_DAX },
{ 0, NULL } { 0, NULL }
}; };
static struct proc_xfs_info xfs_info_unset[] = { static struct proc_xfs_info xfs_info_unset[] = {
@ -1507,6 +1514,20 @@ xfs_fs_fill_super(
if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
sb->s_flags |= MS_I_VERSION; sb->s_flags |= MS_I_VERSION;
if (mp->m_flags & XFS_MOUNT_DAX) {
xfs_warn(mp,
"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
if (sb->s_blocksize != PAGE_SIZE) {
xfs_alert(mp,
"Filesystem block size invalid for DAX Turning DAX off.");
mp->m_flags &= ~XFS_MOUNT_DAX;
} else if (!sb->s_bdev->bd_disk->fops->direct_access) {
xfs_alert(mp,
"Block device does not support DAX Turning DAX off.");
mp->m_flags &= ~XFS_MOUNT_DAX;
}
}
error = xfs_mountfs(mp); error = xfs_mountfs(mp);
if (error) if (error)
goto out_filestream_unmount; goto out_filestream_unmount;

View file

@ -104,7 +104,7 @@ xfs_readlink_bmap(
cur_chunk += sizeof(struct xfs_dsymlink_hdr); cur_chunk += sizeof(struct xfs_dsymlink_hdr);
} }
memcpy(link + offset, bp->b_addr, byte_cnt); memcpy(link + offset, cur_chunk, byte_cnt);
pathlen -= byte_cnt; pathlen -= byte_cnt;
offset += byte_cnt; offset += byte_cnt;
@ -178,7 +178,6 @@ xfs_symlink(
struct xfs_bmap_free free_list; struct xfs_bmap_free free_list;
xfs_fsblock_t first_block; xfs_fsblock_t first_block;
bool unlock_dp_on_error = false; bool unlock_dp_on_error = false;
uint cancel_flags;
int committed; int committed;
xfs_fileoff_t first_fsb; xfs_fileoff_t first_fsb;
xfs_filblks_t fs_blocks; xfs_filblks_t fs_blocks;
@ -224,7 +223,6 @@ xfs_symlink(
return error; return error;
tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
/* /*
* The symlink will fit into the inode data fork? * The symlink will fit into the inode data fork?
* There can't be any attributes so we get the whole variable part. * There can't be any attributes so we get the whole variable part.
@ -239,10 +237,8 @@ xfs_symlink(
resblks = 0; resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0);
} }
if (error) { if (error)
cancel_flags = 0;
goto out_trans_cancel; goto out_trans_cancel;
}
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
unlock_dp_on_error = true; unlock_dp_on_error = true;
@ -394,7 +390,7 @@ xfs_symlink(
if (error) if (error)
goto out_bmap_cancel; goto out_bmap_cancel;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
if (error) if (error)
goto out_release_inode; goto out_release_inode;
@ -407,9 +403,8 @@ xfs_symlink(
out_bmap_cancel: out_bmap_cancel:
xfs_bmap_cancel(&free_list); xfs_bmap_cancel(&free_list);
cancel_flags |= XFS_TRANS_ABORT;
out_trans_cancel: out_trans_cancel:
xfs_trans_cancel(tp, cancel_flags); xfs_trans_cancel(tp);
out_release_inode: out_release_inode:
/* /*
* Wait until after the current transaction is aborted to finish the * Wait until after the current transaction is aborted to finish the
@ -464,7 +459,7 @@ xfs_inactive_symlink_rmt(
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp);
return error; return error;
} }
@ -533,7 +528,7 @@ xfs_inactive_symlink_rmt(
/* /*
* Commit the transaction containing extent freeing and EFDs. * Commit the transaction containing extent freeing and EFDs.
*/ */
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp);
if (error) { if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp)); ASSERT(XFS_FORCED_SHUTDOWN(mp));
goto error_unlock; goto error_unlock;
@ -552,7 +547,7 @@ xfs_inactive_symlink_rmt(
error_bmap_cancel: error_bmap_cancel:
xfs_bmap_cancel(&free_list); xfs_bmap_cancel(&free_list);
error_trans_cancel: error_trans_cancel:
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_trans_cancel(tp);
error_unlock: error_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error; return error;

View file

@ -738,6 +738,53 @@ TRACE_EVENT(xfs_iomap_prealloc_size,
__entry->blocks, __entry->shift, __entry->writeio_blocks) __entry->blocks, __entry->shift, __entry->writeio_blocks)
) )
TRACE_EVENT(xfs_irec_merge_pre,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
uint16_t holemask, xfs_agino_t nagino, uint16_t nholemask),
TP_ARGS(mp, agno, agino, holemask, nagino, nholemask),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(xfs_agino_t, agino)
__field(uint16_t, holemask)
__field(xfs_agino_t, nagino)
__field(uint16_t, nholemask)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
__entry->agino = agino;
__entry->holemask = holemask;
__entry->nagino = nagino;
__entry->nholemask = holemask;
),
TP_printk("dev %d:%d agno %d inobt (%u:0x%x) new (%u:0x%x)",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
__entry->agino, __entry->holemask, __entry->nagino,
__entry->nholemask)
)
TRACE_EVENT(xfs_irec_merge_post,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
uint16_t holemask),
TP_ARGS(mp, agno, agino, holemask),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(xfs_agino_t, agino)
__field(uint16_t, holemask)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
__entry->agino = agino;
__entry->holemask = holemask;
),
TP_printk("dev %d:%d agno %d inobt (%u:0x%x)", MAJOR(__entry->dev),
MINOR(__entry->dev), __entry->agno, __entry->agino,
__entry->holemask)
)
#define DEFINE_IREF_EVENT(name) \ #define DEFINE_IREF_EVENT(name) \
DEFINE_EVENT(xfs_iref_class, name, \ DEFINE_EVENT(xfs_iref_class, name, \
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \

View file

@ -113,7 +113,7 @@ xfs_trans_free(
* blocks. Locks and log items, however, are no inherited. They must * blocks. Locks and log items, however, are no inherited. They must
* be added to the new transaction explicitly. * be added to the new transaction explicitly.
*/ */
xfs_trans_t * STATIC xfs_trans_t *
xfs_trans_dup( xfs_trans_dup(
xfs_trans_t *tp) xfs_trans_t *tp)
{ {
@ -251,14 +251,7 @@ xfs_trans_reserve(
*/ */
undo_log: undo_log:
if (resp->tr_logres > 0) { if (resp->tr_logres > 0) {
int log_flags; xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, false);
if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) {
log_flags = XFS_LOG_REL_PERM_RESERV;
} else {
log_flags = 0;
}
xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags);
tp->t_ticket = NULL; tp->t_ticket = NULL;
tp->t_log_res = 0; tp->t_log_res = 0;
tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES;
@ -744,7 +737,7 @@ void
xfs_trans_free_items( xfs_trans_free_items(
struct xfs_trans *tp, struct xfs_trans *tp,
xfs_lsn_t commit_lsn, xfs_lsn_t commit_lsn,
int flags) bool abort)
{ {
struct xfs_log_item_desc *lidp, *next; struct xfs_log_item_desc *lidp, *next;
@ -755,7 +748,7 @@ xfs_trans_free_items(
if (commit_lsn != NULLCOMMITLSN) if (commit_lsn != NULLCOMMITLSN)
lip->li_ops->iop_committing(lip, commit_lsn); lip->li_ops->iop_committing(lip, commit_lsn);
if (flags & XFS_TRANS_ABORT) if (abort)
lip->li_flags |= XFS_LI_ABORTED; lip->li_flags |= XFS_LI_ABORTED;
lip->li_ops->iop_unlock(lip); lip->li_ops->iop_unlock(lip);
@ -892,26 +885,16 @@ xfs_trans_committed_bulk(
* have already been unlocked as if the commit had succeeded. * have already been unlocked as if the commit had succeeded.
* Do not reference the transaction structure after this call. * Do not reference the transaction structure after this call.
*/ */
int static int
xfs_trans_commit( __xfs_trans_commit(
struct xfs_trans *tp, struct xfs_trans *tp,
uint flags) bool regrant)
{ {
struct xfs_mount *mp = tp->t_mountp; struct xfs_mount *mp = tp->t_mountp;
xfs_lsn_t commit_lsn = -1; xfs_lsn_t commit_lsn = -1;
int error = 0; int error = 0;
int log_flags = 0;
int sync = tp->t_flags & XFS_TRANS_SYNC; int sync = tp->t_flags & XFS_TRANS_SYNC;
/*
* Determine whether this commit is releasing a permanent
* log reservation or not.
*/
if (flags & XFS_TRANS_RELEASE_LOG_RES) {
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
log_flags = XFS_LOG_REL_PERM_RESERV;
}
/* /*
* If there is nothing to be logged by the transaction, * If there is nothing to be logged by the transaction,
* then unlock all of the items associated with the * then unlock all of the items associated with the
@ -936,7 +919,7 @@ xfs_trans_commit(
xfs_trans_apply_sb_deltas(tp); xfs_trans_apply_sb_deltas(tp);
xfs_trans_apply_dquot_deltas(tp); xfs_trans_apply_dquot_deltas(tp);
xfs_log_commit_cil(mp, tp, &commit_lsn, flags); xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_free(tp); xfs_trans_free(tp);
@ -964,18 +947,25 @@ xfs_trans_commit(
*/ */
xfs_trans_unreserve_and_mod_dquots(tp); xfs_trans_unreserve_and_mod_dquots(tp);
if (tp->t_ticket) { if (tp->t_ticket) {
commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags); commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, regrant);
if (commit_lsn == -1 && !error) if (commit_lsn == -1 && !error)
error = -EIO; error = -EIO;
} }
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0); xfs_trans_free_items(tp, NULLCOMMITLSN, !!error);
xfs_trans_free(tp); xfs_trans_free(tp);
XFS_STATS_INC(xs_trans_empty); XFS_STATS_INC(xs_trans_empty);
return error; return error;
} }
int
xfs_trans_commit(
struct xfs_trans *tp)
{
return __xfs_trans_commit(tp, false);
}
/* /*
* Unlock all of the transaction's items and free the transaction. * Unlock all of the transaction's items and free the transaction.
* The transaction must not have modified any of its items, because * The transaction must not have modified any of its items, because
@ -986,29 +976,22 @@ xfs_trans_commit(
*/ */
void void
xfs_trans_cancel( xfs_trans_cancel(
xfs_trans_t *tp, struct xfs_trans *tp)
int flags)
{ {
int log_flags; struct xfs_mount *mp = tp->t_mountp;
xfs_mount_t *mp = tp->t_mountp; bool dirty = (tp->t_flags & XFS_TRANS_DIRTY);
/*
* See if the caller is being too lazy to figure out if
* the transaction really needs an abort.
*/
if ((flags & XFS_TRANS_ABORT) && !(tp->t_flags & XFS_TRANS_DIRTY))
flags &= ~XFS_TRANS_ABORT;
/* /*
* See if the caller is relying on us to shut down the * See if the caller is relying on us to shut down the
* filesystem. This happens in paths where we detect * filesystem. This happens in paths where we detect
* corruption and decide to give up. * corruption and decide to give up.
*/ */
if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) { if (dirty && !XFS_FORCED_SHUTDOWN(mp)) {
XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp); XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
} }
#ifdef DEBUG #ifdef DEBUG
if (!(flags & XFS_TRANS_ABORT) && !XFS_FORCED_SHUTDOWN(mp)) { if (!dirty && !XFS_FORCED_SHUTDOWN(mp)) {
struct xfs_log_item_desc *lidp; struct xfs_log_item_desc *lidp;
list_for_each_entry(lidp, &tp->t_items, lid_trans) list_for_each_entry(lidp, &tp->t_items, lid_trans)
@ -1018,27 +1001,20 @@ xfs_trans_cancel(
xfs_trans_unreserve_and_mod_sb(tp); xfs_trans_unreserve_and_mod_sb(tp);
xfs_trans_unreserve_and_mod_dquots(tp); xfs_trans_unreserve_and_mod_dquots(tp);
if (tp->t_ticket) { if (tp->t_ticket)
if (flags & XFS_TRANS_RELEASE_LOG_RES) { xfs_log_done(mp, tp->t_ticket, NULL, false);
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
log_flags = XFS_LOG_REL_PERM_RESERV;
} else {
log_flags = 0;
}
xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
}
/* mark this thread as no longer being in a transaction */ /* mark this thread as no longer being in a transaction */
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_free_items(tp, NULLCOMMITLSN, flags); xfs_trans_free_items(tp, NULLCOMMITLSN, dirty);
xfs_trans_free(tp); xfs_trans_free(tp);
} }
/* /*
* Roll from one trans in the sequence of PERMANENT transactions to * Roll from one trans in the sequence of PERMANENT transactions to
* the next: permanent transactions are only flushed out when * the next: permanent transactions are only flushed out when
* committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon * committed with xfs_trans_commit(), but we still want as soon
* as possible to let chunks of it go to the log. So we commit the * as possible to let chunks of it go to the log. So we commit the
* chunk we've been working on and get a new transaction to continue. * chunk we've been working on and get a new transaction to continue.
*/ */
@ -1055,7 +1031,8 @@ xfs_trans_roll(
* Ensure that the inode is always logged. * Ensure that the inode is always logged.
*/ */
trans = *tpp; trans = *tpp;
xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); if (dp)
xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
/* /*
* Copy the critical parameters from one trans to the next. * Copy the critical parameters from one trans to the next.
@ -1071,19 +1048,12 @@ xfs_trans_roll(
* is in progress. The caller takes the responsibility to cancel * is in progress. The caller takes the responsibility to cancel
* the duplicate transaction that gets returned. * the duplicate transaction that gets returned.
*/ */
error = xfs_trans_commit(trans, 0); error = __xfs_trans_commit(trans, true);
if (error) if (error)
return error; return error;
trans = *tpp; trans = *tpp;
/*
* transaction commit worked ok so we can drop the extra ticket
* reference that we gained in xfs_trans_dup()
*/
xfs_log_ticket_put(trans->t_ticket);
/* /*
* Reserve space in the log for th next transaction. * Reserve space in the log for th next transaction.
* This also pushes items in the "AIL", the list of logged items, * This also pushes items in the "AIL", the list of logged items,
@ -1100,6 +1070,7 @@ xfs_trans_roll(
if (error) if (error)
return error; return error;
xfs_trans_ijoin(trans, dp, 0); if (dp)
xfs_trans_ijoin(trans, dp, 0);
return 0; return 0;
} }

View file

@ -133,8 +133,6 @@ typedef struct xfs_trans {
* XFS transaction mechanism exported interfaces that are * XFS transaction mechanism exported interfaces that are
* actually macros. * actually macros.
*/ */
#define xfs_trans_get_log_res(tp) ((tp)->t_log_res)
#define xfs_trans_get_log_count(tp) ((tp)->t_log_count)
#define xfs_trans_get_block_res(tp) ((tp)->t_blk_res) #define xfs_trans_get_block_res(tp) ((tp)->t_blk_res)
#define xfs_trans_set_sync(tp) ((tp)->t_flags |= XFS_TRANS_SYNC) #define xfs_trans_set_sync(tp) ((tp)->t_flags |= XFS_TRANS_SYNC)
@ -153,7 +151,6 @@ typedef struct xfs_trans {
*/ */
xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint);
xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t); xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t);
xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
int xfs_trans_reserve(struct xfs_trans *, struct xfs_trans_res *, int xfs_trans_reserve(struct xfs_trans *, struct xfs_trans_res *,
uint, uint); uint, uint);
void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
@ -228,9 +225,9 @@ void xfs_trans_log_efd_extent(xfs_trans_t *,
struct xfs_efd_log_item *, struct xfs_efd_log_item *,
xfs_fsblock_t, xfs_fsblock_t,
xfs_extlen_t); xfs_extlen_t);
int xfs_trans_commit(xfs_trans_t *, uint flags); int xfs_trans_commit(struct xfs_trans *);
int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
void xfs_trans_cancel(xfs_trans_t *, int); void xfs_trans_cancel(xfs_trans_t *);
int xfs_trans_ail_init(struct xfs_mount *); int xfs_trans_ail_init(struct xfs_mount *);
void xfs_trans_ail_destroy(struct xfs_mount *); void xfs_trans_ail_destroy(struct xfs_mount *);

View file

@ -159,7 +159,7 @@ xfs_trans_ail_cursor_next(
{ {
struct xfs_log_item *lip = cur->item; struct xfs_log_item *lip = cur->item;
if ((__psint_t)lip & 1) if ((uintptr_t)lip & 1)
lip = xfs_ail_min(ailp); lip = xfs_ail_min(ailp);
if (lip) if (lip)
cur->item = xfs_ail_next(ailp, lip); cur->item = xfs_ail_next(ailp, lip);
@ -196,7 +196,7 @@ xfs_trans_ail_cursor_clear(
list_for_each_entry(cur, &ailp->xa_cursors, list) { list_for_each_entry(cur, &ailp->xa_cursors, list) {
if (cur->item == lip) if (cur->item == lip)
cur->item = (struct xfs_log_item *) cur->item = (struct xfs_log_item *)
((__psint_t)cur->item | 1); ((uintptr_t)cur->item | 1);
} }
} }
@ -287,7 +287,7 @@ xfs_ail_splice(
* find the place in the AIL where the items belong. * find the place in the AIL where the items belong.
*/ */
lip = cur ? cur->item : NULL; lip = cur ? cur->item : NULL;
if (!lip || (__psint_t) lip & 1) if (!lip || (uintptr_t)lip & 1)
lip = __xfs_trans_ail_cursor_last(ailp, lsn); lip = __xfs_trans_ail_cursor_last(ailp, lsn);
/* /*

View file

@ -90,8 +90,9 @@ xfs_trans_dup_dqinfo(
xfs_trans_t *ntp) xfs_trans_t *ntp)
{ {
xfs_dqtrx_t *oq, *nq; xfs_dqtrx_t *oq, *nq;
int i,j; int i, j;
xfs_dqtrx_t *oqa, *nqa; xfs_dqtrx_t *oqa, *nqa;
ulong blk_res_used;
if (!otp->t_dqinfo) if (!otp->t_dqinfo)
return; return;
@ -102,18 +103,23 @@ xfs_trans_dup_dqinfo(
* Because the quota blk reservation is carried forward, * Because the quota blk reservation is carried forward,
* it is also necessary to carry forward the DQ_DIRTY flag. * it is also necessary to carry forward the DQ_DIRTY flag.
*/ */
if(otp->t_flags & XFS_TRANS_DQ_DIRTY) if (otp->t_flags & XFS_TRANS_DQ_DIRTY)
ntp->t_flags |= XFS_TRANS_DQ_DIRTY; ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) {
oqa = otp->t_dqinfo->dqs[j]; oqa = otp->t_dqinfo->dqs[j];
nqa = ntp->t_dqinfo->dqs[j]; nqa = ntp->t_dqinfo->dqs[j];
for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
blk_res_used = 0;
if (oqa[i].qt_dquot == NULL) if (oqa[i].qt_dquot == NULL)
break; break;
oq = &oqa[i]; oq = &oqa[i];
nq = &nqa[i]; nq = &nqa[i];
if (oq->qt_blk_res && oq->qt_bcount_delta > 0)
blk_res_used = oq->qt_bcount_delta;
nq->qt_dquot = oq->qt_dquot; nq->qt_dquot = oq->qt_dquot;
nq->qt_bcount_delta = nq->qt_icount_delta = 0; nq->qt_bcount_delta = nq->qt_icount_delta = 0;
nq->qt_rtbcount_delta = 0; nq->qt_rtbcount_delta = 0;
@ -121,8 +127,8 @@ xfs_trans_dup_dqinfo(
/* /*
* Transfer whatever is left of the reservations. * Transfer whatever is left of the reservations.
*/ */
nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used; nq->qt_blk_res = oq->qt_blk_res - blk_res_used;
oq->qt_blk_res = oq->qt_blk_res_used; oq->qt_blk_res = blk_res_used;
nq->qt_rtblk_res = oq->qt_rtblk_res - nq->qt_rtblk_res = oq->qt_rtblk_res -
oq->qt_rtblk_res_used; oq->qt_rtblk_res_used;
@ -239,10 +245,6 @@ xfs_trans_mod_dquot(
* disk blocks used. * disk blocks used.
*/ */
case XFS_TRANS_DQ_BCOUNT: case XFS_TRANS_DQ_BCOUNT:
if (qtrx->qt_blk_res && delta > 0) {
qtrx->qt_blk_res_used += (ulong)delta;
ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
}
qtrx->qt_bcount_delta += delta; qtrx->qt_bcount_delta += delta;
break; break;
@ -423,15 +425,19 @@ xfs_trans_apply_dquot_deltas(
* reservation that a transaction structure knows of. * reservation that a transaction structure knows of.
*/ */
if (qtrx->qt_blk_res != 0) { if (qtrx->qt_blk_res != 0) {
if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) { ulong blk_res_used = 0;
if (qtrx->qt_blk_res >
qtrx->qt_blk_res_used) if (qtrx->qt_bcount_delta > 0)
blk_res_used = qtrx->qt_bcount_delta;
if (qtrx->qt_blk_res != blk_res_used) {
if (qtrx->qt_blk_res > blk_res_used)
dqp->q_res_bcount -= (xfs_qcnt_t) dqp->q_res_bcount -= (xfs_qcnt_t)
(qtrx->qt_blk_res - (qtrx->qt_blk_res -
qtrx->qt_blk_res_used); blk_res_used);
else else
dqp->q_res_bcount -= (xfs_qcnt_t) dqp->q_res_bcount -= (xfs_qcnt_t)
(qtrx->qt_blk_res_used - (blk_res_used -
qtrx->qt_blk_res); qtrx->qt_blk_res);
} }
} else { } else {

View file

@ -30,7 +30,7 @@ void xfs_trans_init(struct xfs_mount *);
void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
void xfs_trans_del_item(struct xfs_log_item *); void xfs_trans_del_item(struct xfs_log_item *);
void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
int flags); bool abort);
void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,

View file

@ -70,6 +70,7 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
ssize_t bytes, void *private); ssize_t bytes, void *private);
typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);
#define MAY_EXEC 0x00000001 #define MAY_EXEC 0x00000001
#define MAY_WRITE 0x00000002 #define MAY_WRITE 0x00000002
@ -2655,9 +2656,13 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
int dax_clear_blocks(struct inode *, sector_t block, long size); int dax_clear_blocks(struct inode *, sector_t block, long size);
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
int dax_truncate_page(struct inode *, loff_t from, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t);
int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
dax_iodone_t);
int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
dax_iodone_t);
int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) #define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod)
#define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod)
#ifdef CONFIG_BLOCK #ifdef CONFIG_BLOCK
typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,