linux/fs/ext4/extents_status.h
Yan, Zheng e30b5dca15 ext4: fix fio regression
We (Linux Kernel Performance project) found a regression introduced
by commit:

  f7fec032aa ext4: track all extent status in extent status tree

The commit causes about 20% performance decrease in fio random write
test. Profiler shows that rb_next() uses a lot of CPU time. The call
stack is:

  rb_next
  ext4_es_find_delayed_extent
  ext4_map_blocks
  _ext4_get_block
  ext4_get_block_write
  __blockdev_direct_IO
  ext4_direct_IO
  generic_file_direct_write
  __generic_file_aio_write
  ext4_file_write
  aio_rw_vect_retry
  aio_run_iocb
  do_io_submit
  sys_io_submit
  system_call_fastpath
  io_submit
  td_io_getevents
  io_u_queued_complete
  thread_main
  main
  __libc_start_main

The cause is that ext4_es_find_delayed_extent() doesn't have an
upper bound, it keeps searching until a delayed extent is found.
When there are a lots of non-delayed entries in the extent state
tree, ext4_es_find_delayed_extent() may uses a lot of CPU time.

Reported-by: LKP project <lkp@linux.intel.com>
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
2013-05-03 02:15:52 -04:00

128 lines
3.4 KiB
C

/*
* fs/ext4/extents_status.h
*
* Written by Yongqiang Yang <xiaoqiangnk@gmail.com>
* Modified by
* Allison Henderson <achender@linux.vnet.ibm.com>
* Zheng Liu <wenqing.lz@taobao.com>
*
*/
#ifndef _EXT4_EXTENTS_STATUS_H
#define _EXT4_EXTENTS_STATUS_H
/*
* Turn on ES_DEBUG__ to get lots of info about extent status operations.
*/
#ifdef ES_DEBUG__
#define es_debug(fmt, ...) printk(fmt, ##__VA_ARGS__)
#else
#define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
/*
* With ES_AGGRESSIVE_TEST defined, the result of es caching will be
* checked with old map_block's result.
*/
#define ES_AGGRESSIVE_TEST__
/*
* These flags live in the high bits of extent_status.es_pblk
*/
#define EXTENT_STATUS_WRITTEN (1ULL << 63)
#define EXTENT_STATUS_UNWRITTEN (1ULL << 62)
#define EXTENT_STATUS_DELAYED (1ULL << 61)
#define EXTENT_STATUS_HOLE (1ULL << 60)
#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
EXTENT_STATUS_UNWRITTEN | \
EXTENT_STATUS_DELAYED | \
EXTENT_STATUS_HOLE)
struct ext4_extent;
struct extent_status {
struct rb_node rb_node;
ext4_lblk_t es_lblk; /* first logical block extent covers */
ext4_lblk_t es_len; /* length of extent in block */
ext4_fsblk_t es_pblk; /* first physical block */
};
struct ext4_es_tree {
struct rb_root root;
struct extent_status *cache_es; /* recently accessed extent */
};
extern int __init ext4_init_es(void);
extern void ext4_exit_es(void);
extern void ext4_es_init_tree(struct ext4_es_tree *tree);
extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, ext4_fsblk_t pblk,
unsigned long long status);
extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len);
extern void ext4_es_find_delayed_extent_range(struct inode *inode,
ext4_lblk_t lblk, ext4_lblk_t end,
struct extent_status *es);
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status *es);
extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
static inline int ext4_es_is_written(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_WRITTEN) != 0;
}
static inline int ext4_es_is_unwritten(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_UNWRITTEN) != 0;
}
static inline int ext4_es_is_delayed(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_DELAYED) != 0;
}
static inline int ext4_es_is_hole(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_HOLE) != 0;
}
static inline ext4_fsblk_t ext4_es_status(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_FLAGS);
}
static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
{
return (es->es_pblk & ~EXTENT_STATUS_FLAGS);
}
static inline void ext4_es_store_pblock(struct extent_status *es,
ext4_fsblk_t pb)
{
ext4_fsblk_t block;
block = (pb & ~EXTENT_STATUS_FLAGS) |
(es->es_pblk & EXTENT_STATUS_FLAGS);
es->es_pblk = block;
}
static inline void ext4_es_store_status(struct extent_status *es,
unsigned long long status)
{
ext4_fsblk_t block;
block = (status & EXTENT_STATUS_FLAGS) |
(es->es_pblk & ~EXTENT_STATUS_FLAGS);
es->es_pblk = block;
}
extern void ext4_es_register_shrinker(struct super_block *sb);
extern void ext4_es_unregister_shrinker(struct super_block *sb);
extern void ext4_es_lru_add(struct inode *inode);
extern void ext4_es_lru_del(struct inode *inode);
#endif /* _EXT4_EXTENTS_STATUS_H */