linux/fs/f2fs/extent_cache.c

799 lines
19 KiB
C
Raw Normal View History

/*
* f2fs extent cache support
*
* Copyright (c) 2015 Motorola Mobility
* Copyright (c) 2015 Samsung Electronics
* Authors: Jaegeuk Kim <jaegeuk@kernel.org>
* Chao Yu <chao2.yu@samsung.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include <trace/events/f2fs.h>
static struct kmem_cache *extent_tree_slab;
static struct kmem_cache *extent_node_slab;
static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
struct rb_node *parent, struct rb_node **p)
{
struct extent_node *en;
en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
if (!en)
return NULL;
en->ei = *ei;
INIT_LIST_HEAD(&en->list);
rb_link_node(&en->rb_node, parent, p);
rb_insert_color(&en->rb_node, &et->root);
et->count++;
atomic_inc(&sbi->total_ext_node);
return en;
}
static void __detach_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_node *en)
{
rb_erase(&en->rb_node, &et->root);
et->count--;
atomic_dec(&sbi->total_ext_node);
if (et->cached_en == en)
et->cached_en = NULL;
}
static struct extent_tree *__grab_extent_tree(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
nid_t ino = inode->i_ino;
down_write(&sbi->extent_tree_lock);
et = radix_tree_lookup(&sbi->extent_tree_root, ino);
if (!et) {
et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
memset(et, 0, sizeof(struct extent_tree));
et->ino = ino;
et->root = RB_ROOT;
et->cached_en = NULL;
rwlock_init(&et->lock);
atomic_set(&et->refcount, 0);
et->count = 0;
sbi->total_ext_tree++;
}
atomic_inc(&et->refcount);
up_write(&sbi->extent_tree_lock);
/* never died until evict_inode */
F2FS_I(inode)->extent_tree = et;
return et;
}
static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, unsigned int fofs)
{
struct rb_node *node = et->root.rb_node;
struct extent_node *en = et->cached_en;
if (en) {
struct extent_info *cei = &en->ei;
if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) {
stat_inc_cached_node_hit(sbi);
return en;
}
}
while (node) {
en = rb_entry(node, struct extent_node, rb_node);
if (fofs < en->ei.fofs) {
node = node->rb_left;
} else if (fofs >= en->ei.fofs + en->ei.len) {
node = node->rb_right;
} else {
stat_inc_rbtree_node_hit(sbi);
return en;
}
}
return NULL;
}
static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei)
{
struct rb_node **p = &et->root.rb_node;
struct extent_node *en;
en = __attach_extent_node(sbi, et, ei, NULL, p);
if (!en)
return NULL;
et->largest = en->ei;
et->cached_en = en;
return en;
}
static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, bool free_all)
{
struct rb_node *node, *next;
struct extent_node *en;
unsigned int count = et->count;
node = rb_first(&et->root);
while (node) {
next = rb_next(node);
en = rb_entry(node, struct extent_node, rb_node);
if (free_all) {
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list))
list_del_init(&en->list);
spin_unlock(&sbi->extent_lock);
}
if (free_all || list_empty(&en->list)) {
__detach_extent_node(sbi, et, en);
kmem_cache_free(extent_node_slab, en);
}
node = next;
}
return count - et->count;
}
f2fs: avoid accessing NULL pointer in f2fs_drop_largest_extent If extent cache is disable, we will encounter oops when triggering direct IO as below: BUG: unable to handle kernel NULL pointer dereference at 0000000c IP: [<f0b9c61e>] f2fs_drop_largest_extent+0xe/0x30 [f2fs] *pdpt = 000000002bb9a001 *pde = 0000000000000000 Oops: 0000 [#1] SMP Modules linked in: f2fs(O) fuse bnep rfcomm bluetooth nfsd dm_crypt nfs_acl auth_rpcgss oid_registry nfs binfmt_misc fscache lockd sunrpc grace snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_rawmidi snd_seq_midi_event snd_seq snd_timer snd_seq_device snd soundcore joydev psmouse hid_generic i2c_piix4 serio_raw ppdev mac_hid parport_pc lp parport ext4 jbd2 mbcache usbhid hid e1000 CPU: 3 PID: 3608 Comm: dd Tainted: G O 4.2.0-rc4 #12 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 task: ef161600 ti: ebd5e000 task.ti: ebd5e000 EIP: 0060:[<f0b9c61e>] EFLAGS: 00010202 CPU: 3 EIP is at f2fs_drop_largest_extent+0xe/0x30 [f2fs] EAX: 00000000 EBX: ddebc000 ECX: 00000000 EDX: 00000000 ESI: ebd5fdf8 EDI: 00000000 EBP: ebd5fd58 ESP: ebd5fd58 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 CR0: 80050033 CR2: 0000000c CR3: 2c24ee40 CR4: 000006f0 Stack: ebd5fda4 f0b8c005 00000000 00000001 00000000 f0b8c430 c816cd68 ddebc000 ddebc088 00001000 00000555 00000555 ffffffff c160bb00 00055501 00000000 00000000 00000100 00000000 ebd5fe20 f0b8c430 00000046 ef161600 00001000 Call Trace: [<f0b8c005>] __allocate_data_block+0x1a5/0x260 [f2fs] [<f0b8c430>] ? f2fs_direct_IO+0x370/0x440 [f2fs] [<c160bb00>] ? down_read+0x30/0x50 [<f0b8c430>] f2fs_direct_IO+0x370/0x440 [f2fs] [<c113e115>] generic_file_direct_write+0xa5/0x260 [<c10b53f8>] ? current_fs_time+0x18/0x50 [<c113e38b>] __generic_file_write_iter+0xbb/0x210 [<c113e50f>] ? generic_file_write_iter+0x2f/0x320 [<c113e63c>] generic_file_write_iter+0x15c/0x320 [<f0b77f29>] f2fs_file_write_iter+0x39/0x80 [f2fs] [<c11984d9>] __vfs_write+0xa9/0xe0 [<c1199227>] vfs_write+0x97/0x180 [<c119955b>] SyS_write+0x5b/0xd0 [<c160dcd0>] sysenter_do_call+0x12/0x12 Code: 10 8b 50 1c 89 53 14 eb ca 8d 74 26 00 85 f6 74 86 eb a6 0f 0b 90 8d b4 26 00 00 00 00 55 89 e5 3e 8d 74 26 00 8b 80 d4 02 00 00 <8b> 48 0c 39 d1 77 0e 03 48 14 39 ca 73 07 c7 40 14 00 00 00 00 EIP: [<f0b9c61e>] f2fs_drop_largest_extent+0xe/0x30 [f2fs] SS:ESP 0068:ebd5fd58 CR2: 000000000000000c ---[ end trace a38c07026a1afffd ]--- This is because when extent cache is disable, extent_tree pointer in struct f2fs_inode_info should be NULL, but in f2fs_drop_largest_extent we access this NULL pointer directly without checking state of extent cache, then, the oops occurs. Let's fix it by checking state of extent cache before accessing. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-28 10:18:57 +00:00
static void __drop_largest_extent(struct inode *inode, pgoff_t fofs)
{
struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
if (largest->fofs <= fofs && largest->fofs + largest->len > fofs)
largest->len = 0;
}
f2fs: avoid accessing NULL pointer in f2fs_drop_largest_extent If extent cache is disable, we will encounter oops when triggering direct IO as below: BUG: unable to handle kernel NULL pointer dereference at 0000000c IP: [<f0b9c61e>] f2fs_drop_largest_extent+0xe/0x30 [f2fs] *pdpt = 000000002bb9a001 *pde = 0000000000000000 Oops: 0000 [#1] SMP Modules linked in: f2fs(O) fuse bnep rfcomm bluetooth nfsd dm_crypt nfs_acl auth_rpcgss oid_registry nfs binfmt_misc fscache lockd sunrpc grace snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_rawmidi snd_seq_midi_event snd_seq snd_timer snd_seq_device snd soundcore joydev psmouse hid_generic i2c_piix4 serio_raw ppdev mac_hid parport_pc lp parport ext4 jbd2 mbcache usbhid hid e1000 CPU: 3 PID: 3608 Comm: dd Tainted: G O 4.2.0-rc4 #12 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 task: ef161600 ti: ebd5e000 task.ti: ebd5e000 EIP: 0060:[<f0b9c61e>] EFLAGS: 00010202 CPU: 3 EIP is at f2fs_drop_largest_extent+0xe/0x30 [f2fs] EAX: 00000000 EBX: ddebc000 ECX: 00000000 EDX: 00000000 ESI: ebd5fdf8 EDI: 00000000 EBP: ebd5fd58 ESP: ebd5fd58 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 CR0: 80050033 CR2: 0000000c CR3: 2c24ee40 CR4: 000006f0 Stack: ebd5fda4 f0b8c005 00000000 00000001 00000000 f0b8c430 c816cd68 ddebc000 ddebc088 00001000 00000555 00000555 ffffffff c160bb00 00055501 00000000 00000000 00000100 00000000 ebd5fe20 f0b8c430 00000046 ef161600 00001000 Call Trace: [<f0b8c005>] __allocate_data_block+0x1a5/0x260 [f2fs] [<f0b8c430>] ? f2fs_direct_IO+0x370/0x440 [f2fs] [<c160bb00>] ? down_read+0x30/0x50 [<f0b8c430>] f2fs_direct_IO+0x370/0x440 [f2fs] [<c113e115>] generic_file_direct_write+0xa5/0x260 [<c10b53f8>] ? current_fs_time+0x18/0x50 [<c113e38b>] __generic_file_write_iter+0xbb/0x210 [<c113e50f>] ? generic_file_write_iter+0x2f/0x320 [<c113e63c>] generic_file_write_iter+0x15c/0x320 [<f0b77f29>] f2fs_file_write_iter+0x39/0x80 [f2fs] [<c11984d9>] __vfs_write+0xa9/0xe0 [<c1199227>] vfs_write+0x97/0x180 [<c119955b>] SyS_write+0x5b/0xd0 [<c160dcd0>] sysenter_do_call+0x12/0x12 Code: 10 8b 50 1c 89 53 14 eb ca 8d 74 26 00 85 f6 74 86 eb a6 0f 0b 90 8d b4 26 00 00 00 00 55 89 e5 3e 8d 74 26 00 8b 80 d4 02 00 00 <8b> 48 0c 39 d1 77 0e 03 48 14 39 ca 73 07 c7 40 14 00 00 00 00 EIP: [<f0b9c61e>] f2fs_drop_largest_extent+0xe/0x30 [f2fs] SS:ESP 0068:ebd5fd58 CR2: 000000000000000c ---[ end trace a38c07026a1afffd ]--- This is because when extent cache is disable, extent_tree pointer in struct f2fs_inode_info should be NULL, but in f2fs_drop_largest_extent we access this NULL pointer directly without checking state of extent cache, then, the oops occurs. Let's fix it by checking state of extent cache before accessing. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-28 10:18:57 +00:00
void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs)
{
if (!f2fs_may_extent_tree(inode))
return;
__drop_largest_extent(inode, fofs);
}
void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
struct extent_node *en;
struct extent_info ei;
if (!f2fs_may_extent_tree(inode))
return;
et = __grab_extent_tree(inode);
if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
return;
set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
write_lock(&et->lock);
if (et->count)
goto out;
en = __init_extent_tree(sbi, et, &ei);
if (en) {
spin_lock(&sbi->extent_lock);
list_add_tail(&en->list, &sbi->extent_list);
spin_unlock(&sbi->extent_lock);
}
out:
write_unlock(&et->lock);
}
static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
struct extent_info *ei)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
struct extent_node *en;
bool ret = false;
f2fs_bug_on(sbi, !et);
trace_f2fs_lookup_extent_tree_start(inode, pgofs);
read_lock(&et->lock);
if (et->largest.fofs <= pgofs &&
et->largest.fofs + et->largest.len > pgofs) {
*ei = et->largest;
ret = true;
stat_inc_largest_node_hit(sbi);
goto out;
}
en = __lookup_extent_tree(sbi, et, pgofs);
if (en) {
*ei = en->ei;
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list))
list_move_tail(&en->list, &sbi->extent_list);
et->cached_en = en;
spin_unlock(&sbi->extent_lock);
ret = true;
}
out:
stat_inc_total_hit(sbi);
read_unlock(&et->lock);
trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei);
return ret;
}
/*
* lookup extent at @fofs, if hit, return the extent
* if not, return NULL and
* @prev_ex: extent before fofs
* @next_ex: extent after fofs
* @insert_p: insert point for new extent at fofs
* in order to simpfy the insertion after.
* tree must stay unchanged between lookup and insertion.
*/
static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
unsigned int fofs,
struct extent_node **prev_ex,
struct extent_node **next_ex,
struct rb_node ***insert_p,
struct rb_node **insert_parent)
{
struct rb_node **pnode = &et->root.rb_node;
struct rb_node *parent = NULL, *tmp_node;
struct extent_node *en = et->cached_en;
*insert_p = NULL;
*insert_parent = NULL;
*prev_ex = NULL;
*next_ex = NULL;
if (RB_EMPTY_ROOT(&et->root))
return NULL;
if (en) {
struct extent_info *cei = &en->ei;
if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
goto lookup_neighbors;
}
while (*pnode) {
parent = *pnode;
en = rb_entry(*pnode, struct extent_node, rb_node);
if (fofs < en->ei.fofs)
pnode = &(*pnode)->rb_left;
else if (fofs >= en->ei.fofs + en->ei.len)
pnode = &(*pnode)->rb_right;
else
goto lookup_neighbors;
}
*insert_p = pnode;
*insert_parent = parent;
en = rb_entry(parent, struct extent_node, rb_node);
tmp_node = parent;
if (parent && fofs > en->ei.fofs)
tmp_node = rb_next(parent);
*next_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
tmp_node = parent;
if (parent && fofs < en->ei.fofs)
tmp_node = rb_prev(parent);
*prev_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
return NULL;
lookup_neighbors:
if (fofs == en->ei.fofs) {
/* lookup prev node for merging backward later */
tmp_node = rb_prev(&en->rb_node);
*prev_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
}
if (fofs == en->ei.fofs + en->ei.len - 1) {
/* lookup next node for merging frontward later */
tmp_node = rb_next(&en->rb_node);
*next_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
}
return en;
}
static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
struct extent_node **den,
struct extent_node *prev_ex,
struct extent_node *next_ex)
{
struct extent_node *en = NULL;
if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
prev_ex->ei.len += ei->len;
ei = &prev_ex->ei;
en = prev_ex;
}
if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
if (en) {
__detach_extent_node(sbi, et, prev_ex);
*den = prev_ex;
}
next_ex->ei.fofs = ei->fofs;
next_ex->ei.blk = ei->blk;
next_ex->ei.len += ei->len;
en = next_ex;
}
if (en) {
if (en->ei.len > et->largest.len)
et->largest = en->ei;
et->cached_en = en;
}
return en;
}
static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
struct rb_node **insert_p,
struct rb_node *insert_parent)
{
struct rb_node **p = &et->root.rb_node;
struct rb_node *parent = NULL;
struct extent_node *en = NULL;
if (insert_p && insert_parent) {
parent = insert_parent;
p = insert_p;
goto do_insert;
}
while (*p) {
parent = *p;
en = rb_entry(parent, struct extent_node, rb_node);
if (ei->fofs < en->ei.fofs)
p = &(*p)->rb_left;
else if (ei->fofs >= en->ei.fofs + en->ei.len)
p = &(*p)->rb_right;
else
f2fs_bug_on(sbi, 1);
}
do_insert:
en = __attach_extent_node(sbi, et, ei, parent, p);
if (!en)
return NULL;
if (en->ei.len > et->largest.len)
et->largest = en->ei;
et->cached_en = en;
return en;
}
static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
f2fs: update extent tree in batches This patch introduce a new helper f2fs_update_extent_tree_range which can do extent mapping update at a specified range. The main idea is: 1) punch all mapping info in extent node(s) which are at a specified range; 2) try to merge new extent mapping with adjacent node, or failing that, insert the mapping into extent tree as a new node. In order to see the benefit, I add a function for stating time stamping count as below: uint64_t rdtsc(void) { uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd. truncation path: update extent cache from truncate_data_blocks_range non-truncataion path: update extent cache from other paths total: all update paths a) Removing 128MB file which has one extent node mapping whole range of file: 1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128 2. sync 3. rm /mnt/f2fs/128M Before: total count average truncation: 7651022 32768 233.49 Patched: total count average truncation: 3321 33 100.64 b) fsstress: fsstress -d /mnt/f2fs -l 5 -n 100 -p 20 Test times: 5 times. Before: total count average truncation: 5812480.6 20911.6 277.95 non-truncation: 7783845.6 13440.8 579.12 total: 13596326.2 34352.4 395.79 Patched: total count average truncation: 1281283.0 3041.6 421.25 non-truncation: 7355844.4 13662.8 538.38 total: 8637127.4 16704.4 517.06 1) For the updates in truncation path: - we can see updating in batches leads total tsc and update count reducing explicitly; - besides, for a single batched updating, punching multiple extent nodes in a loop, result in executing more operations, so our average tsc increase intensively. 2) For the updates in non-truncation path: - there is a little improvement, that is because for the scenario that we just need to update in the head or tail of extent node, new interface optimize to update info in extent node directly, rather than removing original extent node for updating and then inserting that updated one into cache as new node. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 12:34:48 +00:00
pgoff_t fofs, block_t blkaddr, unsigned int len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
f2fs: update extent tree in batches This patch introduce a new helper f2fs_update_extent_tree_range which can do extent mapping update at a specified range. The main idea is: 1) punch all mapping info in extent node(s) which are at a specified range; 2) try to merge new extent mapping with adjacent node, or failing that, insert the mapping into extent tree as a new node. In order to see the benefit, I add a function for stating time stamping count as below: uint64_t rdtsc(void) { uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd. truncation path: update extent cache from truncate_data_blocks_range non-truncataion path: update extent cache from other paths total: all update paths a) Removing 128MB file which has one extent node mapping whole range of file: 1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128 2. sync 3. rm /mnt/f2fs/128M Before: total count average truncation: 7651022 32768 233.49 Patched: total count average truncation: 3321 33 100.64 b) fsstress: fsstress -d /mnt/f2fs -l 5 -n 100 -p 20 Test times: 5 times. Before: total count average truncation: 5812480.6 20911.6 277.95 non-truncation: 7783845.6 13440.8 579.12 total: 13596326.2 34352.4 395.79 Patched: total count average truncation: 1281283.0 3041.6 421.25 non-truncation: 7355844.4 13662.8 538.38 total: 8637127.4 16704.4 517.06 1) For the updates in truncation path: - we can see updating in batches leads total tsc and update count reducing explicitly; - besides, for a single batched updating, punching multiple extent nodes in a loop, result in executing more operations, so our average tsc increase intensively. 2) For the updates in non-truncation path: - there is a little improvement, that is because for the scenario that we just need to update in the head or tail of extent node, new interface optimize to update info in extent node directly, rather than removing original extent node for updating and then inserting that updated one into cache as new node. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 12:34:48 +00:00
struct extent_node *prev_en = NULL, *next_en = NULL;
struct extent_info ei, dei, prev;
struct rb_node **insert_p = NULL, *insert_parent = NULL;
f2fs: update extent tree in batches This patch introduce a new helper f2fs_update_extent_tree_range which can do extent mapping update at a specified range. The main idea is: 1) punch all mapping info in extent node(s) which are at a specified range; 2) try to merge new extent mapping with adjacent node, or failing that, insert the mapping into extent tree as a new node. In order to see the benefit, I add a function for stating time stamping count as below: uint64_t rdtsc(void) { uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd. truncation path: update extent cache from truncate_data_blocks_range non-truncataion path: update extent cache from other paths total: all update paths a) Removing 128MB file which has one extent node mapping whole range of file: 1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128 2. sync 3. rm /mnt/f2fs/128M Before: total count average truncation: 7651022 32768 233.49 Patched: total count average truncation: 3321 33 100.64 b) fsstress: fsstress -d /mnt/f2fs -l 5 -n 100 -p 20 Test times: 5 times. Before: total count average truncation: 5812480.6 20911.6 277.95 non-truncation: 7783845.6 13440.8 579.12 total: 13596326.2 34352.4 395.79 Patched: total count average truncation: 1281283.0 3041.6 421.25 non-truncation: 7355844.4 13662.8 538.38 total: 8637127.4 16704.4 517.06 1) For the updates in truncation path: - we can see updating in batches leads total tsc and update count reducing explicitly; - besides, for a single batched updating, punching multiple extent nodes in a loop, result in executing more operations, so our average tsc increase intensively. 2) For the updates in non-truncation path: - there is a little improvement, that is because for the scenario that we just need to update in the head or tail of extent node, new interface optimize to update info in extent node directly, rather than removing original extent node for updating and then inserting that updated one into cache as new node. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 12:34:48 +00:00
unsigned int end = fofs + len;
unsigned int pos = (unsigned int)fofs;
if (!et)
return false;
trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
write_lock(&et->lock);
if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) {
write_unlock(&et->lock);
return false;
}
prev = et->largest;
dei.len = 0;
/* we do not guarantee that the largest extent is cached all the time */
f2fs: avoid accessing NULL pointer in f2fs_drop_largest_extent If extent cache is disable, we will encounter oops when triggering direct IO as below: BUG: unable to handle kernel NULL pointer dereference at 0000000c IP: [<f0b9c61e>] f2fs_drop_largest_extent+0xe/0x30 [f2fs] *pdpt = 000000002bb9a001 *pde = 0000000000000000 Oops: 0000 [#1] SMP Modules linked in: f2fs(O) fuse bnep rfcomm bluetooth nfsd dm_crypt nfs_acl auth_rpcgss oid_registry nfs binfmt_misc fscache lockd sunrpc grace snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_rawmidi snd_seq_midi_event snd_seq snd_timer snd_seq_device snd soundcore joydev psmouse hid_generic i2c_piix4 serio_raw ppdev mac_hid parport_pc lp parport ext4 jbd2 mbcache usbhid hid e1000 CPU: 3 PID: 3608 Comm: dd Tainted: G O 4.2.0-rc4 #12 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 task: ef161600 ti: ebd5e000 task.ti: ebd5e000 EIP: 0060:[<f0b9c61e>] EFLAGS: 00010202 CPU: 3 EIP is at f2fs_drop_largest_extent+0xe/0x30 [f2fs] EAX: 00000000 EBX: ddebc000 ECX: 00000000 EDX: 00000000 ESI: ebd5fdf8 EDI: 00000000 EBP: ebd5fd58 ESP: ebd5fd58 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 CR0: 80050033 CR2: 0000000c CR3: 2c24ee40 CR4: 000006f0 Stack: ebd5fda4 f0b8c005 00000000 00000001 00000000 f0b8c430 c816cd68 ddebc000 ddebc088 00001000 00000555 00000555 ffffffff c160bb00 00055501 00000000 00000000 00000100 00000000 ebd5fe20 f0b8c430 00000046 ef161600 00001000 Call Trace: [<f0b8c005>] __allocate_data_block+0x1a5/0x260 [f2fs] [<f0b8c430>] ? f2fs_direct_IO+0x370/0x440 [f2fs] [<c160bb00>] ? down_read+0x30/0x50 [<f0b8c430>] f2fs_direct_IO+0x370/0x440 [f2fs] [<c113e115>] generic_file_direct_write+0xa5/0x260 [<c10b53f8>] ? current_fs_time+0x18/0x50 [<c113e38b>] __generic_file_write_iter+0xbb/0x210 [<c113e50f>] ? generic_file_write_iter+0x2f/0x320 [<c113e63c>] generic_file_write_iter+0x15c/0x320 [<f0b77f29>] f2fs_file_write_iter+0x39/0x80 [f2fs] [<c11984d9>] __vfs_write+0xa9/0xe0 [<c1199227>] vfs_write+0x97/0x180 [<c119955b>] SyS_write+0x5b/0xd0 [<c160dcd0>] sysenter_do_call+0x12/0x12 Code: 10 8b 50 1c 89 53 14 eb ca 8d 74 26 00 85 f6 74 86 eb a6 0f 0b 90 8d b4 26 00 00 00 00 55 89 e5 3e 8d 74 26 00 8b 80 d4 02 00 00 <8b> 48 0c 39 d1 77 0e 03 48 14 39 ca 73 07 c7 40 14 00 00 00 00 EIP: [<f0b9c61e>] f2fs_drop_largest_extent+0xe/0x30 [f2fs] SS:ESP 0068:ebd5fd58 CR2: 000000000000000c ---[ end trace a38c07026a1afffd ]--- This is because when extent cache is disable, extent_tree pointer in struct f2fs_inode_info should be NULL, but in f2fs_drop_largest_extent we access this NULL pointer directly without checking state of extent cache, then, the oops occurs. Let's fix it by checking state of extent cache before accessing. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-28 10:18:57 +00:00
__drop_largest_extent(inode, fofs);
f2fs: update extent tree in batches This patch introduce a new helper f2fs_update_extent_tree_range which can do extent mapping update at a specified range. The main idea is: 1) punch all mapping info in extent node(s) which are at a specified range; 2) try to merge new extent mapping with adjacent node, or failing that, insert the mapping into extent tree as a new node. In order to see the benefit, I add a function for stating time stamping count as below: uint64_t rdtsc(void) { uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd. truncation path: update extent cache from truncate_data_blocks_range non-truncataion path: update extent cache from other paths total: all update paths a) Removing 128MB file which has one extent node mapping whole range of file: 1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128 2. sync 3. rm /mnt/f2fs/128M Before: total count average truncation: 7651022 32768 233.49 Patched: total count average truncation: 3321 33 100.64 b) fsstress: fsstress -d /mnt/f2fs -l 5 -n 100 -p 20 Test times: 5 times. Before: total count average truncation: 5812480.6 20911.6 277.95 non-truncation: 7783845.6 13440.8 579.12 total: 13596326.2 34352.4 395.79 Patched: total count average truncation: 1281283.0 3041.6 421.25 non-truncation: 7355844.4 13662.8 538.38 total: 8637127.4 16704.4 517.06 1) For the updates in truncation path: - we can see updating in batches leads total tsc and update count reducing explicitly; - besides, for a single batched updating, punching multiple extent nodes in a loop, result in executing more operations, so our average tsc increase intensively. 2) For the updates in non-truncation path: - there is a little improvement, that is because for the scenario that we just need to update in the head or tail of extent node, new interface optimize to update info in extent node directly, rather than removing original extent node for updating and then inserting that updated one into cache as new node. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 12:34:48 +00:00
/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en,
&insert_p, &insert_parent);
f2fs: update extent tree in batches This patch introduce a new helper f2fs_update_extent_tree_range which can do extent mapping update at a specified range. The main idea is: 1) punch all mapping info in extent node(s) which are at a specified range; 2) try to merge new extent mapping with adjacent node, or failing that, insert the mapping into extent tree as a new node. In order to see the benefit, I add a function for stating time stamping count as below: uint64_t rdtsc(void) { uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd. truncation path: update extent cache from truncate_data_blocks_range non-truncataion path: update extent cache from other paths total: all update paths a) Removing 128MB file which has one extent node mapping whole range of file: 1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128 2. sync 3. rm /mnt/f2fs/128M Before: total count average truncation: 7651022 32768 233.49 Patched: total count average truncation: 3321 33 100.64 b) fsstress: fsstress -d /mnt/f2fs -l 5 -n 100 -p 20 Test times: 5 times. Before: total count average truncation: 5812480.6 20911.6 277.95 non-truncation: 7783845.6 13440.8 579.12 total: 13596326.2 34352.4 395.79 Patched: total count average truncation: 1281283.0 3041.6 421.25 non-truncation: 7355844.4 13662.8 538.38 total: 8637127.4 16704.4 517.06 1) For the updates in truncation path: - we can see updating in batches leads total tsc and update count reducing explicitly; - besides, for a single batched updating, punching multiple extent nodes in a loop, result in executing more operations, so our average tsc increase intensively. 2) For the updates in non-truncation path: - there is a little improvement, that is because for the scenario that we just need to update in the head or tail of extent node, new interface optimize to update info in extent node directly, rather than removing original extent node for updating and then inserting that updated one into cache as new node. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 12:34:48 +00:00
if (!en) {
if (next_en) {
en = next_en;
f2fs_bug_on(sbi, en->ei.fofs <= pos);
pos = en->ei.fofs;
} else {
/*
* skip searching in the tree since there is no
* larger extent node in the cache.
*/
goto update_extent;
}
}
/* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
while (en) {
struct rb_node *node;
if (pos >= end)
break;
dei = en->ei;
en1 = en2 = NULL;
node = rb_next(&en->rb_node);
/*
* 2.1 there are four cases when we invalidate blkaddr in extent
* node, |V: valid address, X: will be invalidated|
*/
/* case#1, invalidate right part of extent node |VVVVVXXXXX| */
if (pos > dei.fofs && end >= dei.fofs + dei.len) {
en->ei.len = pos - dei.fofs;
if (en->ei.len < F2FS_MIN_EXTENT_LEN) {
__detach_extent_node(sbi, et, en);
insert_p = NULL;
insert_parent = NULL;
goto update;
}
if (__is_extent_same(&dei, &et->largest))
et->largest = en->ei;
goto next;
}
/* case#2, invalidate left part of extent node |XXXXXVVVVV| */
if (pos <= dei.fofs && end < dei.fofs + dei.len) {
en->ei.fofs = end;
en->ei.blk += end - dei.fofs;
en->ei.len -= end - dei.fofs;
if (en->ei.len < F2FS_MIN_EXTENT_LEN) {
__detach_extent_node(sbi, et, en);
insert_p = NULL;
insert_parent = NULL;
goto update;
}
if (__is_extent_same(&dei, &et->largest))
et->largest = en->ei;
goto next;
}
f2fs: update extent tree in batches This patch introduce a new helper f2fs_update_extent_tree_range which can do extent mapping update at a specified range. The main idea is: 1) punch all mapping info in extent node(s) which are at a specified range; 2) try to merge new extent mapping with adjacent node, or failing that, insert the mapping into extent tree as a new node. In order to see the benefit, I add a function for stating time stamping count as below: uint64_t rdtsc(void) { uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd. truncation path: update extent cache from truncate_data_blocks_range non-truncataion path: update extent cache from other paths total: all update paths a) Removing 128MB file which has one extent node mapping whole range of file: 1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128 2. sync 3. rm /mnt/f2fs/128M Before: total count average truncation: 7651022 32768 233.49 Patched: total count average truncation: 3321 33 100.64 b) fsstress: fsstress -d /mnt/f2fs -l 5 -n 100 -p 20 Test times: 5 times. Before: total count average truncation: 5812480.6 20911.6 277.95 non-truncation: 7783845.6 13440.8 579.12 total: 13596326.2 34352.4 395.79 Patched: total count average truncation: 1281283.0 3041.6 421.25 non-truncation: 7355844.4 13662.8 538.38 total: 8637127.4 16704.4 517.06 1) For the updates in truncation path: - we can see updating in batches leads total tsc and update count reducing explicitly; - besides, for a single batched updating, punching multiple extent nodes in a loop, result in executing more operations, so our average tsc increase intensively. 2) For the updates in non-truncation path: - there is a little improvement, that is because for the scenario that we just need to update in the head or tail of extent node, new interface optimize to update info in extent node directly, rather than removing original extent node for updating and then inserting that updated one into cache as new node. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 12:34:48 +00:00
__detach_extent_node(sbi, et, en);
/*
* if we remove node in rb-tree, our parent node pointer may
* point the wrong place, discard them.
*/
insert_p = NULL;
insert_parent = NULL;
/* case#3, invalidate entire extent node |XXXXXXXXXX| */
if (pos <= dei.fofs && end >= dei.fofs + dei.len) {
if (__is_extent_same(&dei, &et->largest))
et->largest.len = 0;
goto update;
}
/*
* case#4, invalidate data in the middle of extent node
* |VVVXXXXVVV|
*/
if (dei.len > F2FS_MIN_EXTENT_LEN) {
unsigned int endofs;
/* insert left part of split extent into cache */
if (pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
set_extent_info(&ei, dei.fofs, dei.blk,
pos - dei.fofs);
en1 = __insert_extent_tree(sbi, et, &ei,
NULL, NULL);
}
/* insert right part of split extent into cache */
endofs = dei.fofs + dei.len;
if (endofs - end >= F2FS_MIN_EXTENT_LEN) {
set_extent_info(&ei, end,
end - dei.fofs + dei.blk,
endofs - end);
en2 = __insert_extent_tree(sbi, et, &ei,
NULL, NULL);
}
}
f2fs: update extent tree in batches This patch introduce a new helper f2fs_update_extent_tree_range which can do extent mapping update at a specified range. The main idea is: 1) punch all mapping info in extent node(s) which are at a specified range; 2) try to merge new extent mapping with adjacent node, or failing that, insert the mapping into extent tree as a new node. In order to see the benefit, I add a function for stating time stamping count as below: uint64_t rdtsc(void) { uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd. truncation path: update extent cache from truncate_data_blocks_range non-truncataion path: update extent cache from other paths total: all update paths a) Removing 128MB file which has one extent node mapping whole range of file: 1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128 2. sync 3. rm /mnt/f2fs/128M Before: total count average truncation: 7651022 32768 233.49 Patched: total count average truncation: 3321 33 100.64 b) fsstress: fsstress -d /mnt/f2fs -l 5 -n 100 -p 20 Test times: 5 times. Before: total count average truncation: 5812480.6 20911.6 277.95 non-truncation: 7783845.6 13440.8 579.12 total: 13596326.2 34352.4 395.79 Patched: total count average truncation: 1281283.0 3041.6 421.25 non-truncation: 7355844.4 13662.8 538.38 total: 8637127.4 16704.4 517.06 1) For the updates in truncation path: - we can see updating in batches leads total tsc and update count reducing explicitly; - besides, for a single batched updating, punching multiple extent nodes in a loop, result in executing more operations, so our average tsc increase intensively. 2) For the updates in non-truncation path: - there is a little improvement, that is because for the scenario that we just need to update in the head or tail of extent node, new interface optimize to update info in extent node directly, rather than removing original extent node for updating and then inserting that updated one into cache as new node. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 12:34:48 +00:00
update:
/* 2.2 update in global extent list */
spin_lock(&sbi->extent_lock);
if (en && !list_empty(&en->list))
list_del(&en->list);
if (en1)
list_add_tail(&en1->list, &sbi->extent_list);
if (en2)
list_add_tail(&en2->list, &sbi->extent_list);
spin_unlock(&sbi->extent_lock);
/* 2.3 release extent node */
if (en)
kmem_cache_free(extent_node_slab, en);
next:
en = node ? rb_entry(node, struct extent_node, rb_node) : NULL;
next_en = en;
if (en)
pos = en->ei.fofs;
}
update_extent:
/* 3. update extent in extent cache */
if (blkaddr) {
f2fs: update extent tree in batches This patch introduce a new helper f2fs_update_extent_tree_range which can do extent mapping update at a specified range. The main idea is: 1) punch all mapping info in extent node(s) which are at a specified range; 2) try to merge new extent mapping with adjacent node, or failing that, insert the mapping into extent tree as a new node. In order to see the benefit, I add a function for stating time stamping count as below: uint64_t rdtsc(void) { uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd. truncation path: update extent cache from truncate_data_blocks_range non-truncataion path: update extent cache from other paths total: all update paths a) Removing 128MB file which has one extent node mapping whole range of file: 1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128 2. sync 3. rm /mnt/f2fs/128M Before: total count average truncation: 7651022 32768 233.49 Patched: total count average truncation: 3321 33 100.64 b) fsstress: fsstress -d /mnt/f2fs -l 5 -n 100 -p 20 Test times: 5 times. Before: total count average truncation: 5812480.6 20911.6 277.95 non-truncation: 7783845.6 13440.8 579.12 total: 13596326.2 34352.4 395.79 Patched: total count average truncation: 1281283.0 3041.6 421.25 non-truncation: 7355844.4 13662.8 538.38 total: 8637127.4 16704.4 517.06 1) For the updates in truncation path: - we can see updating in batches leads total tsc and update count reducing explicitly; - besides, for a single batched updating, punching multiple extent nodes in a loop, result in executing more operations, so our average tsc increase intensively. 2) For the updates in non-truncation path: - there is a little improvement, that is because for the scenario that we just need to update in the head or tail of extent node, new interface optimize to update info in extent node directly, rather than removing original extent node for updating and then inserting that updated one into cache as new node. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 12:34:48 +00:00
struct extent_node *den = NULL;
set_extent_info(&ei, fofs, blkaddr, len);
en3 = __try_merge_extent_node(sbi, et, &ei, &den,
f2fs: update extent tree in batches This patch introduce a new helper f2fs_update_extent_tree_range which can do extent mapping update at a specified range. The main idea is: 1) punch all mapping info in extent node(s) which are at a specified range; 2) try to merge new extent mapping with adjacent node, or failing that, insert the mapping into extent tree as a new node. In order to see the benefit, I add a function for stating time stamping count as below: uint64_t rdtsc(void) { uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd. truncation path: update extent cache from truncate_data_blocks_range non-truncataion path: update extent cache from other paths total: all update paths a) Removing 128MB file which has one extent node mapping whole range of file: 1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128 2. sync 3. rm /mnt/f2fs/128M Before: total count average truncation: 7651022 32768 233.49 Patched: total count average truncation: 3321 33 100.64 b) fsstress: fsstress -d /mnt/f2fs -l 5 -n 100 -p 20 Test times: 5 times. Before: total count average truncation: 5812480.6 20911.6 277.95 non-truncation: 7783845.6 13440.8 579.12 total: 13596326.2 34352.4 395.79 Patched: total count average truncation: 1281283.0 3041.6 421.25 non-truncation: 7355844.4 13662.8 538.38 total: 8637127.4 16704.4 517.06 1) For the updates in truncation path: - we can see updating in batches leads total tsc and update count reducing explicitly; - besides, for a single batched updating, punching multiple extent nodes in a loop, result in executing more operations, so our average tsc increase intensively. 2) For the updates in non-truncation path: - there is a little improvement, that is because for the scenario that we just need to update in the head or tail of extent node, new interface optimize to update info in extent node directly, rather than removing original extent node for updating and then inserting that updated one into cache as new node. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 12:34:48 +00:00
prev_en, next_en);
if (!en3)
en3 = __insert_extent_tree(sbi, et, &ei,
insert_p, insert_parent);
/* give up extent_cache, if split and small updates happen */
if (dei.len >= 1 &&
prev.len < F2FS_MIN_EXTENT_LEN &&
et->largest.len < F2FS_MIN_EXTENT_LEN) {
et->largest.len = 0;
set_inode_flag(F2FS_I(inode), FI_NO_EXTENT);
}
f2fs: update extent tree in batches This patch introduce a new helper f2fs_update_extent_tree_range which can do extent mapping update at a specified range. The main idea is: 1) punch all mapping info in extent node(s) which are at a specified range; 2) try to merge new extent mapping with adjacent node, or failing that, insert the mapping into extent tree as a new node. In order to see the benefit, I add a function for stating time stamping count as below: uint64_t rdtsc(void) { uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd. truncation path: update extent cache from truncate_data_blocks_range non-truncataion path: update extent cache from other paths total: all update paths a) Removing 128MB file which has one extent node mapping whole range of file: 1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128 2. sync 3. rm /mnt/f2fs/128M Before: total count average truncation: 7651022 32768 233.49 Patched: total count average truncation: 3321 33 100.64 b) fsstress: fsstress -d /mnt/f2fs -l 5 -n 100 -p 20 Test times: 5 times. Before: total count average truncation: 5812480.6 20911.6 277.95 non-truncation: 7783845.6 13440.8 579.12 total: 13596326.2 34352.4 395.79 Patched: total count average truncation: 1281283.0 3041.6 421.25 non-truncation: 7355844.4 13662.8 538.38 total: 8637127.4 16704.4 517.06 1) For the updates in truncation path: - we can see updating in batches leads total tsc and update count reducing explicitly; - besides, for a single batched updating, punching multiple extent nodes in a loop, result in executing more operations, so our average tsc increase intensively. 2) For the updates in non-truncation path: - there is a little improvement, that is because for the scenario that we just need to update in the head or tail of extent node, new interface optimize to update info in extent node directly, rather than removing original extent node for updating and then inserting that updated one into cache as new node. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 12:34:48 +00:00
spin_lock(&sbi->extent_lock);
if (en3) {
if (list_empty(&en3->list))
list_add_tail(&en3->list, &sbi->extent_list);
else
list_move_tail(&en3->list, &sbi->extent_list);
}
if (den && !list_empty(&den->list))
list_del(&den->list);
spin_unlock(&sbi->extent_lock);
f2fs: update extent tree in batches This patch introduce a new helper f2fs_update_extent_tree_range which can do extent mapping update at a specified range. The main idea is: 1) punch all mapping info in extent node(s) which are at a specified range; 2) try to merge new extent mapping with adjacent node, or failing that, insert the mapping into extent tree as a new node. In order to see the benefit, I add a function for stating time stamping count as below: uint64_t rdtsc(void) { uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd. truncation path: update extent cache from truncate_data_blocks_range non-truncataion path: update extent cache from other paths total: all update paths a) Removing 128MB file which has one extent node mapping whole range of file: 1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128 2. sync 3. rm /mnt/f2fs/128M Before: total count average truncation: 7651022 32768 233.49 Patched: total count average truncation: 3321 33 100.64 b) fsstress: fsstress -d /mnt/f2fs -l 5 -n 100 -p 20 Test times: 5 times. Before: total count average truncation: 5812480.6 20911.6 277.95 non-truncation: 7783845.6 13440.8 579.12 total: 13596326.2 34352.4 395.79 Patched: total count average truncation: 1281283.0 3041.6 421.25 non-truncation: 7355844.4 13662.8 538.38 total: 8637127.4 16704.4 517.06 1) For the updates in truncation path: - we can see updating in batches leads total tsc and update count reducing explicitly; - besides, for a single batched updating, punching multiple extent nodes in a loop, result in executing more operations, so our average tsc increase intensively. 2) For the updates in non-truncation path: - there is a little improvement, that is because for the scenario that we just need to update in the head or tail of extent node, new interface optimize to update info in extent node directly, rather than removing original extent node for updating and then inserting that updated one into cache as new node. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 12:34:48 +00:00
if (den)
kmem_cache_free(extent_node_slab, den);
}
if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
__free_extent_tree(sbi, et, true);
write_unlock(&et->lock);
return !__is_extent_same(&prev, &et->largest);
}
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
struct extent_node *en, *tmp;
unsigned long ino = F2FS_ROOT_INO(sbi);
struct radix_tree_root *root = &sbi->extent_tree_root;
unsigned int found;
unsigned int node_cnt = 0, tree_cnt = 0;
int remained;
if (!test_opt(sbi, EXTENT_CACHE))
return 0;
if (!down_write_trylock(&sbi->extent_tree_lock))
goto out;
/* 1. remove unreferenced extent tree */
while ((found = radix_tree_gang_lookup(root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
ino = treevec[found - 1]->ino + 1;
for (i = 0; i < found; i++) {
struct extent_tree *et = treevec[i];
if (!atomic_read(&et->refcount)) {
write_lock(&et->lock);
node_cnt += __free_extent_tree(sbi, et, true);
write_unlock(&et->lock);
radix_tree_delete(root, et->ino);
kmem_cache_free(extent_tree_slab, et);
sbi->total_ext_tree--;
tree_cnt++;
if (node_cnt + tree_cnt >= nr_shrink)
goto unlock_out;
}
}
}
up_write(&sbi->extent_tree_lock);
/* 2. remove LRU extent entries */
if (!down_write_trylock(&sbi->extent_tree_lock))
goto out;
remained = nr_shrink - (node_cnt + tree_cnt);
spin_lock(&sbi->extent_lock);
list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
if (!remained--)
break;
list_del_init(&en->list);
}
spin_unlock(&sbi->extent_lock);
/*
* reset ino for searching victims from beginning of global extent tree.
*/
ino = F2FS_ROOT_INO(sbi);
while ((found = radix_tree_gang_lookup(root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
ino = treevec[found - 1]->ino + 1;
for (i = 0; i < found; i++) {
struct extent_tree *et = treevec[i];
write_lock(&et->lock);
node_cnt += __free_extent_tree(sbi, et, false);
write_unlock(&et->lock);
if (node_cnt + tree_cnt >= nr_shrink)
break;
}
}
unlock_out:
up_write(&sbi->extent_tree_lock);
out:
trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
return node_cnt + tree_cnt;
}
unsigned int f2fs_destroy_extent_node(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
unsigned int node_cnt = 0;
if (!et)
return 0;
write_lock(&et->lock);
node_cnt = __free_extent_tree(sbi, et, true);
write_unlock(&et->lock);
return node_cnt;
}
void f2fs_destroy_extent_tree(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
unsigned int node_cnt = 0;
if (!et)
return;
if (inode->i_nlink && !is_bad_inode(inode) && et->count) {
atomic_dec(&et->refcount);
return;
}
/* free all extent info belong to this extent tree */
node_cnt = f2fs_destroy_extent_node(inode);
/* delete extent tree entry in radix tree */
down_write(&sbi->extent_tree_lock);
atomic_dec(&et->refcount);
f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
sbi->total_ext_tree--;
up_write(&sbi->extent_tree_lock);
F2FS_I(inode)->extent_tree = NULL;
trace_f2fs_destroy_extent_tree(inode, node_cnt);
}
bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
struct extent_info *ei)
{
if (!f2fs_may_extent_tree(inode))
return false;
return f2fs_lookup_extent_tree(inode, pgofs, ei);
}
void f2fs_update_extent_cache(struct dnode_of_data *dn)
{
struct f2fs_inode_info *fi = F2FS_I(dn->inode);
pgoff_t fofs;
if (!f2fs_may_extent_tree(dn->inode))
return;
f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
f2fs: update extent tree in batches This patch introduce a new helper f2fs_update_extent_tree_range which can do extent mapping update at a specified range. The main idea is: 1) punch all mapping info in extent node(s) which are at a specified range; 2) try to merge new extent mapping with adjacent node, or failing that, insert the mapping into extent tree as a new node. In order to see the benefit, I add a function for stating time stamping count as below: uint64_t rdtsc(void) { uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd. truncation path: update extent cache from truncate_data_blocks_range non-truncataion path: update extent cache from other paths total: all update paths a) Removing 128MB file which has one extent node mapping whole range of file: 1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128 2. sync 3. rm /mnt/f2fs/128M Before: total count average truncation: 7651022 32768 233.49 Patched: total count average truncation: 3321 33 100.64 b) fsstress: fsstress -d /mnt/f2fs -l 5 -n 100 -p 20 Test times: 5 times. Before: total count average truncation: 5812480.6 20911.6 277.95 non-truncation: 7783845.6 13440.8 579.12 total: 13596326.2 34352.4 395.79 Patched: total count average truncation: 1281283.0 3041.6 421.25 non-truncation: 7355844.4 13662.8 538.38 total: 8637127.4 16704.4 517.06 1) For the updates in truncation path: - we can see updating in batches leads total tsc and update count reducing explicitly; - besides, for a single batched updating, punching multiple extent nodes in a loop, result in executing more operations, so our average tsc increase intensively. 2) For the updates in non-truncation path: - there is a little improvement, that is because for the scenario that we just need to update in the head or tail of extent node, new interface optimize to update info in extent node directly, rather than removing original extent node for updating and then inserting that updated one into cache as new node. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 12:34:48 +00:00
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
dn->ofs_in_node;
f2fs: update extent tree in batches This patch introduce a new helper f2fs_update_extent_tree_range which can do extent mapping update at a specified range. The main idea is: 1) punch all mapping info in extent node(s) which are at a specified range; 2) try to merge new extent mapping with adjacent node, or failing that, insert the mapping into extent tree as a new node. In order to see the benefit, I add a function for stating time stamping count as below: uint64_t rdtsc(void) { uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd. truncation path: update extent cache from truncate_data_blocks_range non-truncataion path: update extent cache from other paths total: all update paths a) Removing 128MB file which has one extent node mapping whole range of file: 1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128 2. sync 3. rm /mnt/f2fs/128M Before: total count average truncation: 7651022 32768 233.49 Patched: total count average truncation: 3321 33 100.64 b) fsstress: fsstress -d /mnt/f2fs -l 5 -n 100 -p 20 Test times: 5 times. Before: total count average truncation: 5812480.6 20911.6 277.95 non-truncation: 7783845.6 13440.8 579.12 total: 13596326.2 34352.4 395.79 Patched: total count average truncation: 1281283.0 3041.6 421.25 non-truncation: 7355844.4 13662.8 538.38 total: 8637127.4 16704.4 517.06 1) For the updates in truncation path: - we can see updating in batches leads total tsc and update count reducing explicitly; - besides, for a single batched updating, punching multiple extent nodes in a loop, result in executing more operations, so our average tsc increase intensively. 2) For the updates in non-truncation path: - there is a little improvement, that is because for the scenario that we just need to update in the head or tail of extent node, new interface optimize to update info in extent node directly, rather than removing original extent node for updating and then inserting that updated one into cache as new node. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 12:34:48 +00:00
if (f2fs_update_extent_tree_range(dn->inode, fofs, dn->data_blkaddr, 1))
sync_inode_page(dn);
}
void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
pgoff_t fofs, block_t blkaddr, unsigned int len)
{
if (!f2fs_may_extent_tree(dn->inode))
return;
if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len))
sync_inode_page(dn);
}
void init_extent_cache_info(struct f2fs_sb_info *sbi)
{
INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
init_rwsem(&sbi->extent_tree_lock);
INIT_LIST_HEAD(&sbi->extent_list);
spin_lock_init(&sbi->extent_lock);
sbi->total_ext_tree = 0;
atomic_set(&sbi->total_ext_node, 0);
}
int __init create_extent_cache(void)
{
extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
sizeof(struct extent_tree));
if (!extent_tree_slab)
return -ENOMEM;
extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
sizeof(struct extent_node));
if (!extent_node_slab) {
kmem_cache_destroy(extent_tree_slab);
return -ENOMEM;
}
return 0;
}
void destroy_extent_cache(void)
{
kmem_cache_destroy(extent_node_slab);
kmem_cache_destroy(extent_tree_slab);
}