From e1c42045203071c4634b89e696037357810d3083 Mon Sep 17 00:00:00 2001 From: arter97 Date: Wed, 6 Aug 2014 23:22:50 +0900 Subject: [PATCH 01/28] f2fs: fix typo Fix typo and some grammatical errors. The words "filesystem" and "readahead" are being used without the space treewide. Signed-off-by: Park Ju Hyung Signed-off-by: Jaegeuk Kim --- fs/f2fs/Kconfig | 4 ++-- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/data.c | 4 ++-- fs/f2fs/debug.c | 4 ++-- fs/f2fs/dir.c | 4 ++-- fs/f2fs/f2fs.h | 6 +++--- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 4 ++-- fs/f2fs/gc.h | 2 +- fs/f2fs/namei.c | 2 +- fs/f2fs/node.c | 14 +++++++------- fs/f2fs/segment.c | 4 ++-- fs/f2fs/segment.h | 2 +- fs/f2fs/super.c | 2 +- fs/f2fs/xattr.c | 2 +- 15 files changed, 30 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 214fe1054fce..736a348509f7 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -23,7 +23,7 @@ config F2FS_STAT_FS mounted as f2fs. Each file shows the whole f2fs information. /sys/kernel/debug/f2fs/status includes: - - major file system information managed by f2fs currently + - major filesystem information managed by f2fs currently - average SIT information about whole segments - current memory footprint consumed by f2fs. @@ -68,6 +68,6 @@ config F2FS_CHECK_FS bool "F2FS consistency checking feature" depends on F2FS_FS help - Enables BUG_ONs which check the file system consistency in runtime. + Enables BUG_ONs which check the filesystem consistency in runtime. If you want to improve the performance, say N. diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6aeed5bada52..7e1c13bfb99f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -758,7 +758,7 @@ static void block_operations(struct f2fs_sb_info *sbi) } /* - * POR: we should ensure that there is no dirty node pages + * POR: we should ensure that there are no dirty node pages * until finishing nat/sit flush. */ retry_flush_nodes: @@ -942,7 +942,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) } /* - * We guarantee that this checkpoint procedure should not fail. + * We guarantee that this checkpoint procedure will not fail. */ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 03313099c51c..7aef28d1fffa 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -691,7 +691,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, allocated = true; blkaddr = dn.data_blkaddr; } - /* Give more consecutive addresses for the read ahead */ + /* Give more consecutive addresses for the readahead */ if (blkaddr == (bh_result->b_blocknr + ofs)) { ofs++; dn.ofs_in_node++; @@ -739,7 +739,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page) trace_f2fs_readpage(page, DATA); - /* If the file has inline data, try to read it directlly */ + /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); else diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a441ba33be11..fecebdbfd781 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -32,7 +32,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) struct f2fs_stat_info *si = F2FS_STAT(sbi); int i; - /* valid check of the segment numbers */ + /* validation check of the segment numbers */ si->hit_ext = sbi->read_hit_ext; si->total_ext = sbi->total_hit_ext; si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); @@ -152,7 +152,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); - /* buld nm */ + /* build nm */ si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index bcf893c3d903..a69bbfa90c99 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -124,7 +124,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, /* * For the most part, it should be a bug when name_len is zero. - * We stop here for figuring out where the bugs are occurred. + * We stop here for figuring out where the bugs has occurred. */ f2fs_bug_on(!de->name_len); @@ -563,7 +563,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) } /* - * It only removes the dentry from the dentry page,corresponding name + * It only removes the dentry from the dentry page, corresponding name * entry in name page does not need to be touched during deletion. */ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4dab5338a97a..790a0738d314 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -395,7 +395,7 @@ enum count_type { }; /* - * The below are the page types of bios used in submti_bio(). + * The below are the page types of bios used in submit_bio(). * The available types are: * DATA User data pages. It operates as async mode. * NODE Node pages. It operates as async mode. @@ -470,7 +470,7 @@ struct f2fs_sb_info { struct list_head dir_inode_list; /* dir inode list */ spinlock_t dir_inode_lock; /* for dir inode list lock */ - /* basic file system units */ + /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ unsigned int log_blocksize; /* log2 block size */ unsigned int blocksize; /* block size */ @@ -799,7 +799,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) /* * odd numbered checkpoint should at cp segment 0 - * and even segent must be at cp segment 1 + * and even segment must be at cp segment 1 */ if (!(ckpt_version & 1)) start_addr += sbi->blocks_per_seg; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 208f1a9bd569..87cdac4efeec 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -288,7 +288,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) if (err && err != -ENOENT) { goto fail; } else if (err == -ENOENT) { - /* direct node is not exist */ + /* direct node does not exists */ if (whence == SEEK_DATA) { pgofs = PGOFS_OF_NEXT_DNODE(pgofs, F2FS_I(inode)); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d7947d90ccc3..87c50c507461 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -58,7 +58,7 @@ static int gc_thread_func(void *data) * 3. IO subsystem is idle by checking the # of requests in * bdev's request list. * - * Note) We have to avoid triggering GCs too much frequently. + * Note) We have to avoid triggering GCs frequently. * Because it is possible that some segments can be * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. @@ -222,7 +222,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) u = (vblocks * 100) >> sbi->log_blocks_per_seg; - /* Handle if the system time is changed by user */ + /* Handle if the system time has changed by the user */ if (mtime < sit_i->min_mtime) sit_i->min_mtime = mtime; if (mtime > sit_i->max_mtime) diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 5d5eb6047bf4..16f0b2b22999 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -91,7 +91,7 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) block_t invalid_user_blocks = sbi->user_block_count - written_block_count(sbi); /* - * Background GC is triggered with the following condition. + * Background GC is triggered with the following conditions. * 1. There are a number of invalid blocks. * 2. There is not enough free space. */ diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 27b03776ffd2..eb407d207289 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -229,7 +229,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) f2fs_delete_entry(de, page, inode); f2fs_unlock_op(sbi); - /* In order to evict this inode, we set it dirty */ + /* In order to evict this inode, we set it dirty */ mark_inode_dirty(inode); fail: trace_f2fs_unlink_exit(inode, err); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d3d90d284631..1f3329907483 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -237,7 +237,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, nat_get_blkaddr(e) != NULL_ADDR && new_blkaddr == NEW_ADDR); - /* increament version no as node is removed */ + /* increment version no as node is removed */ if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { unsigned char version = nat_get_version(e); nat_set_version(e, inc_node_version(version)); @@ -274,7 +274,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) } /* - * This function returns always success + * This function always returns success */ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) { @@ -650,7 +650,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, /* get indirect nodes in the path */ for (i = 0; i < idx + 1; i++) { - /* refernece count'll be increased */ + /* reference count'll be increased */ pages[i] = get_node_page(sbi, nid[i]); if (IS_ERR(pages[i])) { err = PTR_ERR(pages[i]); @@ -836,7 +836,7 @@ void remove_inode_page(struct inode *inode) f2fs_put_page(page, 1); return; } - /* 0 is possible, after f2fs_new_inode() is failed */ + /* 0 is possible, after f2fs_new_inode() has failed */ f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); set_new_dnode(&dn, inode, page, page, ino); truncate_node(&dn); @@ -1637,7 +1637,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) if (!ipage) return -ENOMEM; - /* Should not use this inode from free nid list */ + /* Should not use this inode from free nid list */ remove_free_nid(NM_I(sbi), ino); SetPageUptodate(ipage); @@ -1665,7 +1665,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* * ra_sum_pages() merge contiguous pages into one bio and submit. - * these pre-readed pages are alloced in bd_inode's mapping tree. + * these pre-read pages are allocated in bd_inode's mapping tree. */ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, int start, int nrpages) @@ -1709,7 +1709,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi, for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) { nrpages = min(last_offset - i, bio_blocks); - /* read ahead node pages */ + /* readahead node pages */ nrpages = ra_sum_pages(sbi, pages, addr, nrpages); if (!nrpages) return -ENOMEM; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0dfeebae2a50..31b630efe4db 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -62,7 +62,7 @@ static inline unsigned long __reverse_ffs(unsigned long word) } /* - * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue + * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because * f2fs_set_bit makes MSB and LSB reversed in a byte. * Example: * LSB <--> MSB @@ -808,7 +808,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, } /* - * This function always allocates a used segment (from dirty seglist) by SSR + * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks */ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 55973f7b0330..ff483257283b 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -549,7 +549,7 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) } /* - * Summary block is always treated as invalid block + * Summary block is always treated as an invalid block */ static inline void check_block_count(struct f2fs_sb_info *sbi, int segno, struct f2fs_sit_entry *raw_sit) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 657582fc7601..633315acfef8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -663,7 +663,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (need_restart_gc) { if (start_gc_thread(sbi)) f2fs_msg(sbi->sb, KERN_WARNING, - "background gc thread is stop"); + "background gc thread has stopped"); } else if (need_stop_gc) { stop_gc_thread(sbi); } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 8bea941ee309..728a5dc3dc16 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -528,7 +528,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, int free; /* * If value is NULL, it is remove operation. - * In case of update operation, we caculate free. + * In case of update operation, we calculate free. */ free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); if (found) From b067ba1f1b3fa7ec798d35e12aed6cdba9cea905 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Aug 2014 16:32:25 -0700 Subject: [PATCH 02/28] f2fs: should convert inline_data during the mkwrite If mkwrite is called to an inode having inline_data, it can overwrite the data index space as NEW_ADDR. (e.g., the first 4 bytes are coincidently zero) Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 11 ++++++++--- fs/f2fs/inline.c | 20 ++++++++++++-------- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7aef28d1fffa..ac3ccc25386b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -946,7 +946,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, f2fs_balance_fs(sbi); repeat: - err = f2fs_convert_inline_data(inode, pos + len); + err = f2fs_convert_inline_data(inode, pos + len, NULL); if (err) goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 790a0738d314..c8288c9a4e1e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1439,7 +1439,7 @@ extern const struct inode_operations f2fs_special_inode_operations; */ bool f2fs_may_inline(struct inode *); int f2fs_read_inline_data(struct inode *, struct page *); -int f2fs_convert_inline_data(struct inode *, pgoff_t); +int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *); int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); void truncate_inline_data(struct inode *, u64); int recover_inline_data(struct inode *, struct page *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 87cdac4efeec..ecbdf6a6381c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -41,6 +41,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, sb_start_pagefault(inode->i_sb); + /* force to convert with normal data indices */ + err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page); + if (err) + goto out; + /* block allocation */ f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -533,7 +538,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { - err = f2fs_convert_inline_data(inode, attr->ia_size); + err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); if (err) return err; @@ -622,7 +627,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) loff_t off_start, off_end; int ret = 0; - ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1); + ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); if (ret) return ret; @@ -678,7 +683,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; - ret = f2fs_convert_inline_data(inode, offset + len); + ret = f2fs_convert_inline_data(inode, offset + len, NULL); if (ret) return ret; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 5beeccef9ae1..1ec512d0bed3 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -124,9 +124,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) return err; } -int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) +int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size, + struct page *page) { - struct page *page; + struct page *new_page = page; int err; if (!f2fs_has_inline_data(inode)) @@ -134,17 +135,20 @@ int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) else if (to_size <= MAX_INLINE_DATA) return 0; - page = grab_cache_page(inode->i_mapping, 0); - if (!page) - return -ENOMEM; + if (!page || page->index != 0) { + new_page = grab_cache_page(inode->i_mapping, 0); + if (!new_page) + return -ENOMEM; + } - err = __f2fs_convert_inline_data(inode, page); - f2fs_put_page(page, 1); + err = __f2fs_convert_inline_data(inode, new_page); + if (!page || page->index != 0) + f2fs_put_page(new_page, 1); return err; } int f2fs_write_inline_data(struct inode *inode, - struct page *page, unsigned size) + struct page *page, unsigned size) { void *src_addr, *dst_addr; struct page *ipage; From 0342fd301a9a940210f59fd094f766e7a0671987 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Aug 2014 16:57:17 -0700 Subject: [PATCH 03/28] f2fs: make clear on test condition and return types This patch adds a parentheses to make clear for condition check. And also it changes the return type for better meanings. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inline.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c8288c9a4e1e..2e4aa3a51b0d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1442,5 +1442,5 @@ int f2fs_read_inline_data(struct inode *, struct page *); int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *); int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); void truncate_inline_data(struct inode *, u64); -int recover_inline_data(struct inode *, struct page *); +bool recover_inline_data(struct inode *, struct page *); #endif diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 1ec512d0bed3..520758b91999 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -203,7 +203,7 @@ void truncate_inline_data(struct inode *inode, u64 from) f2fs_put_page(ipage, 1); } -int recover_inline_data(struct inode *inode, struct page *npage) +bool recover_inline_data(struct inode *inode, struct page *npage) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode *ri = NULL; @@ -222,7 +222,7 @@ int recover_inline_data(struct inode *inode, struct page *npage) ri = F2FS_INODE(npage); if (f2fs_has_inline_data(inode) && - ri && ri->i_inline & F2FS_INLINE_DATA) { + ri && (ri->i_inline & F2FS_INLINE_DATA)) { process_inline: ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(IS_ERR(ipage)); @@ -234,7 +234,7 @@ int recover_inline_data(struct inode *inode, struct page *npage) memcpy(dst_addr, src_addr, MAX_INLINE_DATA); update_inode(inode, ipage); f2fs_put_page(ipage, 1); - return -1; + return true; } if (f2fs_has_inline_data(inode)) { @@ -246,10 +246,10 @@ int recover_inline_data(struct inode *inode, struct page *npage) clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); update_inode(inode, ipage); f2fs_put_page(ipage, 1); - } else if (ri && ri->i_inline & F2FS_INLINE_DATA) { + } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { truncate_blocks(inode, 0); set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); goto process_inline; } - return 0; + return false; } From 617deb8c053aeec06c7aa16ac7225f046fab95e8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Aug 2014 17:04:24 -0700 Subject: [PATCH 04/28] f2fs: fix the initial inode page for recovery If a new inode page is needed for recover_dentry, we should assing i_inline as zero. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1f3329907483..093d7991b7ed 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1651,6 +1651,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) dst->i_blocks = cpu_to_le64(1); dst->i_links = cpu_to_le32(1); dst->i_xattr_nid = 0; + dst->i_inline = src->i_inline & F2FS_INLINE_XATTR; new_ni = old_ni; new_ni.ino = ino; @@ -1659,6 +1660,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR, false); inc_valid_inode_count(sbi); + set_page_dirty(ipage); f2fs_put_page(ipage, 1); return 0; } From 695facc05a3eaaf434911dc5dbbc3e6c6a4e1862 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Aug 2014 17:06:18 -0700 Subject: [PATCH 05/28] f2fs: clear FI_INC_LINK during the recovery If an inode are fsynced multiple times with fsync & dent marks, this inode will set FI_INC_LINK at find_fsync_dnodes during the recovery. But, in recover_inode, recover_dentry doesn't clear that flag when multiple hits were occurred. So this patch removes the flag for the further consistency. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index fe1c6d921ba2..cfb2aa9bfc20 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -62,8 +62,10 @@ static int recover_dentry(struct page *ipage, struct inode *inode) } retry: de = f2fs_find_entry(dir, &name, &page); - if (de && inode->i_ino == le32_to_cpu(de->ino)) + if (de && inode->i_ino == le32_to_cpu(de->ino)) { + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); goto out_unmap_put; + } if (de) { einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); if (IS_ERR(einode)) { From e3b4d43f7c233c6fce21fe4b4cb55b6d59afddae Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Aug 2014 23:45:42 -0700 Subject: [PATCH 06/28] f2fs: should clear the inline_xattr flag During the recovery, we should clear the inline_xattr flag if its xattr node block is recovered. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 093d7991b7ed..151045f3e7b7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1557,26 +1557,25 @@ void recover_inline_xattr(struct inode *inode, struct page *page) struct page *ipage; struct f2fs_inode *ri; - if (!f2fs_has_inline_xattr(inode)) - return; - if (!IS_INODE(page)) return; - ri = F2FS_INODE(page); - if (!(ri->i_inline & F2FS_INLINE_XATTR)) - return; - ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(IS_ERR(ipage)); + ri = F2FS_INODE(page); + if (!(ri->i_inline & F2FS_INLINE_XATTR)) { + clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR); + goto update_inode; + } + dst_addr = inline_xattr_addr(ipage); src_addr = inline_xattr_addr(page); inline_size = inline_xattr_size(inode); f2fs_wait_on_page_writeback(ipage, NODE); memcpy(dst_addr, src_addr, inline_size); - +update_inode: update_inode(inode, ipage); f2fs_put_page(ipage, 1); } From 1c35a90e8ab57cd34b8e806b9c75ba05b3b5c7a3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Aug 2014 23:49:17 -0700 Subject: [PATCH 07/28] f2fs: fix to recover inline_xattr/data and blocks This patch fixes not to skip xattr recovery and inline xattr/data recovery order. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 9 +-------- fs/f2fs/recovery.c | 13 +++++++++---- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2e4aa3a51b0d..cc5ead1bd378 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1205,7 +1205,7 @@ void alloc_nid_failed(struct f2fs_sb_info *, nid_t); void recover_node_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, struct node_info *, block_t); void recover_inline_xattr(struct inode *, struct page *); -bool recover_xattr_data(struct inode *, struct page *, block_t); +void recover_xattr_data(struct inode *, struct page *, block_t); int recover_inode_page(struct f2fs_sb_info *, struct page *); int restore_node_summary(struct f2fs_sb_info *, unsigned int, struct f2fs_summary_block *); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 151045f3e7b7..c80e3d59314d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1557,9 +1557,6 @@ void recover_inline_xattr(struct inode *inode, struct page *page) struct page *ipage; struct f2fs_inode *ri; - if (!IS_INODE(page)) - return; - ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(IS_ERR(ipage)); @@ -1580,16 +1577,13 @@ void recover_inline_xattr(struct inode *inode, struct page *page) f2fs_put_page(ipage, 1); } -bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) +void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; nid_t new_xnid = nid_of_node(page); struct node_info ni; - if (!f2fs_has_xattr_block(ofs_of_node(page))) - return false; - /* 1: invalidate the previous xattr nid */ if (!prev_xnid) goto recover_xnid; @@ -1617,7 +1611,6 @@ bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) set_node_addr(sbi, &ni, blkaddr, false); update_inode_page(inode); - return true; } int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index cfb2aa9bfc20..d7b67b86f607 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -302,14 +302,19 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct node_info ni; int err = 0, recovered = 0; - recover_inline_xattr(inode, page); + /* step 1: recover xattr */ + if (IS_INODE(page)) { + recover_inline_xattr(inode, page); + } else if (f2fs_has_xattr_block(ofs_of_node(page))) { + recover_xattr_data(inode, page, blkaddr); + goto out; + } + /* step 2: recover inline data */ if (recover_inline_data(inode, page)) goto out; - if (recover_xattr_data(inode, page, blkaddr)) - goto out; - + /* step 3: recover data indices */ start = start_bidx_of_node(ofs_of_node(page), fi); end = start + ADDRS_PER_PAGE(page, fi); From b307384e4f4670c490b4d142d27fed497df51fae Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 Aug 2014 10:18:43 -0700 Subject: [PATCH 08/28] f2fs: avoid bug_on when error is occurred During the recovery, if an error like EIO or ENOMEM, f2fs_bug_on should skip. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index d7b67b86f607..7ca7aadaa607 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -472,7 +472,8 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) /* step #2: recover data */ err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); - f2fs_bug_on(!list_empty(&inode_list)); + if (!err) + f2fs_bug_on(!list_empty(&inode_list)); out: destroy_fsync_dnodes(&inode_list); kmem_cache_destroy(fsync_entry_slab); From 97c3c5cac2bba0ecc4b0de83d33a23aa427ef628 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 19 Aug 2014 09:13:01 -0700 Subject: [PATCH 09/28] f2fs: don't skip checkpoint if there is no dirty node pages This is the errorneous scenario. 1. write data 2. do checkpoint 3. produce some dirty node pages by the gc thread 4. write back dirty node pages 5. f2fs_put_super will skip the checkpoint, since dirty count for node pages is zero. This patch removes such the wrong condition check. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 633315acfef8..60e3554a6eb4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -432,7 +432,7 @@ static void f2fs_put_super(struct super_block *sb) stop_gc_thread(sbi); /* We don't need to do checkpoint when it's clean */ - if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES)) + if (sbi->s_dirty) write_checkpoint(sbi, true); iput(sbi->node_inode); From 6f12ac25f0167adb5d9ad5547fd6838380261e5c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 19 Aug 2014 09:48:22 -0700 Subject: [PATCH 10/28] f2fs: trigger release_dirty_inode in f2fs_put_super The generic_shutdown_super calls sync_filesystem, evict_inode, and then f2fs_put_super. In f2fs_evict_inode, we remain some dirty inode information so we should release them at f2fs_put_super. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7e1c13bfb99f..f14af912a146 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -348,7 +348,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) return e ? true : false; } -static void release_dirty_inode(struct f2fs_sb_info *sbi) +void release_dirty_inode(struct f2fs_sb_info *sbi) { struct ino_entry *e, *tmp; int i; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cc5ead1bd378..cc89a7f490a6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1262,6 +1262,7 @@ int ra_meta_pages(struct f2fs_sb_info *, int, int, int); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); +void release_dirty_inode(struct f2fs_sb_info *); bool exist_written_data(struct f2fs_sb_info *, nid_t, int); int acquire_orphan_inode(struct f2fs_sb_info *); void release_orphan_inode(struct f2fs_sb_info *); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 60e3554a6eb4..7a5477915d99 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -435,6 +435,9 @@ static void f2fs_put_super(struct super_block *sb) if (sbi->s_dirty) write_checkpoint(sbi, true); + /* normally superblock is clean, so we need to release this */ + release_dirty_inode(sbi); + iput(sbi->node_inode); iput(sbi->meta_inode); From ed2e621a95d704e6a4e904cc00524e8cbddda0c2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 Aug 2014 15:37:41 -0700 Subject: [PATCH 11/28] f2fs: give a chance to mount again when encountering errors This patch gives another chance to try mount process when we encounter an error. This makes an effect on the roll-forward recovery failures as well. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7a5477915d99..e161e13958e2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -902,8 +902,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) struct buffer_head *raw_super_buf; struct inode *root; long err = -EINVAL; + bool retry = true; int i; +try_onemore: /* allocate memory for f2fs-specific super block info */ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); if (!sbi) @@ -1083,9 +1085,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { err = recover_fsync_data(sbi); - if (err) + if (err) { f2fs_msg(sb, KERN_ERR, "Cannot recover all fsync data errno=%ld", err); + goto free_kobj; + } } /* @@ -1126,6 +1130,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) brelse(raw_super_buf); free_sbi: kfree(sbi); + + /* give only one another chance */ + if (retry) { + retry = !retry; + shrink_dcache_sb(sb); + goto try_onemore; + } return err; } From 1e968fdfe69e4060f05fa04059ecad93a0284e32 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 11 Aug 2014 16:49:25 -0700 Subject: [PATCH 12/28] f2fs: introduce f2fs_cp_error for readability This patch adds f2fs_cp_error for readability. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/f2fs.h | 5 +++++ fs/f2fs/gc.c | 2 +- fs/f2fs/super.c | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f14af912a146..6f38aad8e654 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -162,7 +162,7 @@ static int f2fs_write_meta_page(struct page *page, goto redirty_out; /* Should not write any meta pages, if any IO error was occurred */ - if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) + if (unlikely(f2fs_cp_error(sbi))) goto no_write; f2fs_wait_on_page_writeback(page, META); @@ -934,7 +934,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* Here, we only have one bio having CP pack */ sync_meta_pages(sbi, META_FLUSH, LONG_MAX); - if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { + if (!f2fs_cp_error(sbi)) { clear_prefree_segments(sbi); release_dirty_inode(sbi); F2FS_RESET_SB_DIRT(sbi); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cc89a7f490a6..2d009aed0c2e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1096,6 +1096,11 @@ static inline int f2fs_readonly(struct super_block *sb) return sb->s_flags & MS_RDONLY; } +static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) +{ + return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); +} + static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) { set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 87c50c507461..e8507b1c8759 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -693,7 +693,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) goto stop; - if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) + if (unlikely(f2fs_cp_error(sbi))) goto stop; if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e161e13958e2..8aabe3ef42f6 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -815,7 +815,7 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi) if (unlikely(fsmeta >= total)) return 1; - if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { + if (unlikely(f2fs_cp_error(sbi))) { f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); return 1; } From 5274651927a76c947469a589e3d2a9adbd075da6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 11 Aug 2014 18:18:36 -0700 Subject: [PATCH 13/28] f2fs: unlock_page when node page is redirtied out This patch fixes missing unlock_page when a node page is redirtied out. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c80e3d59314d..9f126f80813d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1129,8 +1129,11 @@ int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, set_fsync_mark(page, 0); set_dentry_mark(page, 0); } - NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); - wrote++; + + if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) + unlock_page(page); + else + wrote++; if (--wbc->nr_to_write == 0) break; From 8501017e50fb7586ba522a2913ce664d6c2024f6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 11 Aug 2014 18:37:46 -0700 Subject: [PATCH 14/28] f2fs: check s_dirty under cp_mutex It needs to check s_dirty under cp_mutex, since s_dirty is reset under that mutex. And previous condition was not correct, since we can omit doing checkpoint when checkpoint was done followed by all the node pages were written back. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++++-- fs/f2fs/super.c | 3 --- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6f38aad8e654..dc29b7837687 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -952,6 +952,10 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); mutex_lock(&sbi->cp_mutex); + + if (!sbi->s_dirty) + goto out; + block_operations(sbi); trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); @@ -976,9 +980,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) do_checkpoint(sbi, is_umount); unblock_operations(sbi); - mutex_unlock(&sbi->cp_mutex); - stat_inc_cp_count(sbi->stat_info); +out: + mutex_unlock(&sbi->cp_mutex); trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8aabe3ef42f6..e7a7b619ffd4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -460,9 +460,6 @@ int f2fs_sync_fs(struct super_block *sb, int sync) trace_f2fs_sync_fs(sb, sync); - if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) - return 0; - if (sync) { mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, false); From cf779cab14d50a84b61399f758da269654b863db Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 11 Aug 2014 18:37:46 -0700 Subject: [PATCH 15/28] f2fs: handle EIO not to break fs consistency There are two rules when EIO is occurred. 1. don't write any checkpoint data to preserve the previous checkpoint 2. don't lose the cached dentry/node/meta pages So, at first, this patch adds set_page_dirty in f2fs_write_end_io's failure. Then, writing checkpoint/dentry/node blocks is not allowed. Note that, for the data pages, we can't just throw away by redirtying them. Otherwise, kworker can fall into infinite loop to flush them. (Ref. xfstests/019) Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 45 +++++++++++++++++++++++++++++++------------- fs/f2fs/data.c | 11 ++++++++++- fs/f2fs/node.c | 2 ++ fs/f2fs/super.c | 5 ++++- 4 files changed, 48 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index dc29b7837687..c9c08d52ecfd 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -160,14 +160,11 @@ static int f2fs_write_meta_page(struct page *page, goto redirty_out; if (wbc->for_reclaim) goto redirty_out; - - /* Should not write any meta pages, if any IO error was occurred */ if (unlikely(f2fs_cp_error(sbi))) - goto no_write; + goto redirty_out; f2fs_wait_on_page_writeback(page, META); write_meta_page(sbi, page); -no_write: dec_page_count(sbi, F2FS_DIRTY_META); unlock_page(page); return 0; @@ -737,7 +734,7 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) /* * Freeze all the FS-operations for checkpoint. */ -static void block_operations(struct f2fs_sb_info *sbi) +static int block_operations(struct f2fs_sb_info *sbi) { struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, @@ -745,6 +742,7 @@ static void block_operations(struct f2fs_sb_info *sbi) .for_reclaim = 0, }; struct blk_plug plug; + int err = 0; blk_start_plug(&plug); @@ -754,6 +752,10 @@ static void block_operations(struct f2fs_sb_info *sbi) if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); sync_dirty_dir_inodes(sbi); + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto out; + } goto retry_flush_dents; } @@ -767,9 +769,16 @@ static void block_operations(struct f2fs_sb_info *sbi) if (get_pages(sbi, F2FS_DIRTY_NODES)) { up_write(&sbi->node_write); sync_node_pages(sbi, 0, &wbc); + if (unlikely(f2fs_cp_error(sbi))) { + f2fs_unlock_all(sbi); + err = -EIO; + goto out; + } goto retry_flush_nodes; } +out: blk_finish_plug(&plug); + return err; } static void unblock_operations(struct f2fs_sb_info *sbi) @@ -813,8 +822,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg)); /* Flush all the NAT/SIT pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) + while (get_pages(sbi, F2FS_DIRTY_META)) { sync_meta_pages(sbi, META, LONG_MAX); + if (unlikely(f2fs_cp_error(sbi))) + return; + } next_free_nid(sbi, &last_nid); @@ -924,6 +936,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* wait for previous submitted node/meta pages writeback */ wait_on_all_pages_writeback(sbi); + if (unlikely(f2fs_cp_error(sbi))) + return; + filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); @@ -934,11 +949,13 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* Here, we only have one bio having CP pack */ sync_meta_pages(sbi, META_FLUSH, LONG_MAX); - if (!f2fs_cp_error(sbi)) { - clear_prefree_segments(sbi); - release_dirty_inode(sbi); - F2FS_RESET_SB_DIRT(sbi); - } + release_dirty_inode(sbi); + + if (unlikely(f2fs_cp_error(sbi))) + return; + + clear_prefree_segments(sbi); + F2FS_RESET_SB_DIRT(sbi); } /* @@ -955,8 +972,10 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) if (!sbi->s_dirty) goto out; - - block_operations(sbi); + if (unlikely(f2fs_cp_error(sbi))) + goto out; + if (block_operations(sbi)) + goto out; trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ac3ccc25386b..6ba52a393063 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -53,7 +53,7 @@ static void f2fs_write_end_io(struct bio *bio, int err) struct page *page = bvec->bv_page; if (unlikely(err)) { - SetPageError(page); + set_page_dirty(page); set_bit(AS_EIO, &page->mapping->flags); f2fs_stop_checkpoint(sbi); } @@ -836,10 +836,19 @@ static int f2fs_write_data_page(struct page *page, /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; err = do_write_data_page(page, &fio); goto done; } + /* we should bypass data pages to proceed the kworkder jobs */ + if (unlikely(f2fs_cp_error(sbi))) { + SetPageError(page); + unlock_page(page); + return 0; + } + if (!wbc->for_reclaim) need_balance_fs = true; else if (has_not_enough_free_secs(sbi, 0)) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9f126f80813d..d2f784283425 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1215,6 +1215,8 @@ static int f2fs_write_node_page(struct page *page, if (unlikely(sbi->por_doing)) goto redirty_out; + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; f2fs_wait_on_page_writeback(page, NODE); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e7a7b619ffd4..ddb1e9d36363 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -435,7 +435,10 @@ static void f2fs_put_super(struct super_block *sb) if (sbi->s_dirty) write_checkpoint(sbi, true); - /* normally superblock is clean, so we need to release this */ + /* + * normally superblock is clean, so we need to release this. + * In addition, EIO will skip do checkpoint, we need this as well. + */ release_dirty_inode(sbi); iput(sbi->node_inode); From b3fe0a0da2cb18d3c94d5ddce836e7f889d7286b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 Aug 2014 10:45:41 -0700 Subject: [PATCH 16/28] f2fs: add WARN_ON in f2fs_bug_on This patch adds WARN_ON when f2fs_bug_on is disable to see kernel messages. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2d009aed0c2e..2723b2d45479 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -24,7 +24,7 @@ #define f2fs_bug_on(condition) BUG_ON(condition) #define f2fs_down_write(x, y) down_write_nest_lock(x, y) #else -#define f2fs_bug_on(condition) +#define f2fs_bug_on(condition) WARN_ON(condition) #define f2fs_down_write(x, y) down_write(x) #endif From 14f4e690857715d5e0cbe403b4cb8e8c904a6c15 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 Aug 2014 16:30:46 -0700 Subject: [PATCH 17/28] f2fs: prevent checkpoint during roll-forward Any checkpoint should not be done during the core roll-forward procedure. Especially, it includes error cases too. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 7ca7aadaa607..d36ef35353b2 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -459,6 +459,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) /* step #1: find fsynced inode numbers */ sbi->por_doing = true; + /* prevent checkpoint */ + mutex_lock(&sbi->cp_mutex); + blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); err = find_fsync_dnodes(sbi, &inode_list); @@ -490,8 +493,13 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) sync_meta_pages(sbi, META, LONG_MAX); + set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); + mutex_unlock(&sbi->cp_mutex); } else if (need_writecp) { + mutex_unlock(&sbi->cp_mutex); write_checkpoint(sbi, false); + } else { + mutex_unlock(&sbi->cp_mutex); } return err; } From 764aa3e978020121cbb86111b5d8f42830015a06 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 14 Aug 2014 16:32:54 -0700 Subject: [PATCH 18/28] f2fs: avoid double lock in truncate_blocks The init_inode_metadata calls truncate_blocks when error is occurred. The callers holds f2fs_lock_op, so we should not call it again in truncate_blocks. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 13 ++++++++----- fs/f2fs/inline.c | 2 +- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6ba52a393063..76de83e25a89 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -936,7 +936,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) if (to > inode->i_size) { truncate_pagecache(inode, inode->i_size); - truncate_blocks(inode, inode->i_size); + truncate_blocks(inode, inode->i_size, true); } } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index a69bbfa90c99..155fb056b7f1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -391,7 +391,7 @@ static struct page *init_inode_metadata(struct inode *inode, error: /* once the failed inode becomes a bad inode, i_mode is S_IFREG */ truncate_inode_pages(&inode->i_data, 0); - truncate_blocks(inode, 0); + truncate_blocks(inode, 0, false); remove_dirty_dir_inode(inode); remove_inode_page(inode); return ERR_PTR(err); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2723b2d45479..7f976c1d1723 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1122,7 +1122,7 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) */ int f2fs_sync_file(struct file *, loff_t, loff_t, int); void truncate_data_blocks(struct dnode_of_data *); -int truncate_blocks(struct inode *, u64); +int truncate_blocks(struct inode *, u64, bool); void f2fs_truncate(struct inode *); int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); int f2fs_setattr(struct dentry *, struct iattr *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ecbdf6a6381c..a8e97f8ed163 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -422,7 +422,7 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) f2fs_put_page(page, 1); } -int truncate_blocks(struct inode *inode, u64 from) +int truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); unsigned int blocksize = inode->i_sb->s_blocksize; @@ -438,14 +438,16 @@ int truncate_blocks(struct inode *inode, u64 from) free_from = (pgoff_t) ((from + blocksize - 1) >> (sbi->log_blocksize)); - f2fs_lock_op(sbi); + if (lock) + f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; - f2fs_unlock_op(sbi); + if (lock) + f2fs_unlock_op(sbi); trace_f2fs_truncate_blocks_exit(inode, err); return err; } @@ -463,7 +465,8 @@ int truncate_blocks(struct inode *inode, u64 from) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); - f2fs_unlock_op(sbi); + if (lock) + f2fs_unlock_op(sbi); done: /* lastly zero out the first data page */ truncate_partial_data_page(inode, from); @@ -480,7 +483,7 @@ void f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); - if (!truncate_blocks(inode, i_size_read(inode))) { + if (!truncate_blocks(inode, i_size_read(inode), true)) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 520758b91999..4d1f39f4583c 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -247,7 +247,7 @@ bool recover_inline_data(struct inode *inode, struct page *npage) update_inode(inode, ipage); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { - truncate_blocks(inode, 0); + truncate_blocks(inode, 0, false); set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); goto process_inline; } From 202095a7a0ec075b924cb15dde330bf76e485f61 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 15 Aug 2014 09:56:46 -0700 Subject: [PATCH 19/28] f2fs: remove rewrite_node_page I think we need to let the dirty node pages remain in the page cache instead of rewriting them in their places. So, after done with successful recovery, write_checkpoint will flush all of them through the normal write path. Through this, we can avoid potential error cases in terms of block allocation. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ---- fs/f2fs/node.c | 9 --------- fs/f2fs/recovery.c | 2 -- fs/f2fs/segment.c | 49 ---------------------------------------------- 4 files changed, 64 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7f976c1d1723..e921242186f6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1207,8 +1207,6 @@ int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); bool alloc_nid(struct f2fs_sb_info *, nid_t *); void alloc_nid_done(struct f2fs_sb_info *, nid_t); void alloc_nid_failed(struct f2fs_sb_info *, nid_t); -void recover_node_page(struct f2fs_sb_info *, struct page *, - struct f2fs_summary *, struct node_info *, block_t); void recover_inline_xattr(struct inode *, struct page *); void recover_xattr_data(struct inode *, struct page *, block_t); int recover_inode_page(struct f2fs_sb_info *, struct page *); @@ -1243,8 +1241,6 @@ void write_data_page(struct page *, struct dnode_of_data *, block_t *, void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); void recover_data_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, block_t, block_t); -void rewrite_node_page(struct f2fs_sb_info *, struct page *, - struct f2fs_summary *, block_t, block_t); void allocate_data_block(struct f2fs_sb_info *, struct page *, block_t, block_t *, struct f2fs_summary *, int); void f2fs_wait_on_page_writeback(struct page *, enum page_type); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d2f784283425..b4d964029fc7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1545,15 +1545,6 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) kmem_cache_free(free_nid_slab, i); } -void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, - struct f2fs_summary *sum, struct node_info *ni, - block_t new_blkaddr) -{ - rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); - set_node_addr(sbi, ni, new_blkaddr, false); - clear_node_page_dirty(page); -} - void recover_inline_xattr(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index d36ef35353b2..756c41cd2582 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -371,8 +371,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, fill_node_footer(dn.node_page, dn.nid, ni.ino, ofs_of_node(page), false); set_page_dirty(dn.node_page); - - recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); err: f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 31b630efe4db..0aa337cd5bba 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1103,55 +1103,6 @@ void recover_data_page(struct f2fs_sb_info *sbi, mutex_unlock(&curseg->curseg_mutex); } -void rewrite_node_page(struct f2fs_sb_info *sbi, - struct page *page, struct f2fs_summary *sum, - block_t old_blkaddr, block_t new_blkaddr) -{ - struct sit_info *sit_i = SIT_I(sbi); - int type = CURSEG_WARM_NODE; - struct curseg_info *curseg; - unsigned int segno, old_cursegno; - block_t next_blkaddr = next_blkaddr_of_node(page); - unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); - struct f2fs_io_info fio = { - .type = NODE, - .rw = WRITE_SYNC, - }; - - curseg = CURSEG_I(sbi, type); - - mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); - - segno = GET_SEGNO(sbi, new_blkaddr); - old_cursegno = curseg->segno; - - /* change the current segment */ - if (segno != curseg->segno) { - curseg->next_segno = segno; - change_curseg(sbi, type, true); - } - curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); - __add_sum_entry(sbi, type, sum); - - /* change the current log to the next block addr in advance */ - if (next_segno != segno) { - curseg->next_segno = next_segno; - change_curseg(sbi, type, true); - } - curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr); - - /* rewrite node page */ - set_page_writeback(page); - f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio); - f2fs_submit_merged_bio(sbi, NODE, WRITE); - refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); - locate_dirty_segment(sbi, old_cursegno); - - mutex_unlock(&sit_i->sentry_lock); - mutex_unlock(&curseg->curseg_mutex); -} - static inline bool is_merged_page(struct f2fs_sb_info *sbi, struct page *page, enum page_type type) { From ec4e7af4ca04ff81f786554d670876f1037a9ded Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 18 Aug 2014 14:41:11 -0700 Subject: [PATCH 20/28] f2fs: skip if inline_data was converted already This patch checks inline_data one more time under the inode page lock whether its inline_data is converted or not. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4d1f39f4583c..3e8ecdf3742b 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -68,7 +68,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) { - int err; + int err = 0; struct page *ipage; struct dnode_of_data dn; void *src_addr, *dst_addr; @@ -86,6 +86,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) goto out; } + /* someone else converted inline_data already */ + if (!f2fs_has_inline_data(inode)) + goto out; + /* * i_addr[0] is not used for inline data, * so reserving new block will not destroy inline data From 04859dba50e6cd85c5d683d06010c5eafb27c893 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 19 Aug 2014 14:14:27 -0700 Subject: [PATCH 21/28] f2fs: remove rename and use rename2 Refer the following patch. commit 7177a9c4b509eb357cc450256bc3cf39f1a1e639 Author: Miklos Szeredi Date: Wed Jul 23 15:15:30 2014 +0200 fs: call rename2 if exists Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index eb407d207289..6b53ce924d95 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -704,7 +704,6 @@ const struct inode_operations f2fs_dir_inode_operations = { .mkdir = f2fs_mkdir, .rmdir = f2fs_rmdir, .mknod = f2fs_mknod, - .rename = f2fs_rename, .rename2 = f2fs_rename2, .tmpfile = f2fs_tmpfile, .getattr = f2fs_getattr, From c200b1aa6cb460ce8c3ecf6fdc690d3949c3cc5d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 20 Aug 2014 18:36:46 +0800 Subject: [PATCH 22/28] f2fs: fix incorrect calculation with total/free inode num Theoretically, our total inodes number is the same as total node number, but there are three node ids are reserved in f2fs, they are 0, 1 (node nid), and 2 (meta nid), and they should never be used by user, so our total/free inode number calculated in ->statfs is wrong. This patch indroduces F2FS_RESERVED_NODE_NUM and then fixes this issue by recalculating total/free inode number with the macro. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- fs/f2fs/super.c | 4 ++-- include/linux/f2fs_fs.h | 3 +++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b4d964029fc7..044395c20ee9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1957,7 +1957,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; /* not used nids: 0, node, meta, (and root counted as valid node) */ - nm_i->available_nids = nm_i->max_nid - 3; + nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM; nm_i->fcnt = 0; nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ddb1e9d36363..0f61f5aa587c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -508,8 +508,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; buf->f_bavail = user_block_count - valid_user_blocks(sbi); - buf->f_files = sbi->total_node_count; - buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); + buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; + buf->f_ffree = buf->f_files - valid_inode_count(sbi); buf->f_namelen = F2FS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 6ff0b0b42d47..0ed77f32c9ac 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -24,6 +24,9 @@ #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ +/* 0, 1(node nid), 2(meta nid) are reserved node id */ +#define F2FS_RESERVED_NODE_NUM 3 + #define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) #define F2FS_NODE_INO(sbi) (sbi->node_ino_num) #define F2FS_META_INO(sbi) (sbi->meta_ino_num) From 9d1589ef2edeb4565161771bd0faf3fd5288844b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 20 Aug 2014 18:37:35 +0800 Subject: [PATCH 23/28] f2fs: introduce need_do_checkpoint for readability This patch introduce need_do_checkpoint() to include numerous judgment condition for readability. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a8e97f8ed163..060aee65aee8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -115,6 +115,25 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) return 1; } +static inline bool need_do_checkpoint(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + bool need_cp = false; + + if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) + need_cp = true; + else if (file_wrong_pino(inode)) + need_cp = true; + else if (!space_for_roll_forward(sbi)) + need_cp = true; + else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) + need_cp = true; + else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) + need_cp = true; + + return need_cp; +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -159,23 +178,12 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) /* guarantee free sections for fsync */ f2fs_balance_fs(sbi); - down_read(&fi->i_sem); - /* * Both of fdatasync() and fsync() are able to be recovered from * sudden-power-off. */ - if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) - need_cp = true; - else if (file_wrong_pino(inode)) - need_cp = true; - else if (!space_for_roll_forward(sbi)) - need_cp = true; - else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) - need_cp = true; - else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) - need_cp = true; - + down_read(&fi->i_sem); + need_cp = need_do_checkpoint(inode); up_read(&fi->i_sem); if (need_cp) { From b5b822050ca3c4fc1f475100cc197cc00ba2d492 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 22 Aug 2014 16:17:38 +0800 Subject: [PATCH 24/28] f2fs: use macro for code readability This patch introduces DEF_NIDS_PER_INODE/GET_ORPHAN_BLOCKS/F2FS_CP_PACKS macro instead of numbers in code for readability. change log from v1: o fix typo pointed out by Jaegeuk Kim. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 21 ++++++++++----------- include/linux/f2fs_fs.h | 13 ++++++++++--- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index c9c08d52ecfd..ec3b7a5381fa 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -443,8 +443,8 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) struct f2fs_orphan_block *orphan_blk = NULL; unsigned int nentries = 0; unsigned short index; - unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + - (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); + unsigned short orphan_blocks = + (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans); struct page *page = NULL; struct ino_entry *orphan = NULL; @@ -837,7 +837,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); - for (i = 0; i < 3; i++) { + for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { ckpt->cur_node_segno[i] = cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); ckpt->cur_node_blkoff[i] = @@ -845,7 +845,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) ckpt->alloc_type[i + CURSEG_HOT_NODE] = curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); } - for (i = 0; i < 3; i++) { + for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { ckpt->cur_data_segno[i] = cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); ckpt->cur_data_blkoff[i] = @@ -860,24 +860,23 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* 2 cp + n data seg summary + orphan inode blocks */ data_sum_blocks = npages_for_summary_flush(sbi); - if (data_sum_blocks < 3) + if (data_sum_blocks < NR_CURSEG_DATA_TYPE) set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); else clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) - / F2FS_ORPHANS_PER_BLOCK; + orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); if (is_umount) { set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ cp_payload_blks + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); } else { clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + cp_payload_blks + data_sum_blocks + orphan_blocks); } @@ -1022,8 +1021,8 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) * for cp pack we can have max 1020*504 orphan entries */ sbi->n_orphans = 0; - sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) - * F2FS_ORPHANS_PER_BLOCK; + sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - + NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; } int __init create_checkpoint_caches(void) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 0ed77f32c9ac..08ed2b0a96e6 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -90,6 +90,8 @@ struct f2fs_super_block { #define CP_ORPHAN_PRESENT_FLAG 0x00000002 #define CP_UMOUNT_FLAG 0x00000001 +#define F2FS_CP_PACKS 2 /* # of checkpoint packs */ + struct f2fs_checkpoint { __le64 checkpoint_ver; /* checkpoint block version number */ __le64 user_block_count; /* # of user blocks */ @@ -126,6 +128,9 @@ struct f2fs_checkpoint { */ #define F2FS_ORPHANS_PER_BLOCK 1020 +#define GET_ORPHAN_BLOCKS(n) ((n + F2FS_ORPHANS_PER_BLOCK - 1) / \ + F2FS_ORPHANS_PER_BLOCK) + struct f2fs_orphan_block { __le32 ino[F2FS_ORPHANS_PER_BLOCK]; /* inode numbers */ __le32 reserved; /* reserved */ @@ -147,6 +152,7 @@ struct f2fs_extent { #define F2FS_NAME_LEN 255 #define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ #define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ +#define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */ #define ADDRS_PER_INODE(fi) addrs_per_inode(fi) #define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ #define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ @@ -166,8 +172,9 @@ struct f2fs_extent { #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) -#define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) \ - - sizeof(__le32) * (DEF_ADDRS_PER_INODE + 5 - 1)) +#define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) -\ + sizeof(__le32) * (DEF_ADDRS_PER_INODE + \ + DEF_NIDS_PER_INODE - 1)) struct f2fs_inode { __le16 i_mode; /* file mode */ @@ -197,7 +204,7 @@ struct f2fs_inode { __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ - __le32 i_nid[5]; /* direct(2), indirect(2), + __le32 i_nid[DEF_NIDS_PER_INODE]; /* direct(2), indirect(2), double_indirect(1) node id */ } __packed; From c2e69583a4787b252f6be9a9daea4662eebc26f8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 25 Aug 2014 14:45:59 -0700 Subject: [PATCH 25/28] f2fs: truncate stale block for inline_data This verifies to truncate any allocated blocks, offset[0], by inline_data. Not figured out, but for making sure. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 044395c20ee9..45378196e19a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -823,22 +823,26 @@ int truncate_xattr_node(struct inode *inode, struct page *page) */ void remove_inode_page(struct inode *inode) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct page *page; - nid_t ino = inode->i_ino; struct dnode_of_data dn; - page = get_node_page(sbi, ino); - if (IS_ERR(page)) + set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); + if (get_dnode_of_data(&dn, 0, LOOKUP_NODE)) return; - if (truncate_xattr_node(inode, page)) { - f2fs_put_page(page, 1); + if (truncate_xattr_node(inode, dn.inode_page)) { + f2fs_put_dnode(&dn); return; } + + /* remove potential inline_data blocks */ + if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode)) + truncate_data_blocks_range(&dn, 1); + /* 0 is possible, after f2fs_new_inode() has failed */ f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); - set_new_dnode(&dn, inode, page, page, ino); + + /* will put inode & node pages */ truncate_node(&dn); } From 922cedbd00b68e506eb6f37cbe07cdf399a37b27 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Aug 2014 16:13:21 +0300 Subject: [PATCH 26/28] f2fs: simplify by using a literal We can make the code a bit simpler because we know that "!retry" is zero. Signed-off-by: Dan Carpenter Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0f61f5aa587c..41bdf511003d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1133,7 +1133,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* give only one another chance */ if (retry) { - retry = !retry; + retry = 0; shrink_dcache_sb(sb); goto try_onemore; } From 3304b56401c4509ffaa74705b49edc9e13cee195 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 29 Aug 2014 00:26:50 -0700 Subject: [PATCH 27/28] f2fs: fix wrong casting for dentry name The dentry name type is unsigned char *. If we don't match this type, some character codes can be changed by signed bit. Signed-off-by: Jaegeuk Kim --- fs/f2fs/hash.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 948d17bf7281..a844fcfb9a8d 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -42,7 +42,8 @@ static void TEA_transform(unsigned int buf[4], unsigned int const in[]) buf[1] += b1; } -static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num) +static void str2hashbuf(const unsigned char *msg, size_t len, + unsigned int *buf, int num) { unsigned pad, val; int i; @@ -73,9 +74,9 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) { __u32 hash; f2fs_hash_t f2fs_hash; - const char *p; + const unsigned char *p; __u32 in[8], buf[4]; - const char *name = name_info->name; + const unsigned char *name = name_info->name; size_t len = name_info->len; if ((len <= 2) && (name[0] == '.') && From b73e52824c8920a5ff754e3c8ff68466a7dd61f9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 30 Aug 2014 09:52:34 +0800 Subject: [PATCH 28/28] f2fs: reposition unlock_new_inode to prevent accessing invalid inode As the race condition on the inode cache, following scenario can appear: [Thread a] [Thread b] ->f2fs_mkdir ->f2fs_add_link ->__f2fs_add_link ->init_inode_metadata failed here ->gc_thread_func ->f2fs_gc ->do_garbage_collect ->gc_data_segment ->f2fs_iget ->iget_locked ->wait_on_inode ->unlock_new_inode ->move_data_page ->make_bad_inode ->iput When we fail in create/symlink/mkdir/mknod/tmpfile, the new allocated inode should be set as bad to avoid being accessed by other thread. But in above scenario, it allows f2fs to access the invalid inode before this inode was set as bad. This patch fix the potential problem, and this issue was found by code review. change log from v1: o Add condition judgment in gc_data_segment() suggested by Changman Lee. o use iget_failed to simplify code. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 2 +- fs/f2fs/namei.c | 20 +++++--------------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e8507b1c8759..943a31db7cc3 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -593,7 +593,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (phase == 2) { inode = f2fs_iget(sb, dni.ino); - if (IS_ERR(inode)) + if (IS_ERR(inode) || is_bad_inode(inode)) continue; start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6b53ce924d95..ee103fd7283c 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -134,9 +134,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, return 0; out: clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, ino); return err; } @@ -267,9 +265,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, return err; out: clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, inode->i_ino); return err; } @@ -308,9 +304,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) out_fail: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, inode->i_ino); return err; } @@ -354,9 +348,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, return 0; out: clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, inode->i_ino); return err; } @@ -688,9 +680,7 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) out: f2fs_unlock_op(sbi); clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, inode->i_ino); return err; }