Bug fixes and regressions for ext4, the most serious of which is a

potential deadlock during directory renames that was introduced during
 the merge window discovered by a combination of syzbot and lockdep.
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmQNVwIACgkQ8vlZVpUN
 gaMwmgf/ZAasXZEMV0zaQZa8zP4KvMKZjWe6azkcJg4sb/HG9Q7JzeJDCurhhWUj
 8+QnyUcuKTyWKYWjGf0f5CZaYEM5AZYij41UJzu2qMkz5hVXSqBVuY8KywxuiJv5
 kfuIvQh0Onv0Yrg2qAc52/kZkq1lu2sl/F5ertBWjdpTUXdBUdrCxkUk+1BgQWAj
 vNwi1/+gNuX7RxMboHqYmwXFP39vECd+wteNdsiK1hR8bLqL68duLLq8xQdHt4gS
 sbVmJKR4j2Giw4ZnlYi9RiwKIO0beqocanp+cfOPulyj5mTM8X1lr0uvaLZgx2AF
 lqrS3/5ksp45cRT70qCIz8je70hTSg==
 =nN3T
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
 "Bug fixes and regressions for ext4, the most serious of which is a
  potential deadlock during directory renames that was introduced during
  the merge window discovered by a combination of syzbot and lockdep"

* tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: zero i_disksize when initializing the bootloader inode
  ext4: make sure fs error flag setted before clear journal error
  ext4: commit super block if fs record error when journal record without error
  ext4, jbd2: add an optimized bmap for the journal inode
  ext4: fix WARNING in ext4_update_inline_data
  ext4: move where set the MAY_INLINE_DATA flag is set
  ext4: Fix deadlock during directory rename
  ext4: Fix comment about the 64BIT feature
  docs: ext4: modify the group desc size to 64
  ext4: fix another off-by-one fsmap error on 1k block filesystems
  ext4: fix RENAME_WHITEOUT handling for inline directories
  ext4: make kobj_type structures constant
  ext4: fix cgroup writeback accounting with fs-layer encryption
This commit is contained in:
Linus Torvalds 2023-03-12 08:55:55 -07:00
commit 40d0c0901e
13 changed files with 98 additions and 33 deletions

View file

@ -105,9 +105,9 @@ descriptors. Instead, the superblock and a single block group descriptor
block is placed at the beginning of the first, second, and last block block is placed at the beginning of the first, second, and last block
groups in a meta-block group. A meta-block group is a collection of groups in a meta-block group. A meta-block group is a collection of
block groups which can be described by a single block group descriptor block groups which can be described by a single block group descriptor
block. Since the size of the block group descriptor structure is 32 block. Since the size of the block group descriptor structure is 64
bytes, a meta-block group contains 32 block groups for filesystems with bytes, a meta-block group contains 16 block groups for filesystems with
a 1KB block size, and 128 block groups for filesystems with a 4KB a 1KB block size, and 64 block groups for filesystems with a 4KB
blocksize. Filesystems can either be created using this new block group blocksize. Filesystems can either be created using this new block group
descriptor layout, or existing filesystems can be resized on-line, and descriptor layout, or existing filesystems can be resized on-line, and
the field s_first_meta_bg in the superblock will indicate the first the field s_first_meta_bg in the superblock will indicate the first

View file

@ -1387,7 +1387,7 @@ struct ext4_super_block {
__le32 s_first_meta_bg; /* First metablock block group */ __le32 s_first_meta_bg; /* First metablock block group */
__le32 s_mkfs_time; /* When the filesystem was created */ __le32 s_mkfs_time; /* When the filesystem was created */
__le32 s_jnl_blocks[17]; /* Backup of the journal inode */ __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
/* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ /* 64bit support valid if EXT4_FEATURE_INCOMPAT_64BIT */
/*150*/ __le32 s_blocks_count_hi; /* Blocks count */ /*150*/ __le32 s_blocks_count_hi; /* Blocks count */
__le32 s_r_blocks_count_hi; /* Reserved blocks count */ __le32 s_r_blocks_count_hi; /* Reserved blocks count */
__le32 s_free_blocks_count_hi; /* Free blocks count */ __le32 s_free_blocks_count_hi; /* Free blocks count */

View file

@ -486,6 +486,8 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
keys[0].fmr_physical = bofs; keys[0].fmr_physical = bofs;
if (keys[1].fmr_physical >= eofs) if (keys[1].fmr_physical >= eofs)
keys[1].fmr_physical = eofs - 1; keys[1].fmr_physical = eofs - 1;
if (keys[1].fmr_physical < keys[0].fmr_physical)
return 0;
start_fsb = keys[0].fmr_physical; start_fsb = keys[0].fmr_physical;
end_fsb = keys[1].fmr_physical; end_fsb = keys[1].fmr_physical;

View file

@ -159,7 +159,6 @@ int ext4_find_inline_data_nolock(struct inode *inode)
(void *)ext4_raw_inode(&is.iloc)); (void *)ext4_raw_inode(&is.iloc));
EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
le32_to_cpu(is.s.here->e_value_size); le32_to_cpu(is.s.here->e_value_size);
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
} }
out: out:
brelse(is.iloc.bh); brelse(is.iloc.bh);

View file

@ -4797,8 +4797,13 @@ static inline int ext4_iget_extra_inode(struct inode *inode,
if (EXT4_INODE_HAS_XATTR_SPACE(inode) && if (EXT4_INODE_HAS_XATTR_SPACE(inode) &&
*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
int err;
ext4_set_inode_state(inode, EXT4_STATE_XATTR); ext4_set_inode_state(inode, EXT4_STATE_XATTR);
return ext4_find_inline_data_nolock(inode); err = ext4_find_inline_data_nolock(inode);
if (!err && ext4_has_inline_data(inode))
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
return err;
} else } else
EXT4_I(inode)->i_inline_off = 0; EXT4_I(inode)->i_inline_off = 0;
return 0; return 0;

View file

@ -431,6 +431,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
ei_bl->i_flags = 0; ei_bl->i_flags = 0;
inode_set_iversion(inode_bl, 1); inode_set_iversion(inode_bl, 1);
i_size_write(inode_bl, 0); i_size_write(inode_bl, 0);
EXT4_I(inode_bl)->i_disksize = inode_bl->i_size;
inode_bl->i_mode = S_IFREG; inode_bl->i_mode = S_IFREG;
if (ext4_has_feature_extents(sb)) { if (ext4_has_feature_extents(sb)) {
ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS); ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS);

View file

@ -1595,11 +1595,10 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
int has_inline_data = 1; int has_inline_data = 1;
ret = ext4_find_inline_entry(dir, fname, res_dir, ret = ext4_find_inline_entry(dir, fname, res_dir,
&has_inline_data); &has_inline_data);
if (has_inline_data) { if (inlined)
if (inlined) *inlined = has_inline_data;
*inlined = 1; if (has_inline_data)
goto cleanup_and_exit; goto cleanup_and_exit;
}
} }
if ((namelen <= 2) && (name[0] == '.') && if ((namelen <= 2) && (name[0] == '.') &&
@ -3646,7 +3645,8 @@ static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
* so the old->de may no longer valid and need to find it again * so the old->de may no longer valid and need to find it again
* before reset old inode info. * before reset old inode info.
*/ */
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
&old.inlined);
if (IS_ERR(old.bh)) if (IS_ERR(old.bh))
retval = PTR_ERR(old.bh); retval = PTR_ERR(old.bh);
if (!old.bh) if (!old.bh)
@ -3813,9 +3813,20 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
return retval; return retval;
} }
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); /*
if (IS_ERR(old.bh)) * We need to protect against old.inode directory getting converted
return PTR_ERR(old.bh); * from inline directory format into a normal one.
*/
if (S_ISDIR(old.inode->i_mode))
inode_lock_nested(old.inode, I_MUTEX_NONDIR2);
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
&old.inlined);
if (IS_ERR(old.bh)) {
retval = PTR_ERR(old.bh);
goto unlock_moved_dir;
}
/* /*
* Check for inode number is _not_ due to possible IO errors. * Check for inode number is _not_ due to possible IO errors.
* We might rmdir the source, keep it as pwd of some process * We might rmdir the source, keep it as pwd of some process
@ -3872,11 +3883,6 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir)) if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
goto end_rename; goto end_rename;
} }
/*
* We need to protect against old.inode directory getting
* converted from inline directory format into a normal one.
*/
inode_lock_nested(old.inode, I_MUTEX_NONDIR2);
retval = ext4_rename_dir_prepare(handle, &old); retval = ext4_rename_dir_prepare(handle, &old);
if (retval) { if (retval) {
inode_unlock(old.inode); inode_unlock(old.inode);
@ -4013,12 +4019,15 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
} else { } else {
ext4_journal_stop(handle); ext4_journal_stop(handle);
} }
if (old.dir_bh)
inode_unlock(old.inode);
release_bh: release_bh:
brelse(old.dir_bh); brelse(old.dir_bh);
brelse(old.bh); brelse(old.bh);
brelse(new.bh); brelse(new.bh);
unlock_moved_dir:
if (S_ISDIR(old.inode->i_mode))
inode_unlock(old.inode);
return retval; return retval;
} }

View file

@ -409,7 +409,8 @@ static void io_submit_init_bio(struct ext4_io_submit *io,
static void io_submit_add_bh(struct ext4_io_submit *io, static void io_submit_add_bh(struct ext4_io_submit *io,
struct inode *inode, struct inode *inode,
struct page *page, struct page *pagecache_page,
struct page *bounce_page,
struct buffer_head *bh) struct buffer_head *bh)
{ {
int ret; int ret;
@ -421,10 +422,11 @@ static void io_submit_add_bh(struct ext4_io_submit *io,
} }
if (io->io_bio == NULL) if (io->io_bio == NULL)
io_submit_init_bio(io, bh); io_submit_init_bio(io, bh);
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page,
bh->b_size, bh_offset(bh));
if (ret != bh->b_size) if (ret != bh->b_size)
goto submit_and_retry; goto submit_and_retry;
wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size); wbc_account_cgroup_owner(io->io_wbc, pagecache_page, bh->b_size);
io->io_next_block++; io->io_next_block++;
} }
@ -561,8 +563,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do { do {
if (!buffer_async_write(bh)) if (!buffer_async_write(bh))
continue; continue;
io_submit_add_bh(io, inode, io_submit_add_bh(io, inode, page, bounce_page, bh);
bounce_page ? bounce_page : page, bh);
} while ((bh = bh->b_this_page) != head); } while ((bh = bh->b_this_page) != head);
unlock: unlock:
unlock_page(page); unlock_page(page);

View file

@ -5726,6 +5726,28 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
return journal_inode; return journal_inode;
} }
static int ext4_journal_bmap(journal_t *journal, sector_t *block)
{
struct ext4_map_blocks map;
int ret;
if (journal->j_inode == NULL)
return 0;
map.m_lblk = *block;
map.m_len = 1;
ret = ext4_map_blocks(NULL, journal->j_inode, &map, 0);
if (ret <= 0) {
ext4_msg(journal->j_inode->i_sb, KERN_CRIT,
"journal bmap failed: block %llu ret %d\n",
*block, ret);
jbd2_journal_abort(journal, ret ? ret : -EIO);
return ret;
}
*block = map.m_pblk;
return 0;
}
static journal_t *ext4_get_journal(struct super_block *sb, static journal_t *ext4_get_journal(struct super_block *sb,
unsigned int journal_inum) unsigned int journal_inum)
{ {
@ -5746,6 +5768,7 @@ static journal_t *ext4_get_journal(struct super_block *sb,
return NULL; return NULL;
} }
journal->j_private = sb; journal->j_private = sb;
journal->j_bmap = ext4_journal_bmap;
ext4_init_journal_params(sb, journal); ext4_init_journal_params(sb, journal);
return journal; return journal;
} }
@ -5920,6 +5943,7 @@ static int ext4_load_journal(struct super_block *sb,
err = jbd2_journal_wipe(journal, !really_read_only); err = jbd2_journal_wipe(journal, !really_read_only);
if (!err) { if (!err) {
char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
if (save) if (save)
memcpy(save, ((char *) es) + memcpy(save, ((char *) es) +
EXT4_S_ERR_START, EXT4_S_ERR_LEN); EXT4_S_ERR_START, EXT4_S_ERR_LEN);
@ -5928,6 +5952,14 @@ static int ext4_load_journal(struct super_block *sb,
memcpy(((char *) es) + EXT4_S_ERR_START, memcpy(((char *) es) + EXT4_S_ERR_START,
save, EXT4_S_ERR_LEN); save, EXT4_S_ERR_LEN);
kfree(save); kfree(save);
es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state &
EXT4_ERROR_FS);
/* Write out restored error information to the superblock */
if (!bdev_read_only(sb->s_bdev)) {
int err2;
err2 = ext4_commit_super(sb);
err = err ? : err2;
}
} }
if (err) { if (err) {
@ -6157,11 +6189,13 @@ static int ext4_clear_journal_err(struct super_block *sb,
errstr = ext4_decode_error(sb, j_errno, nbuf); errstr = ext4_decode_error(sb, j_errno, nbuf);
ext4_warning(sb, "Filesystem error recorded " ext4_warning(sb, "Filesystem error recorded "
"from previous mount: %s", errstr); "from previous mount: %s", errstr);
ext4_warning(sb, "Marking fs in need of filesystem check.");
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS); es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
ext4_commit_super(sb); j_errno = ext4_commit_super(sb);
if (j_errno)
return j_errno;
ext4_warning(sb, "Marked fs in need of filesystem check.");
jbd2_journal_clear_err(journal); jbd2_journal_clear_err(journal);
jbd2_journal_update_sb_errno(journal); jbd2_journal_update_sb_errno(journal);

View file

@ -501,13 +501,13 @@ static const struct sysfs_ops ext4_attr_ops = {
.store = ext4_attr_store, .store = ext4_attr_store,
}; };
static struct kobj_type ext4_sb_ktype = { static const struct kobj_type ext4_sb_ktype = {
.default_groups = ext4_groups, .default_groups = ext4_groups,
.sysfs_ops = &ext4_attr_ops, .sysfs_ops = &ext4_attr_ops,
.release = ext4_sb_release, .release = ext4_sb_release,
}; };
static struct kobj_type ext4_feat_ktype = { static const struct kobj_type ext4_feat_ktype = {
.default_groups = ext4_feat_groups, .default_groups = ext4_feat_groups,
.sysfs_ops = &ext4_attr_ops, .sysfs_ops = &ext4_attr_ops,
.release = ext4_feat_release, .release = ext4_feat_release,

View file

@ -2852,6 +2852,9 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
(void *)header, total_ino); (void *)header, total_ino);
EXT4_I(inode)->i_extra_isize = new_extra_isize; EXT4_I(inode)->i_extra_isize = new_extra_isize;
if (ext4_has_inline_data(inode))
error = ext4_find_inline_data_nolock(inode);
cleanup: cleanup:
if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) { if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) {
ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.", ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.",

View file

@ -969,10 +969,13 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
{ {
int err = 0; int err = 0;
unsigned long long ret; unsigned long long ret;
sector_t block = 0; sector_t block = blocknr;
if (journal->j_inode) { if (journal->j_bmap) {
block = blocknr; err = journal->j_bmap(journal, &block);
if (err == 0)
*retp = block;
} else if (journal->j_inode) {
ret = bmap(journal->j_inode, &block); ret = bmap(journal->j_inode, &block);
if (ret || !block) { if (ret || !block) {

View file

@ -1308,6 +1308,14 @@ struct journal_s
struct buffer_head *bh, struct buffer_head *bh,
enum passtype pass, int off, enum passtype pass, int off,
tid_t expected_commit_id); tid_t expected_commit_id);
/**
* @j_bmap:
*
* Bmap function that should be used instead of the generic
* VFS bmap function.
*/
int (*j_bmap)(struct journal_s *journal, sector_t *block);
}; };
#define jbd2_might_wait_for_commit(j) \ #define jbd2_might_wait_for_commit(j) \