diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ac9554d71a46..756bf2ab64cd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -3395,25 +3394,34 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device } ALLOW_ERROR_INJECTION(open_ctree, ERRNO); -static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) +static void btrfs_end_super_write(struct bio *bio) { - if (uptodate) { - set_buffer_uptodate(bh); - } else { - struct btrfs_device *device = (struct btrfs_device *) - bh->b_private; + struct btrfs_device *device = bio->bi_private; + struct bio_vec *bvec; + struct bvec_iter_all iter_all; + struct page *page; - btrfs_warn_rl_in_rcu(device->fs_info, - "lost page write due to IO error on %s", - rcu_str_deref(device->name)); - /* note, we don't set_buffer_write_io_error because we have - * our own ways of dealing with the IO errors - */ - clear_buffer_uptodate(bh); - btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS); + bio_for_each_segment_all(bvec, bio, iter_all) { + page = bvec->bv_page; + + if (bio->bi_status) { + btrfs_warn_rl_in_rcu(device->fs_info, + "lost page write due to IO error on %s (%d)", + rcu_str_deref(device->name), + blk_status_to_errno(bio->bi_status)); + ClearPageUptodate(page); + SetPageError(page); + btrfs_dev_stat_inc_and_print(device, + BTRFS_DEV_STAT_WRITE_ERRS); + } else { + SetPageUptodate(page); + } + + put_page(page); + unlock_page(page); } - unlock_buffer(bh); - put_bh(bh); + + bio_put(bio); } struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, @@ -3473,25 +3481,23 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev) /* * Write superblock @sb to the @device. Do not wait for completion, all the - * buffer heads we write are pinned. + * pages we use for writing are locked. * * Write @max_mirrors copies of the superblock, where 0 means default that fit * the expected device size at commit time. Note that max_mirrors must be * same for write and wait phases. * - * Return number of errors when buffer head is not found or submission fails. + * Return number of errors when page is not found or submission fails. */ static int write_dev_supers(struct btrfs_device *device, struct btrfs_super_block *sb, int max_mirrors) { struct btrfs_fs_info *fs_info = device->fs_info; + struct address_space *mapping = device->bdev->bd_inode->i_mapping; SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); - struct buffer_head *bh; int i; - int ret; int errors = 0; u64 bytenr; - int op_flags; if (max_mirrors == 0) max_mirrors = BTRFS_SUPER_MIRROR_MAX; @@ -3499,6 +3505,10 @@ static int write_dev_supers(struct btrfs_device *device, shash->tfm = fs_info->csum_shash; for (i = 0; i < max_mirrors; i++) { + struct page *page; + struct bio *bio; + struct btrfs_super_block *disk_super; + bytenr = btrfs_sb_offset(i); if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->commit_total_bytes) @@ -3511,37 +3521,45 @@ static int write_dev_supers(struct btrfs_device *device, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); crypto_shash_final(shash, sb->csum); - /* One reference for us, and we leave it for the caller */ - bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, - BTRFS_SUPER_INFO_SIZE); - if (!bh) { + page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT, + GFP_NOFS); + if (!page) { btrfs_err(device->fs_info, - "couldn't get super buffer head for bytenr %llu", + "couldn't get super block page for bytenr %llu", bytenr); errors++; continue; } - memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); + /* Bump the refcount for wait_dev_supers() */ + get_page(page); - /* one reference for submit_bh */ - get_bh(bh); - - set_buffer_uptodate(bh); - lock_buffer(bh); - bh->b_end_io = btrfs_end_buffer_write_sync; - bh->b_private = device; + disk_super = page_address(page); + memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE); /* - * we fua the first super. The others we allow - * to go down lazy. + * Directly use bios here instead of relying on the page cache + * to do I/O, so we don't lose the ability to do integrity + * checking. */ - op_flags = REQ_SYNC | REQ_META | REQ_PRIO; + bio = bio_alloc(GFP_NOFS, 1); + bio_set_dev(bio, device->bdev); + bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT; + bio->bi_private = device; + bio->bi_end_io = btrfs_end_super_write; + __bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE, + offset_in_page(bytenr)); + + /* + * We FUA only the first super block. The others we allow to + * go down lazy and there's a short window where the on-disk + * copies might still contain the older version. + */ + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO; if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER)) - op_flags |= REQ_FUA; - ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh); - if (ret) - errors++; + bio->bi_opf |= REQ_FUA; + + btrfsic_submit_bio(bio); } return errors < i ? 0 : -1; } @@ -3550,12 +3568,11 @@ static int write_dev_supers(struct btrfs_device *device, * Wait for write completion of superblocks done by write_dev_supers, * @max_mirrors same for write and wait phases. * - * Return number of errors when buffer head is not found or not marked up to + * Return number of errors when page is not found or not marked up to * date. */ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) { - struct buffer_head *bh; int i; int errors = 0; bool primary_failed = false; @@ -3565,32 +3582,34 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) max_mirrors = BTRFS_SUPER_MIRROR_MAX; for (i = 0; i < max_mirrors; i++) { + struct page *page; + bytenr = btrfs_sb_offset(i); if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->commit_total_bytes) break; - bh = __find_get_block(device->bdev, - bytenr / BTRFS_BDEV_BLOCKSIZE, - BTRFS_SUPER_INFO_SIZE); - if (!bh) { + page = find_get_page(device->bdev->bd_inode->i_mapping, + bytenr >> PAGE_SHIFT); + if (!page) { errors++; if (i == 0) primary_failed = true; continue; } - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { + /* Page is submitted locked and unlocked once the IO completes */ + wait_on_page_locked(page); + if (PageError(page)) { errors++; if (i == 0) primary_failed = true; } - /* drop our reference */ - brelse(bh); + /* Drop our reference */ + put_page(page); - /* drop the reference from the writing run */ - brelse(bh); + /* Drop the reference from the writing run */ + put_page(page); } /* log error, force error return */