diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 1bebba881d89..d801e19cb489 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1555,6 +1555,12 @@ enum btree_id { BTREE_ID_NR }; +/* + * Maximum number of btrees that we will _ever_ have under the current scheme, + * where we refer to them with bitfields + */ +#define BTREE_ID_NR_MAX 64 + static inline bool btree_id_is_alloc(enum btree_id id) { switch (id) { diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 5bf98cb8b15d..d3bcb4e4e230 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -332,6 +332,8 @@ static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, struct bpos pos, bool key_cache) { + bch2_trans_verify_not_unlocked(trans); + struct btree_path *path; struct trans_for_each_path_inorder_iter iter; struct printbuf buf = PRINTBUF; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index eab2a25bdc7a..798eb1c47966 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -838,7 +838,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, #define drop_locks_do(_trans, _do) \ ({ \ bch2_trans_unlock(_trans); \ - _do ?: bch2_trans_relock(_trans); \ + (_do) ?: bch2_trans_relock(_trans); \ }) #define allocate_dropping_locks_errcode(_trans, _do) \ diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index e28d28ac2a13..b469586517a8 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -479,9 +479,8 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, percpu_down_read(&c->mark_lock); - rcu_read_lock(); bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) { - struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); + struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); if (!ca) { if (fsck_err(c, ptr_to_invalid_device, "pointer to missing device %u\n" @@ -558,7 +557,7 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, do_update = true; if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen) - continue; + goto next; if (fsck_err_on(bucket_data_type_mismatch(g->data_type, data_type), c, ptr_bucket_data_type_mismatch, @@ -601,8 +600,9 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, bch2_bkey_val_to_text(&buf, c, k), buf.buf))) do_update = true; } +next: + bch2_dev_put(ca); } - rcu_read_unlock(); if (do_update) { if (flags & BTREE_TRIGGER_is_root) { @@ -638,9 +638,10 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, } else { struct bkey_ptrs ptrs; union bch_extent_entry *entry; + + rcu_read_lock(); restart_drop_ptrs: ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); - rcu_read_lock(); bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) { struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); @@ -1464,7 +1465,7 @@ int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c, for_each_online_member(c, ca) { int ret = bch2_trans_mark_dev_sb(c, ca, flags); if (ret) { - bch2_dev_put(ca); + percpu_ref_put(&ca->io_ref); return ret; } } diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c index 85198f391e9c..3bd3aba90d8f 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -233,7 +233,7 @@ struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type, return ret; } default: - BUG(); + return (struct bch_csum) {}; } } @@ -307,7 +307,7 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type, return ret; } default: - BUG(); + return (struct bch_csum) {}; } } @@ -352,8 +352,12 @@ int __bch2_encrypt_bio(struct bch_fs *c, unsigned type, bytes += bv.bv_len; } - sg_mark_end(sg - 1); - return do_encrypt_sg(c->chacha20, nonce, sgl, bytes); + if (sg != sgl) { + sg_mark_end(sg - 1); + return do_encrypt_sg(c->chacha20, nonce, sgl, bytes); + } + + return ret; } struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a, @@ -648,26 +652,26 @@ int bch2_decrypt_sb_key(struct bch_fs *c, static int bch2_alloc_ciphers(struct bch_fs *c) { - int ret; - - if (!c->chacha20) - c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0); - ret = PTR_ERR_OR_ZERO(c->chacha20); + if (c->chacha20) + return 0; + struct crypto_sync_skcipher *chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0); + int ret = PTR_ERR_OR_ZERO(chacha20); if (ret) { bch_err(c, "error requesting chacha20 module: %s", bch2_err_str(ret)); return ret; } - if (!c->poly1305) - c->poly1305 = crypto_alloc_shash("poly1305", 0, 0); - ret = PTR_ERR_OR_ZERO(c->poly1305); - + struct crypto_shash *poly1305 = crypto_alloc_shash("poly1305", 0, 0); + ret = PTR_ERR_OR_ZERO(poly1305); if (ret) { bch_err(c, "error requesting poly1305 module: %s", bch2_err_str(ret)); + crypto_free_sync_skcipher(chacha20); return ret; } + c->chacha20 = chacha20; + c->poly1305 = poly1305; return 0; } @@ -762,11 +766,11 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) void bch2_fs_encryption_exit(struct bch_fs *c) { - if (!IS_ERR_OR_NULL(c->poly1305)) + if (c->poly1305) crypto_free_shash(c->poly1305); - if (!IS_ERR_OR_NULL(c->chacha20)) + if (c->chacha20) crypto_free_sync_skcipher(c->chacha20); - if (!IS_ERR_OR_NULL(c->sha256)) + if (c->sha256) crypto_free_shash(c->sha256); } @@ -779,6 +783,7 @@ int bch2_fs_encryption_init(struct bch_fs *c) c->sha256 = crypto_alloc_shash("sha256", 0, 0); ret = PTR_ERR_OR_ZERO(c->sha256); if (ret) { + c->sha256 = NULL; bch_err(c, "error requesting sha256 module: %s", bch2_err_str(ret)); goto out; } diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 6bbf9a7d9e4d..c67460d8205d 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -15,6 +15,9 @@ static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) { + if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name)) + return 0; + unsigned bkey_u64s = bkey_val_u64s(d.k); unsigned bkey_bytes = bkey_u64s * sizeof(u64); u64 last_u64 = ((u64*)d.v)[bkey_u64s - 1]; diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index b0a33fabadf8..6b69e5cd68dd 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -257,7 +257,6 @@ void bch2_readahead(struct readahead_control *ractl) struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_io_opts opts; - struct btree_trans *trans = bch2_trans_get(c); struct folio *folio; struct readpages_iter readpages_iter; @@ -269,6 +268,7 @@ void bch2_readahead(struct readahead_control *ractl) bch2_pagecache_add_get(inode); + struct btree_trans *trans = bch2_trans_get(c); while ((folio = readpage_iter_peek(&readpages_iter))) { unsigned n = min_t(unsigned, readpages_iter.folios.nr - @@ -289,10 +289,10 @@ void bch2_readahead(struct readahead_control *ractl) &readpages_iter); bch2_trans_unlock(trans); } + bch2_trans_put(trans); bch2_pagecache_add_put(inode); - bch2_trans_put(trans); darray_exit(&readpages_iter.folios); } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index fd851f10d11c..96040a95cf46 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -188,8 +188,7 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino BUG_ON(!old); if (unlikely(old != inode)) { - __destroy_inode(&inode->v); - kmem_cache_free(bch2_inode_cache, inode); + discard_new_inode(&inode->v); inode = old; } else { mutex_lock(&c->vfs_inodes_lock); @@ -1145,6 +1144,8 @@ static int bch2_open(struct inode *vinode, struct file *file) return ret; } + file->f_mode |= FMODE_CAN_ODIRECT; + return generic_file_open(vinode, file); } @@ -1237,7 +1238,6 @@ static const struct address_space_operations bch_address_space_operations = { .write_end = bch2_write_end, .invalidate_folio = bch2_invalidate_folio, .release_folio = bch2_release_folio, - .direct_IO = noop_direct_IO, #ifdef CONFIG_MIGRATION .migrate_folio = filemap_migrate_folio, #endif diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c index 8b0369185f5c..9f529e4c1b16 100644 --- a/fs/bcachefs/printbuf.c +++ b/fs/bcachefs/printbuf.c @@ -45,6 +45,13 @@ int bch2_printbuf_make_room(struct printbuf *out, unsigned extra) unsigned new_size = roundup_pow_of_two(out->size + extra); + /* Sanity check... */ + if (new_size > PAGE_SIZE << MAX_PAGE_ORDER) { + out->allocation_failure = true; + out->overflow = true; + return -ENOMEM; + } + /* * Note: output buffer must be freeable with kfree(), it's not required * that the user use printbuf_exit(). diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 1266916ac03f..cf513fc79ce4 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -35,6 +35,9 @@ void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) { + if (btree >= BTREE_ID_NR_MAX) + return; + u64 b = BIT_ULL(btree); if (!(c->sb.btrees_lost_data & b)) { @@ -808,9 +811,11 @@ int bch2_fs_recovery(struct bch_fs *c) clear_bit(BCH_FS_fsck_running, &c->flags); /* fsync if we fixed errors */ - if (test_bit(BCH_FS_errors_fixed, &c->flags)) { + if (test_bit(BCH_FS_errors_fixed, &c->flags) && + bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) { bch2_journal_flush_all_pins(&c->journal); bch2_journal_meta(&c->journal); + bch2_write_ref_put(c, BCH_WRITE_REF_fsync); } /* If we fixed errors, verify that fs is actually clean now: */ diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index bd1d5d085e23..57a1f09cca09 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -23,14 +23,12 @@ static int bch2_memcmp(const void *l, const void *r, const void *priv) static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) { #ifdef CONFIG_BCACHEFS_DEBUG - unsigned i; - BUG_ON(e->data_type >= BCH_DATA_NR); BUG_ON(!e->nr_devs); BUG_ON(e->nr_required > 1 && e->nr_required >= e->nr_devs); - for (i = 0; i + 1 < e->nr_devs; i++) + for (unsigned i = 0; i + 1 < e->nr_devs; i++) BUG_ON(e->devs[i] >= e->devs[i + 1]); #endif } @@ -192,24 +190,17 @@ cpu_replicas_add_entry(struct bch_fs *c, struct bch_replicas_cpu *old, struct bch_replicas_entry_v1 *new_entry) { - unsigned i; struct bch_replicas_cpu new = { .nr = old->nr + 1, .entry_size = max_t(unsigned, old->entry_size, replicas_entry_bytes(new_entry)), }; - for (i = 0; i < new_entry->nr_devs; i++) - BUG_ON(!bch2_dev_exists(c, new_entry->devs[i])); - - BUG_ON(!new_entry->data_type); - verify_replicas_entry(new_entry); - new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL); if (!new.entries) return new; - for (i = 0; i < old->nr; i++) + for (unsigned i = 0; i < old->nr; i++) memcpy(cpu_replicas_entry(&new, i), cpu_replicas_entry(old, i), old->entry_size); @@ -230,8 +221,6 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, if (unlikely(entry_size > r->entry_size)) return -1; - verify_replicas_entry(search); - #define entry_cmp(_l, _r) memcmp(_l, _r, entry_size) idx = eytzinger0_find(r->entries, r->nr, r->entry_size, entry_cmp, search); @@ -524,13 +513,16 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) c->replicas_gc.nr = 0; c->replicas_gc.entry_size = 0; - for_each_cpu_replicas_entry(&c->replicas, e) - if (!((1 << e->data_type) & typemask)) { + for_each_cpu_replicas_entry(&c->replicas, e) { + /* Preserve unknown data types */ + if (e->data_type >= BCH_DATA_NR || + !((1 << e->data_type) & typemask)) { c->replicas_gc.nr++; c->replicas_gc.entry_size = max_t(unsigned, c->replicas_gc.entry_size, replicas_entry_bytes(e)); } + } c->replicas_gc.entries = kcalloc(c->replicas_gc.nr, c->replicas_gc.entry_size, @@ -542,7 +534,8 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) } for_each_cpu_replicas_entry(&c->replicas, e) - if (!((1 << e->data_type) & typemask)) + if (e->data_type >= BCH_DATA_NR || + !((1 << e->data_type) & typemask)) memcpy(cpu_replicas_entry(&c->replicas_gc, i++), e, c->replicas_gc.entry_size); @@ -998,7 +991,7 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) { struct bch_sb_field_replicas *replicas; struct bch_sb_field_replicas_v0 *replicas_v0; - unsigned i, data_has = 0; + unsigned data_has = 0; replicas = bch2_sb_field_get(sb, replicas); replicas_v0 = bch2_sb_field_get(sb, replicas_v0); @@ -1006,17 +999,26 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) if (replicas) { struct bch_replicas_entry_v1 *r; - for_each_replicas_entry(replicas, r) - for (i = 0; i < r->nr_devs; i++) + for_each_replicas_entry(replicas, r) { + if (r->data_type >= sizeof(data_has) * 8) + continue; + + for (unsigned i = 0; i < r->nr_devs; i++) if (r->devs[i] == dev) data_has |= 1 << r->data_type; + } + } else if (replicas_v0) { struct bch_replicas_entry_v0 *r; - for_each_replicas_entry_v0(replicas_v0, r) - for (i = 0; i < r->nr_devs; i++) + for_each_replicas_entry_v0(replicas_v0, r) { + if (r->data_type >= sizeof(data_has) * 8) + continue; + + for (unsigned i = 0; i < r->nr_devs; i++) if (r->devs[i] == dev) data_has |= 1 << r->data_type; + } } diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h index 87324747351a..666599d3fb9d 100644 --- a/fs/bcachefs/sb-errors_types.h +++ b/fs/bcachefs/sb-errors_types.h @@ -273,7 +273,9 @@ x(dup_backpointer_to_bad_csum_extent, 265) \ x(btree_bitmap_not_marked, 266) \ x(sb_clean_entry_overrun, 267) \ - x(btree_ptr_v2_written_0, 268) + x(btree_ptr_v2_written_0, 268) \ + x(subvol_snapshot_bad, 269) \ + x(subvol_inode_bad, 270) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index bd5d74269d15..ab13d8f4b41e 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -176,12 +176,9 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) { - const struct snapshot_t *t; - bool ret; - rcu_read_lock(); - t = snapshot_t(c, id); - ret = (t->children[0]|t->children[1]) != 0; + const struct snapshot_t *t = snapshot_t(c, id); + bool ret = t && (t->children[0]|t->children[1]) != 0; rcu_read_unlock(); return ret; diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 132213761ef6..dfc9cf305756 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -210,12 +210,21 @@ int bch2_check_subvol_children(struct bch_fs *c) int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, enum bch_validate_flags flags, struct printbuf *err) { + struct bkey_s_c_subvolume subvol = bkey_s_c_to_subvolume(k); int ret = 0; bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, subvol_pos_bad, "invalid pos"); + + bkey_fsck_err_on(!subvol.v->snapshot, c, err, + subvol_snapshot_bad, + "invalid snapshot"); + + bkey_fsck_err_on(!subvol.v->inode, c, err, + subvol_inode_bad, + "invalid inode"); fsck_err: return ret; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 294a9d35a9f2..2206a8dee693 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -551,9 +551,9 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_fs_io_read_exit(c); bch2_fs_buckets_waiting_for_journal_exit(c); bch2_fs_btree_interior_update_exit(c); - bch2_fs_btree_iter_exit(c); bch2_fs_btree_key_cache_exit(&c->btree_key_cache); bch2_fs_btree_cache_exit(c); + bch2_fs_btree_iter_exit(c); bch2_fs_replicas_exit(c); bch2_fs_journal_exit(&c->journal); bch2_io_clock_exit(&c->io_clock[WRITE]);