bcachefs: Use for_each_btree_key_upto() more consistently

It's important that in BTREE_ITER_FILTER_SNAPSHOTS mode we always use
peek_upto() and provide an end for the interval we're searching for -
otherwise, when we hit the end of the inode the next inode be in a
different subvolume and not have any keys in the current snapshot, and
we'd iterate over arbitrarily many keys before returning one.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2022-10-11 04:32:41 -04:00
parent 5b3008bc61
commit c72f687a1f
11 changed files with 150 additions and 81 deletions

View file

@ -2042,6 +2042,7 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
int ret;
EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS);
EBUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && bkey_eq(end, POS_MAX));
if (iter->update_path) {
bch2_path_put_nokeep(trans, iter->update_path,
@ -2053,7 +2054,9 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
while (1) {
k = __bch2_btree_iter_peek(iter, search_key);
if (!k.k || bkey_err(k))
if (unlikely(!k.k))
goto end;
if (unlikely(bkey_err(k)))
goto out_no_locked;
/*
@ -2066,11 +2069,10 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
else
iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
if (bkey_gt(iter_pos, end)) {
bch2_btree_iter_set_pos(iter, end);
k = bkey_s_c_null;
goto out_no_locked;
}
if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
? bkey_gt(iter_pos, end)
: bkey_ge(iter_pos, end)))
goto end;
if (iter->update_path &&
!bkey_eq(iter->update_path->pos, k.k->p)) {
@ -2159,6 +2161,10 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
bch2_btree_iter_verify_entry_exit(iter);
return k;
end:
bch2_btree_iter_set_pos(iter, end);
k = bkey_s_c_null;
goto out_no_locked;
}
/**
@ -2463,15 +2469,15 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
goto out_no_locked;
} else {
struct bpos next;
struct bpos end = iter->pos;
if (iter->flags & BTREE_ITER_IS_EXTENTS)
end.offset = U64_MAX;
EBUG_ON(iter->path->level);
if (iter->flags & BTREE_ITER_INTENT) {
struct btree_iter iter2;
struct bpos end = iter->pos;
if (iter->flags & BTREE_ITER_IS_EXTENTS)
end.offset = U64_MAX;
bch2_trans_copy_iter(&iter2, iter);
k = bch2_btree_iter_peek_upto(&iter2, end);
@ -2484,7 +2490,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
} else {
struct bpos pos = iter->pos;
k = bch2_btree_iter_peek(iter);
k = bch2_btree_iter_peek_upto(iter, end);
if (unlikely(bkey_err(k)))
bch2_btree_iter_set_pos(iter, pos);
else

View file

@ -599,6 +599,22 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
return k;
}
static inline struct bkey_s_c
__bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
struct btree_iter *iter,
struct bpos end,
unsigned flags)
{
struct bkey_s_c k;
while (btree_trans_too_many_iters(trans) ||
(k = bch2_btree_iter_peek_upto_type(iter, end, flags),
bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
bch2_trans_begin(trans);
return k;
}
#define lockrestart_do(_trans, _do) \
({ \
u32 _restart_count; \
@ -673,6 +689,36 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
_ret; \
})
#define for_each_btree_key2_upto(_trans, _iter, _btree_id, \
_start, _end, _flags, _k, _do) \
({ \
int _ret = 0; \
\
bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
\
while (1) { \
u32 _restart_count = bch2_trans_begin(_trans); \
\
_ret = 0; \
(_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, (_flags));\
if (!(_k).k) \
break; \
\
_ret = bkey_err(_k) ?: (_do); \
if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
continue; \
if (_ret) \
break; \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
if (!bch2_btree_iter_advance(&(_iter))) \
break; \
} \
\
bch2_trans_iter_exit((_trans), &(_iter)); \
_ret; \
})
#define for_each_btree_key_reverse(_trans, _iter, _btree_id, \
_start, _flags, _k, _do) \
({ \
@ -711,6 +757,14 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_commit_flags)))
#define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \
_start, _end, _iter_flags, _k, \
_disk_res, _journal_seq, _commit_flags,\
_do) \
for_each_btree_key2_upto(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_commit_flags)))
#define for_each_btree_key(_trans, _iter, _btree_id, \
_start, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
@ -719,6 +773,15 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
#define for_each_btree_key_upto(_trans, _iter, _btree_id, \
_start, _end, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
(_k) = __bch2_btree_iter_peek_upto_and_restart((_trans), \
&(_iter), _end, _flags),\
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
#define for_each_btree_key_norestart(_trans, _iter, _btree_id, \
_start, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
@ -747,6 +810,12 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
#define for_each_btree_key_upto_continue_norestart(_iter, _end, _flags, _k, _ret)\
for (; \
(_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags), \
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
/* new multiple iterator interface: */
void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);

View file

@ -1675,7 +1675,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
int ret = 0;
bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT);
while ((k = bch2_btree_iter_peek(&iter)).k) {
while ((k = bch2_btree_iter_peek_upto(&iter, end)).k) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(trans->c, 0);
struct bkey_i delete;
@ -1684,9 +1684,6 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
if (ret)
goto err;
if (bkey_ge(iter.pos, end))
break;
bkey_init(&delete.k);
/*

View file

@ -683,7 +683,7 @@ static int ec_stripe_delete(struct bch_fs *c, size_t idx)
{
return bch2_btree_delete_range(c, BTREE_ID_stripes,
POS(0, idx),
POS(0, idx + 1),
POS(0, idx),
0, NULL);
}

View file

@ -128,12 +128,9 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
bch2_trans_copy_iter(&copy, iter);
for_each_btree_key_continue_norestart(copy, 0, k, ret) {
for_each_btree_key_upto_continue_norestart(copy, insert->k.p, 0, k, ret) {
unsigned offset = 0;
if (bkey_ge(bkey_start_pos(k.k), *end))
break;
if (bkey_gt(bkey_start_pos(&insert->k), bkey_start_pos(k.k)))
offset = bkey_start_offset(&insert->k) -
bkey_start_offset(k.k);

View file

@ -2542,15 +2542,11 @@ static inline int range_has_data(struct bch_fs *c, u32 subvol,
if (ret)
goto err;
for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, start, 0, k, ret) {
if (bkey_ge(bkey_start_pos(k.k), end))
break;
for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, start, end, 0, k, ret)
if (bkey_extent_is_data(k.k)) {
ret = 1;
break;
}
}
start = iter.pos;
bch2_trans_iter_exit(&trans, &iter);
err:
@ -2590,8 +2586,8 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
* page
*/
ret = range_has_data(c, inode->ei_subvol,
POS(inode->v.i_ino, index << PAGE_SECTORS_SHIFT),
POS(inode->v.i_ino, (index + 1) << PAGE_SECTORS_SHIFT));
POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)),
POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS));
if (ret <= 0)
return ret;
@ -2973,7 +2969,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
k = insert
? bch2_btree_iter_peek_prev(&src)
: bch2_btree_iter_peek(&src);
: bch2_btree_iter_peek_upto(&src, POS(inode->v.i_ino, U64_MAX));
if ((ret = bkey_err(k)))
continue;
@ -3264,6 +3260,10 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
return bch2_err_class(ret);
}
/*
* Take a quota reservation for unallocated blocks in a given file range
* Does not check pagecache
*/
static int quota_reserve_range(struct bch_inode_info *inode,
struct quota_res *res,
u64 start, u64 end)
@ -3477,11 +3477,11 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
if (ret)
goto err;
for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents,
SPOS(inode->v.i_ino, offset >> 9, snapshot), 0, k, ret) {
if (k.k->p.inode != inode->v.i_ino) {
break;
} else if (bkey_extent_is_data(k.k)) {
for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents,
SPOS(inode->v.i_ino, offset >> 9, snapshot),
POS(inode->v.i_ino, U64_MAX),
0, k, ret) {
if (bkey_extent_is_data(k.k)) {
next_data = max(offset, bkey_start_offset(k.k) << 9);
break;
} else if (k.k->p.offset >> 9 > isize)

View file

@ -31,14 +31,12 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum,
u64 sectors = 0;
int ret;
for_each_btree_key(trans, iter, BTREE_ID_extents,
SPOS(inum, 0, snapshot), 0, k, ret) {
if (k.k->p.inode != inum)
break;
for_each_btree_key_upto(trans, iter, BTREE_ID_extents,
SPOS(inum, 0, snapshot),
POS(inum, U64_MAX),
0, k, ret)
if (bkey_extent_is_allocation(k.k))
sectors += k.k->size;
}
bch2_trans_iter_exit(trans, &iter);
@ -54,11 +52,10 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
u64 subdirs = 0;
int ret;
for_each_btree_key(trans, iter, BTREE_ID_dirents,
SPOS(inum, 0, snapshot), 0, k, ret) {
if (k.k->p.inode != inum)
break;
for_each_btree_key_upto(trans, iter, BTREE_ID_dirents,
SPOS(inum, 0, snapshot),
POS(inum, U64_MAX),
0, k, ret) {
if (k.k->type != KEY_TYPE_dirent)
continue;
@ -66,7 +63,6 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
if (d.v->d_type == DT_DIR)
subdirs++;
}
bch2_trans_iter_exit(trans, &iter);
return ret ?: subdirs;

View file

@ -419,11 +419,12 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
bch2_btree_iter_set_snapshot(iter, snapshot);
k = bch2_btree_iter_peek(iter);
if (bkey_ge(iter->pos, end_pos)) {
bch2_btree_iter_set_pos(iter, end_pos);
/*
* peek_upto() doesn't have ideal semantics for extents:
*/
k = bch2_btree_iter_peek_upto(iter, end_pos);
if (!k.k)
break;
}
ret = bkey_err(k);
if (ret)

View file

@ -709,7 +709,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
ret = bch2_btree_delete_range(c, BTREE_ID_quotas,
POS(QTYP_USR, 0),
POS(QTYP_USR + 1, 0),
POS(QTYP_USR, U64_MAX),
0, NULL);
if (ret)
return ret;
@ -721,7 +721,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
ret = bch2_btree_delete_range(c, BTREE_ID_quotas,
POS(QTYP_GRP, 0),
POS(QTYP_GRP + 1, 0),
POS(QTYP_GRP, U64_MAX),
0, NULL);
if (ret)
return ret;
@ -733,7 +733,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
ret = bch2_btree_delete_range(c, BTREE_ID_quotas,
POS(QTYP_PRJ, 0),
POS(QTYP_PRJ + 1, 0),
POS(QTYP_PRJ, U64_MAX),
0, NULL);
if (ret)
return ret;

View file

@ -251,13 +251,9 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
struct bkey_s_c k;
int ret;
for_each_btree_key_continue_norestart(*iter, 0, k, ret) {
if (bkey_ge(iter->pos, end))
break;
for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret)
if (bkey_extent_is_data(k.k))
return k;
}
if (bkey_ge(iter->pos, end))
bch2_btree_iter_set_pos(iter, end);

View file

@ -15,13 +15,14 @@ static void delete_test_keys(struct bch_fs *c)
int ret;
ret = bch2_btree_delete_range(c, BTREE_ID_extents,
SPOS(0, 0, U32_MAX), SPOS_MAX,
0,
NULL);
SPOS(0, 0, U32_MAX),
POS(0, U64_MAX),
0, NULL);
BUG_ON(ret);
ret = bch2_btree_delete_range(c, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), SPOS_MAX,
SPOS(0, 0, U32_MAX),
POS(0, U64_MAX),
0, NULL);
BUG_ON(ret);
}
@ -145,8 +146,9 @@ static int test_iterate(struct bch_fs *c, u64 nr)
i = 0;
ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), 0, k, ({
ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
0, k, ({
BUG_ON(k.k->p.offset != i++);
0;
}));
@ -211,8 +213,9 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr)
i = 0;
ret = for_each_btree_key2(&trans, iter, BTREE_ID_extents,
SPOS(0, 0, U32_MAX), 0, k, ({
ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_extents,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
0, k, ({
BUG_ON(bkey_start_offset(k.k) != i);
i = k.k->p.offset;
0;
@ -278,8 +281,9 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
i = 0;
ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), 0, k, ({
ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
0, k, ({
BUG_ON(k.k->p.offset != i);
i += 2;
0;
@ -295,8 +299,8 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
i = 0;
ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX),
ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
BTREE_ITER_SLOTS, k, ({
if (i >= nr * 2)
break;
@ -351,8 +355,9 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
i = 0;
ret = for_each_btree_key2(&trans, iter, BTREE_ID_extents,
SPOS(0, 0, U32_MAX), 0, k, ({
ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_extents,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
0, k, ({
BUG_ON(bkey_start_offset(k.k) != i + 8);
BUG_ON(k.k->size != 8);
i += 16;
@ -369,8 +374,8 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
i = 0;
ret = for_each_btree_key2(&trans, iter, BTREE_ID_extents,
SPOS(0, 0, U32_MAX),
ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_extents,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
BTREE_ITER_SLOTS, k, ({
if (i == nr)
break;
@ -405,10 +410,10 @@ static int test_peek_end(struct bch_fs *c, u64 nr)
bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), 0);
lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
BUG_ON(k.k);
lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
BUG_ON(k.k);
bch2_trans_iter_exit(&trans, &iter);
@ -426,10 +431,10 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr)
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
SPOS(0, 0, U32_MAX), 0);
lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
BUG_ON(k.k);
lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
BUG_ON(k.k);
bch2_trans_iter_exit(&trans, &iter);
@ -519,7 +524,7 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi)
bch2_trans_init(&trans, c, 0, 0);
bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
SPOS(0, 0, snapid_lo), 0);
lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
BUG_ON(k.k->p.snapshot != U32_MAX);
@ -798,8 +803,9 @@ static int seq_lookup(struct bch_fs *c, u64 nr)
bch2_trans_init(&trans, c, 0, 0);
ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), 0, k,
ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
0, k,
0);
if (ret)
bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
@ -839,7 +845,8 @@ static int seq_delete(struct bch_fs *c, u64 nr)
int ret;
ret = bch2_btree_delete_range(c, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), SPOS_MAX,
SPOS(0, 0, U32_MAX),
POS(0, U64_MAX),
0, NULL);
if (ret)
bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));