bcachefs: Kill struct bucket_mark

This switches struct bucket to using a lock, instead of cmpxchg. And now
that the protected members no longer need to fit into a u64, we can
expand the sector counts to 32 bits.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2022-02-14 00:07:38 -05:00 committed by Kent Overstreet
parent 5735608c14
commit 66d9082385
5 changed files with 116 additions and 158 deletions

View file

@ -903,8 +903,8 @@ struct bch_alloc_v2 {
#define BCH_ALLOC_FIELDS_V2() \
x(read_time, 64) \
x(write_time, 64) \
x(dirty_sectors, 16) \
x(cached_sectors, 16) \
x(dirty_sectors, 32) \
x(cached_sectors, 32) \
x(stripe, 32) \
x(stripe_redundancy, 8)

View file

@ -571,37 +571,37 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
if (!p.ptr.cached) {
g->_mark.gen = p.ptr.gen;
g->gen_valid = true;
g->gen = p.ptr.gen;
} else {
do_update = true;
}
}
if (fsck_err_on(gen_cmp(p.ptr.gen, g->mark.gen) > 0, c,
if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0, c,
"bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
"while marking %s",
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
p.ptr.gen, g->mark.gen,
p.ptr.gen, g->gen,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
if (!p.ptr.cached) {
g->_mark.gen = p.ptr.gen;
g->gen_valid = true;
g->_mark.data_type = 0;
g->_mark.dirty_sectors = 0;
g->_mark.cached_sectors = 0;
g->gen = p.ptr.gen;
g->data_type = 0;
g->dirty_sectors = 0;
g->cached_sectors = 0;
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
} else {
do_update = true;
}
}
if (fsck_err_on(gen_cmp(g->mark.gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, c,
if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, c,
"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
"while marking %s",
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->mark.gen,
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
p.ptr.gen,
(printbuf_reset(&buf),
@ -609,30 +609,30 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
do_update = true;
if (fsck_err_on(!p.ptr.cached &&
gen_cmp(p.ptr.gen, g->mark.gen) < 0, c,
gen_cmp(p.ptr.gen, g->gen) < 0, c,
"bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
"while marking %s",
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
p.ptr.gen, g->mark.gen,
p.ptr.gen, g->gen,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))
do_update = true;
if (data_type != BCH_DATA_btree && p.ptr.gen != g->mark.gen)
if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
continue;
if (fsck_err_on(g->mark.data_type &&
g->mark.data_type != data_type, c,
if (fsck_err_on(g->data_type &&
g->data_type != data_type, c,
"bucket %u:%zu different types of data in same bucket: %s, %s\n"
"while marking %s",
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
bch2_data_types[g->mark.data_type],
bch2_data_types[g->data_type],
bch2_data_types[data_type],
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
if (data_type == BCH_DATA_btree) {
g->_mark.data_type = data_type;
g->data_type = data_type;
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
} else {
do_update = true;
@ -692,7 +692,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bucket *g = PTR_GC_BUCKET(ca, ptr);
ptr->gen = g->mark.gen;
ptr->gen = g->gen;
}
} else {
bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({
@ -701,12 +701,12 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, ptr);
(ptr->cached &&
(!g->gen_valid || gen_cmp(ptr->gen, g->mark.gen) > 0)) ||
(!g->gen_valid || gen_cmp(ptr->gen, g->gen) > 0)) ||
(!ptr->cached &&
gen_cmp(ptr->gen, g->mark.gen) < 0) ||
gen_cmp(g->mark.gen, ptr->gen) > BUCKET_GC_GEN_MAX ||
(g->mark.data_type &&
g->mark.data_type != data_type);
gen_cmp(ptr->gen, g->gen) < 0) ||
gen_cmp(g->gen, ptr->gen) > BUCKET_GC_GEN_MAX ||
(g->data_type &&
g->data_type != data_type);
}));
again:
ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
@ -1325,10 +1325,10 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
struct bucket *g;
struct bucket gc;
struct bkey_s_c k;
struct bkey_i_alloc_v4 *a;
struct bch_alloc_v4 old, new, gc;
struct bch_alloc_v4 old, new;
int ret;
k = bch2_btree_iter_peek_slot(iter);
@ -1340,15 +1340,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
new = old;
percpu_down_read(&c->mark_lock);
g = gc_bucket(ca, iter->pos.offset);
gc = (struct bch_alloc_v4) {
.gen = g->mark.gen,
.data_type = g->mark.data_type,
.dirty_sectors = g->mark.dirty_sectors,
.cached_sectors = g->mark.cached_sectors,
.stripe = g->stripe,
.stripe_redundancy = g->stripe_redundancy,
};
gc = *gc_bucket(ca, iter->pos.offset);
percpu_up_read(&c->mark_lock);
if (metadata_only &&
@ -1365,8 +1357,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
"bucket %llu:%llu gen %u data type %s has wrong " #_f \
": got %u, should be %u", \
iter->pos.inode, iter->pos.offset, \
new.gen, \
bch2_data_types[new.data_type], \
gc.gen, \
bch2_data_types[gc.data_type], \
new._f, gc._f)) \
new._f = gc._f; \
@ -1467,17 +1459,16 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
bch2_alloc_to_v4(k, &a);
g->_mark.gen = a.gen;
g->gen_valid = 1;
g->gen_valid = 1;
g->gen = a.gen;
if (metadata_only &&
(a.data_type == BCH_DATA_user ||
a.data_type == BCH_DATA_cached ||
a.data_type == BCH_DATA_parity)) {
g->_mark.data_type = a.data_type;
g->_mark.dirty_sectors = a.dirty_sectors;
g->_mark.cached_sectors = a.cached_sectors;
g->_mark.stripe = a.stripe != 0;
g->data_type = a.data_type;
g->dirty_sectors = a.dirty_sectors;
g->cached_sectors = a.cached_sectors;
g->stripe = a.stripe;
g->stripe_redundancy = a.stripe_redundancy;
}
@ -1503,12 +1494,12 @@ static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only)
for_each_bucket(g, buckets) {
if (metadata_only &&
(g->mark.data_type == BCH_DATA_user ||
g->mark.data_type == BCH_DATA_cached ||
g->mark.data_type == BCH_DATA_parity))
(g->data_type == BCH_DATA_user ||
g->data_type == BCH_DATA_cached ||
g->data_type == BCH_DATA_parity))
continue;
g->_mark.dirty_sectors = 0;
g->_mark.cached_sectors = 0;
g->dirty_sectors = 0;
g->cached_sectors = 0;
}
};
}

View file

@ -349,7 +349,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
}
static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
struct bucket_mark old, struct bucket_mark new,
struct bucket old, struct bucket new,
u64 journal_seq, bool gc)
{
struct bch_alloc_v4 old_a = {
@ -586,20 +586,19 @@ int bch2_mark_alloc(struct btree_trans *trans,
bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc);
if (gc) {
struct bucket_mark old_m, m;
struct bucket *g = gc_bucket(ca, new.k->p.offset);
old_m = bucket_cmpxchg(g, m, ({
m.gen = new_a.gen;
m.data_type = new_a.data_type;
m.dirty_sectors = new_a.dirty_sectors;
m.cached_sectors = new_a.cached_sectors;
m.stripe = new_a.stripe != 0;
}));
bucket_lock(g);
g->gen_valid = 1;
g->gen = new_a.gen;
g->data_type = new_a.data_type;
g->stripe = new_a.stripe;
g->stripe_redundancy = new_a.stripe_redundancy;
g->dirty_sectors = new_a.dirty_sectors;
g->cached_sectors = new_a.cached_sectors;
bucket_unlock(g);
}
percpu_up_read(&c->mark_lock);
@ -625,23 +624,12 @@ int bch2_mark_alloc(struct btree_trans *trans,
return 0;
}
#define checked_add(a, b) \
({ \
unsigned _res = (unsigned) (a) + (b); \
bool overflow = _res > U16_MAX; \
if (overflow) \
_res = U16_MAX; \
(a) = _res; \
overflow; \
})
void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, enum bch_data_type data_type,
unsigned sectors, struct gc_pos pos,
unsigned flags)
{
struct bucket *g;
struct bucket_mark old, new;
struct bucket old, new, *g;
bool overflow;
BUG_ON(!(flags & BTREE_TRIGGER_GC));
@ -656,10 +644,16 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
percpu_down_read(&c->mark_lock);
g = gc_bucket(ca, b);
old = bucket_cmpxchg(g, new, ({
new.data_type = data_type;
overflow = checked_add(new.dirty_sectors, sectors);
}));
bucket_lock(g);
old = *g;
g->data_type = data_type;
g->dirty_sectors += sectors;
overflow = g->dirty_sectors < sectors;
new = *g;
bucket_unlock(g);
bch2_fs_inconsistent_on(old.data_type &&
old.data_type != data_type, c,
@ -693,7 +687,7 @@ static int check_bucket_ref(struct bch_fs *c,
const struct bch_extent_ptr *ptr,
s64 sectors, enum bch_data_type ptr_data_type,
u8 b_gen, u8 bucket_data_type,
u16 dirty_sectors, u16 cached_sectors)
u32 dirty_sectors, u32 cached_sectors)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
@ -761,7 +755,7 @@ static int check_bucket_ref(struct bch_fs *c,
goto err;
}
if ((unsigned) (bucket_sectors + sectors) > U16_MAX) {
if ((unsigned) (bucket_sectors + sectors) > U32_MAX) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n"
"while marking %s",
@ -792,8 +786,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bucket *g;
struct bucket_mark new, old;
struct bucket old, new, *g;
struct printbuf buf = PRINTBUF;
int ret = 0;
@ -805,33 +798,37 @@ static int mark_stripe_bucket(struct btree_trans *trans,
buf.atomic++;
g = PTR_GC_BUCKET(ca, ptr);
if (g->mark.dirty_sectors ||
if (g->dirty_sectors ||
(g->stripe && g->stripe != k.k->p.offset)) {
bch2_fs_inconsistent(c,
"bucket %u:%zu gen %u: multiple stripes using same bucket\n%s",
ptr->dev, PTR_BUCKET_NR(ca, ptr), g->mark.gen,
ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
ret = -EINVAL;
goto err;
}
old = bucket_cmpxchg(g, new, ({
ret = check_bucket_ref(c, k, ptr, sectors, data_type,
new.gen, new.data_type,
new.dirty_sectors, new.cached_sectors);
if (ret)
goto err;
bucket_lock(g);
old = *g;
new.dirty_sectors += sectors;
if (data_type)
new.data_type = data_type;
ret = check_bucket_ref(c, k, ptr, sectors, data_type,
new.gen, new.data_type,
new.dirty_sectors, new.cached_sectors);
if (ret) {
bucket_unlock(g);
goto err;
}
new.stripe = true;
}));
new.dirty_sectors += sectors;
if (data_type)
new.data_type = data_type;
g->stripe = k.k->p.offset;
g->stripe_redundancy = s->nr_redundant;
new = *g;
bucket_unlock(g);
bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
err:
percpu_up_read(&c->mark_lock);
@ -844,9 +841,9 @@ static int __mark_pointer(struct btree_trans *trans,
const struct bch_extent_ptr *ptr,
s64 sectors, enum bch_data_type ptr_data_type,
u8 bucket_gen, u8 *bucket_data_type,
u16 *dirty_sectors, u16 *cached_sectors)
u32 *dirty_sectors, u32 *cached_sectors)
{
u16 *dst_sectors = !ptr->cached
u32 *dst_sectors = !ptr->cached
? dirty_sectors
: cached_sectors;
int ret = check_bucket_ref(trans->c, k, ptr, sectors, ptr_data_type,
@ -870,11 +867,9 @@ static int bch2_mark_pointer(struct btree_trans *trans,
{
u64 journal_seq = trans->journal_res.seq;
struct bch_fs *c = trans->c;
struct bucket_mark old, new;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket *g;
struct bucket old, new, *g;
u8 bucket_data_type;
u64 v;
int ret = 0;
BUG_ON(!(flags & BTREE_TRIGGER_GC));
@ -882,28 +877,25 @@ static int bch2_mark_pointer(struct btree_trans *trans,
percpu_down_read(&c->mark_lock);
g = PTR_GC_BUCKET(ca, &p.ptr);
v = atomic64_read(&g->_mark.v);
do {
new.v.counter = old.v.counter = v;
bucket_data_type = new.data_type;
bucket_lock(g);
old = *g;
ret = __mark_pointer(trans, k, &p.ptr, sectors,
data_type, new.gen,
&bucket_data_type,
&new.dirty_sectors,
&new.cached_sectors);
if (ret)
goto err;
bucket_data_type = g->data_type;
new.data_type = bucket_data_type;
ret = __mark_pointer(trans, k, &p.ptr, sectors,
data_type, g->gen,
&bucket_data_type,
&g->dirty_sectors,
&g->cached_sectors);
if (ret) {
bucket_unlock(g);
goto err;
}
if (flags & BTREE_TRIGGER_NOATOMIC) {
g->_mark = new;
break;
}
} while ((v = atomic64_cmpxchg(&g->_mark.v,
old.v.counter,
new.v.counter)) != old.v.counter);
g->data_type = bucket_data_type;
new = *g;
bucket_unlock(g);
bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
err:
@ -1404,25 +1396,18 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
{
struct btree_iter iter;
struct bkey_i_alloc_v4 *a;
u16 dirty_sectors, cached_sectors;
int ret;
a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(trans->c, &p.ptr));
if (IS_ERR(a))
return PTR_ERR(a);
dirty_sectors = a->v.dirty_sectors;
cached_sectors = a->v.cached_sectors;
ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type,
a->v.gen, &a->v.data_type,
&dirty_sectors, &cached_sectors);
&a->v.dirty_sectors, &a->v.cached_sectors);
if (ret)
goto out;
a->v.dirty_sectors = dirty_sectors;
a->v.cached_sectors = cached_sectors;
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
if (ret)
goto out;

View file

@ -15,20 +15,16 @@
for (_b = (_buckets)->b + (_buckets)->first_bucket; \
_b < (_buckets)->b + (_buckets)->nbuckets; _b++)
#define bucket_cmpxchg(g, new, expr) \
({ \
struct bucket *_g = g; \
u64 _v = atomic64_read(&(g)->_mark.v); \
struct bucket_mark _old; \
\
do { \
(new).v.counter = _old.v.counter = _v; \
expr; \
} while ((_v = atomic64_cmpxchg(&(_g)->_mark.v, \
_old.v.counter, \
(new).v.counter)) != _old.v.counter);\
_old; \
})
static inline void bucket_unlock(struct bucket *b)
{
smp_store_release(&b->lock, 0);
}
static inline void bucket_lock(struct bucket *b)
{
while (xchg(&b->lock, 1))
cpu_relax();
}
static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca)
{

View file

@ -7,29 +7,15 @@
#define BUCKET_JOURNAL_SEQ_BITS 16
struct bucket_mark {
union {
atomic64_t v;
struct {
u8 gen;
u8 data_type:3,
stripe:1;
u16 dirty_sectors;
u16 cached_sectors;
};
};
};
struct bucket {
union {
struct bucket_mark _mark;
const struct bucket_mark mark;
};
unsigned gen_valid:1;
u8 stripe_redundancy;
u32 stripe;
u8 lock;
u8 gen_valid:1;
u8 data_type:7;
u8 gen;
u8 stripe_redundancy;
u32 stripe;
u32 dirty_sectors;
u32 cached_sectors;
};
struct bucket_array {