mirror of
https://github.com/torvalds/linux
synced 2024-11-05 18:23:50 +00:00
bcachefs: btree_gc no longer uses main in-memory bucket array
This changes the btree_gc code to only use the second bucket array, the one dedicated to GC. On completion, it compares what's in its in memory bucket array to the allocation information in the btree and writes it directly, instead of updating the main in-memory bucket array and writing that. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
parent
63a2edce94
commit
ec061b215d
5 changed files with 225 additions and 246 deletions
|
@ -39,15 +39,6 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
|
|||
#undef x
|
||||
};
|
||||
|
||||
struct bkey_alloc_buf {
|
||||
struct bkey_i k;
|
||||
struct bch_alloc_v3 v;
|
||||
|
||||
#define x(_name, _bits) + _bits / 8
|
||||
u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
|
||||
#undef x
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
/* Persistent alloc info: */
|
||||
|
||||
static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
|
||||
|
@ -254,24 +245,31 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_alloc_pack(struct bch_fs *c,
|
||||
struct bkey_alloc_buf *dst,
|
||||
const struct bkey_alloc_unpacked src)
|
||||
struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *trans,
|
||||
const struct bkey_alloc_unpacked src)
|
||||
{
|
||||
bch2_alloc_pack_v3(dst, src);
|
||||
struct bkey_alloc_buf *dst;
|
||||
|
||||
dst = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
|
||||
if (!IS_ERR(dst))
|
||||
bch2_alloc_pack_v3(dst, src);
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_alloc_unpacked *u, unsigned trigger_flags)
|
||||
{
|
||||
struct bkey_alloc_buf *a;
|
||||
struct bkey_alloc_buf *a = bch2_alloc_pack(trans, *u);
|
||||
|
||||
a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
|
||||
if (IS_ERR(a))
|
||||
return PTR_ERR(a);
|
||||
|
||||
bch2_alloc_pack(trans->c, a, *u);
|
||||
return bch2_trans_update(trans, iter, &a->k, trigger_flags|
|
||||
/*
|
||||
* Without BTREE_UPDATE_NO_KEY_CACHE_COHERENCY, we may end up updating
|
||||
* the btree instead of the key cache - this can casue the allocator to
|
||||
* self-deadlock, since updating the btree may require allocating new
|
||||
* btree nodes:
|
||||
*/
|
||||
return PTR_ERR_OR_ZERO(a) ?:
|
||||
bch2_trans_update(trans, iter, &a->k, trigger_flags|
|
||||
BTREE_UPDATE_NO_KEY_CACHE_COHERENCY);
|
||||
}
|
||||
|
||||
|
@ -342,7 +340,7 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
|
|||
#undef x
|
||||
}
|
||||
|
||||
int bch2_alloc_read(struct bch_fs *c)
|
||||
int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
|
@ -353,108 +351,43 @@ int bch2_alloc_read(struct bch_fs *c)
|
|||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
if (!bkey_is_alloc(k.k))
|
||||
continue;
|
||||
|
||||
ca = bch_dev_bkey_exists(c, k.k->p.inode);
|
||||
g = bucket(ca, k.k->p.offset);
|
||||
g = __bucket(ca, k.k->p.offset, gc);
|
||||
u = bch2_alloc_unpack(k);
|
||||
|
||||
*bucket_gen(ca, k.k->p.offset) = u.gen;
|
||||
if (!gc)
|
||||
*bucket_gen(ca, k.k->p.offset) = u.gen;
|
||||
|
||||
g->_mark.gen = u.gen;
|
||||
g->_mark.data_type = u.data_type;
|
||||
g->_mark.dirty_sectors = u.dirty_sectors;
|
||||
g->_mark.cached_sectors = u.cached_sectors;
|
||||
g->_mark.stripe = u.stripe != 0;
|
||||
g->stripe = u.stripe;
|
||||
g->stripe_redundancy = u.stripe_redundancy;
|
||||
g->io_time[READ] = u.read_time;
|
||||
g->io_time[WRITE] = u.write_time;
|
||||
g->oldest_gen = u.oldest_gen;
|
||||
g->oldest_gen = !gc ? u.oldest_gen : u.gen;
|
||||
g->gen_valid = 1;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
up_read(&c->gc_lock);
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
if (ret) {
|
||||
bch_err(c, "error reading alloc info: %i", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_alloc_unpacked old_u, new_u;
|
||||
int ret;
|
||||
retry:
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
ret = bch2_btree_key_cache_flush(trans,
|
||||
BTREE_ID_alloc, iter->pos);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
old_u = bch2_alloc_unpack(k);
|
||||
new_u = alloc_mem_to_key(c, iter);
|
||||
|
||||
if (!bkey_alloc_unpacked_cmp(old_u, new_u))
|
||||
return 0;
|
||||
|
||||
ret = bch2_alloc_write(trans, iter, &new_u,
|
||||
BTREE_TRIGGER_NORUN) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|flags);
|
||||
err:
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_alloc_write_all(struct bch_fs *c, unsigned flags)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
|
||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN,
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
bch2_btree_iter_set_pos(&iter,
|
||||
POS(ca->dev_idx, ca->mi.first_bucket));
|
||||
|
||||
while (iter.pos.offset < ca->mi.nbuckets) {
|
||||
ret = bch2_alloc_write_key(&trans, &iter, flags);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->ref);
|
||||
goto err;
|
||||
}
|
||||
bch2_btree_iter_advance(&iter);
|
||||
if (!gc ||
|
||||
(metadata_only &&
|
||||
(u.data_type == BCH_DATA_user ||
|
||||
u.data_type == BCH_DATA_cached ||
|
||||
u.data_type == BCH_DATA_parity))) {
|
||||
g->_mark.data_type = u.data_type;
|
||||
g->_mark.dirty_sectors = u.dirty_sectors;
|
||||
g->_mark.cached_sectors = u.cached_sectors;
|
||||
g->_mark.stripe = u.stripe != 0;
|
||||
g->stripe = u.stripe;
|
||||
g->stripe_redundancy = u.stripe_redundancy;
|
||||
}
|
||||
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
if (ret)
|
||||
bch_err(c, "error reading alloc info: %i", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -38,40 +38,23 @@ static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
|
|||
;
|
||||
}
|
||||
|
||||
struct bkey_alloc_buf {
|
||||
struct bkey_i k;
|
||||
struct bch_alloc_v3 v;
|
||||
|
||||
#define x(_name, _bits) + _bits / 8
|
||||
u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
|
||||
#undef x
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
|
||||
struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *,
|
||||
const struct bkey_alloc_unpacked);
|
||||
int bch2_alloc_write(struct btree_trans *, struct btree_iter *,
|
||||
struct bkey_alloc_unpacked *, unsigned);
|
||||
|
||||
int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
|
||||
|
||||
static inline struct bkey_alloc_unpacked
|
||||
alloc_mem_to_key(struct bch_fs *c, struct btree_iter *iter)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
struct bucket *g;
|
||||
struct bkey_alloc_unpacked ret;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
ca = bch_dev_bkey_exists(c, iter->pos.inode);
|
||||
g = bucket(ca, iter->pos.offset);
|
||||
ret = (struct bkey_alloc_unpacked) {
|
||||
.dev = iter->pos.inode,
|
||||
.bucket = iter->pos.offset,
|
||||
.gen = g->mark.gen,
|
||||
.oldest_gen = g->oldest_gen,
|
||||
.data_type = g->mark.data_type,
|
||||
.dirty_sectors = g->mark.dirty_sectors,
|
||||
.cached_sectors = g->mark.cached_sectors,
|
||||
.read_time = g->io_time[READ],
|
||||
.write_time = g->io_time[WRITE],
|
||||
.stripe = g->stripe,
|
||||
.stripe_redundancy = g->stripe_redundancy,
|
||||
};
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
|
||||
|
||||
const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
|
@ -101,7 +84,7 @@ static inline bool bkey_is_alloc(const struct bkey *k)
|
|||
k->type == KEY_TYPE_alloc_v3;
|
||||
}
|
||||
|
||||
int bch2_alloc_read(struct bch_fs *);
|
||||
int bch2_alloc_read(struct bch_fs *, bool, bool);
|
||||
|
||||
static inline void bch2_wake_allocator(struct bch_dev *ca)
|
||||
{
|
||||
|
@ -139,7 +122,6 @@ void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
|
|||
void bch2_dev_allocator_stop(struct bch_dev *);
|
||||
int bch2_dev_allocator_start(struct bch_dev *);
|
||||
|
||||
int bch2_alloc_write_all(struct bch_fs *, unsigned);
|
||||
void bch2_fs_allocator_background_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
|
||||
|
|
|
@ -536,7 +536,6 @@ enum {
|
|||
/* misc: */
|
||||
BCH_FS_NEED_ANOTHER_GC,
|
||||
BCH_FS_DELETED_NODES,
|
||||
BCH_FS_NEED_ALLOC_WRITE,
|
||||
BCH_FS_REBUILD_REPLICAS,
|
||||
BCH_FS_HOLD_BTREE_WRITES,
|
||||
};
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "alloc_foreground.h"
|
||||
#include "bkey_methods.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "btree_key_cache.h"
|
||||
#include "btree_locking.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "btree_io.h"
|
||||
|
@ -533,7 +534,6 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
|||
bkey_for_each_ptr_decode(k->k, ptrs, p, entry) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
|
||||
struct bucket *g2 = PTR_BUCKET(ca, &p.ptr);
|
||||
enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr);
|
||||
|
||||
if (fsck_err_on(!g->gen_valid, c,
|
||||
|
@ -544,9 +544,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
|||
p.ptr.gen,
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
|
||||
if (!p.ptr.cached) {
|
||||
g2->_mark.gen = g->_mark.gen = p.ptr.gen;
|
||||
g2->gen_valid = g->gen_valid = true;
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
|
||||
g->_mark.gen = p.ptr.gen;
|
||||
g->gen_valid = true;
|
||||
} else {
|
||||
do_update = true;
|
||||
}
|
||||
|
@ -560,13 +559,12 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
|||
p.ptr.gen, g->mark.gen,
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
|
||||
if (!p.ptr.cached) {
|
||||
g2->_mark.gen = g->_mark.gen = p.ptr.gen;
|
||||
g2->gen_valid = g->gen_valid = true;
|
||||
g2->_mark.data_type = 0;
|
||||
g2->_mark.dirty_sectors = 0;
|
||||
g2->_mark.cached_sectors = 0;
|
||||
g->_mark.gen = p.ptr.gen;
|
||||
g->gen_valid = true;
|
||||
g->_mark.data_type = 0;
|
||||
g->_mark.dirty_sectors = 0;
|
||||
g->_mark.cached_sectors = 0;
|
||||
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
|
||||
} else {
|
||||
do_update = true;
|
||||
}
|
||||
|
@ -603,8 +601,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
|||
bch2_data_types[data_type],
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
|
||||
if (data_type == BCH_DATA_btree) {
|
||||
g2->_mark.data_type = g->_mark.data_type = data_type;
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
|
||||
g->_mark.data_type = data_type;
|
||||
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
|
||||
} else {
|
||||
do_update = true;
|
||||
|
@ -1169,13 +1166,14 @@ static int bch2_gc_done(struct bch_fs *c,
|
|||
unsigned i, dev;
|
||||
int ret = 0;
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
|
||||
#define copy_field(_f, _msg, ...) \
|
||||
if (dst->_f != src->_f) { \
|
||||
if (verify) \
|
||||
fsck_err(c, _msg ": got %llu, should be %llu" \
|
||||
, ##__VA_ARGS__, dst->_f, src->_f); \
|
||||
dst->_f = src->_f; \
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
|
||||
}
|
||||
#define copy_stripe_field(_f, _msg, ...) \
|
||||
if (dst->_f != src->_f) { \
|
||||
|
@ -1185,18 +1183,6 @@ static int bch2_gc_done(struct bch_fs *c,
|
|||
iter.pos, ##__VA_ARGS__, \
|
||||
dst->_f, src->_f); \
|
||||
dst->_f = src->_f; \
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
|
||||
}
|
||||
#define copy_bucket_field(_f) \
|
||||
if (dst->b[b]._f != src->b[b]._f) { \
|
||||
if (verify) \
|
||||
fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f \
|
||||
": got %u, should be %u", dev, b, \
|
||||
dst->b[b].mark.gen, \
|
||||
bch2_data_types[dst->b[b].mark.data_type],\
|
||||
dst->b[b]._f, src->b[b]._f); \
|
||||
dst->b[b]._f = src->b[b]._f; \
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
|
||||
}
|
||||
#define copy_dev_field(_f, _msg, ...) \
|
||||
copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
|
||||
|
@ -1207,36 +1193,18 @@ static int bch2_gc_done(struct bch_fs *c,
|
|||
bch2_fs_usage_acc_to_base(c, i);
|
||||
|
||||
for_each_member_device(ca, c, dev) {
|
||||
struct bucket_array *dst = __bucket_array(ca, 0);
|
||||
struct bucket_array *src = __bucket_array(ca, 1);
|
||||
size_t b;
|
||||
struct bch_dev_usage *dst = ca->usage_base;
|
||||
struct bch_dev_usage *src = (void *)
|
||||
bch2_acc_percpu_u64s((void *) ca->usage_gc,
|
||||
dev_usage_u64s());
|
||||
|
||||
for (b = 0; b < src->nbuckets; b++) {
|
||||
copy_bucket_field(_mark.gen);
|
||||
copy_bucket_field(_mark.data_type);
|
||||
copy_bucket_field(_mark.stripe);
|
||||
copy_bucket_field(_mark.dirty_sectors);
|
||||
copy_bucket_field(_mark.cached_sectors);
|
||||
copy_bucket_field(stripe_redundancy);
|
||||
copy_bucket_field(stripe);
|
||||
copy_dev_field(buckets_ec, "buckets_ec");
|
||||
copy_dev_field(buckets_unavailable, "buckets_unavailable");
|
||||
|
||||
dst->b[b].oldest_gen = src->b[b].oldest_gen;
|
||||
}
|
||||
|
||||
{
|
||||
struct bch_dev_usage *dst = ca->usage_base;
|
||||
struct bch_dev_usage *src = (void *)
|
||||
bch2_acc_percpu_u64s((void *) ca->usage_gc,
|
||||
dev_usage_u64s());
|
||||
|
||||
copy_dev_field(buckets_ec, "buckets_ec");
|
||||
copy_dev_field(buckets_unavailable, "buckets_unavailable");
|
||||
|
||||
for (i = 0; i < BCH_DATA_NR; i++) {
|
||||
copy_dev_field(d[i].buckets, "%s buckets", bch2_data_types[i]);
|
||||
copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]);
|
||||
copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
|
||||
}
|
||||
for (i = 0; i < BCH_DATA_NR; i++) {
|
||||
copy_dev_field(d[i].buckets, "%s buckets", bch2_data_types[i]);
|
||||
copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]);
|
||||
copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1278,7 +1246,6 @@ static int bch2_gc_done(struct bch_fs *c,
|
|||
|
||||
#undef copy_fs_field
|
||||
#undef copy_dev_field
|
||||
#undef copy_bucket_field
|
||||
#undef copy_stripe_field
|
||||
#undef copy_field
|
||||
fsck_err:
|
||||
|
@ -1286,6 +1253,8 @@ static int bch2_gc_done(struct bch_fs *c,
|
|||
percpu_ref_put(&ca->ref);
|
||||
if (ret)
|
||||
bch_err(c, "%s: ret %i", __func__, ret);
|
||||
|
||||
percpu_up_write(&c->mark_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1308,15 +1277,6 @@ static int bch2_gc_start(struct bch_fs *c,
|
|||
BUG_ON(ca->buckets[1]);
|
||||
BUG_ON(ca->usage_gc);
|
||||
|
||||
ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) +
|
||||
ca->mi.nbuckets * sizeof(struct bucket),
|
||||
GFP_KERNEL|__GFP_ZERO);
|
||||
if (!ca->buckets[1]) {
|
||||
percpu_ref_put(&ca->ref);
|
||||
bch_err(c, "error allocating ca->buckets[gc]");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ca->usage_gc = alloc_percpu(struct bch_dev_usage);
|
||||
if (!ca->usage_gc) {
|
||||
bch_err(c, "error allocating ca->usage_gc");
|
||||
|
@ -1325,33 +1285,151 @@ static int bch2_gc_start(struct bch_fs *c,
|
|||
}
|
||||
}
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
bool initial, bool metadata_only)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
|
||||
struct bucket *g;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_alloc_unpacked old_u, new_u, gc_u;
|
||||
struct bkey_alloc_buf *a;
|
||||
int ret;
|
||||
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
old_u = new_u = bch2_alloc_unpack(k);
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
g = gc_bucket(ca, iter->pos.offset);
|
||||
gc_u = (struct bkey_alloc_unpacked) {
|
||||
.dev = iter->pos.inode,
|
||||
.bucket = iter->pos.offset,
|
||||
.gen = g->mark.gen,
|
||||
.oldest_gen = g->oldest_gen,
|
||||
.data_type = g->mark.data_type,
|
||||
.dirty_sectors = g->mark.dirty_sectors,
|
||||
.cached_sectors = g->mark.cached_sectors,
|
||||
.read_time = g->io_time[READ],
|
||||
.write_time = g->io_time[WRITE],
|
||||
.stripe = g->stripe,
|
||||
.stripe_redundancy = g->stripe_redundancy,
|
||||
};
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
if (metadata_only &&
|
||||
gc_u.data_type != BCH_DATA_sb &&
|
||||
gc_u.data_type != BCH_DATA_journal &&
|
||||
gc_u.data_type != BCH_DATA_btree)
|
||||
return 0;
|
||||
|
||||
if (!bkey_alloc_unpacked_cmp(old_u, gc_u) ||
|
||||
gen_after(old_u.gen, gc_u.gen))
|
||||
return 0;
|
||||
|
||||
#define copy_bucket_field(_f) \
|
||||
if (fsck_err_on(new_u._f != gc_u._f, c, \
|
||||
"bucket %llu:%llu gen %u data type %s has wrong " #_f \
|
||||
": got %u, should be %u", \
|
||||
iter->pos.inode, iter->pos.offset, \
|
||||
new_u.gen, \
|
||||
bch2_data_types[new_u.data_type], \
|
||||
new_u._f, gc_u._f)) \
|
||||
new_u._f = gc_u._f; \
|
||||
|
||||
copy_bucket_field(gen);
|
||||
copy_bucket_field(data_type);
|
||||
copy_bucket_field(stripe);
|
||||
copy_bucket_field(dirty_sectors);
|
||||
copy_bucket_field(cached_sectors);
|
||||
copy_bucket_field(stripe_redundancy);
|
||||
copy_bucket_field(stripe);
|
||||
#undef copy_bucket_field
|
||||
|
||||
new_u.oldest_gen = gc_u.oldest_gen;
|
||||
|
||||
if (!bkey_alloc_unpacked_cmp(old_u, new_u))
|
||||
return 0;
|
||||
|
||||
a = bch2_alloc_pack(trans, new_u);
|
||||
if (IS_ERR(a))
|
||||
return PTR_ERR(a);
|
||||
|
||||
ret = initial
|
||||
? bch2_journal_key_insert(c, BTREE_ID_alloc, 0, &a->k)
|
||||
: bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN);
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_alloc_done(struct bch_fs *c, bool initial, bool metadata_only)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
struct bucket_array *dst = __bucket_array(ca, 1);
|
||||
struct bucket_array *src = __bucket_array(ca, 0);
|
||||
size_t b;
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_alloc,
|
||||
POS(ca->dev_idx, ca->mi.first_bucket),
|
||||
BTREE_ITER_SLOTS|
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
if (bkey_cmp(iter.pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0)
|
||||
break;
|
||||
|
||||
dst->first_bucket = src->first_bucket;
|
||||
dst->nbuckets = src->nbuckets;
|
||||
|
||||
for (b = 0; b < src->nbuckets; b++) {
|
||||
struct bucket *d = &dst->b[b];
|
||||
struct bucket *s = &src->b[b];
|
||||
|
||||
d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
|
||||
d->gen_valid = s->gen_valid;
|
||||
|
||||
if (metadata_only &&
|
||||
(s->mark.data_type == BCH_DATA_user ||
|
||||
s->mark.data_type == BCH_DATA_cached))
|
||||
d->_mark = s->mark;
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW,
|
||||
bch2_alloc_write_key(&trans, &iter,
|
||||
initial, metadata_only));
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
if (ret) {
|
||||
bch_err(c, "error writing alloc info: %i", ret);
|
||||
percpu_ref_put(&ca->ref);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_alloc_start(struct bch_fs *c, bool initial, bool metadata_only)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
|
||||
ca->mi.nbuckets * sizeof(struct bucket),
|
||||
GFP_KERNEL|__GFP_ZERO);
|
||||
if (!buckets) {
|
||||
percpu_ref_put(&ca->ref);
|
||||
percpu_up_write(&c->mark_lock);
|
||||
bch_err(c, "error allocating ca->buckets[gc]");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
buckets->first_bucket = ca->mi.first_bucket;
|
||||
buckets->nbuckets = ca->mi.nbuckets;
|
||||
rcu_assign_pointer(ca->buckets[1], buckets);
|
||||
};
|
||||
|
||||
percpu_up_write(&c->mark_lock);
|
||||
|
||||
return 0;
|
||||
return bch2_alloc_read(c, true, metadata_only);
|
||||
}
|
||||
|
||||
static void bch2_gc_alloc_reset(struct bch_fs *c, bool initial, bool metadata_only)
|
||||
|
@ -1598,6 +1676,7 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
|
|||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
|
||||
ret = bch2_gc_start(c, metadata_only) ?:
|
||||
bch2_gc_alloc_start(c, initial, metadata_only) ?:
|
||||
bch2_gc_reflink_start(c, initial, metadata_only);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
@ -1665,16 +1744,15 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
|
|||
if (!ret) {
|
||||
bch2_journal_block(&c->journal);
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
ret = bch2_gc_reflink_done(c, initial, metadata_only) ?:
|
||||
bch2_gc_stripes_done(c, initial, metadata_only) ?:
|
||||
ret = bch2_gc_stripes_done(c, initial, metadata_only) ?:
|
||||
bch2_gc_reflink_done(c, initial, metadata_only) ?:
|
||||
bch2_gc_alloc_done(c, initial, metadata_only) ?:
|
||||
bch2_gc_done(c, initial, metadata_only);
|
||||
|
||||
bch2_journal_unblock(&c->journal);
|
||||
} else {
|
||||
percpu_down_write(&c->mark_lock);
|
||||
}
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
/* Indicates that gc is no longer in progress: */
|
||||
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
|
||||
|
||||
|
|
|
@ -1113,7 +1113,11 @@ int bch2_fs_recovery(struct bch_fs *c)
|
|||
|
||||
bch_verbose(c, "starting alloc read");
|
||||
err = "error reading allocation information";
|
||||
ret = bch2_alloc_read(c);
|
||||
|
||||
down_read(&c->gc_lock);
|
||||
ret = bch2_alloc_read(c, false, false);
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "alloc read done");
|
||||
|
@ -1171,23 +1175,6 @@ int bch2_fs_recovery(struct bch_fs *c)
|
|||
if (c->opts.verbose || !c->sb.clean)
|
||||
bch_info(c, "journal replay done");
|
||||
|
||||
if (test_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags) &&
|
||||
!c->opts.nochanges) {
|
||||
/*
|
||||
* note that even when filesystem was clean there might be work
|
||||
* to do here, if we ran gc (because of fsck) which recalculated
|
||||
* oldest_gen:
|
||||
*/
|
||||
bch_verbose(c, "writing allocation info");
|
||||
err = "error writing out alloc info";
|
||||
ret = bch2_alloc_write_all(c, BTREE_INSERT_LAZY_RW);
|
||||
if (ret) {
|
||||
bch_err(c, "error writing alloc info");
|
||||
goto err;
|
||||
}
|
||||
bch_verbose(c, "alloc write done");
|
||||
}
|
||||
|
||||
if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
|
||||
bch2_fs_lazy_rw(c);
|
||||
|
||||
|
|
Loading…
Reference in a new issue