From a71a261f5c39685698f7f1970dc7046b36e132d1 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 12 Oct 2012 16:59:42 +0100 Subject: [PATCH 1/8] dm mpath: fix check for null mpio in end_io fn The mpio dereference should be moved below the BUG_ON NULL test in multipath_end_io(). spatch with a semantic match was used to found this. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d778563a4ffd..573bd04591bf 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1309,13 +1309,14 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, { struct multipath *m = ti->private; struct dm_mpath_io *mpio = map_context->ptr; - struct pgpath *pgpath = mpio->pgpath; + struct pgpath *pgpath; struct path_selector *ps; int r; BUG_ON(!mpio); r = do_end_io(m, clone, error, mpio); + pgpath = mpio->pgpath; if (pgpath) { ps = &pgpath->pg->ps; if (ps->type->end_io) From 54499afbb80e44ae5511984486b4b33d6229fceb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 12 Oct 2012 16:59:44 +0100 Subject: [PATCH 2/8] dm bufio: use list_move Use list_move() instead of list_del() + list_add(). spatch with a semantic match was used to find this. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: Alasdair G Kergon --- drivers/md/dm-bufio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index cc06a1e52423..b9e006aa6062 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -441,8 +441,7 @@ static void __relink_lru(struct dm_buffer *b, int dirty) c->n_buffers[b->list_mode]--; c->n_buffers[dirty]++; b->list_mode = dirty; - list_del(&b->lru_list); - list_add(&b->lru_list, &c->lru[dirty]); + list_move(&b->lru_list, &c->lru[dirty]); } /*---------------------------------------------------------------- From fe5fe90639b62a75349dd1f1c74c4a984397171c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 12 Oct 2012 16:59:46 +0100 Subject: [PATCH 3/8] dm: use ACCESS_ONCE for sysfs values Use the ACCESS_ONCE macro in dm-bufio and dm-verity where a variable can be modified asynchronously (through sysfs) and we want to prevent compiler optimizations that assume that the variable hasn't changed. (See Documentation/atomic_ops.txt.) Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-bufio.c | 10 +++------- drivers/md/dm-verity.c | 2 +- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index b9e006aa6062..651ca79881dd 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -280,9 +280,7 @@ static void __cache_size_refresh(void) BUG_ON(!mutex_is_locked(&dm_bufio_clients_lock)); BUG_ON(dm_bufio_client_count < 0); - dm_bufio_cache_size_latch = dm_bufio_cache_size; - - barrier(); + dm_bufio_cache_size_latch = ACCESS_ONCE(dm_bufio_cache_size); /* * Use default if set to 0 and report the actual cache size used. @@ -812,7 +810,7 @@ static void __get_memory_limit(struct dm_bufio_client *c, { unsigned long buffers; - if (dm_bufio_cache_size != dm_bufio_cache_size_latch) { + if (ACCESS_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch) { mutex_lock(&dm_bufio_clients_lock); __cache_size_refresh(); mutex_unlock(&dm_bufio_clients_lock); @@ -1590,11 +1588,9 @@ EXPORT_SYMBOL_GPL(dm_bufio_client_destroy); static void cleanup_old_buffers(void) { - unsigned long max_age = dm_bufio_max_age; + unsigned long max_age = ACCESS_ONCE(dm_bufio_max_age); struct dm_bufio_client *c; - barrier(); - if (max_age > ULONG_MAX / HZ) max_age = ULONG_MAX / HZ; diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 892ae2766aa6..9e7328bb4030 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -438,7 +438,7 @@ static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io) verity_hash_at_level(v, io->block, i, &hash_block_start, NULL); verity_hash_at_level(v, io->block + io->n_blocks - 1, i, &hash_block_end, NULL); if (!i) { - unsigned cluster = *(volatile unsigned *)&dm_verity_prefetch_cluster; + unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster); cluster >>= v->data_dev_block_bits; if (unlikely(!cluster)) From 0bcf08798eb9fc3cd0fe2e6b74b25f3f57fa8af2 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 12 Oct 2012 16:59:47 +0100 Subject: [PATCH 4/8] dm persistent data: convert to use le32_add_cpu Convert cpu_to_le32(le32_to_cpu(E1) + E2) to use le32_add_cpu(). dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: Alasdair G Kergon --- drivers/md/persistent-data/dm-space-map-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index d77602d63c83..f3a9af8cdec3 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -434,14 +434,14 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b, if (ref_count && !old) { *ev = SM_ALLOC; ll->nr_allocated++; - ie_disk.nr_free = cpu_to_le32(le32_to_cpu(ie_disk.nr_free) - 1); + le32_add_cpu(&ie_disk.nr_free, -1); if (le32_to_cpu(ie_disk.none_free_before) == bit) ie_disk.none_free_before = cpu_to_le32(bit + 1); } else if (old && !ref_count) { *ev = SM_FREE; ll->nr_allocated--; - ie_disk.nr_free = cpu_to_le32(le32_to_cpu(ie_disk.nr_free) + 1); + le32_add_cpu(&ie_disk.nr_free, 1); ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit)); } From 28eed34e7662d7602da6753b0ba2563006b8e7a2 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 12 Oct 2012 21:02:07 +0100 Subject: [PATCH 5/8] dm thin: support discard with non power of two block size Support discards when the pool's block size is not a power of 2. The block layer assumes discard_granularity is a power of 2 (in blkdev_issue_discard), so we set this to the largest power of 2 that is a divides into the number of sectors in each block, but never less than DATA_DEV_BLOCK_SIZE_MIN_SECTORS. This patch eliminates the "Discard support must be disabled when the block size is not a power of 2" constraint that was imposed in commit 55f2b8b ("dm thin: support for non power of 2 pool blocksize"). That commit was incomplete: using a block size that is not a power of 2 shouldn't mean disabling discard support on the device completely. Signed-off-by: Mike Snitzer Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index c29410af1e22..df20a115136f 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2272,15 +2272,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) goto out_flags_changed; } - /* - * The block layer requires discard_granularity to be a power of 2. - */ - if (pf.discard_enabled && !is_power_of_2(block_size)) { - ti->error = "Discard support must be disabled when the block size is not a power of 2"; - r = -EINVAL; - goto out_flags_changed; - } - pt->pool = pool; pt->ti = ti; pt->metadata_dev = metadata_dev; @@ -2762,6 +2753,11 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } +static bool block_size_is_power_of_two(struct pool *pool) +{ + return pool->sectors_per_block_shift >= 0; +} + static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) { struct pool *pool = pt->pool; @@ -2775,8 +2771,15 @@ static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) if (pt->adjusted_pf.discard_passdown) { data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; limits->discard_granularity = data_limits->discard_granularity; - } else + } else if (block_size_is_power_of_two(pool)) limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; + else + /* + * Use largest power of 2 that is a factor of sectors_per_block + * but at least DATA_DEV_BLOCK_SIZE_MIN_SECTORS. + */ + limits->discard_granularity = max(1 << (ffs(pool->sectors_per_block) - 1), + DATA_DEV_BLOCK_SIZE_MIN_SECTORS) << SECTOR_SHIFT; } static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) From 44feb387f6f5584535bd6e3ad7ccfdce715d7dba Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 12 Oct 2012 21:02:10 +0100 Subject: [PATCH 6/8] dm thin: prepare to separate bio_prison code The bio prison code will be useful to share with future DM targets. Prepare to move this code into a separate module, adding a dm prefix to structures and functions that will be exported. Signed-off-by: Mike Snitzer Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 221 +++++++++++++++++++++++++------------------ 1 file changed, 131 insertions(+), 90 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index df20a115136f..22a22a701e16 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -58,7 +58,7 @@ * i) plug io further to this physical block. (see bio_prison code). * * ii) quiesce any read io to that shared data block. Obviously - * including all devices that share this block. (see deferred_set code) + * including all devices that share this block. (see dm_deferred_set code) * * iii) copy the data block to a newly allocate block. This step can be * missed out if the io covers the block. (schedule_copy). @@ -104,9 +104,9 @@ * by a key, multiple bios can be in the same cell. When the cell is * subsequently unlocked the bios become available. */ -struct bio_prison; +struct dm_bio_prison; -struct cell_key { +struct dm_cell_key { int virtual; dm_thin_id dev; dm_block_t block; @@ -114,13 +114,13 @@ struct cell_key { struct dm_bio_prison_cell { struct hlist_node list; - struct bio_prison *prison; - struct cell_key key; + struct dm_bio_prison *prison; + struct dm_cell_key key; struct bio *holder; struct bio_list bios; }; -struct bio_prison { +struct dm_bio_prison { spinlock_t lock; mempool_t *cell_pool; @@ -148,13 +148,13 @@ static struct kmem_cache *_cell_cache; * @nr_cells should be the number of cells you want in use _concurrently_. * Don't confuse it with the number of distinct keys. */ -static struct bio_prison *prison_create(unsigned nr_cells) +static struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells) { unsigned i; uint32_t nr_buckets = calc_nr_buckets(nr_cells); - size_t len = sizeof(struct bio_prison) + + size_t len = sizeof(struct dm_bio_prison) + (sizeof(struct hlist_head) * nr_buckets); - struct bio_prison *prison = kmalloc(len, GFP_KERNEL); + struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL); if (!prison) return NULL; @@ -175,13 +175,13 @@ static struct bio_prison *prison_create(unsigned nr_cells) return prison; } -static void prison_destroy(struct bio_prison *prison) +static void dm_bio_prison_destroy(struct dm_bio_prison *prison) { mempool_destroy(prison->cell_pool); kfree(prison); } -static uint32_t hash_key(struct bio_prison *prison, struct cell_key *key) +static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key) { const unsigned long BIG_PRIME = 4294967291UL; uint64_t hash = key->block * BIG_PRIME; @@ -189,7 +189,7 @@ static uint32_t hash_key(struct bio_prison *prison, struct cell_key *key) return (uint32_t) (hash & prison->hash_mask); } -static int keys_equal(struct cell_key *lhs, struct cell_key *rhs) +static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs) { return (lhs->virtual == rhs->virtual) && (lhs->dev == rhs->dev) && @@ -197,7 +197,7 @@ static int keys_equal(struct cell_key *lhs, struct cell_key *rhs) } static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, - struct cell_key *key) + struct dm_cell_key *key) { struct dm_bio_prison_cell *cell; struct hlist_node *tmp; @@ -215,8 +215,8 @@ static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, * * Returns 1 if the cell was already held, 0 if @inmate is the new holder. */ -static int bio_detain(struct bio_prison *prison, struct cell_key *key, - struct bio *inmate, struct dm_bio_prison_cell **ref) +static int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, + struct bio *inmate, struct dm_bio_prison_cell **ref) { int r = 1; unsigned long flags; @@ -277,7 +277,7 @@ static int bio_detain(struct bio_prison *prison, struct cell_key *key, */ static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates) { - struct bio_prison *prison = cell->prison; + struct dm_bio_prison *prison = cell->prison; hlist_del(&cell->list); @@ -289,10 +289,10 @@ static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inm mempool_free(cell, prison->cell_pool); } -static void cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) +static void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) { unsigned long flags; - struct bio_prison *prison = cell->prison; + struct dm_bio_prison *prison = cell->prison; spin_lock_irqsave(&prison->lock, flags); __cell_release(cell, bios); @@ -313,10 +313,10 @@ static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio __cell_release(cell, NULL); } -static void cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) +static void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) { unsigned long flags; - struct bio_prison *prison = cell->prison; + struct dm_bio_prison *prison = cell->prison; spin_lock_irqsave(&prison->lock, flags); __cell_release_singleton(cell, bio); @@ -329,7 +329,7 @@ static void cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio * static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) { - struct bio_prison *prison = cell->prison; + struct dm_bio_prison *prison = cell->prison; hlist_del(&cell->list); bio_list_merge(inmates, &cell->bios); @@ -337,20 +337,20 @@ static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, mempool_free(cell, prison->cell_pool); } -static void cell_release_no_holder(struct dm_bio_prison_cell *cell, - struct bio_list *inmates) +static void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, + struct bio_list *inmates) { unsigned long flags; - struct bio_prison *prison = cell->prison; + struct dm_bio_prison *prison = cell->prison; spin_lock_irqsave(&prison->lock, flags); __cell_release_no_holder(cell, inmates); spin_unlock_irqrestore(&prison->lock, flags); } -static void cell_error(struct dm_bio_prison_cell *cell) +static void dm_cell_error(struct dm_bio_prison_cell *cell) { - struct bio_prison *prison = cell->prison; + struct dm_bio_prison *prison = cell->prison; struct bio_list bios; struct bio *bio; unsigned long flags; @@ -374,23 +374,28 @@ static void cell_error(struct dm_bio_prison_cell *cell) * new mapping could free the old block that the read bios are mapped to. */ -struct deferred_set; -struct deferred_entry { - struct deferred_set *ds; +struct dm_deferred_set; +struct dm_deferred_entry { + struct dm_deferred_set *ds; unsigned count; struct list_head work_items; }; -struct deferred_set { +struct dm_deferred_set { spinlock_t lock; unsigned current_entry; unsigned sweeper; - struct deferred_entry entries[DEFERRED_SET_SIZE]; + struct dm_deferred_entry entries[DEFERRED_SET_SIZE]; }; -static void ds_init(struct deferred_set *ds) +static struct dm_deferred_set *dm_deferred_set_create(void) { int i; + struct dm_deferred_set *ds; + + ds = kmalloc(sizeof(*ds), GFP_KERNEL); + if (!ds) + return NULL; spin_lock_init(&ds->lock); ds->current_entry = 0; @@ -400,12 +405,19 @@ static void ds_init(struct deferred_set *ds) ds->entries[i].count = 0; INIT_LIST_HEAD(&ds->entries[i].work_items); } + + return ds; } -static struct deferred_entry *ds_inc(struct deferred_set *ds) +static void dm_deferred_set_destroy(struct dm_deferred_set *ds) +{ + kfree(ds); +} + +static struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds) { unsigned long flags; - struct deferred_entry *entry; + struct dm_deferred_entry *entry; spin_lock_irqsave(&ds->lock, flags); entry = ds->entries + ds->current_entry; @@ -420,7 +432,7 @@ static unsigned ds_next(unsigned index) return (index + 1) % DEFERRED_SET_SIZE; } -static void __sweep(struct deferred_set *ds, struct list_head *head) +static void __sweep(struct dm_deferred_set *ds, struct list_head *head) { while ((ds->sweeper != ds->current_entry) && !ds->entries[ds->sweeper].count) { @@ -432,7 +444,7 @@ static void __sweep(struct deferred_set *ds, struct list_head *head) list_splice_init(&ds->entries[ds->sweeper].work_items, head); } -static void ds_dec(struct deferred_entry *entry, struct list_head *head) +static void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head) { unsigned long flags; @@ -446,7 +458,7 @@ static void ds_dec(struct deferred_entry *entry, struct list_head *head) /* * Returns 1 if deferred or 0 if no pending items to delay job. */ -static int ds_add_work(struct deferred_set *ds, struct list_head *work) +static int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work) { int r = 1; unsigned long flags; @@ -467,13 +479,28 @@ static int ds_add_work(struct deferred_set *ds, struct list_head *work) return r; } +static int __init dm_bio_prison_init(void) +{ + _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0); + if (!_cell_cache) + return -ENOMEM; + + return 0; +} + +static void __exit dm_bio_prison_exit(void) +{ + kmem_cache_destroy(_cell_cache); + _cell_cache = NULL; +} + /*----------------------------------------------------------------*/ /* * Key building. */ static void build_data_key(struct dm_thin_device *td, - dm_block_t b, struct cell_key *key) + dm_block_t b, struct dm_cell_key *key) { key->virtual = 0; key->dev = dm_thin_dev_id(td); @@ -481,7 +508,7 @@ static void build_data_key(struct dm_thin_device *td, } static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, - struct cell_key *key) + struct dm_cell_key *key) { key->virtual = 1; key->dev = dm_thin_dev_id(td); @@ -534,7 +561,7 @@ struct pool { unsigned low_water_triggered:1; /* A dm event has been sent */ unsigned no_free_space:1; /* A -ENOSPC warning has been issued */ - struct bio_prison *prison; + struct dm_bio_prison *prison; struct dm_kcopyd_client *copier; struct workqueue_struct *wq; @@ -552,8 +579,8 @@ struct pool { struct bio_list retry_on_resume_list; - struct deferred_set shared_read_ds; - struct deferred_set all_io_ds; + struct dm_deferred_set *shared_read_ds; + struct dm_deferred_set *all_io_ds; struct dm_thin_new_mapping *next_mapping; mempool_t *mapping_pool; @@ -660,8 +687,8 @@ static struct pool *__pool_table_lookup_metadata_dev(struct block_device *md_dev struct dm_thin_endio_hook { struct thin_c *tc; - struct deferred_entry *shared_read_entry; - struct deferred_entry *all_io_entry; + struct dm_deferred_entry *shared_read_entry; + struct dm_deferred_entry *all_io_entry; struct dm_thin_new_mapping *overwrite_mapping; }; @@ -877,7 +904,7 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, unsigned long flags; spin_lock_irqsave(&pool->lock, flags); - cell_release(cell, &pool->deferred_bios); + dm_cell_release(cell, &pool->deferred_bios); spin_unlock_irqrestore(&tc->pool->lock, flags); wake_worker(pool); @@ -896,7 +923,7 @@ static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell bio_list_init(&bios); spin_lock_irqsave(&pool->lock, flags); - cell_release_no_holder(cell, &pool->deferred_bios); + dm_cell_release_no_holder(cell, &pool->deferred_bios); spin_unlock_irqrestore(&pool->lock, flags); wake_worker(pool); @@ -906,7 +933,7 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) { if (m->bio) m->bio->bi_end_io = m->saved_bi_end_io; - cell_error(m->cell); + dm_cell_error(m->cell); list_del(&m->list); mempool_free(m, m->tc->pool->mapping_pool); } @@ -921,7 +948,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) bio->bi_end_io = m->saved_bi_end_io; if (m->err) { - cell_error(m->cell); + dm_cell_error(m->cell); goto out; } @@ -933,7 +960,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); if (r) { DMERR("dm_thin_insert_block() failed"); - cell_error(m->cell); + dm_cell_error(m->cell); goto out; } @@ -1067,7 +1094,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, m->err = 0; m->bio = NULL; - if (!ds_add_work(&pool->shared_read_ds, &m->list)) + if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list)) m->quiesced = 1; /* @@ -1099,7 +1126,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, if (r < 0) { mempool_free(m, pool->mapping_pool); DMERR("dm_kcopyd_copy() failed"); - cell_error(cell); + dm_cell_error(cell); } } } @@ -1164,7 +1191,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, if (r < 0) { mempool_free(m, pool->mapping_pool); DMERR("dm_kcopyd_zero() failed"); - cell_error(cell); + dm_cell_error(cell); } } } @@ -1276,7 +1303,7 @@ static void no_space(struct dm_bio_prison_cell *cell) struct bio_list bios; bio_list_init(&bios); - cell_release(cell, &bios); + dm_cell_release(cell, &bios); while ((bio = bio_list_pop(&bios))) retry_on_resume(bio); @@ -1288,13 +1315,13 @@ static void process_discard(struct thin_c *tc, struct bio *bio) unsigned long flags; struct pool *pool = tc->pool; struct dm_bio_prison_cell *cell, *cell2; - struct cell_key key, key2; + struct dm_cell_key key, key2; dm_block_t block = get_bio_block(tc, bio); struct dm_thin_lookup_result lookup_result; struct dm_thin_new_mapping *m; build_virtual_key(tc->td, block, &key); - if (bio_detain(tc->pool->prison, &key, bio, &cell)) + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell)) return; r = dm_thin_find_block(tc->td, block, 1, &lookup_result); @@ -1306,8 +1333,8 @@ static void process_discard(struct thin_c *tc, struct bio *bio) * on this block. */ build_data_key(tc->td, lookup_result.block, &key2); - if (bio_detain(tc->pool->prison, &key2, bio, &cell2)) { - cell_release_singleton(cell, bio); + if (dm_bio_detain(tc->pool->prison, &key2, bio, &cell2)) { + dm_cell_release_singleton(cell, bio); break; } @@ -1326,7 +1353,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio) m->err = 0; m->bio = bio; - if (!ds_add_work(&pool->all_io_ds, &m->list)) { + if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list)) { spin_lock_irqsave(&pool->lock, flags); list_add(&m->list, &pool->prepared_discards); spin_unlock_irqrestore(&pool->lock, flags); @@ -1338,8 +1365,8 @@ static void process_discard(struct thin_c *tc, struct bio *bio) * a block boundary. So we submit the discard of a * partial block appropriately. */ - cell_release_singleton(cell, bio); - cell_release_singleton(cell2, bio); + dm_cell_release_singleton(cell, bio); + dm_cell_release_singleton(cell2, bio); if ((!lookup_result.shared) && pool->pf.discard_passdown) remap_and_issue(tc, bio, lookup_result.block); else @@ -1351,20 +1378,20 @@ static void process_discard(struct thin_c *tc, struct bio *bio) /* * It isn't provisioned, just forget it. */ - cell_release_singleton(cell, bio); + dm_cell_release_singleton(cell, bio); bio_endio(bio, 0); break; default: DMERR("discard: find block unexpectedly returned %d", r); - cell_release_singleton(cell, bio); + dm_cell_release_singleton(cell, bio); bio_io_error(bio); break; } } static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, - struct cell_key *key, + struct dm_cell_key *key, struct dm_thin_lookup_result *lookup_result, struct dm_bio_prison_cell *cell) { @@ -1384,7 +1411,7 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, default: DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); - cell_error(cell); + dm_cell_error(cell); break; } } @@ -1395,14 +1422,14 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, { struct dm_bio_prison_cell *cell; struct pool *pool = tc->pool; - struct cell_key key; + struct dm_cell_key key; /* * If cell is already occupied, then sharing is already in the process * of being broken so we have nothing further to do here. */ build_data_key(tc->td, lookup_result->block, &key); - if (bio_detain(pool->prison, &key, bio, &cell)) + if (dm_bio_detain(pool->prison, &key, bio, &cell)) return; if (bio_data_dir(bio) == WRITE && bio->bi_size) @@ -1410,9 +1437,9 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, else { struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; - h->shared_read_entry = ds_inc(&pool->shared_read_ds); + h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds); - cell_release_singleton(cell, bio); + dm_cell_release_singleton(cell, bio); remap_and_issue(tc, bio, lookup_result->block); } } @@ -1427,7 +1454,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block * Remap empty bios (flushes) immediately, without provisioning. */ if (!bio->bi_size) { - cell_release_singleton(cell, bio); + dm_cell_release_singleton(cell, bio); remap_and_issue(tc, bio, 0); return; } @@ -1437,7 +1464,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block */ if (bio_data_dir(bio) == READ) { zero_fill_bio(bio); - cell_release_singleton(cell, bio); + dm_cell_release_singleton(cell, bio); bio_endio(bio, 0); return; } @@ -1458,7 +1485,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block default: DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); set_pool_mode(tc->pool, PM_READ_ONLY); - cell_error(cell); + dm_cell_error(cell); break; } } @@ -1468,7 +1495,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) int r; dm_block_t block = get_bio_block(tc, bio); struct dm_bio_prison_cell *cell; - struct cell_key key; + struct dm_cell_key key; struct dm_thin_lookup_result lookup_result; /* @@ -1476,7 +1503,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) * being provisioned so we have nothing further to do here. */ build_virtual_key(tc->td, block, &key); - if (bio_detain(tc->pool->prison, &key, bio, &cell)) + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell)) return; r = dm_thin_find_block(tc->td, block, 1, &lookup_result); @@ -1491,7 +1518,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) * TODO: this will probably have to change when discard goes * back in. */ - cell_release_singleton(cell, bio); + dm_cell_release_singleton(cell, bio); if (lookup_result.shared) process_shared_bio(tc, bio, block, &lookup_result); @@ -1501,7 +1528,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) case -ENODATA: if (bio_data_dir(bio) == READ && tc->origin_dev) { - cell_release_singleton(cell, bio); + dm_cell_release_singleton(cell, bio); remap_to_origin_and_issue(tc, bio); } else provision_block(tc, bio, block, cell); @@ -1509,7 +1536,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) default: DMERR("dm_thin_find_block() failed, error = %d", r); - cell_release_singleton(cell, bio); + dm_cell_release_singleton(cell, bio); bio_io_error(bio); break; } @@ -1718,7 +1745,7 @@ static struct dm_thin_endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *b h->tc = tc; h->shared_read_entry = NULL; - h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : ds_inc(&pool->all_io_ds); + h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : dm_deferred_entry_inc(pool->all_io_ds); h->overwrite_mapping = NULL; return h; @@ -1928,7 +1955,7 @@ static void __pool_destroy(struct pool *pool) if (dm_pool_metadata_close(pool->pmd) < 0) DMWARN("%s: dm_pool_metadata_close() failed.", __func__); - prison_destroy(pool->prison); + dm_bio_prison_destroy(pool->prison); dm_kcopyd_client_destroy(pool->copier); if (pool->wq) @@ -1938,6 +1965,8 @@ static void __pool_destroy(struct pool *pool) mempool_free(pool->next_mapping, pool->mapping_pool); mempool_destroy(pool->mapping_pool); mempool_destroy(pool->endio_hook_pool); + dm_deferred_set_destroy(pool->shared_read_ds); + dm_deferred_set_destroy(pool->all_io_ds); kfree(pool); } @@ -1976,7 +2005,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, pool->sectors_per_block_shift = __ffs(block_size); pool->low_water_blocks = 0; pool_features_init(&pool->pf); - pool->prison = prison_create(PRISON_CELLS); + pool->prison = dm_bio_prison_create(PRISON_CELLS); if (!pool->prison) { *error = "Error creating pool's bio prison"; err_p = ERR_PTR(-ENOMEM); @@ -2012,8 +2041,20 @@ static struct pool *pool_create(struct mapped_device *pool_md, pool->low_water_triggered = 0; pool->no_free_space = 0; bio_list_init(&pool->retry_on_resume_list); - ds_init(&pool->shared_read_ds); - ds_init(&pool->all_io_ds); + + pool->shared_read_ds = dm_deferred_set_create(); + if (!pool->shared_read_ds) { + *error = "Error creating pool's shared read deferred set"; + err_p = ERR_PTR(-ENOMEM); + goto bad_shared_read_ds; + } + + pool->all_io_ds = dm_deferred_set_create(); + if (!pool->all_io_ds) { + *error = "Error creating pool's all io deferred set"; + err_p = ERR_PTR(-ENOMEM); + goto bad_all_io_ds; + } pool->next_mapping = NULL; pool->mapping_pool = mempool_create_slab_pool(MAPPING_POOL_SIZE, @@ -2042,11 +2083,15 @@ static struct pool *pool_create(struct mapped_device *pool_md, bad_endio_hook_pool: mempool_destroy(pool->mapping_pool); bad_mapping_pool: + dm_deferred_set_destroy(pool->all_io_ds); +bad_all_io_ds: + dm_deferred_set_destroy(pool->shared_read_ds); +bad_shared_read_ds: destroy_workqueue(pool->wq); bad_wq: dm_kcopyd_client_destroy(pool->copier); bad_kcopyd_client: - prison_destroy(pool->prison); + dm_bio_prison_destroy(pool->prison); bad_prison: kfree(pool); bad_pool: @@ -2982,7 +3027,7 @@ static int thin_endio(struct dm_target *ti, if (h->shared_read_entry) { INIT_LIST_HEAD(&work); - ds_dec(h->shared_read_entry, &work); + dm_deferred_entry_dec(h->shared_read_entry, &work); spin_lock_irqsave(&pool->lock, flags); list_for_each_entry_safe(m, tmp, &work, list) { @@ -2995,7 +3040,7 @@ static int thin_endio(struct dm_target *ti, if (h->all_io_entry) { INIT_LIST_HEAD(&work); - ds_dec(h->all_io_entry, &work); + dm_deferred_entry_dec(h->all_io_entry, &work); spin_lock_irqsave(&pool->lock, flags); list_for_each_entry_safe(m, tmp, &work, list) list_add(&m->list, &pool->prepared_discards); @@ -3128,9 +3173,7 @@ static int __init dm_thin_init(void) r = -ENOMEM; - _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0); - if (!_cell_cache) - goto bad_cell_cache; + dm_bio_prison_init(); _new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0); if (!_new_mapping_cache) @@ -3145,8 +3188,6 @@ static int __init dm_thin_init(void) bad_endio_hook_cache: kmem_cache_destroy(_new_mapping_cache); bad_new_mapping_cache: - kmem_cache_destroy(_cell_cache); -bad_cell_cache: dm_unregister_target(&pool_target); bad_pool_target: dm_unregister_target(&thin_target); @@ -3159,7 +3200,7 @@ static void dm_thin_exit(void) dm_unregister_target(&thin_target); dm_unregister_target(&pool_target); - kmem_cache_destroy(_cell_cache); + dm_bio_prison_exit(); kmem_cache_destroy(_new_mapping_cache); kmem_cache_destroy(_endio_hook_cache); } From 4f81a4176297db57c7ef3b2893092dd837c1e2a8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 12 Oct 2012 21:02:13 +0100 Subject: [PATCH 7/8] dm thin: move bio_prison code to separate module The bio prison code will be useful to other future DM targets so move it to a separate module. Signed-off-by: Mike Snitzer Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/Kconfig | 8 + drivers/md/Makefile | 1 + drivers/md/dm-bio-prison.c | 415 +++++++++++++++++++++++++++++++++++++ drivers/md/dm-bio-prison.h | 72 +++++++ drivers/md/dm-thin.c | 407 +----------------------------------- 5 files changed, 499 insertions(+), 404 deletions(-) create mode 100644 drivers/md/dm-bio-prison.c create mode 100644 drivers/md/dm-bio-prison.h diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index d949b781f6f8..91a02eeeb319 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -216,6 +216,13 @@ config DM_BUFIO as a cache, holding recently-read blocks in memory and performing delayed writes. +config DM_BIO_PRISON + tristate + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + Some bio locking schemes used by other device-mapper targets + including thin provisioning. + source "drivers/md/persistent-data/Kconfig" config DM_CRYPT @@ -247,6 +254,7 @@ config DM_THIN_PROVISIONING tristate "Thin provisioning target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL select DM_PERSISTENT_DATA + select DM_BIO_PRISON ---help--- Provides thin provisioning and snapshots that share a data store. diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 8b2e0dffe82e..94dce8b49324 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o obj-$(CONFIG_BLK_DEV_MD) += md-mod.o obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_DM_BUFIO) += dm-bufio.o +obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o obj-$(CONFIG_DM_CRYPT) += dm-crypt.o obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_FLAKEY) += dm-flakey.o diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c new file mode 100644 index 000000000000..e4e841567459 --- /dev/null +++ b/drivers/md/dm-bio-prison.c @@ -0,0 +1,415 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm.h" +#include "dm-bio-prison.h" + +#include +#include +#include +#include + +/*----------------------------------------------------------------*/ + +struct dm_bio_prison_cell { + struct hlist_node list; + struct dm_bio_prison *prison; + struct dm_cell_key key; + struct bio *holder; + struct bio_list bios; +}; + +struct dm_bio_prison { + spinlock_t lock; + mempool_t *cell_pool; + + unsigned nr_buckets; + unsigned hash_mask; + struct hlist_head *cells; +}; + +/*----------------------------------------------------------------*/ + +static uint32_t calc_nr_buckets(unsigned nr_cells) +{ + uint32_t n = 128; + + nr_cells /= 4; + nr_cells = min(nr_cells, 8192u); + + while (n < nr_cells) + n <<= 1; + + return n; +} + +static struct kmem_cache *_cell_cache; + +/* + * @nr_cells should be the number of cells you want in use _concurrently_. + * Don't confuse it with the number of distinct keys. + */ +struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells) +{ + unsigned i; + uint32_t nr_buckets = calc_nr_buckets(nr_cells); + size_t len = sizeof(struct dm_bio_prison) + + (sizeof(struct hlist_head) * nr_buckets); + struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL); + + if (!prison) + return NULL; + + spin_lock_init(&prison->lock); + prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); + if (!prison->cell_pool) { + kfree(prison); + return NULL; + } + + prison->nr_buckets = nr_buckets; + prison->hash_mask = nr_buckets - 1; + prison->cells = (struct hlist_head *) (prison + 1); + for (i = 0; i < nr_buckets; i++) + INIT_HLIST_HEAD(prison->cells + i); + + return prison; +} +EXPORT_SYMBOL_GPL(dm_bio_prison_create); + +void dm_bio_prison_destroy(struct dm_bio_prison *prison) +{ + mempool_destroy(prison->cell_pool); + kfree(prison); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_destroy); + +static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key) +{ + const unsigned long BIG_PRIME = 4294967291UL; + uint64_t hash = key->block * BIG_PRIME; + + return (uint32_t) (hash & prison->hash_mask); +} + +static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs) +{ + return (lhs->virtual == rhs->virtual) && + (lhs->dev == rhs->dev) && + (lhs->block == rhs->block); +} + +static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, + struct dm_cell_key *key) +{ + struct dm_bio_prison_cell *cell; + struct hlist_node *tmp; + + hlist_for_each_entry(cell, tmp, bucket, list) + if (keys_equal(&cell->key, key)) + return cell; + + return NULL; +} + +/* + * This may block if a new cell needs allocating. You must ensure that + * cells will be unlocked even if the calling thread is blocked. + * + * Returns 1 if the cell was already held, 0 if @inmate is the new holder. + */ +int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, + struct bio *inmate, struct dm_bio_prison_cell **ref) +{ + int r = 1; + unsigned long flags; + uint32_t hash = hash_key(prison, key); + struct dm_bio_prison_cell *cell, *cell2; + + BUG_ON(hash > prison->nr_buckets); + + spin_lock_irqsave(&prison->lock, flags); + + cell = __search_bucket(prison->cells + hash, key); + if (cell) { + bio_list_add(&cell->bios, inmate); + goto out; + } + + /* + * Allocate a new cell + */ + spin_unlock_irqrestore(&prison->lock, flags); + cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); + spin_lock_irqsave(&prison->lock, flags); + + /* + * We've been unlocked, so we have to double check that + * nobody else has inserted this cell in the meantime. + */ + cell = __search_bucket(prison->cells + hash, key); + if (cell) { + mempool_free(cell2, prison->cell_pool); + bio_list_add(&cell->bios, inmate); + goto out; + } + + /* + * Use new cell. + */ + cell = cell2; + + cell->prison = prison; + memcpy(&cell->key, key, sizeof(cell->key)); + cell->holder = inmate; + bio_list_init(&cell->bios); + hlist_add_head(&cell->list, prison->cells + hash); + + r = 0; + +out: + spin_unlock_irqrestore(&prison->lock, flags); + + *ref = cell; + + return r; +} +EXPORT_SYMBOL_GPL(dm_bio_detain); + +/* + * @inmates must have been initialised prior to this call + */ +static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates) +{ + struct dm_bio_prison *prison = cell->prison; + + hlist_del(&cell->list); + + if (inmates) { + bio_list_add(inmates, cell->holder); + bio_list_merge(inmates, &cell->bios); + } + + mempool_free(cell, prison->cell_pool); +} + +void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) +{ + unsigned long flags; + struct dm_bio_prison *prison = cell->prison; + + spin_lock_irqsave(&prison->lock, flags); + __cell_release(cell, bios); + spin_unlock_irqrestore(&prison->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_cell_release); + +/* + * There are a couple of places where we put a bio into a cell briefly + * before taking it out again. In these situations we know that no other + * bio may be in the cell. This function releases the cell, and also does + * a sanity check. + */ +static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) +{ + BUG_ON(cell->holder != bio); + BUG_ON(!bio_list_empty(&cell->bios)); + + __cell_release(cell, NULL); +} + +void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) +{ + unsigned long flags; + struct dm_bio_prison *prison = cell->prison; + + spin_lock_irqsave(&prison->lock, flags); + __cell_release_singleton(cell, bio); + spin_unlock_irqrestore(&prison->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_cell_release_singleton); + +/* + * Sometimes we don't want the holder, just the additional bios. + */ +static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) +{ + struct dm_bio_prison *prison = cell->prison; + + hlist_del(&cell->list); + bio_list_merge(inmates, &cell->bios); + + mempool_free(cell, prison->cell_pool); +} + +void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) +{ + unsigned long flags; + struct dm_bio_prison *prison = cell->prison; + + spin_lock_irqsave(&prison->lock, flags); + __cell_release_no_holder(cell, inmates); + spin_unlock_irqrestore(&prison->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); + +void dm_cell_error(struct dm_bio_prison_cell *cell) +{ + struct dm_bio_prison *prison = cell->prison; + struct bio_list bios; + struct bio *bio; + unsigned long flags; + + bio_list_init(&bios); + + spin_lock_irqsave(&prison->lock, flags); + __cell_release(cell, &bios); + spin_unlock_irqrestore(&prison->lock, flags); + + while ((bio = bio_list_pop(&bios))) + bio_io_error(bio); +} +EXPORT_SYMBOL_GPL(dm_cell_error); + +/*----------------------------------------------------------------*/ + +#define DEFERRED_SET_SIZE 64 + +struct dm_deferred_entry { + struct dm_deferred_set *ds; + unsigned count; + struct list_head work_items; +}; + +struct dm_deferred_set { + spinlock_t lock; + unsigned current_entry; + unsigned sweeper; + struct dm_deferred_entry entries[DEFERRED_SET_SIZE]; +}; + +struct dm_deferred_set *dm_deferred_set_create(void) +{ + int i; + struct dm_deferred_set *ds; + + ds = kmalloc(sizeof(*ds), GFP_KERNEL); + if (!ds) + return NULL; + + spin_lock_init(&ds->lock); + ds->current_entry = 0; + ds->sweeper = 0; + for (i = 0; i < DEFERRED_SET_SIZE; i++) { + ds->entries[i].ds = ds; + ds->entries[i].count = 0; + INIT_LIST_HEAD(&ds->entries[i].work_items); + } + + return ds; +} +EXPORT_SYMBOL_GPL(dm_deferred_set_create); + +void dm_deferred_set_destroy(struct dm_deferred_set *ds) +{ + kfree(ds); +} +EXPORT_SYMBOL_GPL(dm_deferred_set_destroy); + +struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds) +{ + unsigned long flags; + struct dm_deferred_entry *entry; + + spin_lock_irqsave(&ds->lock, flags); + entry = ds->entries + ds->current_entry; + entry->count++; + spin_unlock_irqrestore(&ds->lock, flags); + + return entry; +} +EXPORT_SYMBOL_GPL(dm_deferred_entry_inc); + +static unsigned ds_next(unsigned index) +{ + return (index + 1) % DEFERRED_SET_SIZE; +} + +static void __sweep(struct dm_deferred_set *ds, struct list_head *head) +{ + while ((ds->sweeper != ds->current_entry) && + !ds->entries[ds->sweeper].count) { + list_splice_init(&ds->entries[ds->sweeper].work_items, head); + ds->sweeper = ds_next(ds->sweeper); + } + + if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count) + list_splice_init(&ds->entries[ds->sweeper].work_items, head); +} + +void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head) +{ + unsigned long flags; + + spin_lock_irqsave(&entry->ds->lock, flags); + BUG_ON(!entry->count); + --entry->count; + __sweep(entry->ds, head); + spin_unlock_irqrestore(&entry->ds->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_deferred_entry_dec); + +/* + * Returns 1 if deferred or 0 if no pending items to delay job. + */ +int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work) +{ + int r = 1; + unsigned long flags; + unsigned next_entry; + + spin_lock_irqsave(&ds->lock, flags); + if ((ds->sweeper == ds->current_entry) && + !ds->entries[ds->current_entry].count) + r = 0; + else { + list_add(work, &ds->entries[ds->current_entry].work_items); + next_entry = ds_next(ds->current_entry); + if (!ds->entries[next_entry].count) + ds->current_entry = next_entry; + } + spin_unlock_irqrestore(&ds->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_deferred_set_add_work); + +/*----------------------------------------------------------------*/ + +static int __init dm_bio_prison_init(void) +{ + _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0); + if (!_cell_cache) + return -ENOMEM; + + return 0; +} + +static void __exit dm_bio_prison_exit(void) +{ + kmem_cache_destroy(_cell_cache); + _cell_cache = NULL; +} + +/* + * module hooks + */ +module_init(dm_bio_prison_init); +module_exit(dm_bio_prison_exit); + +MODULE_DESCRIPTION(DM_NAME " bio prison"); +MODULE_AUTHOR("Joe Thornber "); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h new file mode 100644 index 000000000000..4e0ac376700a --- /dev/null +++ b/drivers/md/dm-bio-prison.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2011-2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#ifndef DM_BIO_PRISON_H +#define DM_BIO_PRISON_H + +#include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */ +#include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */ + +#include +#include + +/*----------------------------------------------------------------*/ + +/* + * Sometimes we can't deal with a bio straight away. We put them in prison + * where they can't cause any mischief. Bios are put in a cell identified + * by a key, multiple bios can be in the same cell. When the cell is + * subsequently unlocked the bios become available. + */ +struct dm_bio_prison; +struct dm_bio_prison_cell; + +/* FIXME: this needs to be more abstract */ +struct dm_cell_key { + int virtual; + dm_thin_id dev; + dm_block_t block; +}; + +struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells); +void dm_bio_prison_destroy(struct dm_bio_prison *prison); + +/* + * This may block if a new cell needs allocating. You must ensure that + * cells will be unlocked even if the calling thread is blocked. + * + * Returns 1 if the cell was already held, 0 if @inmate is the new holder. + */ +int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, + struct bio *inmate, struct dm_bio_prison_cell **ref); + +void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios); +void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio); // FIXME: bio arg not needed +void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates); +void dm_cell_error(struct dm_bio_prison_cell *cell); + +/*----------------------------------------------------------------*/ + +/* + * We use the deferred set to keep track of pending reads to shared blocks. + * We do this to ensure the new mapping caused by a write isn't performed + * until these prior reads have completed. Otherwise the insertion of the + * new mapping could free the old block that the read bios are mapped to. + */ + +struct dm_deferred_set; +struct dm_deferred_entry; + +struct dm_deferred_set *dm_deferred_set_create(void); +void dm_deferred_set_destroy(struct dm_deferred_set *ds); + +struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds); +void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head); +int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work); + +/*----------------------------------------------------------------*/ + +#endif diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 22a22a701e16..058acf3a5ba7 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -5,6 +5,7 @@ */ #include "dm-thin-metadata.h" +#include "dm-bio-prison.h" #include "dm.h" #include @@ -21,7 +22,6 @@ * Tunable constants */ #define ENDIO_HOOK_POOL_SIZE 1024 -#define DEFERRED_SET_SIZE 64 #define MAPPING_POOL_SIZE 1024 #define PRISON_CELLS 1024 #define COMMIT_PERIOD HZ @@ -98,404 +98,6 @@ /*----------------------------------------------------------------*/ -/* - * Sometimes we can't deal with a bio straight away. We put them in prison - * where they can't cause any mischief. Bios are put in a cell identified - * by a key, multiple bios can be in the same cell. When the cell is - * subsequently unlocked the bios become available. - */ -struct dm_bio_prison; - -struct dm_cell_key { - int virtual; - dm_thin_id dev; - dm_block_t block; -}; - -struct dm_bio_prison_cell { - struct hlist_node list; - struct dm_bio_prison *prison; - struct dm_cell_key key; - struct bio *holder; - struct bio_list bios; -}; - -struct dm_bio_prison { - spinlock_t lock; - mempool_t *cell_pool; - - unsigned nr_buckets; - unsigned hash_mask; - struct hlist_head *cells; -}; - -static uint32_t calc_nr_buckets(unsigned nr_cells) -{ - uint32_t n = 128; - - nr_cells /= 4; - nr_cells = min(nr_cells, 8192u); - - while (n < nr_cells) - n <<= 1; - - return n; -} - -static struct kmem_cache *_cell_cache; - -/* - * @nr_cells should be the number of cells you want in use _concurrently_. - * Don't confuse it with the number of distinct keys. - */ -static struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells) -{ - unsigned i; - uint32_t nr_buckets = calc_nr_buckets(nr_cells); - size_t len = sizeof(struct dm_bio_prison) + - (sizeof(struct hlist_head) * nr_buckets); - struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL); - - if (!prison) - return NULL; - - spin_lock_init(&prison->lock); - prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); - if (!prison->cell_pool) { - kfree(prison); - return NULL; - } - - prison->nr_buckets = nr_buckets; - prison->hash_mask = nr_buckets - 1; - prison->cells = (struct hlist_head *) (prison + 1); - for (i = 0; i < nr_buckets; i++) - INIT_HLIST_HEAD(prison->cells + i); - - return prison; -} - -static void dm_bio_prison_destroy(struct dm_bio_prison *prison) -{ - mempool_destroy(prison->cell_pool); - kfree(prison); -} - -static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key) -{ - const unsigned long BIG_PRIME = 4294967291UL; - uint64_t hash = key->block * BIG_PRIME; - - return (uint32_t) (hash & prison->hash_mask); -} - -static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs) -{ - return (lhs->virtual == rhs->virtual) && - (lhs->dev == rhs->dev) && - (lhs->block == rhs->block); -} - -static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, - struct dm_cell_key *key) -{ - struct dm_bio_prison_cell *cell; - struct hlist_node *tmp; - - hlist_for_each_entry(cell, tmp, bucket, list) - if (keys_equal(&cell->key, key)) - return cell; - - return NULL; -} - -/* - * This may block if a new cell needs allocating. You must ensure that - * cells will be unlocked even if the calling thread is blocked. - * - * Returns 1 if the cell was already held, 0 if @inmate is the new holder. - */ -static int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, - struct bio *inmate, struct dm_bio_prison_cell **ref) -{ - int r = 1; - unsigned long flags; - uint32_t hash = hash_key(prison, key); - struct dm_bio_prison_cell *cell, *cell2; - - BUG_ON(hash > prison->nr_buckets); - - spin_lock_irqsave(&prison->lock, flags); - - cell = __search_bucket(prison->cells + hash, key); - if (cell) { - bio_list_add(&cell->bios, inmate); - goto out; - } - - /* - * Allocate a new cell - */ - spin_unlock_irqrestore(&prison->lock, flags); - cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); - spin_lock_irqsave(&prison->lock, flags); - - /* - * We've been unlocked, so we have to double check that - * nobody else has inserted this cell in the meantime. - */ - cell = __search_bucket(prison->cells + hash, key); - if (cell) { - mempool_free(cell2, prison->cell_pool); - bio_list_add(&cell->bios, inmate); - goto out; - } - - /* - * Use new cell. - */ - cell = cell2; - - cell->prison = prison; - memcpy(&cell->key, key, sizeof(cell->key)); - cell->holder = inmate; - bio_list_init(&cell->bios); - hlist_add_head(&cell->list, prison->cells + hash); - - r = 0; - -out: - spin_unlock_irqrestore(&prison->lock, flags); - - *ref = cell; - - return r; -} - -/* - * @inmates must have been initialised prior to this call - */ -static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates) -{ - struct dm_bio_prison *prison = cell->prison; - - hlist_del(&cell->list); - - if (inmates) { - bio_list_add(inmates, cell->holder); - bio_list_merge(inmates, &cell->bios); - } - - mempool_free(cell, prison->cell_pool); -} - -static void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) -{ - unsigned long flags; - struct dm_bio_prison *prison = cell->prison; - - spin_lock_irqsave(&prison->lock, flags); - __cell_release(cell, bios); - spin_unlock_irqrestore(&prison->lock, flags); -} - -/* - * There are a couple of places where we put a bio into a cell briefly - * before taking it out again. In these situations we know that no other - * bio may be in the cell. This function releases the cell, and also does - * a sanity check. - */ -static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) -{ - BUG_ON(cell->holder != bio); - BUG_ON(!bio_list_empty(&cell->bios)); - - __cell_release(cell, NULL); -} - -static void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) -{ - unsigned long flags; - struct dm_bio_prison *prison = cell->prison; - - spin_lock_irqsave(&prison->lock, flags); - __cell_release_singleton(cell, bio); - spin_unlock_irqrestore(&prison->lock, flags); -} - -/* - * Sometimes we don't want the holder, just the additional bios. - */ -static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, - struct bio_list *inmates) -{ - struct dm_bio_prison *prison = cell->prison; - - hlist_del(&cell->list); - bio_list_merge(inmates, &cell->bios); - - mempool_free(cell, prison->cell_pool); -} - -static void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, - struct bio_list *inmates) -{ - unsigned long flags; - struct dm_bio_prison *prison = cell->prison; - - spin_lock_irqsave(&prison->lock, flags); - __cell_release_no_holder(cell, inmates); - spin_unlock_irqrestore(&prison->lock, flags); -} - -static void dm_cell_error(struct dm_bio_prison_cell *cell) -{ - struct dm_bio_prison *prison = cell->prison; - struct bio_list bios; - struct bio *bio; - unsigned long flags; - - bio_list_init(&bios); - - spin_lock_irqsave(&prison->lock, flags); - __cell_release(cell, &bios); - spin_unlock_irqrestore(&prison->lock, flags); - - while ((bio = bio_list_pop(&bios))) - bio_io_error(bio); -} - -/*----------------------------------------------------------------*/ - -/* - * We use the deferred set to keep track of pending reads to shared blocks. - * We do this to ensure the new mapping caused by a write isn't performed - * until these prior reads have completed. Otherwise the insertion of the - * new mapping could free the old block that the read bios are mapped to. - */ - -struct dm_deferred_set; -struct dm_deferred_entry { - struct dm_deferred_set *ds; - unsigned count; - struct list_head work_items; -}; - -struct dm_deferred_set { - spinlock_t lock; - unsigned current_entry; - unsigned sweeper; - struct dm_deferred_entry entries[DEFERRED_SET_SIZE]; -}; - -static struct dm_deferred_set *dm_deferred_set_create(void) -{ - int i; - struct dm_deferred_set *ds; - - ds = kmalloc(sizeof(*ds), GFP_KERNEL); - if (!ds) - return NULL; - - spin_lock_init(&ds->lock); - ds->current_entry = 0; - ds->sweeper = 0; - for (i = 0; i < DEFERRED_SET_SIZE; i++) { - ds->entries[i].ds = ds; - ds->entries[i].count = 0; - INIT_LIST_HEAD(&ds->entries[i].work_items); - } - - return ds; -} - -static void dm_deferred_set_destroy(struct dm_deferred_set *ds) -{ - kfree(ds); -} - -static struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds) -{ - unsigned long flags; - struct dm_deferred_entry *entry; - - spin_lock_irqsave(&ds->lock, flags); - entry = ds->entries + ds->current_entry; - entry->count++; - spin_unlock_irqrestore(&ds->lock, flags); - - return entry; -} - -static unsigned ds_next(unsigned index) -{ - return (index + 1) % DEFERRED_SET_SIZE; -} - -static void __sweep(struct dm_deferred_set *ds, struct list_head *head) -{ - while ((ds->sweeper != ds->current_entry) && - !ds->entries[ds->sweeper].count) { - list_splice_init(&ds->entries[ds->sweeper].work_items, head); - ds->sweeper = ds_next(ds->sweeper); - } - - if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count) - list_splice_init(&ds->entries[ds->sweeper].work_items, head); -} - -static void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head) -{ - unsigned long flags; - - spin_lock_irqsave(&entry->ds->lock, flags); - BUG_ON(!entry->count); - --entry->count; - __sweep(entry->ds, head); - spin_unlock_irqrestore(&entry->ds->lock, flags); -} - -/* - * Returns 1 if deferred or 0 if no pending items to delay job. - */ -static int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work) -{ - int r = 1; - unsigned long flags; - unsigned next_entry; - - spin_lock_irqsave(&ds->lock, flags); - if ((ds->sweeper == ds->current_entry) && - !ds->entries[ds->current_entry].count) - r = 0; - else { - list_add(work, &ds->entries[ds->current_entry].work_items); - next_entry = ds_next(ds->current_entry); - if (!ds->entries[next_entry].count) - ds->current_entry = next_entry; - } - spin_unlock_irqrestore(&ds->lock, flags); - - return r; -} - -static int __init dm_bio_prison_init(void) -{ - _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0); - if (!_cell_cache) - return -ENOMEM; - - return 0; -} - -static void __exit dm_bio_prison_exit(void) -{ - kmem_cache_destroy(_cell_cache); - _cell_cache = NULL; -} - -/*----------------------------------------------------------------*/ - /* * Key building. */ @@ -2852,7 +2454,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -3143,7 +2745,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, @@ -3173,8 +2775,6 @@ static int __init dm_thin_init(void) r = -ENOMEM; - dm_bio_prison_init(); - _new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0); if (!_new_mapping_cache) goto bad_new_mapping_cache; @@ -3200,7 +2800,6 @@ static void dm_thin_exit(void) dm_unregister_target(&thin_target); dm_unregister_target(&pool_target); - dm_bio_prison_exit(); kmem_cache_destroy(_new_mapping_cache); kmem_cache_destroy(_endio_hook_cache); } From dba141601d1327146c84b575bd581ea8730e901c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 12 Oct 2012 21:02:15 +0100 Subject: [PATCH 8/8] dm: store dm_target_io in bio front_pad Use the recently-added bio front_pad field to allocate struct dm_target_io. Prior to this patch, dm_target_io was allocated from a mempool. For each dm_target_io, there is exactly one bio allocated from a bioset. This patch merges these two allocations into one allocation: we create a bioset with front_pad equal to the size of dm_target_io so that every bio allocated from the bioset has sizeof(struct dm_target_io) bytes before it. We allocate a bio and use the bytes before the bio as dm_target_io. _tio_cache is removed and the tio_pool mempool is now only used for request-based devices. This idea was introduced by Kent Overstreet. Signed-off-by: Mikulas Patocka Cc: Kent Overstreet Cc: Jens Axboe Cc: tj@kernel.org Cc: Vivek Goyal Cc: Bill Pemberton Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 108 ++++++++++++++++++++++-------------------------- 1 file changed, 49 insertions(+), 59 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 66ceaff6455c..02db9183ca01 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -71,6 +71,7 @@ struct dm_target_io { struct dm_io *io; struct dm_target *ti; union map_info info; + struct bio clone; }; /* @@ -214,7 +215,6 @@ struct dm_md_mempools { #define MIN_IOS 256 static struct kmem_cache *_io_cache; -static struct kmem_cache *_tio_cache; static struct kmem_cache *_rq_tio_cache; /* @@ -232,14 +232,9 @@ static int __init local_init(void) if (!_io_cache) return r; - /* allocate a slab for the target ios */ - _tio_cache = KMEM_CACHE(dm_target_io, 0); - if (!_tio_cache) - goto out_free_io_cache; - _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); if (!_rq_tio_cache) - goto out_free_tio_cache; + goto out_free_io_cache; _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0); if (!_rq_bio_info_cache) @@ -265,8 +260,6 @@ static int __init local_init(void) kmem_cache_destroy(_rq_bio_info_cache); out_free_rq_tio_cache: kmem_cache_destroy(_rq_tio_cache); -out_free_tio_cache: - kmem_cache_destroy(_tio_cache); out_free_io_cache: kmem_cache_destroy(_io_cache); @@ -277,7 +270,6 @@ static void local_exit(void) { kmem_cache_destroy(_rq_bio_info_cache); kmem_cache_destroy(_rq_tio_cache); - kmem_cache_destroy(_tio_cache); kmem_cache_destroy(_io_cache); unregister_blkdev(_major, _name); dm_uevent_exit(); @@ -463,7 +455,7 @@ static void free_io(struct mapped_device *md, struct dm_io *io) static void free_tio(struct mapped_device *md, struct dm_target_io *tio) { - mempool_free(tio, md->tio_pool); + bio_put(&tio->clone); } static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md, @@ -682,7 +674,6 @@ static void clone_endio(struct bio *bio, int error) } free_tio(md, tio); - bio_put(bio); dec_pending(io, error); } @@ -1002,12 +993,12 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) } EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); -static void __map_bio(struct dm_target *ti, struct bio *clone, - struct dm_target_io *tio) +static void __map_bio(struct dm_target *ti, struct dm_target_io *tio) { int r; sector_t sector; struct mapped_device *md; + struct bio *clone = &tio->clone; clone->bi_end_io = clone_endio; clone->bi_private = tio; @@ -1031,7 +1022,6 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, /* error the io and bail out, or requeue it if needed */ md = tio->io->md; dec_pending(tio->io, r); - bio_put(clone); free_tio(md, tio); } else if (r) { DMWARN("unimplemented target map return value: %d", r); @@ -1052,14 +1042,13 @@ struct clone_info { /* * Creates a little bio that just does part of a bvec. */ -static struct bio *split_bvec(struct bio *bio, sector_t sector, - unsigned short idx, unsigned int offset, - unsigned int len, struct bio_set *bs) +static void split_bvec(struct dm_target_io *tio, struct bio *bio, + sector_t sector, unsigned short idx, unsigned int offset, + unsigned int len, struct bio_set *bs) { - struct bio *clone; + struct bio *clone = &tio->clone; struct bio_vec *bv = bio->bi_io_vec + idx; - clone = bio_alloc_bioset(GFP_NOIO, 1, bs); *clone->bi_io_vec = *bv; clone->bi_sector = sector; @@ -1076,20 +1065,18 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, bio_integrity_trim(clone, bio_sector_offset(bio, idx, offset), len); } - - return clone; } /* * Creates a bio that consists of range of complete bvecs. */ -static struct bio *clone_bio(struct bio *bio, sector_t sector, - unsigned short idx, unsigned short bv_count, - unsigned int len, struct bio_set *bs) +static void clone_bio(struct dm_target_io *tio, struct bio *bio, + sector_t sector, unsigned short idx, + unsigned short bv_count, unsigned int len, + struct bio_set *bs) { - struct bio *clone; + struct bio *clone = &tio->clone; - clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); __bio_clone(clone, bio); clone->bi_sector = sector; clone->bi_idx = idx; @@ -1104,14 +1091,16 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, bio_integrity_trim(clone, bio_sector_offset(bio, idx, 0), len); } - - return clone; } static struct dm_target_io *alloc_tio(struct clone_info *ci, - struct dm_target *ti) + struct dm_target *ti, int nr_iovecs) { - struct dm_target_io *tio = mempool_alloc(ci->md->tio_pool, GFP_NOIO); + struct dm_target_io *tio; + struct bio *clone; + + clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, ci->md->bs); + tio = container_of(clone, struct dm_target_io, clone); tio->io = ci->io; tio->ti = ti; @@ -1123,8 +1112,8 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, unsigned request_nr, sector_t len) { - struct dm_target_io *tio = alloc_tio(ci, ti); - struct bio *clone; + struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs); + struct bio *clone = &tio->clone; tio->info.target_request_nr = request_nr; @@ -1133,14 +1122,14 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush * and discard, so no need for concern about wasted bvec allocations. */ - clone = bio_clone_bioset(ci->bio, GFP_NOIO, ci->md->bs); + __bio_clone(clone, ci->bio); if (len) { clone->bi_sector = ci->sector; clone->bi_size = to_bytes(len); } - __map_bio(ti, clone, tio); + __map_bio(ti, tio); } static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, @@ -1169,14 +1158,13 @@ static int __clone_and_map_empty_flush(struct clone_info *ci) */ static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) { - struct bio *clone, *bio = ci->bio; + struct bio *bio = ci->bio; struct dm_target_io *tio; - tio = alloc_tio(ci, ti); - clone = clone_bio(bio, ci->sector, ci->idx, - bio->bi_vcnt - ci->idx, ci->sector_count, - ci->md->bs); - __map_bio(ti, clone, tio); + tio = alloc_tio(ci, ti, bio->bi_max_vecs); + clone_bio(tio, bio, ci->sector, ci->idx, bio->bi_vcnt - ci->idx, + ci->sector_count, ci->md->bs); + __map_bio(ti, tio); ci->sector_count = 0; } @@ -1214,7 +1202,7 @@ static int __clone_and_map_discard(struct clone_info *ci) static int __clone_and_map(struct clone_info *ci) { - struct bio *clone, *bio = ci->bio; + struct bio *bio = ci->bio; struct dm_target *ti; sector_t len = 0, max; struct dm_target_io *tio; @@ -1254,10 +1242,10 @@ static int __clone_and_map(struct clone_info *ci) len += bv_len; } - tio = alloc_tio(ci, ti); - clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len, - ci->md->bs); - __map_bio(ti, clone, tio); + tio = alloc_tio(ci, ti, bio->bi_max_vecs); + clone_bio(tio, bio, ci->sector, ci->idx, i - ci->idx, len, + ci->md->bs); + __map_bio(ti, tio); ci->sector += len; ci->sector_count -= len; @@ -1282,12 +1270,11 @@ static int __clone_and_map(struct clone_info *ci) len = min(remaining, max); - tio = alloc_tio(ci, ti); - clone = split_bvec(bio, ci->sector, ci->idx, - bv->bv_offset + offset, len, - ci->md->bs); + tio = alloc_tio(ci, ti, 1); + split_bvec(tio, bio, ci->sector, ci->idx, + bv->bv_offset + offset, len, ci->md->bs); - __map_bio(ti, clone, tio); + __map_bio(ti, tio); ci->sector += len; ci->sector_count -= len; @@ -1955,7 +1942,7 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) { struct dm_md_mempools *p; - if (md->io_pool && md->tio_pool && md->bs) + if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) /* the md already has necessary mempools */ goto out; @@ -2732,14 +2719,16 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) if (!pools->io_pool) goto free_pools_and_out; - pools->tio_pool = (type == DM_TYPE_BIO_BASED) ? - mempool_create_slab_pool(MIN_IOS, _tio_cache) : - mempool_create_slab_pool(MIN_IOS, _rq_tio_cache); - if (!pools->tio_pool) - goto free_io_pool_and_out; + pools->tio_pool = NULL; + if (type == DM_TYPE_REQUEST_BASED) { + pools->tio_pool = mempool_create_slab_pool(MIN_IOS, _rq_tio_cache); + if (!pools->tio_pool) + goto free_io_pool_and_out; + } pools->bs = (type == DM_TYPE_BIO_BASED) ? - bioset_create(pool_size, 0) : + bioset_create(pool_size, + offsetof(struct dm_target_io, clone)) : bioset_create(pool_size, offsetof(struct dm_rq_clone_bio_info, clone)); if (!pools->bs) @@ -2754,7 +2743,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) bioset_free(pools->bs); free_tio_pool_and_out: - mempool_destroy(pools->tio_pool); + if (pools->tio_pool) + mempool_destroy(pools->tio_pool); free_io_pool_and_out: mempool_destroy(pools->io_pool);