diff --git a/block/bio.c b/block/bio.c index fa01bef35bb1..1f2cc1fbe283 100644 --- a/block/bio.c +++ b/block/bio.c @@ -608,13 +608,13 @@ void bio_truncate(struct bio *bio, unsigned new_size) void guard_bio_eod(struct bio *bio) { sector_t maxsector; - struct hd_struct *part; + struct block_device *part; rcu_read_lock(); part = __disk_get_part(bio->bi_disk, bio->bi_partno); if (part) - maxsector = part_nr_sects_read(part); - else + maxsector = bdev_nr_sectors(part); + else maxsector = get_capacity(bio->bi_disk); rcu_read_unlock(); @@ -1212,8 +1212,8 @@ void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, flush_dcache_page(dst_bv.bv_page); - bio_advance_iter(src, src_iter, bytes); - bio_advance_iter(dst, dst_iter, bytes); + bio_advance_iter_single(src, src_iter, bytes); + bio_advance_iter_single(dst, dst_iter, bytes); } } EXPORT_SYMBOL(bio_copy_data_iter); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 54fbe1e80cc4..031114d454a6 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -556,22 +556,22 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg, } /** - * blkg_conf_prep - parse and prepare for per-blkg config update + * blkcg_conf_open_bdev - parse and open bdev for per-blkg config update * @inputp: input string pointer * * Parse the device node prefix part, MAJ:MIN, of per-blkg config update - * from @input and get and return the matching gendisk. *@inputp is + * from @input and get and return the matching bdev. *@inputp is * updated to point past the device node prefix. Returns an ERR_PTR() * value on error. * * Use this function iff blkg_conf_prep() can't be used for some reason. */ -struct gendisk *blkcg_conf_get_disk(char **inputp) +struct block_device *blkcg_conf_open_bdev(char **inputp) { char *input = *inputp; unsigned int major, minor; - struct gendisk *disk; - int key_len, part; + struct block_device *bdev; + int key_len; if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2) return ERR_PTR(-EINVAL); @@ -581,16 +581,16 @@ struct gendisk *blkcg_conf_get_disk(char **inputp) return ERR_PTR(-EINVAL); input = skip_spaces(input); - disk = get_gendisk(MKDEV(major, minor), &part); - if (!disk) + bdev = blkdev_get_no_open(MKDEV(major, minor)); + if (!bdev) return ERR_PTR(-ENODEV); - if (part) { - put_disk_and_module(disk); + if (bdev_is_partition(bdev)) { + blkdev_put_no_open(bdev); return ERR_PTR(-ENODEV); } *inputp = input; - return disk; + return bdev; } /** @@ -607,18 +607,18 @@ struct gendisk *blkcg_conf_get_disk(char **inputp) */ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx) - __acquires(rcu) __acquires(&disk->queue->queue_lock) + __acquires(rcu) __acquires(&bdev->bd_disk->queue->queue_lock) { - struct gendisk *disk; + struct block_device *bdev; struct request_queue *q; struct blkcg_gq *blkg; int ret; - disk = blkcg_conf_get_disk(&input); - if (IS_ERR(disk)) - return PTR_ERR(disk); + bdev = blkcg_conf_open_bdev(&input); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); - q = disk->queue; + q = bdev->bd_disk->queue; rcu_read_lock(); spin_lock_irq(&q->queue_lock); @@ -689,7 +689,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, goto success; } success: - ctx->disk = disk; + ctx->bdev = bdev; ctx->blkg = blkg; ctx->body = input; return 0; @@ -700,7 +700,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, spin_unlock_irq(&q->queue_lock); rcu_read_unlock(); fail: - put_disk_and_module(disk); + blkdev_put_no_open(bdev); /* * If queue was bypassing, we should retry. Do so after a * short msleep(). It isn't strictly necessary but queue @@ -723,11 +723,11 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep); * with blkg_conf_prep(). */ void blkg_conf_finish(struct blkg_conf_ctx *ctx) - __releases(&ctx->disk->queue->queue_lock) __releases(rcu) + __releases(&ctx->bdev->bd_disk->queue->queue_lock) __releases(rcu) { - spin_unlock_irq(&ctx->disk->queue->queue_lock); + spin_unlock_irq(&ctx->bdev->bd_disk->queue->queue_lock); rcu_read_unlock(); - put_disk_and_module(ctx->disk); + blkdev_put_no_open(ctx->bdev); } EXPORT_SYMBOL_GPL(blkg_conf_finish); @@ -820,9 +820,9 @@ static void blkcg_fill_root_iostats(void) class_dev_iter_init(&iter, &block_class, NULL, &disk_type); while ((dev = class_dev_iter_next(&iter))) { - struct gendisk *disk = dev_to_disk(dev); - struct hd_struct *part = disk_get_part(disk, 0); - struct blkcg_gq *blkg = blk_queue_root_blkg(disk->queue); + struct block_device *bdev = dev_to_bdev(dev); + struct blkcg_gq *blkg = + blk_queue_root_blkg(bdev->bd_disk->queue); struct blkg_iostat tmp; int cpu; @@ -830,7 +830,7 @@ static void blkcg_fill_root_iostats(void) for_each_possible_cpu(cpu) { struct disk_stats *cpu_dkstats; - cpu_dkstats = per_cpu_ptr(part->dkstats, cpu); + cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu); tmp.ios[BLKG_IOSTAT_READ] += cpu_dkstats->ios[STAT_READ]; tmp.ios[BLKG_IOSTAT_WRITE] += @@ -849,7 +849,6 @@ static void blkcg_fill_root_iostats(void) blkg_iostat_set(&blkg->iostat.cur, &tmp); u64_stats_update_end(&blkg->iostat.sync); } - disk_put_part(part); } } diff --git a/block/blk-core.c b/block/blk-core.c index 2db8bda43b6e..96e5fcd7f071 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -666,9 +666,9 @@ static int __init setup_fail_make_request(char *str) } __setup("fail_make_request=", setup_fail_make_request); -static bool should_fail_request(struct hd_struct *part, unsigned int bytes) +static bool should_fail_request(struct block_device *part, unsigned int bytes) { - return part->make_it_fail && should_fail(&fail_make_request, bytes); + return part->bd_make_it_fail && should_fail(&fail_make_request, bytes); } static int __init fail_make_request_debugfs(void) @@ -683,7 +683,7 @@ late_initcall(fail_make_request_debugfs); #else /* CONFIG_FAIL_MAKE_REQUEST */ -static inline bool should_fail_request(struct hd_struct *part, +static inline bool should_fail_request(struct block_device *part, unsigned int bytes) { return false; @@ -691,11 +691,11 @@ static inline bool should_fail_request(struct hd_struct *part, #endif /* CONFIG_FAIL_MAKE_REQUEST */ -static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) +static inline bool bio_check_ro(struct bio *bio, struct block_device *part) { const int op = bio_op(bio); - if (part->policy && op_is_write(op)) { + if (part->bd_read_only && op_is_write(op)) { char b[BDEVNAME_SIZE]; if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) @@ -703,7 +703,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) WARN_ONCE(1, "Trying to write to read-only block-device %s (partno %d)\n", - bio_devname(bio, b), part->partno); + bio_devname(bio, b), part->bd_partno); /* Older lvm-tools actually trigger this */ return false; } @@ -713,7 +713,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) static noinline int should_fail_bio(struct bio *bio) { - if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size)) + if (should_fail_request(bio->bi_disk->part0, bio->bi_iter.bi_size)) return -EIO; return 0; } @@ -742,7 +742,7 @@ static inline int bio_check_eod(struct bio *bio, sector_t maxsector) */ static inline int blk_partition_remap(struct bio *bio) { - struct hd_struct *p; + struct block_device *p; int ret = -EIO; rcu_read_lock(); @@ -755,11 +755,12 @@ static inline int blk_partition_remap(struct bio *bio) goto out; if (bio_sectors(bio)) { - if (bio_check_eod(bio, part_nr_sects_read(p))) + if (bio_check_eod(bio, bdev_nr_sectors(p))) goto out; - bio->bi_iter.bi_sector += p->start_sect; - trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p), - bio->bi_iter.bi_sector - p->start_sect); + bio->bi_iter.bi_sector += p->bd_start_sect; + trace_block_bio_remap(bio, p->bd_dev, + bio->bi_iter.bi_sector - + p->bd_start_sect); } bio->bi_partno = 0; ret = 0; @@ -829,7 +830,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) if (unlikely(blk_partition_remap(bio))) goto end_io; } else { - if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0))) + if (unlikely(bio_check_ro(bio, bio->bi_disk->part0))) goto end_io; if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk)))) goto end_io; @@ -906,7 +907,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) blkcg_bio_issue_init(bio); if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { - trace_block_bio_queue(q, bio); + trace_block_bio_queue(bio); /* Now that enqueuing has been traced, we need to trace * completion as well. */ @@ -1201,7 +1202,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * return ret; if (rq->rq_disk && - should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) + should_fail_request(rq->rq_disk->part0, blk_rq_bytes(rq))) return BLK_STS_IOERR; if (blk_crypto_insert_cloned_request(rq)) @@ -1260,17 +1261,18 @@ unsigned int blk_rq_err_bytes(const struct request *rq) } EXPORT_SYMBOL_GPL(blk_rq_err_bytes); -static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end) +static void update_io_ticks(struct block_device *part, unsigned long now, + bool end) { unsigned long stamp; again: - stamp = READ_ONCE(part->stamp); + stamp = READ_ONCE(part->bd_stamp); if (unlikely(stamp != now)) { - if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) + if (likely(cmpxchg(&part->bd_stamp, stamp, now) == stamp)) __part_stat_add(part, io_ticks, end ? now - stamp : 1); } - if (part->partno) { - part = &part_to_disk(part)->part0; + if (part->bd_partno) { + part = bdev_whole(part); goto again; } } @@ -1279,11 +1281,9 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) { if (req->part && blk_do_io_stat(req)) { const int sgrp = op_stat_group(req_op(req)); - struct hd_struct *part; part_stat_lock(); - part = req->part; - part_stat_add(part, sectors[sgrp], bytes >> 9); + part_stat_add(req->part, sectors[sgrp], bytes >> 9); part_stat_unlock(); } } @@ -1298,17 +1298,12 @@ void blk_account_io_done(struct request *req, u64 now) if (req->part && blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { const int sgrp = op_stat_group(req_op(req)); - struct hd_struct *part; part_stat_lock(); - part = req->part; - - update_io_ticks(part, jiffies, true); - part_stat_inc(part, ios[sgrp]); - part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); + update_io_ticks(req->part, jiffies, true); + part_stat_inc(req->part, ios[sgrp]); + part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns); part_stat_unlock(); - - hd_struct_put(part); } } @@ -1324,7 +1319,7 @@ void blk_account_io_start(struct request *rq) part_stat_unlock(); } -static unsigned long __part_start_io_acct(struct hd_struct *part, +static unsigned long __part_start_io_acct(struct block_device *part, unsigned int sectors, unsigned int op) { const int sgrp = op_stat_group(op); @@ -1340,7 +1335,7 @@ static unsigned long __part_start_io_acct(struct hd_struct *part, return now; } -unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, +unsigned long part_start_io_acct(struct gendisk *disk, struct block_device **part, struct bio *bio) { *part = disk_map_sector_rcu(disk, bio->bi_iter.bi_sector); @@ -1352,11 +1347,11 @@ EXPORT_SYMBOL_GPL(part_start_io_acct); unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, unsigned int op) { - return __part_start_io_acct(&disk->part0, sectors, op); + return __part_start_io_acct(disk->part0, sectors, op); } EXPORT_SYMBOL(disk_start_io_acct); -static void __part_end_io_acct(struct hd_struct *part, unsigned int op, +static void __part_end_io_acct(struct block_device *part, unsigned int op, unsigned long start_time) { const int sgrp = op_stat_group(op); @@ -1370,18 +1365,17 @@ static void __part_end_io_acct(struct hd_struct *part, unsigned int op, part_stat_unlock(); } -void part_end_io_acct(struct hd_struct *part, struct bio *bio, +void part_end_io_acct(struct block_device *part, struct bio *bio, unsigned long start_time) { __part_end_io_acct(part, bio_op(bio), start_time); - hd_struct_put(part); } EXPORT_SYMBOL_GPL(part_end_io_acct); void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time) { - __part_end_io_acct(&disk->part0, op, start_time); + __part_end_io_acct(disk->part0, op, start_time); } EXPORT_SYMBOL(disk_end_io_acct); diff --git a/block/blk-flush.c b/block/blk-flush.c index fd5cee9f1a3b..76c1624cb06c 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -69,7 +69,6 @@ #include #include #include -#include #include "blk.h" #include "blk-mq.h" @@ -139,7 +138,7 @@ static void blk_flush_queue_rq(struct request *rq, bool add_front) static void blk_account_io_flush(struct request *rq) { - struct hd_struct *part = &rq->rq_disk->part0; + struct block_device *part = rq->rq_disk->part0; part_stat_lock(); part_stat_inc(part, ios[STAT_FLUSH]); @@ -474,9 +473,6 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, INIT_LIST_HEAD(&fq->flush_queue[1]); INIT_LIST_HEAD(&fq->flush_data_in_flight); - lockdep_register_key(&fq->key); - lockdep_set_class(&fq->mq_flush_lock, &fq->key); - return fq; fail_rq: @@ -491,7 +487,31 @@ void blk_free_flush_queue(struct blk_flush_queue *fq) if (!fq) return; - lockdep_unregister_key(&fq->key); kfree(fq->flush_rq); kfree(fq); } + +/* + * Allow driver to set its own lock class to fq->mq_flush_lock for + * avoiding lockdep complaint. + * + * flush_end_io() may be called recursively from some driver, such as + * nvme-loop, so lockdep may complain 'possible recursive locking' because + * all 'struct blk_flush_queue' instance share same mq_flush_lock lock class + * key. We need to assign different lock class for these driver's + * fq->mq_flush_lock for avoiding the lockdep warning. + * + * Use dynamically allocated lock class key for each 'blk_flush_queue' + * instance is over-kill, and more worse it introduces horrible boot delay + * issue because synchronize_rcu() is implied in lockdep_unregister_key which + * is called for each hctx release. SCSI probing may synchronously create and + * destroy lots of MQ request_queues for non-existent devices, and some robot + * test kernel always enable lockdep option. It is observed that more than half + * an hour is taken during SCSI MQ probe with per-fq lock class. + */ +void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, + struct lock_class_key *key) +{ + lockdep_set_class(&hctx->fq->mq_flush_lock, key); +} +EXPORT_SYMBOL_GPL(blk_mq_hctx_set_fq_lock_class); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index bbe86d1199dc..ffa418c0dcb1 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -39,7 +39,7 @@ * On top of that, a size cost proportional to the length of the IO is * added. While simple, this model captures the operational * characteristics of a wide varienty of devices well enough. Default - * paramters for several different classes of devices are provided and the + * parameters for several different classes of devices are provided and the * parameters can be configured from userspace via * /sys/fs/cgroup/io.cost.model. * @@ -77,7 +77,7 @@ * * This constitutes the basis of IO capacity distribution. Each cgroup's * vtime is running at a rate determined by its hweight. A cgroup tracks - * the vtime consumed by past IOs and can issue a new IO iff doing so + * the vtime consumed by past IOs and can issue a new IO if doing so * wouldn't outrun the current device vtime. Otherwise, the IO is * suspended until the vtime has progressed enough to cover it. * @@ -155,7 +155,7 @@ * Instead of debugfs or other clumsy monitoring mechanisms, this * controller uses a drgn based monitoring script - * tools/cgroup/iocost_monitor.py. For details on drgn, please see - * https://github.com/osandov/drgn. The ouput looks like the following. + * https://github.com/osandov/drgn. The output looks like the following. * * sdb RUN per=300ms cur_per=234.218:v203.695 busy= +1 vrate= 62.12% * active weight hweight% inflt% dbt delay usages% @@ -370,8 +370,6 @@ enum { AUTOP_SSD_FAST, }; -struct ioc_gq; - struct ioc_params { u32 qos[NR_QOS_PARAMS]; u64 i_lcoefs[NR_I_LCOEFS]; @@ -492,7 +490,7 @@ struct ioc_gq { /* * `vtime` is this iocg's vtime cursor which progresses as IOs are * issued. If lagging behind device vtime, the delta represents - * the currently available IO budget. If runnning ahead, the + * the currently available IO budget. If running ahead, the * overage. * * `vtime_done` is the same but progressed on completion rather @@ -973,6 +971,58 @@ static void ioc_refresh_vrate(struct ioc *ioc, struct ioc_now *now) ioc->vtime_err = clamp(ioc->vtime_err, -vperiod, vperiod); } +static void ioc_adjust_base_vrate(struct ioc *ioc, u32 rq_wait_pct, + int nr_lagging, int nr_shortages, + int prev_busy_level, u32 *missed_ppm) +{ + u64 vrate = ioc->vtime_base_rate; + u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max; + + if (!ioc->busy_level || (ioc->busy_level < 0 && nr_lagging)) { + if (ioc->busy_level != prev_busy_level || nr_lagging) + trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), + missed_ppm, rq_wait_pct, + nr_lagging, nr_shortages); + + return; + } + + /* rq_wait signal is always reliable, ignore user vrate_min */ + if (rq_wait_pct > RQ_WAIT_BUSY_PCT) + vrate_min = VRATE_MIN; + + /* + * If vrate is out of bounds, apply clamp gradually as the + * bounds can change abruptly. Otherwise, apply busy_level + * based adjustment. + */ + if (vrate < vrate_min) { + vrate = div64_u64(vrate * (100 + VRATE_CLAMP_ADJ_PCT), 100); + vrate = min(vrate, vrate_min); + } else if (vrate > vrate_max) { + vrate = div64_u64(vrate * (100 - VRATE_CLAMP_ADJ_PCT), 100); + vrate = max(vrate, vrate_max); + } else { + int idx = min_t(int, abs(ioc->busy_level), + ARRAY_SIZE(vrate_adj_pct) - 1); + u32 adj_pct = vrate_adj_pct[idx]; + + if (ioc->busy_level > 0) + adj_pct = 100 - adj_pct; + else + adj_pct = 100 + adj_pct; + + vrate = clamp(DIV64_U64_ROUND_UP(vrate * adj_pct, 100), + vrate_min, vrate_max); + } + + trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, + nr_lagging, nr_shortages); + + ioc->vtime_base_rate = vrate; + ioc_refresh_margins(ioc); +} + /* take a snapshot of the current [v]time and vrate */ static void ioc_now(struct ioc *ioc, struct ioc_now *now) { @@ -1046,7 +1096,7 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, /* * The delta between inuse and active sums indicates that - * that much of weight is being given away. Parent's inuse + * much of weight is being given away. Parent's inuse * and active should reflect the ratio. */ if (parent->child_active_sum) { @@ -2071,13 +2121,88 @@ static void ioc_forgive_debts(struct ioc *ioc, u64 usage_us_sum, int nr_debtors, } } +/* + * Check the active iocgs' state to avoid oversleeping and deactive + * idle iocgs. + * + * Since waiters determine the sleep durations based on the vrate + * they saw at the time of sleep, if vrate has increased, some + * waiters could be sleeping for too long. Wake up tardy waiters + * which should have woken up in the last period and expire idle + * iocgs. + */ +static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now) +{ + int nr_debtors = 0; + struct ioc_gq *iocg, *tiocg; + + list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) { + if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && + !iocg->delay && !iocg_is_idle(iocg)) + continue; + + spin_lock(&iocg->waitq.lock); + + /* flush wait and indebt stat deltas */ + if (iocg->wait_since) { + iocg->local_stat.wait_us += now->now - iocg->wait_since; + iocg->wait_since = now->now; + } + if (iocg->indebt_since) { + iocg->local_stat.indebt_us += + now->now - iocg->indebt_since; + iocg->indebt_since = now->now; + } + if (iocg->indelay_since) { + iocg->local_stat.indelay_us += + now->now - iocg->indelay_since; + iocg->indelay_since = now->now; + } + + if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt || + iocg->delay) { + /* might be oversleeping vtime / hweight changes, kick */ + iocg_kick_waitq(iocg, true, now); + if (iocg->abs_vdebt || iocg->delay) + nr_debtors++; + } else if (iocg_is_idle(iocg)) { + /* no waiter and idle, deactivate */ + u64 vtime = atomic64_read(&iocg->vtime); + s64 excess; + + /* + * @iocg has been inactive for a full duration and will + * have a high budget. Account anything above target as + * error and throw away. On reactivation, it'll start + * with the target budget. + */ + excess = now->vnow - vtime - ioc->margins.target; + if (excess > 0) { + u32 old_hwi; + + current_hweight(iocg, NULL, &old_hwi); + ioc->vtime_err -= div64_u64(excess * old_hwi, + WEIGHT_ONE); + } + + __propagate_weights(iocg, 0, 0, false, now); + list_del_init(&iocg->active_list); + } + + spin_unlock(&iocg->waitq.lock); + } + + commit_weights(ioc); + return nr_debtors; +} + static void ioc_timer_fn(struct timer_list *timer) { struct ioc *ioc = container_of(timer, struct ioc, timer); struct ioc_gq *iocg, *tiocg; struct ioc_now now; LIST_HEAD(surpluses); - int nr_debtors = 0, nr_shortages = 0, nr_lagging = 0; + int nr_debtors, nr_shortages = 0, nr_lagging = 0; u64 usage_us_sum = 0; u32 ppm_rthr = MILLION - ioc->params.qos[QOS_RPPM]; u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM]; @@ -2099,68 +2224,7 @@ static void ioc_timer_fn(struct timer_list *timer) return; } - /* - * Waiters determine the sleep durations based on the vrate they - * saw at the time of sleep. If vrate has increased, some waiters - * could be sleeping for too long. Wake up tardy waiters which - * should have woken up in the last period and expire idle iocgs. - */ - list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) { - if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && - !iocg->delay && !iocg_is_idle(iocg)) - continue; - - spin_lock(&iocg->waitq.lock); - - /* flush wait and indebt stat deltas */ - if (iocg->wait_since) { - iocg->local_stat.wait_us += now.now - iocg->wait_since; - iocg->wait_since = now.now; - } - if (iocg->indebt_since) { - iocg->local_stat.indebt_us += - now.now - iocg->indebt_since; - iocg->indebt_since = now.now; - } - if (iocg->indelay_since) { - iocg->local_stat.indelay_us += - now.now - iocg->indelay_since; - iocg->indelay_since = now.now; - } - - if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt || - iocg->delay) { - /* might be oversleeping vtime / hweight changes, kick */ - iocg_kick_waitq(iocg, true, &now); - if (iocg->abs_vdebt || iocg->delay) - nr_debtors++; - } else if (iocg_is_idle(iocg)) { - /* no waiter and idle, deactivate */ - u64 vtime = atomic64_read(&iocg->vtime); - s64 excess; - - /* - * @iocg has been inactive for a full duration and will - * have a high budget. Account anything above target as - * error and throw away. On reactivation, it'll start - * with the target budget. - */ - excess = now.vnow - vtime - ioc->margins.target; - if (excess > 0) { - u32 old_hwi; - - current_hweight(iocg, NULL, &old_hwi); - ioc->vtime_err -= div64_u64(excess * old_hwi, - WEIGHT_ONE); - } - - __propagate_weights(iocg, 0, 0, false, &now); - list_del_init(&iocg->active_list); - } - - spin_unlock(&iocg->waitq.lock); - } - commit_weights(ioc); + nr_debtors = ioc_check_iocgs(ioc, &now); /* * Wait and indebt stat are flushed above and the donation calculation @@ -2170,8 +2234,8 @@ static void ioc_timer_fn(struct timer_list *timer) /* calc usage and see whether some weights need to be moved around */ list_for_each_entry(iocg, &ioc->active_iocgs, active_list) { - u64 vdone, vtime, usage_us, usage_dur; - u32 usage, hw_active, hw_inuse; + u64 vdone, vtime, usage_us; + u32 hw_active, hw_inuse; /* * Collect unused and wind vtime closer to vnow to prevent @@ -2202,30 +2266,32 @@ static void ioc_timer_fn(struct timer_list *timer) usage_us = iocg->usage_delta_us; usage_us_sum += usage_us; - if (vdone != vtime) { - u64 inflight_us = DIV64_U64_ROUND_UP( - cost_to_abs_cost(vtime - vdone, hw_inuse), - ioc->vtime_base_rate); - usage_us = max(usage_us, inflight_us); - } - - /* convert to hweight based usage ratio */ - if (time_after64(iocg->activated_at, ioc->period_at)) - usage_dur = max_t(u64, now.now - iocg->activated_at, 1); - else - usage_dur = max_t(u64, now.now - ioc->period_at, 1); - - usage = clamp_t(u32, - DIV64_U64_ROUND_UP(usage_us * WEIGHT_ONE, - usage_dur), - 1, WEIGHT_ONE); - /* see whether there's surplus vtime */ WARN_ON_ONCE(!list_empty(&iocg->surplus_list)); if (hw_inuse < hw_active || (!waitqueue_active(&iocg->waitq) && time_before64(vtime, now.vnow - ioc->margins.low))) { - u32 hwa, old_hwi, hwm, new_hwi; + u32 hwa, old_hwi, hwm, new_hwi, usage; + u64 usage_dur; + + if (vdone != vtime) { + u64 inflight_us = DIV64_U64_ROUND_UP( + cost_to_abs_cost(vtime - vdone, hw_inuse), + ioc->vtime_base_rate); + + usage_us = max(usage_us, inflight_us); + } + + /* convert to hweight based usage ratio */ + if (time_after64(iocg->activated_at, ioc->period_at)) + usage_dur = max_t(u64, now.now - iocg->activated_at, 1); + else + usage_dur = max_t(u64, now.now - ioc->period_at, 1); + + usage = clamp_t(u32, + DIV64_U64_ROUND_UP(usage_us * WEIGHT_ONE, + usage_dur), + 1, WEIGHT_ONE); /* * Already donating or accumulated enough to start. @@ -2309,51 +2375,8 @@ static void ioc_timer_fn(struct timer_list *timer) ioc->busy_level = clamp(ioc->busy_level, -1000, 1000); - if (ioc->busy_level > 0 || (ioc->busy_level < 0 && !nr_lagging)) { - u64 vrate = ioc->vtime_base_rate; - u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max; - - /* rq_wait signal is always reliable, ignore user vrate_min */ - if (rq_wait_pct > RQ_WAIT_BUSY_PCT) - vrate_min = VRATE_MIN; - - /* - * If vrate is out of bounds, apply clamp gradually as the - * bounds can change abruptly. Otherwise, apply busy_level - * based adjustment. - */ - if (vrate < vrate_min) { - vrate = div64_u64(vrate * (100 + VRATE_CLAMP_ADJ_PCT), - 100); - vrate = min(vrate, vrate_min); - } else if (vrate > vrate_max) { - vrate = div64_u64(vrate * (100 - VRATE_CLAMP_ADJ_PCT), - 100); - vrate = max(vrate, vrate_max); - } else { - int idx = min_t(int, abs(ioc->busy_level), - ARRAY_SIZE(vrate_adj_pct) - 1); - u32 adj_pct = vrate_adj_pct[idx]; - - if (ioc->busy_level > 0) - adj_pct = 100 - adj_pct; - else - adj_pct = 100 + adj_pct; - - vrate = clamp(DIV64_U64_ROUND_UP(vrate * adj_pct, 100), - vrate_min, vrate_max); - } - - trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, - nr_lagging, nr_shortages); - - ioc->vtime_base_rate = vrate; - ioc_refresh_margins(ioc); - } else if (ioc->busy_level != prev_busy_level || nr_lagging) { - trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), - missed_ppm, rq_wait_pct, nr_lagging, - nr_shortages); - } + ioc_adjust_base_vrate(ioc, rq_wait_pct, nr_lagging, nr_shortages, + prev_busy_level, missed_ppm); ioc_refresh_params(ioc, false); @@ -2400,7 +2423,7 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime, return cost; /* - * We only increase inuse during period and do so iff the margin has + * We only increase inuse during period and do so if the margin has * deteriorated since the previous adjustment. */ if (margin >= iocg->saved_margin || margin >= margins->low || @@ -3120,23 +3143,23 @@ static const match_table_t qos_tokens = { static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, size_t nbytes, loff_t off) { - struct gendisk *disk; + struct block_device *bdev; struct ioc *ioc; u32 qos[NR_QOS_PARAMS]; bool enable, user; char *p; int ret; - disk = blkcg_conf_get_disk(&input); - if (IS_ERR(disk)) - return PTR_ERR(disk); + bdev = blkcg_conf_open_bdev(&input); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); - ioc = q_to_ioc(disk->queue); + ioc = q_to_ioc(bdev->bd_disk->queue); if (!ioc) { - ret = blk_iocost_init(disk->queue); + ret = blk_iocost_init(bdev->bd_disk->queue); if (ret) goto err; - ioc = q_to_ioc(disk->queue); + ioc = q_to_ioc(bdev->bd_disk->queue); } spin_lock_irq(&ioc->lock); @@ -3231,12 +3254,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); - put_disk_and_module(disk); + blkdev_put_no_open(bdev); return nbytes; einval: ret = -EINVAL; err: - put_disk_and_module(disk); + blkdev_put_no_open(bdev); return ret; } @@ -3287,23 +3310,23 @@ static const match_table_t i_lcoef_tokens = { static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, size_t nbytes, loff_t off) { - struct gendisk *disk; + struct block_device *bdev; struct ioc *ioc; u64 u[NR_I_LCOEFS]; bool user; char *p; int ret; - disk = blkcg_conf_get_disk(&input); - if (IS_ERR(disk)) - return PTR_ERR(disk); + bdev = blkcg_conf_open_bdev(&input); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); - ioc = q_to_ioc(disk->queue); + ioc = q_to_ioc(bdev->bd_disk->queue); if (!ioc) { - ret = blk_iocost_init(disk->queue); + ret = blk_iocost_init(bdev->bd_disk->queue); if (ret) goto err; - ioc = q_to_ioc(disk->queue); + ioc = q_to_ioc(bdev->bd_disk->queue); } spin_lock_irq(&ioc->lock); @@ -3356,13 +3379,13 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); - put_disk_and_module(disk); + blkdev_put_no_open(bdev); return nbytes; einval: ret = -EINVAL; err: - put_disk_and_module(disk); + blkdev_put_no_open(bdev); return ret; } diff --git a/block/blk-lib.c b/block/blk-lib.c index e90614fd8d6a..752f9c722062 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -65,7 +65,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, /* In case the discard request is in a partition */ if (bdev_is_partition(bdev)) - part_offset = bdev->bd_part->start_sect; + part_offset = bdev->bd_start_sect; while (nr_sects) { sector_t granularity_aligned_lba, req_sects; diff --git a/block/blk-merge.c b/block/blk-merge.c index 97b7c2821565..808768f6b174 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -279,6 +279,14 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, return NULL; split: *segs = nsegs; + + /* + * Bio splitting may cause subtle trouble such as hang when doing sync + * iopoll in direct IO routine. Given performance gain of iopoll for + * big IO can be trival, disable iopoll when split needed. + */ + bio->bi_opf &= ~REQ_HIPRI; + return bio_split(bio, sectors, GFP_NOIO, bs); } @@ -338,7 +346,7 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs) split->bi_opf |= REQ_NOMERGE; bio_chain(split, *bio); - trace_block_split(q, split, (*bio)->bi_iter.bi_sector); + trace_block_split(split, (*bio)->bi_iter.bi_sector); submit_bio_noacct(*bio); *bio = split; } @@ -683,8 +691,6 @@ static void blk_account_io_merge_request(struct request *req) part_stat_lock(); part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); part_stat_unlock(); - - hd_struct_put(req->part); } } @@ -801,7 +807,7 @@ static struct request *attempt_merge(struct request_queue *q, */ blk_account_io_merge_request(next); - trace_block_rq_merge(q, next); + trace_block_rq_merge(next); /* * ownership of bio passed from next to req, return 'next' for @@ -924,7 +930,7 @@ static enum bio_merge_status bio_attempt_back_merge(struct request *req, if (!ll_back_merge_fn(req, bio, nr_segs)) return BIO_MERGE_FAILED; - trace_block_bio_backmerge(req->q, req, bio); + trace_block_bio_backmerge(bio); rq_qos_merge(req->q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) @@ -948,7 +954,7 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req, if (!ll_front_merge_fn(req, bio, nr_segs)) return BIO_MERGE_FAILED; - trace_block_bio_frontmerge(req->q, req, bio); + trace_block_bio_frontmerge(bio); rq_qos_merge(req->q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index d1eafe2c045c..deff4e826e23 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -386,7 +386,7 @@ EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); void blk_mq_sched_request_inserted(struct request *rq) { - trace_block_rq_insert(rq->q, rq); + trace_block_rq_insert(rq); } EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); diff --git a/block/blk-mq.c b/block/blk-mq.c index d35b3c0c876a..14a44699e9b6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -95,7 +95,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, } struct mq_inflight { - struct hd_struct *part; + struct block_device *part; unsigned int inflight[2]; }; @@ -105,13 +105,15 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, { struct mq_inflight *mi = priv; - if (rq->part == mi->part && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) + if ((!mi->part->bd_partno || rq->part == mi->part) && + blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) mi->inflight[rq_data_dir(rq)]++; return true; } -unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part) +unsigned int blk_mq_in_flight(struct request_queue *q, + struct block_device *part) { struct mq_inflight mi = { .part = part }; @@ -120,8 +122,8 @@ unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part) return mi.inflight[0] + mi.inflight[1]; } -void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]) +void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, + unsigned int inflight[2]) { struct mq_inflight mi = { .part = part }; @@ -729,7 +731,7 @@ void blk_mq_start_request(struct request *rq) { struct request_queue *q = rq->q; - trace_block_rq_issue(q, rq); + trace_block_rq_issue(rq); if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { rq->io_start_time_ns = ktime_get_ns(); @@ -756,7 +758,7 @@ static void __blk_mq_requeue_request(struct request *rq) blk_mq_put_driver_tag(rq); - trace_block_rq_requeue(q, rq); + trace_block_rq_requeue(rq); rq_qos_requeue(q, rq); if (blk_mq_request_started(rq)) { @@ -1590,7 +1592,7 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) * __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue. * @hctx: Pointer to the hardware queue to run. * @async: If we want to run the queue asynchronously. - * @msecs: Microseconds of delay to wait before running the queue. + * @msecs: Milliseconds of delay to wait before running the queue. * * If !@async, try to run the queue now. Else, run the queue asynchronously and * with a delay of @msecs. @@ -1619,7 +1621,7 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, /** * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously. * @hctx: Pointer to the hardware queue to run. - * @msecs: Microseconds of delay to wait before running the queue. + * @msecs: Milliseconds of delay to wait before running the queue. * * Run a hardware queue asynchronously with a delay of @msecs. */ @@ -1683,7 +1685,7 @@ EXPORT_SYMBOL(blk_mq_run_hw_queues); /** * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously. * @q: Pointer to the request queue to run. - * @msecs: Microseconds of delay to wait before running the queues. + * @msecs: Milliseconds of delay to wait before running the queues. */ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs) { @@ -1817,7 +1819,7 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, lockdep_assert_held(&ctx->lock); - trace_block_rq_insert(hctx->queue, rq); + trace_block_rq_insert(rq); if (at_head) list_add(&rq->queuelist, &ctx->rq_lists[type]); @@ -1874,7 +1876,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, */ list_for_each_entry(rq, list, queuelist) { BUG_ON(rq->mq_ctx != ctx); - trace_block_rq_insert(hctx->queue, rq); + trace_block_rq_insert(rq); } spin_lock(&ctx->lock); @@ -2155,6 +2157,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) unsigned int nr_segs; blk_qc_t cookie; blk_status_t ret; + bool hipri; blk_queue_bounce(q, &bio); __blk_queue_split(&bio, &nr_segs); @@ -2171,6 +2174,8 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) rq_qos_throttle(q, bio); + hipri = bio->bi_opf & REQ_HIPRI; + data.cmd_flags = bio->bi_opf; rq = __blk_mq_alloc_request(&data); if (unlikely(!rq)) { @@ -2180,7 +2185,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) goto queue_exit; } - trace_block_getrq(q, bio, bio->bi_opf); + trace_block_getrq(bio); rq_qos_track(q, rq, bio); @@ -2263,6 +2268,8 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) blk_mq_sched_insert_request(rq, false, true, true); } + if (!hipri) + return BLK_QC_T_NONE; return cookie; queue_exit: blk_queue_exit(q); @@ -3373,6 +3380,12 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, return 0; } +static int blk_mq_alloc_tag_set_tags(struct blk_mq_tag_set *set, + int new_nr_hw_queues) +{ + return blk_mq_realloc_tag_set_tags(set, 0, new_nr_hw_queues); +} + /* * Alloc a tag set to be associated with one or more request queues. * May fail with EINVAL for various error conditions. May adjust the @@ -3426,7 +3439,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids) set->nr_hw_queues = nr_cpu_ids; - if (blk_mq_realloc_tag_set_tags(set, 0, set->nr_hw_queues) < 0) + if (blk_mq_alloc_tag_set_tags(set, set->nr_hw_queues) < 0) return -ENOMEM; ret = -ENOMEM; @@ -3861,9 +3874,10 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) * the state. Like for the other success return cases, the * caller is responsible for checking if the IO completed. If * the IO isn't complete, we'll get called again and will go - * straight to the busy poll loop. + * straight to the busy poll loop. If specified not to spin, + * we also should not sleep. */ - if (blk_mq_poll_hybrid(q, hctx, cookie)) + if (spin && blk_mq_poll_hybrid(q, hctx, cookie)) return 1; hctx->poll_considered++; diff --git a/block/blk-mq.h b/block/blk-mq.h index a52703c98b77..c1458d9502f1 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -99,7 +99,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue * * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue * @q: request queue * @flags: request command flags - * @cpu: cpu ctx + * @ctx: software queue cpu ctx */ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, unsigned int flags, @@ -182,9 +182,10 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx) return hctx->nr_ctx && hctx->tags; } -unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part); -void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]); +unsigned int blk_mq_in_flight(struct request_queue *q, + struct block_device *part); +void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, + unsigned int inflight[2]); static inline void blk_mq_put_dispatch_budget(struct request_queue *q) { diff --git a/block/blk-throttle.c b/block/blk-throttle.c index b771c4299982..d52cac9f3a7c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -587,6 +587,7 @@ static void throtl_pd_online(struct blkg_policy_data *pd) tg_update_has_rules(tg); } +#ifdef CONFIG_BLK_DEV_THROTTLING_LOW static void blk_throtl_update_limit_valid(struct throtl_data *td) { struct cgroup_subsys_state *pos_css; @@ -607,6 +608,11 @@ static void blk_throtl_update_limit_valid(struct throtl_data *td) td->limit_valid[LIMIT_LOW] = low_valid; } +#else +static inline void blk_throtl_update_limit_valid(struct throtl_data *td) +{ +} +#endif static void throtl_upgrade_state(struct throtl_data *td); static void throtl_pd_offline(struct blkg_policy_data *pd) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index fd410086fe1d..0321ca83e73f 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -835,7 +835,6 @@ int wbt_init(struct request_queue *q) rwb->enable_state = WBT_STATE_ON_DEFAULT; rwb->wc = 1; rwb->rq_depth.default_depth = RWB_DEF_DEPTH; - wbt_update_limits(rwb); /* * Assign rwb and add the stats callback. diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 6817a673e5ce..7a68b6e4300c 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -508,15 +508,29 @@ int blk_revalidate_disk_zones(struct gendisk *disk, noio_flag = memalloc_noio_save(); ret = disk->fops->report_zones(disk, 0, UINT_MAX, blk_revalidate_zone_cb, &args); + if (!ret) { + pr_warn("%s: No zones reported\n", disk->disk_name); + ret = -ENODEV; + } memalloc_noio_restore(noio_flag); + /* + * If zones where reported, make sure that the entire disk capacity + * has been checked. + */ + if (ret > 0 && args.sector != get_capacity(disk)) { + pr_warn("%s: Missing zones from sector %llu\n", + disk->disk_name, args.sector); + ret = -ENODEV; + } + /* * Install the new bitmaps and update nr_zones only once the queue is * stopped and all I/Os are completed (i.e. a scheduler is not * referencing the bitmaps). */ blk_mq_freeze_queue(q); - if (ret >= 0) { + if (ret > 0) { blk_queue_chunk_sectors(q, args.zone_sectors); q->nr_zones = args.nr_zones; swap(q->seq_zones_wlock, args.seq_zones_wlock); diff --git a/block/blk.h b/block/blk.h index dfab98465db9..d23d018fd2cd 100644 --- a/block/blk.h +++ b/block/blk.h @@ -25,7 +25,6 @@ struct blk_flush_queue { struct list_head flush_data_in_flight; struct request *flush_rq; - struct lock_class_key key; spinlock_t mq_flush_lock; }; @@ -215,7 +214,7 @@ static inline void elevator_exit(struct request_queue *q, __elevator_exit(q, e); } -struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); +struct block_device *__disk_get_part(struct gendisk *disk, int partno); ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); @@ -348,97 +347,21 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q); static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {} #endif -struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); +struct block_device *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); -int blk_alloc_devt(struct hd_struct *part, dev_t *devt); +int blk_alloc_devt(struct block_device *part, dev_t *devt); void blk_free_devt(dev_t devt); -void blk_invalidate_devt(dev_t devt); char *disk_name(struct gendisk *hd, int partno, char *buf); #define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 -void delete_partition(struct hd_struct *part); +void delete_partition(struct block_device *part); int bdev_add_partition(struct block_device *bdev, int partno, sector_t start, sector_t length); int bdev_del_partition(struct block_device *bdev, int partno); int bdev_resize_partition(struct block_device *bdev, int partno, sector_t start, sector_t length); int disk_expand_part_tbl(struct gendisk *disk, int target); -int hd_ref_init(struct hd_struct *part); - -/* no need to get/put refcount of part0 */ -static inline int hd_struct_try_get(struct hd_struct *part) -{ - if (part->partno) - return percpu_ref_tryget_live(&part->ref); - return 1; -} - -static inline void hd_struct_put(struct hd_struct *part) -{ - if (part->partno) - percpu_ref_put(&part->ref); -} - -static inline void hd_free_part(struct hd_struct *part) -{ - free_percpu(part->dkstats); - kfree(part->info); - percpu_ref_exit(&part->ref); -} - -/* - * Any access of part->nr_sects which is not protected by partition - * bd_mutex or gendisk bdev bd_mutex, should be done using this - * accessor function. - * - * Code written along the lines of i_size_read() and i_size_write(). - * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption - * on. - */ -static inline sector_t part_nr_sects_read(struct hd_struct *part) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - sector_t nr_sects; - unsigned seq; - do { - seq = read_seqcount_begin(&part->nr_sects_seq); - nr_sects = part->nr_sects; - } while (read_seqcount_retry(&part->nr_sects_seq, seq)); - return nr_sects; -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) - sector_t nr_sects; - - preempt_disable(); - nr_sects = part->nr_sects; - preempt_enable(); - return nr_sects; -#else - return part->nr_sects; -#endif -} - -/* - * Should be called with mutex lock held (typically bd_mutex) of partition - * to provide mutual exlusion among writers otherwise seqcount might be - * left in wrong state leaving the readers spinning infinitely. - */ -static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - preempt_disable(); - write_seqcount_begin(&part->nr_sects_seq); - part->nr_sects = size; - write_seqcount_end(&part->nr_sects_seq); - preempt_enable(); -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) - preempt_disable(); - part->nr_sects = size; - preempt_enable(); -#else - part->nr_sects = size; -#endif -} int bio_add_hw_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, diff --git a/block/bounce.c b/block/bounce.c index 162a6eee8999..d3f51acd6e3b 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -340,7 +340,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, } } - trace_block_bio_bounce(q, *bio_orig); + trace_block_bio_bounce(*bio_orig); bio->bi_flags |= (1 << BIO_BOUNCED); diff --git a/block/genhd.c b/block/genhd.c index 9387f050c248..b84b8671e627 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -26,17 +25,13 @@ #include "blk.h" -static DEFINE_MUTEX(block_class_lock); static struct kobject *block_depr; +DECLARE_RWSEM(bdev_lookup_sem); + /* for extended dynamic devt allocation, currently only one major is used */ #define NR_EXT_DEVT (1 << MINORBITS) - -/* For extended devt allocation. ext_devt_lock prevents look up - * results from going away underneath its user. - */ -static DEFINE_SPINLOCK(ext_devt_lock); -static DEFINE_IDR(ext_devt_idr); +static DEFINE_IDA(ext_devt_ida); static void disk_check_events(struct disk_events *ev, unsigned int *clearing_ptr); @@ -45,30 +40,49 @@ static void disk_add_events(struct gendisk *disk); static void disk_del_events(struct gendisk *disk); static void disk_release_events(struct gendisk *disk); +void set_capacity(struct gendisk *disk, sector_t sectors) +{ + struct block_device *bdev = disk->part0; + + spin_lock(&bdev->bd_size_lock); + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); + spin_unlock(&bdev->bd_size_lock); +} +EXPORT_SYMBOL(set_capacity); + /* - * Set disk capacity and notify if the size is not currently - * zero and will not be set to zero + * Set disk capacity and notify if the size is not currently zero and will not + * be set to zero. Returns true if a uevent was sent, otherwise false. */ -bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, - bool update_bdev) +bool set_capacity_and_notify(struct gendisk *disk, sector_t size) { sector_t capacity = get_capacity(disk); + char *envp[] = { "RESIZE=1", NULL }; set_capacity(disk, size); - if (update_bdev) - revalidate_disk_size(disk, true); - if (capacity != size && capacity != 0 && size != 0) { - char *envp[] = { "RESIZE=1", NULL }; + /* + * Only print a message and send a uevent if the gendisk is user visible + * and alive. This avoids spamming the log and udev when setting the + * initial capacity during probing. + */ + if (size == capacity || + (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP) + return false; - kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); - return true; - } + pr_info("%s: detected capacity change from %lld to %lld\n", + disk->disk_name, size, capacity); - return false; + /* + * Historically we did not send a uevent for changes to/from an empty + * device. + */ + if (!capacity || !size) + return false; + kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); + return true; } - -EXPORT_SYMBOL_GPL(set_capacity_revalidate_and_notify); +EXPORT_SYMBOL_GPL(set_capacity_and_notify); /* * Format the device name of the indicated disk into the supplied buffer and @@ -92,13 +106,14 @@ const char *bdevname(struct block_device *bdev, char *buf) } EXPORT_SYMBOL(bdevname); -static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat) +static void part_stat_read_all(struct block_device *part, + struct disk_stats *stat) { int cpu; memset(stat, 0, sizeof(struct disk_stats)); for_each_possible_cpu(cpu) { - struct disk_stats *ptr = per_cpu_ptr(part->dkstats, cpu); + struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu); int group; for (group = 0; group < NR_STAT_GROUPS; group++) { @@ -112,7 +127,7 @@ static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat) } } -static unsigned int part_in_flight(struct hd_struct *part) +static unsigned int part_in_flight(struct block_device *part) { unsigned int inflight = 0; int cpu; @@ -127,7 +142,8 @@ static unsigned int part_in_flight(struct hd_struct *part) return inflight; } -static void part_in_flight_rw(struct hd_struct *part, unsigned int inflight[2]) +static void part_in_flight_rw(struct block_device *part, + unsigned int inflight[2]) { int cpu; @@ -143,7 +159,7 @@ static void part_in_flight_rw(struct hd_struct *part, unsigned int inflight[2]) inflight[1] = 0; } -struct hd_struct *__disk_get_part(struct gendisk *disk, int partno) +struct block_device *__disk_get_part(struct gendisk *disk, int partno) { struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl); @@ -152,33 +168,6 @@ struct hd_struct *__disk_get_part(struct gendisk *disk, int partno) return rcu_dereference(ptbl->part[partno]); } -/** - * disk_get_part - get partition - * @disk: disk to look partition from - * @partno: partition number - * - * Look for partition @partno from @disk. If found, increment - * reference count and return it. - * - * CONTEXT: - * Don't care. - * - * RETURNS: - * Pointer to the found partition on success, NULL if not found. - */ -struct hd_struct *disk_get_part(struct gendisk *disk, int partno) -{ - struct hd_struct *part; - - rcu_read_lock(); - part = __disk_get_part(disk, partno); - if (part) - get_device(part_to_dev(part)); - rcu_read_unlock(); - - return part; -} - /** * disk_part_iter_init - initialize partition iterator * @piter: iterator to initialize @@ -223,14 +212,13 @@ EXPORT_SYMBOL_GPL(disk_part_iter_init); * CONTEXT: * Don't care. */ -struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) +struct block_device *disk_part_iter_next(struct disk_part_iter *piter) { struct disk_part_tbl *ptbl; int inc, end; /* put the last partition */ - disk_put_part(piter->part); - piter->part = NULL; + disk_part_iter_exit(piter); /* get part_tbl */ rcu_read_lock(); @@ -251,19 +239,20 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) /* iterate to the next partition */ for (; piter->idx != end; piter->idx += inc) { - struct hd_struct *part; + struct block_device *part; part = rcu_dereference(ptbl->part[piter->idx]); if (!part) continue; - if (!part_nr_sects_read(part) && + if (!bdev_nr_sectors(part) && !(piter->flags & DISK_PITER_INCL_EMPTY) && !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && piter->idx == 0)) continue; - get_device(part_to_dev(part)); - piter->part = part; + piter->part = bdgrab(part); + if (!piter->part) + continue; piter->idx += inc; break; } @@ -285,15 +274,16 @@ EXPORT_SYMBOL_GPL(disk_part_iter_next); */ void disk_part_iter_exit(struct disk_part_iter *piter) { - disk_put_part(piter->part); + if (piter->part) + bdput(piter->part); piter->part = NULL; } EXPORT_SYMBOL_GPL(disk_part_iter_exit); -static inline int sector_in_part(struct hd_struct *part, sector_t sector) +static inline int sector_in_part(struct block_device *part, sector_t sector) { - return part->start_sect <= sector && - sector < part->start_sect + part_nr_sects_read(part); + return part->bd_start_sect <= sector && + sector < part->bd_start_sect + bdev_nr_sectors(part); } /** @@ -305,44 +295,34 @@ static inline int sector_in_part(struct hd_struct *part, sector_t sector) * primarily used for stats accounting. * * CONTEXT: - * RCU read locked. The returned partition pointer is always valid - * because its refcount is grabbed except for part0, which lifetime - * is same with the disk. + * RCU read locked. * * RETURNS: * Found partition on success, part0 is returned if no partition matches * or the matched partition is being deleted. */ -struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) +struct block_device *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) { struct disk_part_tbl *ptbl; - struct hd_struct *part; + struct block_device *part; int i; rcu_read_lock(); ptbl = rcu_dereference(disk->part_tbl); part = rcu_dereference(ptbl->last_lookup); - if (part && sector_in_part(part, sector) && hd_struct_try_get(part)) + if (part && sector_in_part(part, sector)) goto out_unlock; for (i = 1; i < ptbl->len; i++) { part = rcu_dereference(ptbl->part[i]); - if (part && sector_in_part(part, sector)) { - /* - * only live partition can be cached for lookup, - * so use-after-free on cached & deleting partition - * can be avoided - */ - if (!hd_struct_try_get(part)) - break; rcu_assign_pointer(ptbl->last_lookup, part); goto out_unlock; } } - part = &disk->part0; + part = disk->part0; out_unlock: rcu_read_unlock(); return part; @@ -393,7 +373,9 @@ static struct blk_major_name { struct blk_major_name *next; int major; char name[16]; + void (*probe)(dev_t devt); } *major_names[BLKDEV_MAJOR_HASH_SIZE]; +static DEFINE_MUTEX(major_names_lock); /* index in the above - for now: assume no multimajor ranges */ static inline int major_to_index(unsigned major) @@ -406,20 +388,21 @@ void blkdev_show(struct seq_file *seqf, off_t offset) { struct blk_major_name *dp; - mutex_lock(&block_class_lock); + mutex_lock(&major_names_lock); for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next) if (dp->major == offset) seq_printf(seqf, "%3d %s\n", dp->major, dp->name); - mutex_unlock(&block_class_lock); + mutex_unlock(&major_names_lock); } #endif /* CONFIG_PROC_FS */ /** - * register_blkdev - register a new block device + * __register_blkdev - register a new block device * * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If * @major = 0, try to allocate any unused major number. * @name: the name of the new block device as a zero terminated string + * @probe: allback that is called on access to any minor number of @major * * The @name must be unique within the system. * @@ -433,13 +416,16 @@ void blkdev_show(struct seq_file *seqf, off_t offset) * * See Documentation/admin-guide/devices.txt for the list of allocated * major numbers. + * + * Use register_blkdev instead for any new code. */ -int register_blkdev(unsigned int major, const char *name) +int __register_blkdev(unsigned int major, const char *name, + void (*probe)(dev_t devt)) { struct blk_major_name **n, *p; int index, ret = 0; - mutex_lock(&block_class_lock); + mutex_lock(&major_names_lock); /* temporary */ if (major == 0) { @@ -473,6 +459,7 @@ int register_blkdev(unsigned int major, const char *name) } p->major = major; + p->probe = probe; strlcpy(p->name, name, sizeof(p->name)); p->next = NULL; index = major_to_index(major); @@ -492,11 +479,10 @@ int register_blkdev(unsigned int major, const char *name) kfree(p); } out: - mutex_unlock(&block_class_lock); + mutex_unlock(&major_names_lock); return ret; } - -EXPORT_SYMBOL(register_blkdev); +EXPORT_SYMBOL(__register_blkdev); void unregister_blkdev(unsigned int major, const char *name) { @@ -504,7 +490,7 @@ void unregister_blkdev(unsigned int major, const char *name) struct blk_major_name *p = NULL; int index = major_to_index(major); - mutex_lock(&block_class_lock); + mutex_lock(&major_names_lock); for (n = &major_names[index]; *n; n = &(*n)->next) if ((*n)->major == major) break; @@ -514,14 +500,12 @@ void unregister_blkdev(unsigned int major, const char *name) p = *n; *n = p->next; } - mutex_unlock(&block_class_lock); + mutex_unlock(&major_names_lock); kfree(p); } EXPORT_SYMBOL(unregister_blkdev); -static struct kobj_map *bdev_map; - /** * blk_mangle_minor - scatter minor numbers apart * @minor: minor number to mangle @@ -555,8 +539,8 @@ static int blk_mangle_minor(int minor) } /** - * blk_alloc_devt - allocate a dev_t for a partition - * @part: partition to allocate dev_t for + * blk_alloc_devt - allocate a dev_t for a block device + * @bdev: block device to allocate dev_t for * @devt: out parameter for resulting dev_t * * Allocate a dev_t for block device. @@ -568,25 +552,18 @@ static int blk_mangle_minor(int minor) * CONTEXT: * Might sleep. */ -int blk_alloc_devt(struct hd_struct *part, dev_t *devt) +int blk_alloc_devt(struct block_device *bdev, dev_t *devt) { - struct gendisk *disk = part_to_disk(part); + struct gendisk *disk = bdev->bd_disk; int idx; /* in consecutive minor range? */ - if (part->partno < disk->minors) { - *devt = MKDEV(disk->major, disk->first_minor + part->partno); + if (bdev->bd_partno < disk->minors) { + *devt = MKDEV(disk->major, disk->first_minor + bdev->bd_partno); return 0; } - /* allocate ext devt */ - idr_preload(GFP_KERNEL); - - spin_lock_bh(&ext_devt_lock); - idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT); - spin_unlock_bh(&ext_devt_lock); - - idr_preload_end(); + idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL); if (idx < 0) return idx == -ENOSPC ? -EBUSY : idx; @@ -605,26 +582,8 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) */ void blk_free_devt(dev_t devt) { - if (devt == MKDEV(0, 0)) - return; - - if (MAJOR(devt) == BLOCK_EXT_MAJOR) { - spin_lock_bh(&ext_devt_lock); - idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); - spin_unlock_bh(&ext_devt_lock); - } -} - -/* - * We invalidate devt by assigning NULL pointer for devt in idr. - */ -void blk_invalidate_devt(dev_t devt) -{ - if (MAJOR(devt) == BLOCK_EXT_MAJOR) { - spin_lock_bh(&ext_devt_lock); - idr_replace(&ext_devt_idr, NULL, blk_mangle_minor(MINOR(devt))); - spin_unlock_bh(&ext_devt_lock); - } + if (MAJOR(devt) == BLOCK_EXT_MAJOR) + ida_free(&ext_devt_ida, blk_mangle_minor(MINOR(devt))); } static char *bdevt_str(dev_t devt, char *buf) @@ -639,43 +598,6 @@ static char *bdevt_str(dev_t devt, char *buf) return buf; } -/* - * Register device numbers dev..(dev+range-1) - * range must be nonzero - * The hash chain is sorted on range, so that subranges can override. - */ -void blk_register_region(dev_t devt, unsigned long range, struct module *module, - struct kobject *(*probe)(dev_t, int *, void *), - int (*lock)(dev_t, void *), void *data) -{ - kobj_map(bdev_map, devt, range, module, probe, lock, data); -} - -EXPORT_SYMBOL(blk_register_region); - -void blk_unregister_region(dev_t devt, unsigned long range) -{ - kobj_unmap(bdev_map, devt, range); -} - -EXPORT_SYMBOL(blk_unregister_region); - -static struct kobject *exact_match(dev_t devt, int *partno, void *data) -{ - struct gendisk *p = data; - - return &disk_to_dev(p)->kobj; -} - -static int exact_lock(dev_t devt, void *data) -{ - struct gendisk *p = data; - - if (!get_disk_and_module(p)) - return -1; - return 0; -} - static void disk_scan_partitions(struct gendisk *disk) { struct block_device *bdev; @@ -694,7 +616,7 @@ static void register_disk(struct device *parent, struct gendisk *disk, { struct device *ddev = disk_to_dev(disk); struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; int err; ddev->parent = parent; @@ -726,7 +648,8 @@ static void register_disk(struct device *parent, struct gendisk *disk, */ pm_runtime_set_memalloc_noio(ddev, true); - disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); + disk->part0->bd_holder_dir = + kobject_create_and_add("holders", &ddev->kobj); disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); if (disk->flags & GENHD_FL_HIDDEN) { @@ -743,7 +666,7 @@ static void register_disk(struct device *parent, struct gendisk *disk, /* announce possible partitions */ disk_part_iter_init(&piter, disk, 0); while ((part = disk_part_iter_next(&piter))) - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); + kobject_uevent(bdev_kobj(part), KOBJ_ADD); disk_part_iter_exit(&piter); if (disk->queue->backing_dev_info->dev) { @@ -792,7 +715,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, disk->flags |= GENHD_FL_UP; - retval = blk_alloc_devt(&disk->part0, &devt); + retval = blk_alloc_devt(disk->part0, &devt); if (retval) { WARN_ON(1); return; @@ -819,8 +742,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt)); WARN_ON(ret); bdi_set_owner(bdi, dev); - blk_register_region(disk_devt(disk), disk->minors, NULL, - exact_match, exact_lock, disk); + bdev_add(disk->part0, devt); } register_disk(parent, disk, groups); if (register_queue) @@ -850,23 +772,16 @@ void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk) } EXPORT_SYMBOL(device_add_disk_no_queue_reg); -static void invalidate_partition(struct gendisk *disk, int partno) +static void invalidate_partition(struct block_device *bdev) { - struct block_device *bdev; - - bdev = bdget_disk(disk, partno); - if (!bdev) - return; - fsync_bdev(bdev); __invalidate_device(bdev, true); /* - * Unhash the bdev inode for this device so that it gets evicted as soon - * as last inode reference is dropped. + * Unhash the bdev inode for this device so that it can't be looked + * up any more even if openers still hold references to it. */ remove_inode_hash(bdev->bd_inode); - bdput(bdev); } /** @@ -891,10 +806,13 @@ static void invalidate_partition(struct gendisk *disk, int partno) void del_gendisk(struct gendisk *disk) { struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; might_sleep(); + if (WARN_ON_ONCE(!disk->queue)) + return; + blk_integrity_del(disk); disk_del_events(disk); @@ -902,50 +820,39 @@ void del_gendisk(struct gendisk *disk) * Block lookups of the disk until all bdevs are unhashed and the * disk is marked as dead (GENHD_FL_UP cleared). */ - down_write(&disk->lookup_sem); + down_write(&bdev_lookup_sem); + /* invalidate stuff */ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); while ((part = disk_part_iter_next(&piter))) { - invalidate_partition(disk, part->partno); + invalidate_partition(part); delete_partition(part); } disk_part_iter_exit(&piter); - invalidate_partition(disk, 0); + invalidate_partition(disk->part0); set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; - up_write(&disk->lookup_sem); + up_write(&bdev_lookup_sem); - if (!(disk->flags & GENHD_FL_HIDDEN)) + if (!(disk->flags & GENHD_FL_HIDDEN)) { sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); - if (disk->queue) { + /* * Unregister bdi before releasing device numbers (as they can * get reused and we'd get clashes in sysfs). */ - if (!(disk->flags & GENHD_FL_HIDDEN)) - bdi_unregister(disk->queue->backing_dev_info); - blk_unregister_queue(disk); - } else { - WARN_ON(1); + bdi_unregister(disk->queue->backing_dev_info); } - if (!(disk->flags & GENHD_FL_HIDDEN)) - blk_unregister_region(disk_devt(disk), disk->minors); - /* - * Remove gendisk pointer from idr so that it cannot be looked up - * while RCU period before freeing gendisk is running to prevent - * use-after-free issues. Note that the device number stays - * "in-use" until we really free the gendisk. - */ - blk_invalidate_devt(disk_devt(disk)); + blk_unregister_queue(disk); - kobject_put(disk->part0.holder_dir); + kobject_put(disk->part0->bd_holder_dir); kobject_put(disk->slave_dir); - part_stat_set_all(&disk->part0, 0); - disk->part0.stamp = 0; + part_stat_set_all(disk->part0, 0); + disk->part0->bd_stamp = 0; if (!sysfs_deprecated) sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); @@ -978,57 +885,24 @@ static ssize_t disk_badblocks_store(struct device *dev, return badblocks_store(disk->bb, page, len, 0); } -/** - * get_gendisk - get partitioning information for a given device - * @devt: device to get partitioning information for - * @partno: returned partition index - * - * This function gets the structure containing partitioning - * information for the given device @devt. - * - * Context: can sleep - */ -struct gendisk *get_gendisk(dev_t devt, int *partno) +void blk_request_module(dev_t devt) { - struct gendisk *disk = NULL; + unsigned int major = MAJOR(devt); + struct blk_major_name **n; - might_sleep(); - - if (MAJOR(devt) != BLOCK_EXT_MAJOR) { - struct kobject *kobj; - - kobj = kobj_lookup(bdev_map, devt, partno); - if (kobj) - disk = dev_to_disk(kobj_to_dev(kobj)); - } else { - struct hd_struct *part; - - spin_lock_bh(&ext_devt_lock); - part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); - if (part && get_disk_and_module(part_to_disk(part))) { - *partno = part->partno; - disk = part_to_disk(part); + mutex_lock(&major_names_lock); + for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) { + if ((*n)->major == major && (*n)->probe) { + (*n)->probe(devt); + mutex_unlock(&major_names_lock); + return; } - spin_unlock_bh(&ext_devt_lock); } + mutex_unlock(&major_names_lock); - if (!disk) - return NULL; - - /* - * Synchronize with del_gendisk() to not return disk that is being - * destroyed. - */ - down_read(&disk->lookup_sem); - if (unlikely((disk->flags & GENHD_FL_HIDDEN) || - !(disk->flags & GENHD_FL_UP))) { - up_read(&disk->lookup_sem); - put_disk_and_module(disk); - disk = NULL; - } else { - up_read(&disk->lookup_sem); - } - return disk; + if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) + /* Make old-style 2.4 aliases work */ + request_module("block-major-%d", MAJOR(devt)); } /** @@ -1046,17 +920,16 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) */ struct block_device *bdget_disk(struct gendisk *disk, int partno) { - struct hd_struct *part; struct block_device *bdev = NULL; - part = disk_get_part(disk, partno); - if (part) - bdev = bdget_part(part); - disk_put_part(part); + rcu_read_lock(); + bdev = __disk_get_part(disk, partno); + if (bdev && !bdgrab(bdev)) + bdev = NULL; + rcu_read_unlock(); return bdev; } -EXPORT_SYMBOL(bdget_disk); /* * print a full list of all partitions - intended for places where the root @@ -1072,7 +945,7 @@ void __init printk_all_partitions(void) while ((dev = class_dev_iter_next(&iter))) { struct gendisk *disk = dev_to_disk(dev); struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; char name_buf[BDEVNAME_SIZE]; char devt_buf[BDEVT_SIZE]; @@ -1091,13 +964,14 @@ void __init printk_all_partitions(void) */ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) { - bool is_part0 = part == &disk->part0; + bool is_part0 = part == disk->part0; printk("%s%s %10llu %s %s", is_part0 ? "" : " ", - bdevt_str(part_devt(part), devt_buf), - (unsigned long long)part_nr_sects_read(part) >> 1 - , disk_name(disk, part->partno, name_buf), - part->info ? part->info->uuid : ""); + bdevt_str(part->bd_dev, devt_buf), + bdev_nr_sectors(part) >> 1, + disk_name(disk, part->bd_partno, name_buf), + part->bd_meta_info ? + part->bd_meta_info->uuid : ""); if (is_part0) { if (dev->parent && dev->parent->driver) printk(" driver: %s\n", @@ -1173,7 +1047,7 @@ static int show_partition(struct seq_file *seqf, void *v) { struct gendisk *sgp = v; struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; char buf[BDEVNAME_SIZE]; /* Don't show non-partitionable removeable devices or empty devices */ @@ -1187,9 +1061,9 @@ static int show_partition(struct seq_file *seqf, void *v) disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) seq_printf(seqf, "%4d %7d %10llu %s\n", - MAJOR(part_devt(part)), MINOR(part_devt(part)), - (unsigned long long)part_nr_sects_read(part) >> 1, - disk_name(sgp, part->partno, buf)); + MAJOR(part->bd_dev), MINOR(part->bd_dev), + bdev_nr_sectors(part) >> 1, + disk_name(sgp, part->bd_partno, buf)); disk_part_iter_exit(&piter); return 0; @@ -1203,15 +1077,6 @@ static const struct seq_operations partitions_op = { }; #endif - -static struct kobject *base_probe(dev_t devt, int *partno, void *data) -{ - if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) - /* Make old-style 2.4 aliases work */ - request_module("block-major-%d", MAJOR(devt)); - return NULL; -} - static int __init genhd_device_init(void) { int error; @@ -1220,7 +1085,6 @@ static int __init genhd_device_init(void) error = class_register(&block_class); if (unlikely(error)) return error; - bdev_map = kobj_map_init(base_probe, &block_class_lock); blk_dev_init(); register_blkdev(BLOCK_EXT_MAJOR, "blkext"); @@ -1278,25 +1142,22 @@ static ssize_t disk_ro_show(struct device *dev, ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%llu\n", - (unsigned long long)part_nr_sects_read(p)); + return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev))); } ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - struct request_queue *q = part_to_disk(p)->queue; + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev->bd_disk->queue; struct disk_stats stat; unsigned int inflight; - part_stat_read_all(p, &stat); + part_stat_read_all(bdev, &stat); if (queue_is_mq(q)) - inflight = blk_mq_in_flight(q, p); + inflight = blk_mq_in_flight(q, bdev); else - inflight = part_in_flight(p); + inflight = part_in_flight(bdev); return sprintf(buf, "%8lu %8lu %8llu %8u " @@ -1331,14 +1192,14 @@ ssize_t part_stat_show(struct device *dev, ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - struct request_queue *q = part_to_disk(p)->queue; + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev->bd_disk->queue; unsigned int inflight[2]; if (queue_is_mq(q)) - blk_mq_in_flight_rw(q, p, inflight); + blk_mq_in_flight_rw(q, bdev, inflight); else - part_in_flight_rw(p, inflight); + part_in_flight_rw(bdev, inflight); return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); } @@ -1386,20 +1247,17 @@ static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store); ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%d\n", p->make_it_fail); + return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail); } ssize_t part_fail_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct hd_struct *p = dev_to_part(dev); int i; if (count > 0 && sscanf(buf, "%d", &i) > 0) - p->make_it_fail = (i == 0) ? 0 : 1; + dev_to_bdev(dev)->bd_make_it_fail = i; return count; } @@ -1538,11 +1396,6 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno) * * This function releases all allocated resources of the gendisk. * - * The struct gendisk refcount is incremented with get_gendisk() or - * get_disk_and_module(), and its refcount is decremented with - * put_disk_and_module() or put_disk(). Once the refcount reaches 0 this - * function is called. - * * Drivers which used __device_add_disk() have a gendisk with a request_queue * assigned. Since the request_queue sits on top of the gendisk for these * drivers we also call blk_put_queue() for them, and we expect the @@ -1561,7 +1414,7 @@ static void disk_release(struct device *dev) disk_release_events(disk); kfree(disk->random); disk_replace_part_tbl(disk, NULL); - hd_free_part(&disk->part0); + bdput(disk->part0); if (disk->queue) blk_put_queue(disk->queue); kfree(disk); @@ -1599,7 +1452,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) { struct gendisk *gp = v; struct disk_part_iter piter; - struct hd_struct *hd; + struct block_device *hd; char buf[BDEVNAME_SIZE]; unsigned int inflight; struct disk_stats stat; @@ -1627,8 +1480,8 @@ static int diskstats_show(struct seq_file *seqf, void *v) "%lu %lu %lu %u " "%lu %u" "\n", - MAJOR(part_devt(hd)), MINOR(part_devt(hd)), - disk_name(gp, hd->partno, buf), + MAJOR(hd->bd_dev), MINOR(hd->bd_dev), + disk_name(gp, hd->bd_partno, buf), stat.ios[STAT_READ], stat.merges[STAT_READ], stat.sectors[STAT_READ], @@ -1686,7 +1539,7 @@ dev_t blk_lookup_devt(const char *name, int partno) class_dev_iter_init(&iter, &block_class, NULL, &disk_type); while ((dev = class_dev_iter_next(&iter))) { struct gendisk *disk = dev_to_disk(dev); - struct hd_struct *part; + struct block_device *part; if (strcmp(dev_name(dev), name)) continue; @@ -1699,13 +1552,12 @@ dev_t blk_lookup_devt(const char *name, int partno) MINOR(dev->devt) + partno); break; } - part = disk_get_part(disk, partno); + part = bdget_disk(disk, partno); if (part) { - devt = part_devt(part); - disk_put_part(part); + devt = part->bd_dev; + bdput(part); break; } - disk_put_part(part); } class_dev_iter_exit(&iter); return devt; @@ -1727,32 +1579,16 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) if (!disk) return NULL; - disk->part0.dkstats = alloc_percpu(struct disk_stats); - if (!disk->part0.dkstats) + disk->part0 = bdev_alloc(disk, 0); + if (!disk->part0) goto out_free_disk; - init_rwsem(&disk->lookup_sem); disk->node_id = node_id; - if (disk_expand_part_tbl(disk, 0)) { - free_percpu(disk->part0.dkstats); - goto out_free_disk; - } + if (disk_expand_part_tbl(disk, 0)) + goto out_bdput; ptbl = rcu_dereference_protected(disk->part_tbl, 1); - rcu_assign_pointer(ptbl->part[0], &disk->part0); - - /* - * set_capacity() and get_capacity() currently don't use - * seqcounter to read/update the part0->nr_sects. Still init - * the counter as we can read the sectors in IO submission - * patch using seqence counters. - * - * TODO: Ideally set_capacity() and get_capacity() should be - * converted to make use of bd_mutex and sequence counters. - */ - hd_sects_seq_init(&disk->part0); - if (hd_ref_init(&disk->part0)) - goto out_free_part0; + rcu_assign_pointer(ptbl->part[0], disk->part0); disk->minors = minors; rand_initialize_disk(disk); @@ -1761,43 +1597,14 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) device_initialize(disk_to_dev(disk)); return disk; -out_free_part0: - hd_free_part(&disk->part0); +out_bdput: + bdput(disk->part0); out_free_disk: kfree(disk); return NULL; } EXPORT_SYMBOL(__alloc_disk_node); -/** - * get_disk_and_module - increments the gendisk and gendisk fops module refcount - * @disk: the struct gendisk to increment the refcount for - * - * This increments the refcount for the struct gendisk, and the gendisk's - * fops module owner. - * - * Context: Any context. - */ -struct kobject *get_disk_and_module(struct gendisk *disk) -{ - struct module *owner; - struct kobject *kobj; - - if (!disk->fops) - return NULL; - owner = disk->fops->owner; - if (owner && !try_module_get(owner)) - return NULL; - kobj = kobject_get_unless_zero(&disk_to_dev(disk)->kobj); - if (kobj == NULL) { - module_put(owner); - return NULL; - } - return kobj; - -} -EXPORT_SYMBOL(get_disk_and_module); - /** * put_disk - decrements the gendisk refcount * @disk: the struct gendisk to decrement the refcount for @@ -1811,31 +1618,10 @@ EXPORT_SYMBOL(get_disk_and_module); void put_disk(struct gendisk *disk) { if (disk) - kobject_put(&disk_to_dev(disk)->kobj); + put_device(disk_to_dev(disk)); } EXPORT_SYMBOL(put_disk); -/** - * put_disk_and_module - decrements the module and gendisk refcount - * @disk: the struct gendisk to decrement the refcount for - * - * This is a counterpart of get_disk_and_module() and thus also of - * get_gendisk(). - * - * Context: Any context, but the last reference must not be dropped from - * atomic context. - */ -void put_disk_and_module(struct gendisk *disk) -{ - if (disk) { - struct module *owner = disk->fops->owner; - - put_disk(disk); - module_put(owner); - } -} -EXPORT_SYMBOL(put_disk_and_module); - static void set_disk_ro_uevent(struct gendisk *gd, int ro) { char event[] = "DISK_RO=1"; @@ -1846,26 +1632,19 @@ static void set_disk_ro_uevent(struct gendisk *gd, int ro) kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); } -void set_device_ro(struct block_device *bdev, int flag) -{ - bdev->bd_part->policy = flag; -} - -EXPORT_SYMBOL(set_device_ro); - void set_disk_ro(struct gendisk *disk, int flag) { struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; - if (disk->part0.policy != flag) { + if (disk->part0->bd_read_only != flag) { set_disk_ro_uevent(disk, flag); - disk->part0.policy = flag; + disk->part0->bd_read_only = flag; } disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) - part->policy = flag; + part->bd_read_only = flag; disk_part_iter_exit(&piter); } @@ -1875,7 +1654,7 @@ int bdev_read_only(struct block_device *bdev) { if (!bdev) return 0; - return bdev->bd_part->policy; + return bdev->bd_read_only; } EXPORT_SYMBOL(bdev_read_only); diff --git a/block/ioctl.c b/block/ioctl.c index 3fbc382eb926..d61d652078f4 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -35,15 +35,6 @@ static int blkpg_do_ioctl(struct block_device *bdev, start = p.start >> SECTOR_SHIFT; length = p.length >> SECTOR_SHIFT; - /* check for fit in a hd_struct */ - if (sizeof(sector_t) < sizeof(long long)) { - long pstart = start, plength = length; - - if (pstart != start || plength != length || pstart < 0 || - plength < 0 || p.pno > 65535) - return -EINVAL; - } - switch (op) { case BLKPG_ADD_PARTITION: /* check if partition is aligned to blocksize */ @@ -219,23 +210,6 @@ static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) } #endif -int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, - unsigned cmd, unsigned long arg) -{ - struct gendisk *disk = bdev->bd_disk; - - if (disk->fops->ioctl) - return disk->fops->ioctl(bdev, mode, cmd, arg); - - return -ENOTTY; -} -/* - * For the record: _GPL here is only because somebody decided to slap it - * on the previous export. Sheer idiocy, since it wasn't copyrightable - * at all and could be open-coded without any exports by anybody who cares. - */ -EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); - #ifdef CONFIG_COMPAT /* * This is the equivalent of compat_ptr_ioctl(), to be used by block @@ -346,38 +320,11 @@ static int blkdev_pr_clear(struct block_device *bdev, return ops->pr_clear(bdev, c.key); } -/* - * Is it an unrecognized ioctl? The correct returns are either - * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a - * fallback"). ENOIOCTLCMD gets turned into ENOTTY by the ioctl - * code before returning. - * - * Confused drivers sometimes return EINVAL, which is wrong. It - * means "I understood the ioctl command, but the parameters to - * it were wrong". - * - * We should aim to just fix the broken drivers, the EINVAL case - * should go away. - */ -static inline int is_unrecognized_ioctl(int ret) -{ - return ret == -EINVAL || - ret == -ENOTTY || - ret == -ENOIOCTLCMD; -} - static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) { - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EACCES; - - ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); - if (!is_unrecognized_ioctl(ret)) - return ret; - fsync_bdev(bdev); invalidate_bdev(bdev); return 0; @@ -391,12 +338,14 @@ static int blkdev_roset(struct block_device *bdev, fmode_t mode, if (!capable(CAP_SYS_ADMIN)) return -EACCES; - ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); - if (!is_unrecognized_ioctl(ret)) - return ret; if (get_user(n, (int __user *)arg)) return -EFAULT; - set_device_ro(bdev, n); + if (bdev->bd_disk->fops->set_read_only) { + ret = bdev->bd_disk->fops->set_read_only(bdev, n); + if (ret) + return ret; + } + bdev->bd_read_only = n; return 0; } @@ -619,10 +568,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, } ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); - if (ret == -ENOIOCTLCMD) - return __blkdev_driver_ioctl(bdev, mode, cmd, arg); + if (ret != -ENOIOCTLCMD) + return ret; - return ret; + if (!bdev->bd_disk->fops->ioctl) + return -ENOTTY; + return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); } EXPORT_SYMBOL_GPL(blkdev_ioctl); /* for /dev/raw */ @@ -639,8 +590,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) { int ret; void __user *argp = compat_ptr(arg); - struct inode *inode = file->f_mapping->host; - struct block_device *bdev = inode->i_bdev; + struct block_device *bdev = I_BDEV(file->f_mapping->host); struct gendisk *disk = bdev->bd_disk; fmode_t mode = file->f_mode; loff_t size; diff --git a/block/partitions/core.c b/block/partitions/core.c index a02e22411594..deca253583bd 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -85,6 +85,13 @@ static int (*check_part[])(struct parsed_partitions *) = { NULL }; +static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) +{ + spin_lock(&bdev->bd_size_lock); + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); + spin_unlock(&bdev->bd_size_lock); +} + static struct parsed_partitions *allocate_partitions(struct gendisk *hd) { struct parsed_partitions *state; @@ -175,44 +182,39 @@ static struct parsed_partitions *check_partition(struct gendisk *hd, static ssize_t part_partition_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%d\n", p->partno); + return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_partno); } static ssize_t part_start_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); + return sprintf(buf, "%llu\n", dev_to_bdev(dev)->bd_start_sect); } static ssize_t part_ro_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%d\n", p->policy ? 1 : 0); + return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_read_only); } static ssize_t part_alignment_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); + struct block_device *bdev = dev_to_bdev(dev); return sprintf(buf, "%u\n", - queue_limit_alignment_offset(&part_to_disk(p)->queue->limits, - p->start_sect)); + queue_limit_alignment_offset(&bdev->bd_disk->queue->limits, + bdev->bd_start_sect)); } static ssize_t part_discard_alignment_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); + struct block_device *bdev = dev_to_bdev(dev); return sprintf(buf, "%u\n", - queue_limit_discard_alignment(&part_to_disk(p)->queue->limits, - p->start_sect)); + queue_limit_discard_alignment(&bdev->bd_disk->queue->limits, + bdev->bd_start_sect)); } static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); @@ -257,19 +259,17 @@ static const struct attribute_group *part_attr_groups[] = { static void part_release(struct device *dev) { - struct hd_struct *p = dev_to_part(dev); blk_free_devt(dev->devt); - hd_free_part(p); - kfree(p); + bdput(dev_to_bdev(dev)); } static int part_uevent(struct device *dev, struct kobj_uevent_env *env) { - struct hd_struct *part = dev_to_part(dev); + struct block_device *part = dev_to_bdev(dev); - add_uevent_var(env, "PARTN=%u", part->partno); - if (part->info && part->info->volname[0]) - add_uevent_var(env, "PARTNAME=%s", part->info->volname); + add_uevent_var(env, "PARTN=%u", part->bd_partno); + if (part->bd_meta_info && part->bd_meta_info->volname[0]) + add_uevent_var(env, "PARTNAME=%s", part->bd_meta_info->volname); return 0; } @@ -280,73 +280,29 @@ struct device_type part_type = { .uevent = part_uevent, }; -static void hd_struct_free_work(struct work_struct *work) -{ - struct hd_struct *part = - container_of(to_rcu_work(work), struct hd_struct, rcu_work); - struct gendisk *disk = part_to_disk(part); - - /* - * Release the disk reference acquired in delete_partition here. - * We can't release it in hd_struct_free because the final put_device - * needs process context and thus can't be run directly from a - * percpu_ref ->release handler. - */ - put_device(disk_to_dev(disk)); - - part->start_sect = 0; - part->nr_sects = 0; - part_stat_set_all(part, 0); - put_device(part_to_dev(part)); -} - -static void hd_struct_free(struct percpu_ref *ref) -{ - struct hd_struct *part = container_of(ref, struct hd_struct, ref); - struct gendisk *disk = part_to_disk(part); - struct disk_part_tbl *ptbl = - rcu_dereference_protected(disk->part_tbl, 1); - - rcu_assign_pointer(ptbl->last_lookup, NULL); - - INIT_RCU_WORK(&part->rcu_work, hd_struct_free_work); - queue_rcu_work(system_wq, &part->rcu_work); -} - -int hd_ref_init(struct hd_struct *part) -{ - if (percpu_ref_init(&part->ref, hd_struct_free, 0, GFP_KERNEL)) - return -ENOMEM; - return 0; -} - /* * Must be called either with bd_mutex held, before a disk can be opened or * after all disk users are gone. */ -void delete_partition(struct hd_struct *part) +void delete_partition(struct block_device *part) { - struct gendisk *disk = part_to_disk(part); + struct gendisk *disk = part->bd_disk; struct disk_part_tbl *ptbl = rcu_dereference_protected(disk->part_tbl, 1); - /* - * ->part_tbl is referenced in this part's release handler, so - * we have to hold the disk device - */ - get_device(disk_to_dev(disk)); - rcu_assign_pointer(ptbl->part[part->partno], NULL); - kobject_put(part->holder_dir); - device_del(part_to_dev(part)); + rcu_assign_pointer(ptbl->part[part->bd_partno], NULL); + rcu_assign_pointer(ptbl->last_lookup, NULL); + + kobject_put(part->bd_holder_dir); + device_del(&part->bd_device); /* - * Remove gendisk pointer from idr so that it cannot be looked up - * while RCU period before freeing gendisk is running to prevent - * use-after-free issues. Note that the device number stays - * "in-use" until we really free the gendisk. + * Remove the block device from the inode hash, so that it cannot be + * looked up any more even when openers still hold references. */ - blk_invalidate_devt(part_devt(part)); - percpu_ref_kill(&part->ref); + remove_inode_hash(part->bd_inode); + + put_device(&part->bd_device); } static ssize_t whole_disk_show(struct device *dev, @@ -360,14 +316,14 @@ static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL); * Must be called either with bd_mutex held, before a disk can be opened or * after all disk users are gone. */ -static struct hd_struct *add_partition(struct gendisk *disk, int partno, +static struct block_device *add_partition(struct gendisk *disk, int partno, sector_t start, sector_t len, int flags, struct partition_meta_info *info) { - struct hd_struct *p; dev_t devt = MKDEV(0, 0); struct device *ddev = disk_to_dev(disk); struct device *pdev; + struct block_device *bdev; struct disk_part_tbl *ptbl; const char *dname; int err; @@ -398,36 +354,22 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, if (ptbl->part[partno]) return ERR_PTR(-EBUSY); - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return ERR_PTR(-EBUSY); + bdev = bdev_alloc(disk, partno); + if (!bdev) + return ERR_PTR(-ENOMEM); - p->dkstats = alloc_percpu(struct disk_stats); - if (!p->dkstats) { - err = -ENOMEM; - goto out_free; - } - - hd_sects_seq_init(p); - pdev = part_to_dev(p); - - p->start_sect = start; - p->nr_sects = len; - p->partno = partno; - p->policy = get_disk_ro(disk); + bdev->bd_start_sect = start; + bdev_set_nr_sectors(bdev, len); + bdev->bd_read_only = get_disk_ro(disk); if (info) { - struct partition_meta_info *pinfo; - - pinfo = kzalloc_node(sizeof(*pinfo), GFP_KERNEL, disk->node_id); - if (!pinfo) { - err = -ENOMEM; - goto out_free_stats; - } - memcpy(pinfo, info, sizeof(*info)); - p->info = pinfo; + err = -ENOMEM; + bdev->bd_meta_info = kmemdup(info, sizeof(*info), GFP_KERNEL); + if (!bdev->bd_meta_info) + goto out_bdput; } + pdev = &bdev->bd_device; dname = dev_name(ddev); if (isdigit(dname[strlen(dname) - 1])) dev_set_name(pdev, "%sp%d", dname, partno); @@ -439,9 +381,9 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, pdev->type = &part_type; pdev->parent = ddev; - err = blk_alloc_devt(p, &devt); + err = blk_alloc_devt(bdev, &devt); if (err) - goto out_free_info; + goto out_bdput; pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ @@ -451,8 +393,8 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, goto out_put; err = -ENOMEM; - p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); - if (!p->holder_dir) + bdev->bd_holder_dir = kobject_create_and_add("holders", &pdev->kobj); + if (!bdev->bd_holder_dir) goto out_del; dev_set_uevent_suppress(pdev, 0); @@ -462,32 +404,20 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, goto out_del; } - err = hd_ref_init(p); - if (err) { - if (flags & ADDPART_FLAG_WHOLEDISK) - goto out_remove_file; - goto out_del; - } - /* everything is up and running, commence */ - rcu_assign_pointer(ptbl->part[partno], p); + bdev_add(bdev, devt); + rcu_assign_pointer(ptbl->part[partno], bdev); /* suppress uevent if the disk suppresses it */ if (!dev_get_uevent_suppress(ddev)) kobject_uevent(&pdev->kobj, KOBJ_ADD); - return p; + return bdev; -out_free_info: - kfree(p->info); -out_free_stats: - free_percpu(p->dkstats); -out_free: - kfree(p); +out_bdput: + bdput(bdev); return ERR_PTR(err); -out_remove_file: - device_remove_file(pdev, &dev_attr_whole_disk); out_del: - kobject_put(p->holder_dir); + kobject_put(bdev->bd_holder_dir); device_del(pdev); out_put: put_device(pdev); @@ -498,14 +428,14 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start, sector_t length, int skip_partno) { struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; bool overlap = false; disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) { - if (part->partno == skip_partno || - start >= part->start_sect + part->nr_sects || - start + length <= part->start_sect) + if (part->bd_partno == skip_partno || + start >= part->bd_start_sect + bdev_nr_sectors(part) || + start + length <= part->bd_start_sect) continue; overlap = true; break; @@ -518,7 +448,7 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start, int bdev_add_partition(struct block_device *bdev, int partno, sector_t start, sector_t length) { - struct hd_struct *part; + struct block_device *part; mutex_lock(&bdev->bd_mutex); if (partition_overlaps(bdev->bd_disk, start, length, -1)) { @@ -534,77 +464,59 @@ int bdev_add_partition(struct block_device *bdev, int partno, int bdev_del_partition(struct block_device *bdev, int partno) { - struct block_device *bdevp; - struct hd_struct *part = NULL; + struct block_device *part; int ret; - bdevp = bdget_disk(bdev->bd_disk, partno); - if (!bdevp) + part = bdget_disk(bdev->bd_disk, partno); + if (!part) return -ENXIO; - mutex_lock(&bdevp->bd_mutex); + mutex_lock(&part->bd_mutex); mutex_lock_nested(&bdev->bd_mutex, 1); - ret = -ENXIO; - part = disk_get_part(bdev->bd_disk, partno); - if (!part) - goto out_unlock; - ret = -EBUSY; - if (bdevp->bd_openers) + if (part->bd_openers) goto out_unlock; - sync_blockdev(bdevp); - invalidate_bdev(bdevp); + sync_blockdev(part); + invalidate_bdev(part); delete_partition(part); ret = 0; out_unlock: mutex_unlock(&bdev->bd_mutex); - mutex_unlock(&bdevp->bd_mutex); - bdput(bdevp); - if (part) - disk_put_part(part); + mutex_unlock(&part->bd_mutex); + bdput(part); return ret; } int bdev_resize_partition(struct block_device *bdev, int partno, sector_t start, sector_t length) { - struct block_device *bdevp; - struct hd_struct *part; + struct block_device *part; int ret = 0; - part = disk_get_part(bdev->bd_disk, partno); + part = bdget_disk(bdev->bd_disk, partno); if (!part) return -ENXIO; - ret = -ENOMEM; - bdevp = bdget_part(part); - if (!bdevp) - goto out_put_part; - - mutex_lock(&bdevp->bd_mutex); + mutex_lock(&part->bd_mutex); mutex_lock_nested(&bdev->bd_mutex, 1); - ret = -EINVAL; - if (start != part->start_sect) + if (start != part->bd_start_sect) goto out_unlock; ret = -EBUSY; if (partition_overlaps(bdev->bd_disk, start, length, partno)) goto out_unlock; - part_nr_sects_write(part, length); - bd_set_nr_sectors(bdevp, length); + bdev_set_nr_sectors(part, length); ret = 0; out_unlock: - mutex_unlock(&bdevp->bd_mutex); + mutex_unlock(&part->bd_mutex); mutex_unlock(&bdev->bd_mutex); - bdput(bdevp); -out_put_part: - disk_put_part(part); + bdput(part); return ret; } @@ -627,7 +539,7 @@ static bool disk_unlock_native_capacity(struct gendisk *disk) int blk_drop_partitions(struct block_device *bdev) { struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; if (bdev->bd_part_count) return -EBUSY; @@ -652,7 +564,7 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev, { sector_t size = state->parts[p].size; sector_t from = state->parts[p].from; - struct hd_struct *part; + struct block_device *part; if (!size) return true; @@ -692,7 +604,7 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev, if (IS_BUILTIN(CONFIG_BLK_DEV_MD) && (state->parts[p].flags & ADDPART_FLAG_RAID)) - md_autodetect_dev(part_to_dev(part)->devt); + md_autodetect_dev(part->bd_dev); return true; } diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 71c2b1564558..9e2d0c6a3877 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -201,7 +201,7 @@ struct amiga_floppy_struct { int busy; /* true when drive is active */ int dirty; /* true when trackbuf is not on disk */ int status; /* current error code for unit */ - struct gendisk *gendisk; + struct gendisk *gendisk[2]; struct blk_mq_tag_set tag_set; }; @@ -1669,6 +1669,11 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) return -EBUSY; } + if (unit[drive].type->code == FD_NODRIVE) { + mutex_unlock(&amiflop_mutex); + return -ENXIO; + } + if (mode & (FMODE_READ|FMODE_WRITE)) { bdev_check_media_change(bdev); if (mode & FMODE_WRITE) { @@ -1695,7 +1700,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) unit[drive].dtype=&data_types[system]; unit[drive].blocks=unit[drive].type->heads*unit[drive].type->tracks* data_types[system].sects*unit[drive].type->sect_mult; - set_capacity(unit[drive].gendisk, unit[drive].blocks); + set_capacity(unit[drive].gendisk[system], unit[drive].blocks); printk(KERN_INFO "fd%d: accessing %s-disk with %s-layout\n",drive, unit[drive].type->name, data_types[system].name); @@ -1772,36 +1777,68 @@ static const struct blk_mq_ops amiflop_mq_ops = { .queue_rq = amiflop_queue_rq, }; -static struct gendisk *fd_alloc_disk(int drive) +static int fd_alloc_disk(int drive, int system) { struct gendisk *disk; disk = alloc_disk(1); if (!disk) goto out; - - disk->queue = blk_mq_init_sq_queue(&unit[drive].tag_set, &amiflop_mq_ops, - 2, BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(disk->queue)) { - disk->queue = NULL; + disk->queue = blk_mq_init_queue(&unit[drive].tag_set); + if (IS_ERR(disk->queue)) goto out_put_disk; - } - unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL); - if (!unit[drive].trackbuf) - goto out_cleanup_queue; + disk->major = FLOPPY_MAJOR; + disk->first_minor = drive + system; + disk->fops = &floppy_fops; + disk->events = DISK_EVENT_MEDIA_CHANGE; + if (system) + sprintf(disk->disk_name, "fd%d_msdos", drive); + else + sprintf(disk->disk_name, "fd%d", drive); + disk->private_data = &unit[drive]; + set_capacity(disk, 880 * 2); - return disk; + unit[drive].gendisk[system] = disk; + add_disk(disk); + return 0; -out_cleanup_queue: - blk_cleanup_queue(disk->queue); - disk->queue = NULL; - blk_mq_free_tag_set(&unit[drive].tag_set); out_put_disk: + disk->queue = NULL; put_disk(disk); +out: + return -ENOMEM; +} + +static int fd_alloc_drive(int drive) +{ + unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL); + if (!unit[drive].trackbuf) + goto out; + + memset(&unit[drive].tag_set, 0, sizeof(unit[drive].tag_set)); + unit[drive].tag_set.ops = &amiflop_mq_ops; + unit[drive].tag_set.nr_hw_queues = 1; + unit[drive].tag_set.nr_maps = 1; + unit[drive].tag_set.queue_depth = 2; + unit[drive].tag_set.numa_node = NUMA_NO_NODE; + unit[drive].tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + if (blk_mq_alloc_tag_set(&unit[drive].tag_set)) + goto out_cleanup_trackbuf; + + pr_cont(" fd%d", drive); + + if (fd_alloc_disk(drive, 0) || fd_alloc_disk(drive, 1)) + goto out_cleanup_tagset; + return 0; + +out_cleanup_tagset: + blk_mq_free_tag_set(&unit[drive].tag_set); +out_cleanup_trackbuf: + kfree(unit[drive].trackbuf); out: unit[drive].type->code = FD_NODRIVE; - return NULL; + return -ENOMEM; } static int __init fd_probe_drives(void) @@ -1812,29 +1849,16 @@ static int __init fd_probe_drives(void) drives=0; nomem=0; for(drive=0;drivecode == FD_NODRIVE) continue; - disk = fd_alloc_disk(drive); - if (!disk) { + if (fd_alloc_drive(drive) < 0) { pr_cont(" no mem for fd%d", drive); nomem = 1; continue; } - unit[drive].gendisk = disk; drives++; - - pr_cont(" fd%d",drive); - disk->major = FLOPPY_MAJOR; - disk->first_minor = drive; - disk->fops = &floppy_fops; - disk->events = DISK_EVENT_MEDIA_CHANGE; - sprintf(disk->disk_name, "fd%d", drive); - disk->private_data = &unit[drive]; - set_capacity(disk, 880*2); - add_disk(disk); } if ((drives > 0) || (nomem == 0)) { if (drives == 0) @@ -1846,15 +1870,6 @@ static int __init fd_probe_drives(void) return -ENOMEM; } -static struct kobject *floppy_find(dev_t dev, int *part, void *data) -{ - int drive = *part & 3; - if (unit[drive].type->code == FD_NODRIVE) - return NULL; - *part = 0; - return get_disk_and_module(unit[drive].gendisk); -} - static int __init amiga_floppy_probe(struct platform_device *pdev) { int i, ret; @@ -1884,9 +1899,6 @@ static int __init amiga_floppy_probe(struct platform_device *pdev) if (fd_probe_drives() < 1) /* No usable drives */ goto out_probe; - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, - floppy_find, NULL, NULL); - /* initialize variables */ timer_setup(&motor_on_timer, motor_on_callback, 0); motor_on_timer.expires = 0; diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 313f0b946fe2..ac720bdcd983 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -890,19 +890,13 @@ void aoecmd_sleepwork(struct work_struct *work) { struct aoedev *d = container_of(work, struct aoedev, work); - struct block_device *bd; - u64 ssize; if (d->flags & DEVFL_GDALLOC) aoeblk_gdalloc(d); if (d->flags & DEVFL_NEWSIZE) { - ssize = get_capacity(d->gd); - bd = bdget_disk(d->gd, 0); - if (bd) { - bd_set_nr_sectors(bd, ssize); - bdput(bd); - } + set_capacity_and_notify(d->gd, d->ssize); + spin_lock_irq(&d->lock); d->flags |= DEVFL_UP; d->flags &= ~DEVFL_NEWSIZE; @@ -971,10 +965,9 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) d->geo.start = 0; if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) return; - if (d->gd != NULL) { - set_capacity(d->gd, ssize); + if (d->gd != NULL) d->flags |= DEVFL_NEWSIZE; - } else + else d->flags |= DEVFL_GDALLOC; schedule_work(&d->work); } diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 3e881fdb06e0..104b713f4055 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -297,7 +297,7 @@ static struct atari_floppy_struct { unsigned int wpstat; /* current state of WP signal (for disk change detection) */ int flags; /* flags */ - struct gendisk *disk; + struct gendisk *disk[NUM_DISK_MINORS]; int ref; int type; struct blk_mq_tag_set tag_set; @@ -723,12 +723,16 @@ static void fd_error( void ) static int do_format(int drive, int type, struct atari_format_descr *desc) { - struct request_queue *q = unit[drive].disk->queue; + struct request_queue *q; unsigned char *p; int sect, nsect; unsigned long flags; int ret; + if (type) + type--; + + q = unit[drive].disk[type]->queue; blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); @@ -738,7 +742,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) local_irq_restore(flags); if (type) { - if (--type >= NUM_DISK_MINORS || + if (type >= NUM_DISK_MINORS || minor2disktype[type].drive_types > DriveType) { ret = -EINVAL; goto out; @@ -1154,7 +1158,7 @@ static void fd_rwsec_done1(int status) if (SUDT[-1].blocks > ReqBlock) { /* try another disk type */ SUDT--; - set_capacity(unit[SelectedDrive].disk, + set_capacity(unit[SelectedDrive].disk[0], SUDT->blocks); } else Probing = 0; @@ -1169,7 +1173,7 @@ static void fd_rwsec_done1(int status) /* record not found, but not probing. Maybe stretch wrong ? Restart probing */ if (SUD.autoprobe) { SUDT = atari_disk_type + StartDiskType[DriveType]; - set_capacity(unit[SelectedDrive].disk, + set_capacity(unit[SelectedDrive].disk[0], SUDT->blocks); Probing = 1; } @@ -1515,7 +1519,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, if (!UDT) { Probing = 1; UDT = atari_disk_type + StartDiskType[DriveType]; - set_capacity(floppy->disk, UDT->blocks); + set_capacity(bd->rq->rq_disk, UDT->blocks); UD.autoprobe = 1; } } @@ -1533,7 +1537,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, } type = minor2disktype[type].index; UDT = &atari_disk_type[type]; - set_capacity(floppy->disk, UDT->blocks); + set_capacity(bd->rq->rq_disk, UDT->blocks); UD.autoprobe = 0; } @@ -1658,7 +1662,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, printk (KERN_INFO "floppy%d: setting %s %p!\n", drive, dtp->name, dtp); UDT = dtp; - set_capacity(floppy->disk, UDT->blocks); + set_capacity(disk, UDT->blocks); if (cmd == FDDEFPRM) { /* save settings as permanent default type */ @@ -1702,7 +1706,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, return -EINVAL; UDT = dtp; - set_capacity(floppy->disk, UDT->blocks); + set_capacity(disk, UDT->blocks); return 0; case FDMSGON: @@ -1725,7 +1729,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, UDT = NULL; /* MSch: invalidate default_params */ default_params[drive].blocks = 0; - set_capacity(floppy->disk, MAX_DISK_SIZE * 2); + set_capacity(disk, MAX_DISK_SIZE * 2); fallthrough; case FDFMTEND: case FDFLUSH: @@ -1962,14 +1966,50 @@ static const struct blk_mq_ops ataflop_mq_ops = { .commit_rqs = ataflop_commit_rqs, }; -static struct kobject *floppy_find(dev_t dev, int *part, void *data) +static int ataflop_alloc_disk(unsigned int drive, unsigned int type) { - int drive = *part & 3; - int type = *part >> 2; + struct gendisk *disk; + int ret; + + disk = alloc_disk(1); + if (!disk) + return -ENOMEM; + + disk->queue = blk_mq_init_queue(&unit[drive].tag_set); + if (IS_ERR(disk->queue)) { + ret = PTR_ERR(disk->queue); + disk->queue = NULL; + put_disk(disk); + return ret; + } + + disk->major = FLOPPY_MAJOR; + disk->first_minor = drive + (type << 2); + sprintf(disk->disk_name, "fd%d", drive); + disk->fops = &floppy_fops; + disk->events = DISK_EVENT_MEDIA_CHANGE; + disk->private_data = &unit[drive]; + set_capacity(disk, MAX_DISK_SIZE * 2); + + unit[drive].disk[type] = disk; + return 0; +} + +static DEFINE_MUTEX(ataflop_probe_lock); + +static void ataflop_probe(dev_t dev) +{ + int drive = MINOR(dev) & 3; + int type = MINOR(dev) >> 2; + if (drive >= FD_MAX_UNITS || type > NUM_DISK_MINORS) - return NULL; - *part = 0; - return get_disk_and_module(unit[drive].disk); + return; + mutex_lock(&ataflop_probe_lock); + if (!unit[drive].disk[type]) { + if (ataflop_alloc_disk(drive, type) == 0) + add_disk(unit[drive].disk[type]); + } + mutex_unlock(&ataflop_probe_lock); } static int __init atari_floppy_init (void) @@ -1981,23 +2021,26 @@ static int __init atari_floppy_init (void) /* Amiga, Mac, ... don't have Atari-compatible floppy :-) */ return -ENODEV; - if (register_blkdev(FLOPPY_MAJOR,"fd")) - return -EBUSY; + mutex_lock(&ataflop_probe_lock); + ret = __register_blkdev(FLOPPY_MAJOR, "fd", ataflop_probe); + if (ret) + goto out_unlock; for (i = 0; i < FD_MAX_UNITS; i++) { - unit[i].disk = alloc_disk(1); - if (!unit[i].disk) { - ret = -ENOMEM; + memset(&unit[i].tag_set, 0, sizeof(unit[i].tag_set)); + unit[i].tag_set.ops = &ataflop_mq_ops; + unit[i].tag_set.nr_hw_queues = 1; + unit[i].tag_set.nr_maps = 1; + unit[i].tag_set.queue_depth = 2; + unit[i].tag_set.numa_node = NUMA_NO_NODE; + unit[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ret = blk_mq_alloc_tag_set(&unit[i].tag_set); + if (ret) goto err; - } - unit[i].disk->queue = blk_mq_init_sq_queue(&unit[i].tag_set, - &ataflop_mq_ops, 2, - BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(unit[i].disk->queue)) { - put_disk(unit[i].disk); - ret = PTR_ERR(unit[i].disk->queue); - unit[i].disk->queue = NULL; + ret = ataflop_alloc_disk(i, 0); + if (ret) { + blk_mq_free_tag_set(&unit[i].tag_set); goto err; } } @@ -2027,19 +2070,9 @@ static int __init atari_floppy_init (void) for (i = 0; i < FD_MAX_UNITS; i++) { unit[i].track = -1; unit[i].flags = 0; - unit[i].disk->major = FLOPPY_MAJOR; - unit[i].disk->first_minor = i; - sprintf(unit[i].disk->disk_name, "fd%d", i); - unit[i].disk->fops = &floppy_fops; - unit[i].disk->events = DISK_EVENT_MEDIA_CHANGE; - unit[i].disk->private_data = &unit[i]; - set_capacity(unit[i].disk, MAX_DISK_SIZE * 2); - add_disk(unit[i].disk); + add_disk(unit[i].disk[0]); } - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, - floppy_find, NULL, NULL); - printk(KERN_INFO "Atari floppy driver: max. %cD, %strack buffering\n", DriveType == 0 ? 'D' : DriveType == 1 ? 'H' : 'E', UseTrackbuffer ? "" : "no "); @@ -2049,14 +2082,14 @@ static int __init atari_floppy_init (void) err: while (--i >= 0) { - struct gendisk *disk = unit[i].disk; - - blk_cleanup_queue(disk->queue); + blk_cleanup_queue(unit[i].disk[0]->queue); + put_disk(unit[i].disk[0]); blk_mq_free_tag_set(&unit[i].tag_set); - put_disk(unit[i].disk); } unregister_blkdev(FLOPPY_MAJOR, "fd"); +out_unlock: + mutex_unlock(&ataflop_probe_lock); return ret; } @@ -2101,13 +2134,17 @@ __setup("floppy=", atari_floppy_setup); static void __exit atari_floppy_exit(void) { - int i; - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); + int i, type; + for (i = 0; i < FD_MAX_UNITS; i++) { - del_gendisk(unit[i].disk); - blk_cleanup_queue(unit[i].disk->queue); + for (type = 0; type < NUM_DISK_MINORS; type++) { + if (!unit[i].disk[type]) + continue; + del_gendisk(unit[i].disk[type]); + blk_cleanup_queue(unit[i].disk[type]->queue); + put_disk(unit[i].disk[type]); + } blk_mq_free_tag_set(&unit[i].tag_set); - put_disk(unit[i].disk); } unregister_blkdev(FLOPPY_MAJOR, "fd"); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index cc49a921339f..c43a6ab4b1f3 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -426,14 +426,15 @@ static void brd_free(struct brd_device *brd) kfree(brd); } -static struct brd_device *brd_init_one(int i, bool *new) +static void brd_probe(dev_t dev) { struct brd_device *brd; + int i = MINOR(dev) / max_part; - *new = false; + mutex_lock(&brd_devices_mutex); list_for_each_entry(brd, &brd_devices, brd_list) { if (brd->brd_number == i) - goto out; + goto out_unlock; } brd = brd_alloc(i); @@ -442,9 +443,9 @@ static struct brd_device *brd_init_one(int i, bool *new) add_disk(brd->brd_disk); list_add_tail(&brd->brd_list, &brd_devices); } - *new = true; -out: - return brd; + +out_unlock: + mutex_unlock(&brd_devices_mutex); } static void brd_del_one(struct brd_device *brd) @@ -454,23 +455,6 @@ static void brd_del_one(struct brd_device *brd) brd_free(brd); } -static struct kobject *brd_probe(dev_t dev, int *part, void *data) -{ - struct brd_device *brd; - struct kobject *kobj; - bool new; - - mutex_lock(&brd_devices_mutex); - brd = brd_init_one(MINOR(dev) / max_part, &new); - kobj = brd ? get_disk_and_module(brd->brd_disk) : NULL; - mutex_unlock(&brd_devices_mutex); - - if (new) - *part = 0; - - return kobj; -} - static inline void brd_check_and_reset_par(void) { if (unlikely(!max_part)) @@ -510,11 +494,12 @@ static int __init brd_init(void) * dynamically. */ - if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) + if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) return -EIO; brd_check_and_reset_par(); + mutex_lock(&brd_devices_mutex); for (i = 0; i < rd_nr; i++) { brd = brd_alloc(i); if (!brd) @@ -532,9 +517,7 @@ static int __init brd_init(void) brd->brd_disk->queue = brd->brd_queue; add_disk(brd->brd_disk); } - - blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS, - THIS_MODULE, brd_probe, NULL, NULL); + mutex_unlock(&brd_devices_mutex); pr_info("brd: module loaded\n"); return 0; @@ -544,6 +527,7 @@ static int __init brd_init(void) list_del(&brd->brd_list); brd_free(brd); } + mutex_unlock(&brd_devices_mutex); unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); pr_info("brd: module NOT loaded !!!\n"); @@ -557,7 +541,6 @@ static void __exit brd_exit(void) list_for_each_entry_safe(brd, next, &brd_devices, brd_list) brd_del_one(brd); - blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS); unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); pr_info("brd: module unloaded\n"); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 65b95aef8dbc..1c8c18b2a25f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2036,8 +2036,7 @@ void drbd_set_my_capacity(struct drbd_device *device, sector_t size) { char ppb[10]; - set_capacity(device->vdisk, size); - revalidate_disk_size(device->vdisk, false); + set_capacity_and_notify(device->vdisk, size); drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), (unsigned long long)size>>1); @@ -2068,8 +2067,7 @@ void drbd_device_cleanup(struct drbd_device *device) } D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL); - set_capacity(device->vdisk, 0); - revalidate_disk_size(device->vdisk, false); + set_capacity_and_notify(device->vdisk, 0); if (device->bitmap) { /* maybe never allocated. */ drbd_bm_resize(device, 0, 1); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index dc333dbe5232..09c86ef3f0fd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2802,7 +2802,7 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) if (c_min_rate == 0) return false; - curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - + curr_events = (int)part_stat_read_accum(disk->part0, sectors) - atomic_read(&device->rs_sect_ev); if (atomic_read(&device->ap_actlog_cnt) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index ba56f3f05312..02044ab7f767 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1678,7 +1678,8 @@ void drbd_rs_controller_reset(struct drbd_device *device) atomic_set(&device->rs_sect_in, 0); atomic_set(&device->rs_sect_ev, 0); device->rs_in_flight = 0; - device->rs_last_events = (int)part_stat_read_accum(&disk->part0, sectors); + device->rs_last_events = + (int)part_stat_read_accum(disk->part0, sectors); /* Updating the RCU protected object in place is necessary since this function gets called from atomic context. diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 7df79ae6b0a1..dfe1dfc901cc 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -402,7 +402,6 @@ static struct floppy_drive_params drive_params[N_DRIVE]; static struct floppy_drive_struct drive_state[N_DRIVE]; static struct floppy_write_errors write_errors[N_DRIVE]; static struct timer_list motor_off_timer[N_DRIVE]; -static struct gendisk *disks[N_DRIVE]; static struct blk_mq_tag_set tag_sets[N_DRIVE]; static struct block_device *opened_bdev[N_DRIVE]; static DEFINE_MUTEX(open_lock); @@ -477,6 +476,8 @@ static struct floppy_struct floppy_type[32] = { { 3200,20,2,80,0,0x1C,0x00,0xCF,0x2C,"H1600" }, /* 31 1.6MB 3.5" */ }; +static struct gendisk *disks[N_DRIVE][ARRAY_SIZE(floppy_type)]; + #define SECTSIZE (_FD_SECTSIZE(*floppy)) /* Auto-detection: Disk type used until the next media change occurs. */ @@ -4111,7 +4112,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) new_dev = MINOR(bdev->bd_dev); drive_state[drive].fd_device = new_dev; - set_capacity(disks[drive], floppy_sizes[new_dev]); + set_capacity(disks[drive][ITYPE(new_dev)], floppy_sizes[new_dev]); if (old_dev != -1 && old_dev != new_dev) { if (buffer_drive == drive) buffer_track = -1; @@ -4579,15 +4580,58 @@ static bool floppy_available(int drive) return true; } -static struct kobject *floppy_find(dev_t dev, int *part, void *data) +static int floppy_alloc_disk(unsigned int drive, unsigned int type) { - int drive = (*part & 3) | ((*part & 0x80) >> 5); - if (drive >= N_DRIVE || !floppy_available(drive)) - return NULL; - if (((*part >> 2) & 0x1f) >= ARRAY_SIZE(floppy_type)) - return NULL; - *part = 0; - return get_disk_and_module(disks[drive]); + struct gendisk *disk; + int err; + + disk = alloc_disk(1); + if (!disk) + return -ENOMEM; + + disk->queue = blk_mq_init_queue(&tag_sets[drive]); + if (IS_ERR(disk->queue)) { + err = PTR_ERR(disk->queue); + disk->queue = NULL; + put_disk(disk); + return err; + } + + blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); + blk_queue_max_hw_sectors(disk->queue, 64); + disk->major = FLOPPY_MAJOR; + disk->first_minor = TOMINOR(drive) | (type << 2); + disk->fops = &floppy_fops; + disk->events = DISK_EVENT_MEDIA_CHANGE; + if (type) + sprintf(disk->disk_name, "fd%d_type%d", drive, type); + else + sprintf(disk->disk_name, "fd%d", drive); + /* to be cleaned up... */ + disk->private_data = (void *)(long)drive; + disk->flags |= GENHD_FL_REMOVABLE; + + disks[drive][type] = disk; + return 0; +} + +static DEFINE_MUTEX(floppy_probe_lock); + +static void floppy_probe(dev_t dev) +{ + unsigned int drive = (MINOR(dev) & 3) | ((MINOR(dev) & 0x80) >> 5); + unsigned int type = (MINOR(dev) >> 2) & 0x1f; + + if (drive >= N_DRIVE || !floppy_available(drive) || + type >= ARRAY_SIZE(floppy_type)) + return; + + mutex_lock(&floppy_probe_lock); + if (!disks[drive][type]) { + if (floppy_alloc_disk(drive, type) == 0) + add_disk(disks[drive][type]); + } + mutex_unlock(&floppy_probe_lock); } static int __init do_floppy_init(void) @@ -4609,33 +4653,25 @@ static int __init do_floppy_init(void) return -ENOMEM; for (drive = 0; drive < N_DRIVE; drive++) { - disks[drive] = alloc_disk(1); - if (!disks[drive]) { - err = -ENOMEM; + memset(&tag_sets[drive], 0, sizeof(tag_sets[drive])); + tag_sets[drive].ops = &floppy_mq_ops; + tag_sets[drive].nr_hw_queues = 1; + tag_sets[drive].nr_maps = 1; + tag_sets[drive].queue_depth = 2; + tag_sets[drive].numa_node = NUMA_NO_NODE; + tag_sets[drive].flags = BLK_MQ_F_SHOULD_MERGE; + err = blk_mq_alloc_tag_set(&tag_sets[drive]); + if (err) goto out_put_disk; - } - disks[drive]->queue = blk_mq_init_sq_queue(&tag_sets[drive], - &floppy_mq_ops, 2, - BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(disks[drive]->queue)) { - err = PTR_ERR(disks[drive]->queue); - disks[drive]->queue = NULL; + err = floppy_alloc_disk(drive, 0); + if (err) goto out_put_disk; - } - - blk_queue_bounce_limit(disks[drive]->queue, BLK_BOUNCE_HIGH); - blk_queue_max_hw_sectors(disks[drive]->queue, 64); - disks[drive]->major = FLOPPY_MAJOR; - disks[drive]->first_minor = TOMINOR(drive); - disks[drive]->fops = &floppy_fops; - disks[drive]->events = DISK_EVENT_MEDIA_CHANGE; - sprintf(disks[drive]->disk_name, "fd%d", drive); timer_setup(&motor_off_timer[drive], motor_off_callback, 0); } - err = register_blkdev(FLOPPY_MAJOR, "fd"); + err = __register_blkdev(FLOPPY_MAJOR, "fd", floppy_probe); if (err) goto out_put_disk; @@ -4643,9 +4679,6 @@ static int __init do_floppy_init(void) if (err) goto out_unreg_blkdev; - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, - floppy_find, NULL, NULL); - for (i = 0; i < 256; i++) if (ITYPE(i)) floppy_sizes[i] = floppy_type[ITYPE(i)].size; @@ -4673,7 +4706,7 @@ static int __init do_floppy_init(void) if (fdc_state[0].address == -1) { cancel_delayed_work(&fd_timeout); err = -ENODEV; - goto out_unreg_region; + goto out_unreg_driver; } #if N_FDC > 1 fdc_state[1].address = FDC2; @@ -4684,7 +4717,7 @@ static int __init do_floppy_init(void) if (err) { cancel_delayed_work(&fd_timeout); err = -EBUSY; - goto out_unreg_region; + goto out_unreg_driver; } /* initialise drive state */ @@ -4761,10 +4794,8 @@ static int __init do_floppy_init(void) if (err) goto out_remove_drives; - /* to be cleaned up... */ - disks[drive]->private_data = (void *)(long)drive; - disks[drive]->flags |= GENHD_FL_REMOVABLE; - device_add_disk(&floppy_device[drive].dev, disks[drive], NULL); + device_add_disk(&floppy_device[drive].dev, disks[drive][0], + NULL); } return 0; @@ -4772,30 +4803,27 @@ static int __init do_floppy_init(void) out_remove_drives: while (drive--) { if (floppy_available(drive)) { - del_gendisk(disks[drive]); + del_gendisk(disks[drive][0]); platform_device_unregister(&floppy_device[drive]); } } out_release_dma: if (atomic_read(&usage_count)) floppy_release_irq_and_dma(); -out_unreg_region: - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); +out_unreg_driver: platform_driver_unregister(&floppy_driver); out_unreg_blkdev: unregister_blkdev(FLOPPY_MAJOR, "fd"); out_put_disk: destroy_workqueue(floppy_wq); for (drive = 0; drive < N_DRIVE; drive++) { - if (!disks[drive]) + if (!disks[drive][0]) break; - if (disks[drive]->queue) { - del_timer_sync(&motor_off_timer[drive]); - blk_cleanup_queue(disks[drive]->queue); - disks[drive]->queue = NULL; - blk_mq_free_tag_set(&tag_sets[drive]); - } - put_disk(disks[drive]); + del_timer_sync(&motor_off_timer[drive]); + blk_cleanup_queue(disks[drive][0]->queue); + disks[drive][0]->queue = NULL; + blk_mq_free_tag_set(&tag_sets[drive]); + put_disk(disks[drive][0]); } return err; } @@ -5006,9 +5034,8 @@ module_init(floppy_module_init); static void __exit floppy_module_exit(void) { - int drive; + int drive, i; - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); unregister_blkdev(FLOPPY_MAJOR, "fd"); platform_driver_unregister(&floppy_driver); @@ -5018,10 +5045,16 @@ static void __exit floppy_module_exit(void) del_timer_sync(&motor_off_timer[drive]); if (floppy_available(drive)) { - del_gendisk(disks[drive]); + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { + if (disks[drive][i]) + del_gendisk(disks[drive][i]); + } platform_device_unregister(&floppy_device[drive]); } - blk_cleanup_queue(disks[drive]->queue); + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { + if (disks[drive][i]) + blk_cleanup_queue(disks[drive][i]->queue); + } blk_mq_free_tag_set(&tag_sets[drive]); /* @@ -5029,10 +5062,17 @@ static void __exit floppy_module_exit(void) * queue reference in put_disk(). */ if (!(allowed_drive_mask & (1 << drive)) || - fdc_state[FDC(drive)].version == FDC_NONE) - disks[drive]->queue = NULL; + fdc_state[FDC(drive)].version == FDC_NONE) { + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { + if (disks[drive][i]) + disks[drive][i]->queue = NULL; + } + } - put_disk(disks[drive]); + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { + if (disks[drive][i]) + put_disk(disks[drive][i]); + } } cancel_delayed_work_sync(&fd_timeout); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a58084c2ed7c..d2ce1ddc192d 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -251,12 +251,8 @@ loop_validate_block_size(unsigned short bsize) */ static void loop_set_size(struct loop_device *lo, loff_t size) { - struct block_device *bdev = lo->lo_device; - - bd_set_nr_sectors(bdev, size); - - if (!set_capacity_revalidate_and_notify(lo->lo_disk, size, false)) - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); + if (!set_capacity_and_notify(lo->lo_disk, size)) + kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); } static inline int @@ -679,10 +675,10 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) while (is_loop_device(f)) { struct loop_device *l; - if (f->f_mapping->host->i_bdev == bdev) + if (f->f_mapping->host->i_rdev == bdev->bd_dev) return -EBADF; - l = f->f_mapping->host->i_bdev->bd_disk->private_data; + l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; if (l->lo_state != Lo_bound) { return -EINVAL; } @@ -889,9 +885,7 @@ static void loop_config_discard(struct loop_device *lo) * file-backed loop devices: discarded regions read back as zero. */ if (S_ISBLK(inode->i_mode) && !lo->lo_encrypt_key_size) { - struct request_queue *backingq; - - backingq = bdev_get_queue(inode->i_bdev); + struct request_queue *backingq = bdev_get_queue(I_BDEV(inode)); max_discard_sectors = backingq->limits.max_write_zeroes_sectors; granularity = backingq->limits.discard_granularity ?: @@ -1075,7 +1069,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, struct file *file; struct inode *inode; struct address_space *mapping; - struct block_device *claimed_bdev = NULL; int error; loff_t size; bool partscan; @@ -1094,8 +1087,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, * here to avoid changing device under exclusive owner. */ if (!(mode & FMODE_EXCL)) { - claimed_bdev = bdev->bd_contains; - error = bd_prepare_to_claim(bdev, claimed_bdev, loop_configure); + error = bd_prepare_to_claim(bdev, loop_configure); if (error) goto out_putf; } @@ -1138,7 +1130,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, if (error) goto out_unlock; - set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); + set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO; lo->lo_device = bdev; @@ -1168,9 +1160,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); loop_set_size(lo, size); - set_blocksize(bdev, S_ISBLK(inode->i_mode) ? - block_size(inode->i_bdev) : PAGE_SIZE); - lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; @@ -1185,15 +1174,15 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, mutex_unlock(&loop_ctl_mutex); if (partscan) loop_reread_partitions(lo, bdev); - if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, loop_configure); + if (!(mode & FMODE_EXCL)) + bd_abort_claiming(bdev, loop_configure); return 0; out_unlock: mutex_unlock(&loop_ctl_mutex); out_bdev: - if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, loop_configure); + if (!(mode & FMODE_EXCL)) + bd_abort_claiming(bdev, loop_configure); out_putf: fput(file); out: @@ -1252,7 +1241,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) set_capacity(lo->lo_disk, 0); loop_sysfs_exit(lo); if (bdev) { - bd_set_nr_sectors(bdev, 0); /* let user-space know about this change */ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); } @@ -2235,24 +2223,18 @@ static int loop_lookup(struct loop_device **l, int i) return ret; } -static struct kobject *loop_probe(dev_t dev, int *part, void *data) +static void loop_probe(dev_t dev) { + int idx = MINOR(dev) >> part_shift; struct loop_device *lo; - struct kobject *kobj; - int err; + + if (max_loop && idx >= max_loop) + return; mutex_lock(&loop_ctl_mutex); - err = loop_lookup(&lo, MINOR(dev) >> part_shift); - if (err < 0) - err = loop_add(&lo, MINOR(dev) >> part_shift); - if (err < 0) - kobj = NULL; - else - kobj = get_disk_and_module(lo->lo_disk); + if (loop_lookup(&lo, idx) < 0) + loop_add(&lo, idx); mutex_unlock(&loop_ctl_mutex); - - *part = 0; - return kobj; } static long loop_control_ioctl(struct file *file, unsigned int cmd, @@ -2372,14 +2354,11 @@ static int __init loop_init(void) goto err_out; - if (register_blkdev(LOOP_MAJOR, "loop")) { + if (__register_blkdev(LOOP_MAJOR, "loop", loop_probe)) { err = -EIO; goto misc_out; } - blk_register_region(MKDEV(LOOP_MAJOR, 0), range, - THIS_MODULE, loop_probe, NULL, NULL); - /* pre-create number of devices given by config or max_loop */ mutex_lock(&loop_ctl_mutex); for (i = 0; i < nr; i++) @@ -2405,16 +2384,11 @@ static int loop_exit_cb(int id, void *ptr, void *data) static void __exit loop_exit(void) { - unsigned long range; - - range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; - mutex_lock(&loop_ctl_mutex); idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); idr_destroy(&loop_index_idr); - blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); unregister_blkdev(LOOP_MAJOR, "loop"); misc_deregister(&loop_misc); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 153e2cdecb4d..53ac59d19ae5 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3687,7 +3687,6 @@ static int mtip_block_initialize(struct driver_data *dd) /* Enable the block device and add it to /dev */ device_add_disk(&dd->pdev->dev, dd->disk, NULL); - dd->bdev = bdget_disk(dd->disk, 0); /* * Now that the disk is active, initialize any sysfs attributes * managed by the protocol layer. @@ -3721,9 +3720,6 @@ static int mtip_block_initialize(struct driver_data *dd) return rv; kthread_run_error: - bdput(dd->bdev); - dd->bdev = NULL; - /* Delete our gendisk. This also removes the device from /dev */ del_gendisk(dd->disk); @@ -3804,14 +3800,6 @@ static int mtip_block_remove(struct driver_data *dd) blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd); blk_mq_unquiesce_queue(dd->queue); - /* - * Delete our gendisk structure. This also removes the device - * from /dev - */ - if (dd->bdev) { - bdput(dd->bdev); - dd->bdev = NULL; - } if (dd->disk) { if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) del_gendisk(dd->disk); @@ -4206,9 +4194,6 @@ static void mtip_pci_remove(struct pci_dev *pdev) } while (atomic_read(&dd->irq_workers_active) != 0 && time_before(jiffies, to)); - if (!dd->sr) - fsync_bdev(dd->bdev); - if (atomic_read(&dd->irq_workers_active) != 0) { dev_warn(&dd->pdev->dev, "Completion workers still active!\n"); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index e22a7f0523bf..88f4206310e4 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -463,8 +463,6 @@ struct driver_data { int isr_binding; - struct block_device *bdev; - struct list_head online_list; /* linkage for online list */ struct list_head remove_list; /* linkage for removing list */ diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index aaae9220f3a0..92f84ed0ba9e 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -296,40 +296,32 @@ static void nbd_size_clear(struct nbd_device *nbd) } } -static void nbd_size_update(struct nbd_device *nbd, bool start) +static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, + loff_t blksize) { - struct nbd_config *config = nbd->config; - struct block_device *bdev = bdget_disk(nbd->disk, 0); - sector_t nr_sectors = config->bytesize >> 9; + if (!blksize) + blksize = NBD_DEF_BLKSIZE; + if (blksize < 512 || blksize > PAGE_SIZE || !is_power_of_2(blksize)) + return -EINVAL; - if (config->flags & NBD_FLAG_SEND_TRIM) { - nbd->disk->queue->limits.discard_granularity = config->blksize; - nbd->disk->queue->limits.discard_alignment = config->blksize; + nbd->config->bytesize = bytesize; + nbd->config->blksize = blksize; + + if (!nbd->task_recv) + return 0; + + if (nbd->config->flags & NBD_FLAG_SEND_TRIM) { + nbd->disk->queue->limits.discard_granularity = blksize; + nbd->disk->queue->limits.discard_alignment = blksize; blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); } - blk_queue_logical_block_size(nbd->disk->queue, config->blksize); - blk_queue_physical_block_size(nbd->disk->queue, config->blksize); - set_capacity(nbd->disk, nr_sectors); - if (bdev) { - if (bdev->bd_disk) { - bd_set_nr_sectors(bdev, nr_sectors); - if (start) - set_blocksize(bdev, config->blksize); - } else - set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); - bdput(bdev); - } - kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); -} + blk_queue_logical_block_size(nbd->disk->queue, blksize); + blk_queue_physical_block_size(nbd->disk->queue, blksize); -static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, - loff_t nr_blocks) -{ - struct nbd_config *config = nbd->config; - config->blksize = blocksize; - config->bytesize = blocksize * nr_blocks; - if (nbd->task_recv != NULL) - nbd_size_update(nbd, false); + set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); + if (!set_capacity_and_notify(nbd->disk, bytesize >> 9)) + kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); + return 0; } static void nbd_complete_rq(struct request *req) @@ -1140,7 +1132,7 @@ static void nbd_bdev_reset(struct block_device *bdev) { if (bdev->bd_openers > 1) return; - bd_set_nr_sectors(bdev, 0); + set_capacity(bdev->bd_disk, 0); } static void nbd_parse_flags(struct nbd_device *nbd) @@ -1309,8 +1301,7 @@ static int nbd_start_device(struct nbd_device *nbd) args->index = i; queue_work(nbd->recv_workq, &args->work); } - nbd_size_update(nbd, true); - return error; + return nbd_set_size(nbd, config->bytesize, config->blksize); } static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) @@ -1352,14 +1343,6 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd, nbd_config_put(nbd); } -static bool nbd_is_valid_blksize(unsigned long blksize) -{ - if (!blksize || !is_power_of_2(blksize) || blksize < 512 || - blksize > PAGE_SIZE) - return false; - return true; -} - static void nbd_set_cmd_timeout(struct nbd_device *nbd, u64 timeout) { nbd->tag_set.timeout = timeout * HZ; @@ -1384,20 +1367,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_SET_SOCK: return nbd_add_socket(nbd, arg, false); case NBD_SET_BLKSIZE: - if (!arg) - arg = NBD_DEF_BLKSIZE; - if (!nbd_is_valid_blksize(arg)) - return -EINVAL; - nbd_size_set(nbd, arg, - div_s64(config->bytesize, arg)); - return 0; + return nbd_set_size(nbd, config->bytesize, arg); case NBD_SET_SIZE: - nbd_size_set(nbd, config->blksize, - div_s64(arg, config->blksize)); - return 0; + return nbd_set_size(nbd, arg, config->blksize); case NBD_SET_SIZE_BLOCKS: - nbd_size_set(nbd, config->blksize, arg); - return 0; + return nbd_set_size(nbd, arg * config->blksize, + config->blksize); case NBD_SET_TIMEOUT: nbd_set_cmd_timeout(nbd, arg); return 0; @@ -1513,12 +1488,10 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) static void nbd_release(struct gendisk *disk, fmode_t mode) { struct nbd_device *nbd = disk->private_data; - struct block_device *bdev = bdget_disk(disk, 0); if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && - bdev->bd_openers == 0) + disk->part0->bd_openers == 0) nbd_disconnect_and_put(nbd); - bdput(bdev); nbd_config_put(nbd); nbd_put(nbd); @@ -1815,18 +1788,11 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd) if (info->attrs[NBD_ATTR_SIZE_BYTES]) bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); - if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { + if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); - if (!bsize) - bsize = NBD_DEF_BLKSIZE; - if (!nbd_is_valid_blksize(bsize)) { - printk(KERN_ERR "Invalid block size %llu\n", bsize); - return -EINVAL; - } - } if (bytes != config->bytesize || bsize != config->blksize) - nbd_size_set(nbd, bsize, div64_u64(bytes, bsize)); + return nbd_set_size(nbd, bytes, bsize); return 0; } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 467dbd06b7cd..b8bb8ec7538d 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2130,8 +2130,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) } set_capacity(pd->disk, lba << 2); - set_capacity(pd->bdev->bd_disk, lba << 2); - bd_set_nr_sectors(pd->bdev, lba << 2); + set_capacity_and_notify(pd->bdev->bd_disk, lba << 2); q = bdev_get_queue(pd->bdev); if (write) { @@ -2584,9 +2583,11 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, case CDROM_LAST_WRITTEN: case CDROM_SEND_PACKET: case SCSI_IOCTL_SEND_COMMAND: - ret = __blkdev_driver_ioctl(pd->bdev, mode, cmd, arg); + if (!bdev->bd_disk->fops->ioctl) + ret = -ENOTTY; + else + ret = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); break; - default: pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd); ret = -ENOTTY; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index f84128abade3..2ed79b09439a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -692,12 +692,9 @@ static void rbd_release(struct gendisk *disk, fmode_t mode) put_device(&rbd_dev->dev); } -static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg) +static int rbd_set_read_only(struct block_device *bdev, bool ro) { - int ro; - - if (get_user(ro, (int __user *)arg)) - return -EFAULT; + struct rbd_device *rbd_dev = bdev->bd_disk->private_data; /* * Both images mapped read-only and snapshots can't be marked @@ -710,43 +707,14 @@ static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg) rbd_assert(!rbd_is_snap(rbd_dev)); } - /* Let blkdev_roset() handle it */ - return -ENOTTY; + return 0; } -static int rbd_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct rbd_device *rbd_dev = bdev->bd_disk->private_data; - int ret; - - switch (cmd) { - case BLKROSET: - ret = rbd_ioctl_set_ro(rbd_dev, arg); - break; - default: - ret = -ENOTTY; - } - - return ret; -} - -#ifdef CONFIG_COMPAT -static int rbd_compat_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - return rbd_ioctl(bdev, mode, cmd, arg); -} -#endif /* CONFIG_COMPAT */ - static const struct block_device_operations rbd_bd_ops = { .owner = THIS_MODULE, .open = rbd_open, .release = rbd_release, - .ioctl = rbd_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = rbd_compat_ioctl, -#endif + .set_read_only = rbd_set_read_only, }; /* @@ -4920,8 +4888,7 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev) !test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) { size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; dout("setting size to %llu sectors", (unsigned long long)size); - set_capacity(rbd_dev->disk, size); - revalidate_disk_size(rbd_dev->disk, true); + set_capacity_and_notify(rbd_dev->disk, size); } } diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 8b2411ccbda9..bb13d7dd195a 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -100,8 +100,7 @@ static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev, rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n", dev->nsectors, new_nsectors); dev->nsectors = new_nsectors; - set_capacity(dev->gd, dev->nsectors); - revalidate_disk_size(dev->gd, true); + set_capacity_and_notify(dev->gd, dev->nsectors); return 0; } diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 52dd1efa00f9..cc6a0bc6c005 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -745,18 +745,6 @@ static const struct block_device_operations floppy_fops = { .check_events = floppy_check_events, }; -static struct kobject *floppy_find(dev_t dev, int *part, void *data) -{ - struct swim_priv *swd = data; - int drive = (*part & 3); - - if (drive >= swd->floppy_count) - return NULL; - - *part = 0; - return get_disk_and_module(swd->unit[drive].disk); -} - static int swim_add_floppy(struct swim_priv *swd, enum drive_location location) { struct floppy_state *fs = &swd->unit[swd->floppy_count]; @@ -846,9 +834,6 @@ static int swim_floppy_init(struct swim_priv *swd) add_disk(swd->unit[drive].disk); } - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, - floppy_find, NULL, swd); - return 0; exit_put_disks: @@ -932,8 +917,6 @@ static int swim_remove(struct platform_device *dev) int drive; struct resource *res; - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); - for (drive = 0; drive < swd->floppy_count; drive++) { del_gendisk(swd->unit[drive].disk); blk_cleanup_queue(swd->unit[drive].disk->queue); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a314b9382442..145606dc52db 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -470,7 +470,7 @@ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) cap_str_10, cap_str_2); - set_capacity_revalidate_and_notify(vblk->disk, capacity, true); + set_capacity_and_notify(vblk->disk, capacity); } static void virtblk_config_changed_work(struct work_struct *work) @@ -598,7 +598,6 @@ static void virtblk_update_cache_mode(struct virtio_device *vdev) struct virtio_blk *vblk = vdev->priv; blk_queue_write_cache(vblk->disk->queue, writeback, false); - revalidate_disk_size(vblk->disk, true); } static const char *const virtblk_cache_types[] = { diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index a1b9df2c4ef1..b0c71d3a81a0 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -356,9 +356,7 @@ struct pending_req { }; -#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : \ - get_capacity((_v)->bdev->bd_disk)) +#define vbd_sz(_v) bdev_nr_sectors((_v)->bdev) #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define xen_blkif_put(_b) \ diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 48629d3433b4..188e0b47534b 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2153,7 +2153,7 @@ static void blkfront_closing(struct blkfront_info *info) } if (info->gd) - bdev = bdget_disk(info->gd, 0); + bdev = bdgrab(info->gd->part0); mutex_unlock(&info->mutex); @@ -2370,7 +2370,7 @@ static void blkfront_connect(struct blkfront_info *info) return; printk(KERN_INFO "Setting capacity to %Lu\n", sectors); - set_capacity_revalidate_and_notify(info->gd, sectors, true); + set_capacity_and_notify(info->gd, sectors); return; case BLKIF_STATE_SUSPENDED: @@ -2518,7 +2518,7 @@ static int blkfront_remove(struct xenbus_device *xbdev) disk = info->gd; if (disk) - bdev = bdget_disk(disk, 0); + bdev = bdgrab(disk->part0); info->xbdev = NULL; mutex_unlock(&info->mutex); @@ -2595,19 +2595,11 @@ static int blkif_open(struct block_device *bdev, fmode_t mode) static void blkif_release(struct gendisk *disk, fmode_t mode) { struct blkfront_info *info = disk->private_data; - struct block_device *bdev; struct xenbus_device *xbdev; mutex_lock(&blkfront_mutex); - - bdev = bdget_disk(disk, 0); - - if (!bdev) { - WARN(1, "Block device %s yanked out from us!\n", disk->disk_name); + if (disk->part0->bd_openers) goto out_mutex; - } - if (bdev->bd_openers) - goto out; /* * Check if we have been instructed to close. We will have @@ -2619,7 +2611,7 @@ static void blkif_release(struct gendisk *disk, fmode_t mode) if (xbdev && xbdev->state == XenbusStateClosing) { /* pending switch to state closed */ - dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); + dev_info(disk_to_dev(disk), "releasing disk\n"); xlvbd_release_gendisk(info); xenbus_frontend_closed(info->xbdev); } @@ -2628,14 +2620,12 @@ static void blkif_release(struct gendisk *disk, fmode_t mode) if (!xbdev) { /* sudden device removal */ - dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); + dev_info(disk_to_dev(disk), "releasing disk\n"); xlvbd_release_gendisk(info); disk->private_data = NULL; free_info(info); } -out: - bdput(bdev); out_mutex: mutex_unlock(&blkfront_mutex); } diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 0e734802ee7c..c1d20818e649 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -42,7 +42,6 @@ #include - #define Z2MINOR_COMBINED (0) #define Z2MINOR_Z2ONLY (1) #define Z2MINOR_CHIPONLY (2) @@ -50,28 +49,28 @@ #define Z2MINOR_MEMLIST2 (5) #define Z2MINOR_MEMLIST3 (6) #define Z2MINOR_MEMLIST4 (7) -#define Z2MINOR_COUNT (8) /* Move this down when adding a new minor */ +#define Z2MINOR_COUNT (8) /* Move this down when adding a new minor */ #define Z2RAM_CHUNK1024 ( Z2RAM_CHUNKSIZE >> 10 ) static DEFINE_MUTEX(z2ram_mutex); -static u_long *z2ram_map = NULL; -static u_long z2ram_size = 0; -static int z2_count = 0; -static int chip_count = 0; -static int list_count = 0; -static int current_device = -1; +static u_long *z2ram_map = NULL; +static u_long z2ram_size = 0; +static int z2_count = 0; +static int chip_count = 0; +static int list_count = 0; +static int current_device = -1; static DEFINE_SPINLOCK(z2ram_lock); -static struct gendisk *z2ram_gendisk; +static struct gendisk *z2ram_gendisk[Z2MINOR_COUNT]; static blk_status_t z2_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct request *req = bd->rq; unsigned long start = blk_rq_pos(req) << 9; - unsigned long len = blk_rq_cur_bytes(req); + unsigned long len = blk_rq_cur_bytes(req); blk_mq_start_request(req); @@ -92,7 +91,7 @@ static blk_status_t z2_queue_rq(struct blk_mq_hw_ctx *hctx, if (len < size) size = len; - addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ]; + addr += z2ram_map[start >> Z2RAM_CHUNKSHIFT]; if (rq_data_dir(req) == READ) memcpy(buffer, (char *)addr, size); else @@ -106,323 +105,319 @@ static blk_status_t z2_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } -static void -get_z2ram( void ) +static void get_z2ram(void) { - int i; + int i; - for ( i = 0; i < Z2RAM_SIZE / Z2RAM_CHUNKSIZE; i++ ) - { - if ( test_bit( i, zorro_unused_z2ram ) ) - { - z2_count++; - z2ram_map[z2ram_size++] = (unsigned long)ZTWO_VADDR(Z2RAM_START) + - (i << Z2RAM_CHUNKSHIFT); - clear_bit( i, zorro_unused_z2ram ); + for (i = 0; i < Z2RAM_SIZE / Z2RAM_CHUNKSIZE; i++) { + if (test_bit(i, zorro_unused_z2ram)) { + z2_count++; + z2ram_map[z2ram_size++] = + (unsigned long)ZTWO_VADDR(Z2RAM_START) + + (i << Z2RAM_CHUNKSHIFT); + clear_bit(i, zorro_unused_z2ram); + } } - } - return; + return; } -static void -get_chipram( void ) +static void get_chipram(void) { - while ( amiga_chip_avail() > ( Z2RAM_CHUNKSIZE * 4 ) ) - { - chip_count++; - z2ram_map[ z2ram_size ] = - (u_long)amiga_chip_alloc( Z2RAM_CHUNKSIZE, "z2ram" ); + while (amiga_chip_avail() > (Z2RAM_CHUNKSIZE * 4)) { + chip_count++; + z2ram_map[z2ram_size] = + (u_long) amiga_chip_alloc(Z2RAM_CHUNKSIZE, "z2ram"); - if ( z2ram_map[ z2ram_size ] == 0 ) - { - break; + if (z2ram_map[z2ram_size] == 0) { + break; + } + + z2ram_size++; } - z2ram_size++; - } - - return; + return; } static int z2_open(struct block_device *bdev, fmode_t mode) { - int device; - int max_z2_map = ( Z2RAM_SIZE / Z2RAM_CHUNKSIZE ) * - sizeof( z2ram_map[0] ); - int max_chip_map = ( amiga_chip_size / Z2RAM_CHUNKSIZE ) * - sizeof( z2ram_map[0] ); - int rc = -ENOMEM; + int device; + int max_z2_map = (Z2RAM_SIZE / Z2RAM_CHUNKSIZE) * sizeof(z2ram_map[0]); + int max_chip_map = (amiga_chip_size / Z2RAM_CHUNKSIZE) * + sizeof(z2ram_map[0]); + int rc = -ENOMEM; - device = MINOR(bdev->bd_dev); + device = MINOR(bdev->bd_dev); - mutex_lock(&z2ram_mutex); - if ( current_device != -1 && current_device != device ) - { - rc = -EBUSY; - goto err_out; - } + mutex_lock(&z2ram_mutex); + if (current_device != -1 && current_device != device) { + rc = -EBUSY; + goto err_out; + } - if ( current_device == -1 ) - { - z2_count = 0; - chip_count = 0; - list_count = 0; - z2ram_size = 0; + if (current_device == -1) { + z2_count = 0; + chip_count = 0; + list_count = 0; + z2ram_size = 0; - /* Use a specific list entry. */ - if (device >= Z2MINOR_MEMLIST1 && device <= Z2MINOR_MEMLIST4) { - int index = device - Z2MINOR_MEMLIST1 + 1; - unsigned long size, paddr, vaddr; + /* Use a specific list entry. */ + if (device >= Z2MINOR_MEMLIST1 && device <= Z2MINOR_MEMLIST4) { + int index = device - Z2MINOR_MEMLIST1 + 1; + unsigned long size, paddr, vaddr; - if (index >= m68k_realnum_memory) { - printk( KERN_ERR DEVICE_NAME - ": no such entry in z2ram_map\n" ); - goto err_out; - } + if (index >= m68k_realnum_memory) { + printk(KERN_ERR DEVICE_NAME + ": no such entry in z2ram_map\n"); + goto err_out; + } - paddr = m68k_memory[index].addr; - size = m68k_memory[index].size & ~(Z2RAM_CHUNKSIZE-1); + paddr = m68k_memory[index].addr; + size = m68k_memory[index].size & ~(Z2RAM_CHUNKSIZE - 1); #ifdef __powerpc__ - /* FIXME: ioremap doesn't build correct memory tables. */ - { - vfree(vmalloc (size)); - } + /* FIXME: ioremap doesn't build correct memory tables. */ + { + vfree(vmalloc(size)); + } - vaddr = (unsigned long)ioremap_wt(paddr, size); + vaddr = (unsigned long)ioremap_wt(paddr, size); #else - vaddr = (unsigned long)z_remap_nocache_nonser(paddr, size); + vaddr = + (unsigned long)z_remap_nocache_nonser(paddr, size); #endif - z2ram_map = - kmalloc_array(size / Z2RAM_CHUNKSIZE, - sizeof(z2ram_map[0]), - GFP_KERNEL); - if ( z2ram_map == NULL ) - { - printk( KERN_ERR DEVICE_NAME - ": cannot get mem for z2ram_map\n" ); - goto err_out; + z2ram_map = + kmalloc_array(size / Z2RAM_CHUNKSIZE, + sizeof(z2ram_map[0]), GFP_KERNEL); + if (z2ram_map == NULL) { + printk(KERN_ERR DEVICE_NAME + ": cannot get mem for z2ram_map\n"); + goto err_out; + } + + while (size) { + z2ram_map[z2ram_size++] = vaddr; + size -= Z2RAM_CHUNKSIZE; + vaddr += Z2RAM_CHUNKSIZE; + list_count++; + } + + if (z2ram_size != 0) + printk(KERN_INFO DEVICE_NAME + ": using %iK List Entry %d Memory\n", + list_count * Z2RAM_CHUNK1024, index); + } else + switch (device) { + case Z2MINOR_COMBINED: + + z2ram_map = + kmalloc(max_z2_map + max_chip_map, + GFP_KERNEL); + if (z2ram_map == NULL) { + printk(KERN_ERR DEVICE_NAME + ": cannot get mem for z2ram_map\n"); + goto err_out; + } + + get_z2ram(); + get_chipram(); + + if (z2ram_size != 0) + printk(KERN_INFO DEVICE_NAME + ": using %iK Zorro II RAM and %iK Chip RAM (Total %dK)\n", + z2_count * Z2RAM_CHUNK1024, + chip_count * Z2RAM_CHUNK1024, + (z2_count + + chip_count) * Z2RAM_CHUNK1024); + + break; + + case Z2MINOR_Z2ONLY: + z2ram_map = kmalloc(max_z2_map, GFP_KERNEL); + if (z2ram_map == NULL) { + printk(KERN_ERR DEVICE_NAME + ": cannot get mem for z2ram_map\n"); + goto err_out; + } + + get_z2ram(); + + if (z2ram_size != 0) + printk(KERN_INFO DEVICE_NAME + ": using %iK of Zorro II RAM\n", + z2_count * Z2RAM_CHUNK1024); + + break; + + case Z2MINOR_CHIPONLY: + z2ram_map = kmalloc(max_chip_map, GFP_KERNEL); + if (z2ram_map == NULL) { + printk(KERN_ERR DEVICE_NAME + ": cannot get mem for z2ram_map\n"); + goto err_out; + } + + get_chipram(); + + if (z2ram_size != 0) + printk(KERN_INFO DEVICE_NAME + ": using %iK Chip RAM\n", + chip_count * Z2RAM_CHUNK1024); + + break; + + default: + rc = -ENODEV; + goto err_out; + + break; + } + + if (z2ram_size == 0) { + printk(KERN_NOTICE DEVICE_NAME + ": no unused ZII/Chip RAM found\n"); + goto err_out_kfree; } - while (size) { - z2ram_map[ z2ram_size++ ] = vaddr; - size -= Z2RAM_CHUNKSIZE; - vaddr += Z2RAM_CHUNKSIZE; - list_count++; - } - - if ( z2ram_size != 0 ) - printk( KERN_INFO DEVICE_NAME - ": using %iK List Entry %d Memory\n", - list_count * Z2RAM_CHUNK1024, index ); - } else - - switch ( device ) - { - case Z2MINOR_COMBINED: - - z2ram_map = kmalloc( max_z2_map + max_chip_map, GFP_KERNEL ); - if ( z2ram_map == NULL ) - { - printk( KERN_ERR DEVICE_NAME - ": cannot get mem for z2ram_map\n" ); - goto err_out; - } - - get_z2ram(); - get_chipram(); - - if ( z2ram_size != 0 ) - printk( KERN_INFO DEVICE_NAME - ": using %iK Zorro II RAM and %iK Chip RAM (Total %dK)\n", - z2_count * Z2RAM_CHUNK1024, - chip_count * Z2RAM_CHUNK1024, - ( z2_count + chip_count ) * Z2RAM_CHUNK1024 ); - - break; - - case Z2MINOR_Z2ONLY: - z2ram_map = kmalloc( max_z2_map, GFP_KERNEL ); - if ( z2ram_map == NULL ) - { - printk( KERN_ERR DEVICE_NAME - ": cannot get mem for z2ram_map\n" ); - goto err_out; - } - - get_z2ram(); - - if ( z2ram_size != 0 ) - printk( KERN_INFO DEVICE_NAME - ": using %iK of Zorro II RAM\n", - z2_count * Z2RAM_CHUNK1024 ); - - break; - - case Z2MINOR_CHIPONLY: - z2ram_map = kmalloc( max_chip_map, GFP_KERNEL ); - if ( z2ram_map == NULL ) - { - printk( KERN_ERR DEVICE_NAME - ": cannot get mem for z2ram_map\n" ); - goto err_out; - } - - get_chipram(); - - if ( z2ram_size != 0 ) - printk( KERN_INFO DEVICE_NAME - ": using %iK Chip RAM\n", - chip_count * Z2RAM_CHUNK1024 ); - - break; - - default: - rc = -ENODEV; - goto err_out; - - break; + current_device = device; + z2ram_size <<= Z2RAM_CHUNKSHIFT; + set_capacity(z2ram_gendisk[device], z2ram_size >> 9); } - if ( z2ram_size == 0 ) - { - printk( KERN_NOTICE DEVICE_NAME - ": no unused ZII/Chip RAM found\n" ); - goto err_out_kfree; - } - - current_device = device; - z2ram_size <<= Z2RAM_CHUNKSHIFT; - set_capacity(z2ram_gendisk, z2ram_size >> 9); - } - - mutex_unlock(&z2ram_mutex); - return 0; + mutex_unlock(&z2ram_mutex); + return 0; err_out_kfree: - kfree(z2ram_map); + kfree(z2ram_map); err_out: - mutex_unlock(&z2ram_mutex); - return rc; + mutex_unlock(&z2ram_mutex); + return rc; } -static void -z2_release(struct gendisk *disk, fmode_t mode) +static void z2_release(struct gendisk *disk, fmode_t mode) { - mutex_lock(&z2ram_mutex); - if ( current_device == -1 ) { - mutex_unlock(&z2ram_mutex); - return; - } - mutex_unlock(&z2ram_mutex); - /* - * FIXME: unmap memory - */ + mutex_lock(&z2ram_mutex); + if (current_device == -1) { + mutex_unlock(&z2ram_mutex); + return; + } + mutex_unlock(&z2ram_mutex); + /* + * FIXME: unmap memory + */ } -static const struct block_device_operations z2_fops = -{ - .owner = THIS_MODULE, - .open = z2_open, - .release = z2_release, +static const struct block_device_operations z2_fops = { + .owner = THIS_MODULE, + .open = z2_open, + .release = z2_release, }; -static struct kobject *z2_find(dev_t dev, int *part, void *data) -{ - *part = 0; - return get_disk_and_module(z2ram_gendisk); -} - -static struct request_queue *z2_queue; static struct blk_mq_tag_set tag_set; static const struct blk_mq_ops z2_mq_ops = { - .queue_rq = z2_queue_rq, + .queue_rq = z2_queue_rq, }; -static int __init -z2_init(void) +static int z2ram_register_disk(int minor) { - int ret; + struct request_queue *q; + struct gendisk *disk; - if (!MACH_IS_AMIGA) - return -ENODEV; + disk = alloc_disk(1); + if (!disk) + return -ENOMEM; - ret = -EBUSY; - if (register_blkdev(Z2RAM_MAJOR, DEVICE_NAME)) - goto err; + q = blk_mq_init_queue(&tag_set); + if (IS_ERR(q)) { + put_disk(disk); + return PTR_ERR(q); + } - ret = -ENOMEM; - z2ram_gendisk = alloc_disk(1); - if (!z2ram_gendisk) - goto out_disk; + disk->major = Z2RAM_MAJOR; + disk->first_minor = minor; + disk->fops = &z2_fops; + if (minor) + sprintf(disk->disk_name, "z2ram%d", minor); + else + sprintf(disk->disk_name, "z2ram"); + disk->queue = q; - z2_queue = blk_mq_init_sq_queue(&tag_set, &z2_mq_ops, 16, - BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(z2_queue)) { - ret = PTR_ERR(z2_queue); - z2_queue = NULL; - goto out_queue; - } + z2ram_gendisk[minor] = disk; + add_disk(disk); + return 0; +} - z2ram_gendisk->major = Z2RAM_MAJOR; - z2ram_gendisk->first_minor = 0; - z2ram_gendisk->fops = &z2_fops; - sprintf(z2ram_gendisk->disk_name, "z2ram"); +static int __init z2_init(void) +{ + int ret, i; - z2ram_gendisk->queue = z2_queue; - add_disk(z2ram_gendisk); - blk_register_region(MKDEV(Z2RAM_MAJOR, 0), Z2MINOR_COUNT, THIS_MODULE, - z2_find, NULL, NULL); + if (!MACH_IS_AMIGA) + return -ENODEV; - return 0; + if (register_blkdev(Z2RAM_MAJOR, DEVICE_NAME)) + return -EBUSY; -out_queue: - put_disk(z2ram_gendisk); -out_disk: - unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); -err: - return ret; + tag_set.ops = &z2_mq_ops; + tag_set.nr_hw_queues = 1; + tag_set.nr_maps = 1; + tag_set.queue_depth = 16; + tag_set.numa_node = NUMA_NO_NODE; + tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ret = blk_mq_alloc_tag_set(&tag_set); + if (ret) + goto out_unregister_blkdev; + + for (i = 0; i < Z2MINOR_COUNT; i++) { + ret = z2ram_register_disk(i); + if (ret && i == 0) + goto out_free_tagset; + } + + return 0; + +out_free_tagset: + blk_mq_free_tag_set(&tag_set); +out_unregister_blkdev: + unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); + return ret; } static void __exit z2_exit(void) { - int i, j; - blk_unregister_region(MKDEV(Z2RAM_MAJOR, 0), Z2MINOR_COUNT); - unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); - del_gendisk(z2ram_gendisk); - put_disk(z2ram_gendisk); - blk_cleanup_queue(z2_queue); - blk_mq_free_tag_set(&tag_set); + int i, j; - if ( current_device != -1 ) - { - i = 0; + unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); - for ( j = 0 ; j < z2_count; j++ ) - { - set_bit( i++, zorro_unused_z2ram ); + for (i = 0; i < Z2MINOR_COUNT; i++) { + del_gendisk(z2ram_gendisk[i]); + blk_cleanup_queue(z2ram_gendisk[i]->queue); + put_disk(z2ram_gendisk[i]); + } + blk_mq_free_tag_set(&tag_set); + + if (current_device != -1) { + i = 0; + + for (j = 0; j < z2_count; j++) { + set_bit(i++, zorro_unused_z2ram); + } + + for (j = 0; j < chip_count; j++) { + if (z2ram_map[i]) { + amiga_chip_free((void *)z2ram_map[i++]); + } + } + + if (z2ram_map != NULL) { + kfree(z2ram_map); + } } - for ( j = 0 ; j < chip_count; j++ ) - { - if ( z2ram_map[ i ] ) - { - amiga_chip_free( (void *) z2ram_map[ i++ ] ); - } - } - - if ( z2ram_map != NULL ) - { - kfree( z2ram_map ); - } - } - - return; -} + return; +} module_init(z2_init); module_exit(z2_exit); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 66a33e418940..e2933cb7a82a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -403,13 +403,10 @@ static void reset_bdev(struct zram *zram) return; bdev = zram->bdev; - if (zram->old_block_size) - set_blocksize(bdev, zram->old_block_size); blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); /* hope filp_close flush all of IO */ filp_close(zram->backing_dev, NULL); zram->backing_dev = NULL; - zram->old_block_size = 0; zram->bdev = NULL; zram->disk->fops = &zram_devops; kvfree(zram->bitmap); @@ -454,7 +451,7 @@ static ssize_t backing_dev_store(struct device *dev, struct file *backing_dev = NULL; struct inode *inode; struct address_space *mapping; - unsigned int bitmap_sz, old_block_size = 0; + unsigned int bitmap_sz; unsigned long nr_pages, *bitmap = NULL; struct block_device *bdev = NULL; int err; @@ -509,14 +506,8 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - old_block_size = block_size(bdev); - err = set_blocksize(bdev, PAGE_SIZE); - if (err) - goto out; - reset_bdev(zram); - zram->old_block_size = old_block_size; zram->bdev = bdev; zram->backing_dev = backing_dev; zram->bitmap = bitmap; @@ -1710,8 +1701,8 @@ static void zram_reset_device(struct zram *zram) disksize = zram->disksize; zram->disksize = 0; - set_capacity(zram->disk, 0); - part_stat_set_all(&zram->disk->part0, 0); + set_capacity_and_notify(zram->disk, 0); + part_stat_set_all(zram->disk->part0, 0); up_write(&zram->init_lock); /* I/O operation under all of CPU are done so let's free */ @@ -1756,9 +1747,7 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; - set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - - revalidate_disk_size(zram->disk, true); + set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); up_write(&zram->init_lock); return len; @@ -1786,15 +1775,12 @@ static ssize_t reset_store(struct device *dev, return -EINVAL; zram = dev_to_zram(dev); - bdev = bdget_disk(zram->disk, 0); - if (!bdev) - return -ENOMEM; + bdev = zram->disk->part0; mutex_lock(&bdev->bd_mutex); /* Do not reset an active device or claimed device */ if (bdev->bd_openers || zram->claim) { mutex_unlock(&bdev->bd_mutex); - bdput(bdev); return -EBUSY; } @@ -1805,8 +1791,6 @@ static ssize_t reset_store(struct device *dev, /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - revalidate_disk_size(zram->disk, true); - bdput(bdev); mutex_lock(&bdev->bd_mutex); zram->claim = false; @@ -1992,16 +1976,11 @@ static int zram_add(void) static int zram_remove(struct zram *zram) { - struct block_device *bdev; - - bdev = bdget_disk(zram->disk, 0); - if (!bdev) - return -ENOMEM; + struct block_device *bdev = zram->disk->part0; mutex_lock(&bdev->bd_mutex); if (bdev->bd_openers || zram->claim) { mutex_unlock(&bdev->bd_mutex); - bdput(bdev); return -EBUSY; } @@ -2013,7 +1992,6 @@ static int zram_remove(struct zram *zram) /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - bdput(bdev); pr_info("Removed device: %s\n", zram->disk->disk_name); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 9cabcbb13fd9..419a7e8281ee 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -119,7 +119,6 @@ struct zram { bool wb_limit_enable; u64 bd_wb_limit; struct block_device *bdev; - unsigned int old_block_size; unsigned long *bitmap; unsigned long nr_pages; #endif diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 430b29e0abdb..aefd74c0d862 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -902,65 +902,14 @@ static int init_irq (ide_hwif_t *hwif) return 1; } -static int ata_lock(dev_t dev, void *data) +static void ata_probe(dev_t dev) { - /* FIXME: we want to pin hwif down */ - return 0; + request_module("ide-disk"); + request_module("ide-cd"); + request_module("ide-tape"); + request_module("ide-floppy"); } -static struct kobject *ata_probe(dev_t dev, int *part, void *data) -{ - ide_hwif_t *hwif = data; - int unit = *part >> PARTN_BITS; - ide_drive_t *drive = hwif->devices[unit]; - - if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0) - return NULL; - - if (drive->media == ide_disk) - request_module("ide-disk"); - if (drive->media == ide_cdrom || drive->media == ide_optical) - request_module("ide-cd"); - if (drive->media == ide_tape) - request_module("ide-tape"); - if (drive->media == ide_floppy) - request_module("ide-floppy"); - - return NULL; -} - -static struct kobject *exact_match(dev_t dev, int *part, void *data) -{ - struct gendisk *p = data; - *part &= (1 << PARTN_BITS) - 1; - return &disk_to_dev(p)->kobj; -} - -static int exact_lock(dev_t dev, void *data) -{ - struct gendisk *p = data; - - if (!get_disk_and_module(p)) - return -1; - return 0; -} - -void ide_register_region(struct gendisk *disk) -{ - blk_register_region(MKDEV(disk->major, disk->first_minor), - disk->minors, NULL, exact_match, exact_lock, disk); -} - -EXPORT_SYMBOL_GPL(ide_register_region); - -void ide_unregister_region(struct gendisk *disk) -{ - blk_unregister_region(MKDEV(disk->major, disk->first_minor), - disk->minors); -} - -EXPORT_SYMBOL_GPL(ide_unregister_region); - void ide_init_disk(struct gendisk *disk, ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; @@ -999,7 +948,7 @@ static int hwif_init(ide_hwif_t *hwif) return 0; } - if (register_blkdev(hwif->major, hwif->name)) + if (__register_blkdev(hwif->major, hwif->name, ata_probe)) return 0; if (!hwif->sg_max_nents) @@ -1021,8 +970,6 @@ static int hwif_init(ide_hwif_t *hwif) goto out; } - blk_register_region(MKDEV(hwif->major, 0), MAX_DRIVES << PARTN_BITS, - THIS_MODULE, ata_probe, ata_lock, hwif); return 1; out: @@ -1611,7 +1558,6 @@ static void ide_unregister(ide_hwif_t *hwif) /* * Remove us from the kernel's knowledge */ - blk_unregister_region(MKDEV(hwif->major, 0), MAX_DRIVES<sg_table); unregister_blkdev(hwif->major, hwif->name); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 6f26634b22bb..88b96437b22e 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -1822,7 +1822,6 @@ static void ide_tape_remove(ide_drive_t *drive) ide_proc_unregister_driver(drive, tape->driver); device_del(&tape->dev); - ide_unregister_region(tape->disk); mutex_lock(&idetape_ref_mutex); put_device(&tape->dev); @@ -2026,7 +2025,6 @@ static int ide_tape_probe(ide_drive_t *drive) "n%s", tape->name); g->fops = &idetape_block_ops; - ide_register_region(g); return 0; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 214326383145..85b1f2a9b72d 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -475,7 +475,7 @@ struct search { unsigned int read_dirty_data:1; unsigned int cache_missed:1; - struct hd_struct *part; + struct block_device *part; unsigned long start_time; struct btree_op op; @@ -1073,7 +1073,7 @@ struct detached_dev_io_private { unsigned long start_time; bio_end_io_t *bi_end_io; void *bi_private; - struct hd_struct *part; + struct block_device *part; }; static void detached_dev_end_io(struct bio *bio) @@ -1230,8 +1230,9 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, if (dc->io_disable) return -EIO; - - return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg); + if (!dc->bdev->bd_disk->fops->ioctl) + return -ENOTTY; + return dc->bdev->bd_disk->fops->ioctl(dc->bdev, mode, cmd, arg); } void bch_cached_dev_request_init(struct cached_dev *dc) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 46a00134a36a..04fa40868fbe 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1408,7 +1408,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) q->limits.raid_partial_stripes_expensive; ret = bcache_device_init(&dc->disk, block_size, - dc->bdev->bd_part->nr_sects - dc->sb.data_offset, + bdev_nr_sectors(dc->bdev) - dc->sb.data_offset, dc->bdev, &bcache_cached_ops); if (ret) return ret; @@ -1447,8 +1447,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, goto err; err = "error creating kobject"; - if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj, - "bcache")) + if (kobject_add(&dc->disk.kobj, bdev_kobj(bdev), "bcache")) goto err; if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) goto err; @@ -2342,9 +2341,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, goto err; } - if (kobject_add(&ca->kobj, - &part_to_dev(bdev->bd_part)->kobj, - "bcache")) { + if (kobject_add(&ca->kobj, bdev_kobj(bdev), "bcache")) { err = "error calling kobject_add"; ret = -ENOMEM; goto out; @@ -2383,38 +2380,38 @@ kobj_attribute_write(register, register_bcache); kobj_attribute_write(register_quiet, register_bcache); kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup); -static bool bch_is_open_backing(struct block_device *bdev) +static bool bch_is_open_backing(dev_t dev) { struct cache_set *c, *tc; struct cached_dev *dc, *t; list_for_each_entry_safe(c, tc, &bch_cache_sets, list) list_for_each_entry_safe(dc, t, &c->cached_devs, list) - if (dc->bdev == bdev) + if (dc->bdev->bd_dev == dev) return true; list_for_each_entry_safe(dc, t, &uncached_devices, list) - if (dc->bdev == bdev) + if (dc->bdev->bd_dev == dev) return true; return false; } -static bool bch_is_open_cache(struct block_device *bdev) +static bool bch_is_open_cache(dev_t dev) { struct cache_set *c, *tc; list_for_each_entry_safe(c, tc, &bch_cache_sets, list) { struct cache *ca = c->cache; - if (ca->bdev == bdev) + if (ca->bdev->bd_dev == dev) return true; } return false; } -static bool bch_is_open(struct block_device *bdev) +static bool bch_is_open(dev_t dev) { - return bch_is_open_cache(bdev) || bch_is_open_backing(bdev); + return bch_is_open_cache(dev) || bch_is_open_backing(dev); } struct async_reg_args { @@ -2538,9 +2535,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, sb); if (IS_ERR(bdev)) { if (bdev == ERR_PTR(-EBUSY)) { - bdev = lookup_bdev(strim(path)); + dev_t dev; + mutex_lock(&bch_register_lock); - if (!IS_ERR(bdev) && bch_is_open(bdev)) + if (lookup_bdev(strim(path), &dev) == 0 && + bch_is_open(dev)) err = "device already registered"; else err = "device busy"; diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index d522093cb39d..086d293c2b03 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -96,19 +96,12 @@ struct mapped_device { */ struct workqueue_struct *wq; - /* - * freeze/thaw support require holding onto a super block - */ - struct super_block *frozen_sb; - /* forced geometry settings */ struct hd_geometry geometry; /* kobject and completion */ struct dm_kobject_holder kobj_holder; - struct block_device *bdev; - struct dm_stats stats; /* for blk-mq request-based DM support */ diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 56b723d012ac..23c38777e8f6 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -700,8 +700,7 @@ static void rs_set_capacity(struct raid_set *rs) { struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table)); - set_capacity(gendisk, rs->md.array_sectors); - revalidate_disk_size(gendisk, true); + set_capacity_and_notify(gendisk, rs->md.array_sectors); } /* diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 729a72ec30cc..13b4385f4d5a 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -397,7 +397,7 @@ static int map_request(struct dm_rq_target_io *tio) } /* The target has remapped the I/O so dispatch it */ - trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), + trace_block_rq_remap(clone, disk_devt(dm_disk(md)), blk_rq_pos(rq)); ret = dm_dispatch_clone_request(clone, rq); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 7eeb7c4169c9..188f41287f18 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -347,16 +347,9 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, dev_t dm_get_dev_t(const char *path) { dev_t dev; - struct block_device *bdev; - bdev = lookup_bdev(path); - if (IS_ERR(bdev)) + if (lookup_bdev(path, &dev)) dev = name_to_dev_t(path); - else { - dev = bdev->bd_dev; - bdput(bdev); - } - return dev; } EXPORT_SYMBOL_GPL(dm_get_dev_t); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4e0cbfe3f14d..5b2f371ec4bb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -570,7 +570,10 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, } } - r = __blkdev_driver_ioctl(bdev, mode, cmd, arg); + if (!bdev->bd_disk->fops->ioctl) + r = -ENOTTY; + else + r = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); out: dm_unprepare_ioctl(md, srcu_idx); return r; @@ -1274,8 +1277,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) break; case DM_MAPIO_REMAPPED: /* the bio has been remapped so dispatch it */ - trace_block_bio_remap(clone->bi_disk->queue, clone, - bio_dev(io->orig_bio), sector); + trace_block_bio_remap(clone, bio_dev(io->orig_bio), sector); ret = submit_bio_noacct(clone); break; case DM_MAPIO_KILL: @@ -1420,18 +1422,12 @@ static int __send_empty_flush(struct clone_info *ci) */ bio_init(&flush_bio, NULL, 0); flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; + flush_bio.bi_disk = ci->io->md->disk; + bio_associate_blkg(&flush_bio); + ci->bio = &flush_bio; ci->sector_count = 0; - /* - * Empty flush uses a statically initialized bio, as the base for - * cloning. However, blkg association requires that a bdev is - * associated with a gendisk, which doesn't happen until the bdev is - * opened. So, blkg association is done at issue time of the flush - * rather than when the device is created in alloc_dev(). - */ - bio_set_dev(ci->bio, ci->io->md->bdev); - BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); @@ -1611,12 +1607,12 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, * (by eliminating DM's splitting and just using bio_split) */ part_stat_lock(); - __dm_part_stat_sub(&dm_disk(md)->part0, + __dm_part_stat_sub(dm_disk(md)->part0, sectors[op_stat_group(bio_op(bio))], ci.sector_count); part_stat_unlock(); bio_chain(b, bio); - trace_block_split(md->queue, b, bio->bi_iter.bi_sector); + trace_block_split(b, bio->bi_iter.bi_sector); ret = submit_bio_noacct(bio); break; } @@ -1748,11 +1744,6 @@ static void cleanup_mapped_device(struct mapped_device *md) cleanup_srcu_struct(&md->io_barrier); - if (md->bdev) { - bdput(md->bdev); - md->bdev = NULL; - } - mutex_destroy(&md->suspend_lock); mutex_destroy(&md->type_lock); mutex_destroy(&md->table_devices_lock); @@ -1844,10 +1835,6 @@ static struct mapped_device *alloc_dev(int minor) if (!md->wq) goto bad; - md->bdev = bdget_disk(md->disk, 0); - if (!md->bdev) - goto bad; - dm_stats_init(&md->stats); /* Populate the mapping, nobody knows we exist yet */ @@ -1972,8 +1959,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, if (size != dm_get_size(md)) memset(&md->geometry, 0, sizeof(md->geometry)); - set_capacity(md->disk, size); - bd_set_nr_sectors(md->bdev, size); + set_capacity_and_notify(md->disk, size); dm_table_event_callback(t, event_callback, md); @@ -2256,7 +2242,7 @@ EXPORT_SYMBOL_GPL(dm_put); static bool md_in_flight_bios(struct mapped_device *md) { int cpu; - struct hd_struct *part = &dm_disk(md)->part0; + struct block_device *part = dm_disk(md)->part0; long sum = 0; for_each_possible_cpu(cpu) { @@ -2391,27 +2377,19 @@ static int lock_fs(struct mapped_device *md) { int r; - WARN_ON(md->frozen_sb); + WARN_ON(test_bit(DMF_FROZEN, &md->flags)); - md->frozen_sb = freeze_bdev(md->bdev); - if (IS_ERR(md->frozen_sb)) { - r = PTR_ERR(md->frozen_sb); - md->frozen_sb = NULL; - return r; - } - - set_bit(DMF_FROZEN, &md->flags); - - return 0; + r = freeze_bdev(md->disk->part0); + if (!r) + set_bit(DMF_FROZEN, &md->flags); + return r; } static void unlock_fs(struct mapped_device *md) { if (!test_bit(DMF_FROZEN, &md->flags)) return; - - thaw_bdev(md->bdev, md->frozen_sb); - md->frozen_sb = NULL; + thaw_bdev(md->disk->part0); clear_bit(DMF_FROZEN, &md->flags); } diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 4aaf4820b6f6..35e2690c1803 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -581,8 +581,7 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) process_metadata_update(mddev, msg); break; case CHANGE_CAPACITY: - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk_size(mddev->gendisk, true); + set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); break; case RESYNCING: set_bit(MD_RESYNCING_REMOTE, &mddev->recovery); @@ -1296,13 +1295,10 @@ static void update_size(struct mddev *mddev, sector_t old_dev_sectors) if (ret) pr_err("%s:%d: failed to send CHANGE_CAPACITY msg\n", __func__, __LINE__); - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk_size(mddev->gendisk, true); + set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); } else { /* revert to previous sectors */ ret = mddev->pers->resize(mddev, old_dev_sectors); - if (!ret) - revalidate_disk_size(mddev->gendisk, true); ret = __sendmsg(cinfo, &cmsg); if (ret) pr_err("%s:%d: failed to send METADATA_UPDATED msg\n", diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index 5ab22069b5be..68cac7d19278 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -200,9 +200,8 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev) "copied raid_disks doesn't match mddev->raid_disks"); rcu_assign_pointer(mddev->private, newconf); md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); - set_capacity(mddev->gendisk, mddev->array_sectors); + set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); mddev_resume(mddev); - revalidate_disk_size(mddev->gendisk, true); kfree_rcu(oldconf, rcu); return 0; } @@ -258,8 +257,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio) bio_endio(bio); } else { if (mddev->gendisk) - trace_block_bio_remap(bio->bi_disk->queue, - bio, disk_devt(mddev->gendisk), + trace_block_bio_remap(bio, disk_devt(mddev->gendisk), bio_sector); mddev_check_writesame(mddev, bio); mddev_check_write_zeroes(mddev, bio); diff --git a/drivers/md/md.c b/drivers/md/md.c index 0037c6ecab65..0445f44ae635 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -464,7 +464,7 @@ struct md_io { bio_end_io_t *orig_bi_end_io; void *orig_bi_private; unsigned long start_time; - struct hd_struct *part; + struct block_device *part; }; static void md_end_io(struct bio *bio) @@ -2414,7 +2414,6 @@ EXPORT_SYMBOL(md_integrity_add_rdev); static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) { char b[BDEVNAME_SIZE]; - struct kobject *ko; int err; /* prevent duplicates */ @@ -2477,9 +2476,8 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) goto fail; - ko = &part_to_dev(rdev->bdev->bd_part)->kobj; /* failure here is OK */ - err = sysfs_create_link(&rdev->kobj, ko, "block"); + err = sysfs_create_link(&rdev->kobj, bdev_kobj(rdev->bdev), "block"); rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); rdev->sysfs_unack_badblocks = sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks"); @@ -5355,10 +5353,9 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len) if (!err) { mddev->array_sectors = sectors; - if (mddev->pers) { - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk_size(mddev->gendisk, true); - } + if (mddev->pers) + set_capacity_and_notify(mddev->gendisk, + mddev->array_sectors); } mddev_unlock(mddev); return err ?: len; @@ -5765,11 +5762,12 @@ static int md_alloc(dev_t dev, char *name) return error; } -static struct kobject *md_probe(dev_t dev, int *part, void *data) +static void md_probe(dev_t dev) { + if (MAJOR(dev) == MD_MAJOR && MINOR(dev) >= 512) + return; if (create_on_open) md_alloc(dev, NULL); - return NULL; } static int add_named_array(const char *val, const struct kernel_param *kp) @@ -6107,8 +6105,7 @@ int do_md_run(struct mddev *mddev) md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk_size(mddev->gendisk, true); + set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); clear_bit(MD_NOT_READY, &mddev->flags); mddev->changed = 1; kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); @@ -6423,10 +6420,9 @@ static int do_md_stop(struct mddev *mddev, int mode, if (rdev->raid_disk >= 0) sysfs_unlink_rdev(mddev, rdev); - set_capacity(disk, 0); + set_capacity_and_notify(disk, 0); mutex_unlock(&mddev->open_mutex); mddev->changed = 1; - revalidate_disk_size(disk, true); if (mddev->ro) mddev->ro = 0; @@ -6535,7 +6531,7 @@ static void autorun_devices(int part) break; } - md_probe(dev, NULL, NULL); + md_probe(dev); mddev = mddev_find(dev); if (!mddev || !mddev->gendisk) { if (mddev) @@ -7257,8 +7253,8 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) if (mddev_is_clustered(mddev)) md_cluster_ops->update_size(mddev, old_dev_sectors); else if (mddev->queue) { - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk_size(mddev->gendisk, true); + set_capacity_and_notify(mddev->gendisk, + mddev->array_sectors); } } return rv; @@ -7480,7 +7476,6 @@ static inline bool md_ioctl_valid(unsigned int cmd) { switch (cmd) { case ADD_NEW_DISK: - case BLKROSET: case GET_ARRAY_INFO: case GET_BITMAP_FILE: case GET_DISK_INFO: @@ -7507,7 +7502,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, int err = 0; void __user *argp = (void __user *)arg; struct mddev *mddev = NULL; - int ro; bool did_set_md_closing = false; if (!md_ioctl_valid(cmd)) @@ -7687,35 +7681,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, goto unlock; } break; - - case BLKROSET: - if (get_user(ro, (int __user *)(arg))) { - err = -EFAULT; - goto unlock; - } - err = -EINVAL; - - /* if the bdev is going readonly the value of mddev->ro - * does not matter, no writes are coming - */ - if (ro) - goto unlock; - - /* are we are already prepared for writes? */ - if (mddev->ro != 1) - goto unlock; - - /* transitioning to readauto need only happen for - * arrays that call md_write_start - */ - if (mddev->pers) { - err = restart_array(mddev); - if (err == 0) { - mddev->ro = 2; - set_disk_ro(mddev->gendisk, 0); - } - } - goto unlock; } /* @@ -7809,6 +7774,36 @@ static int md_compat_ioctl(struct block_device *bdev, fmode_t mode, } #endif /* CONFIG_COMPAT */ +static int md_set_read_only(struct block_device *bdev, bool ro) +{ + struct mddev *mddev = bdev->bd_disk->private_data; + int err; + + err = mddev_lock(mddev); + if (err) + return err; + + if (!mddev->raid_disks && !mddev->external) { + err = -ENODEV; + goto out_unlock; + } + + /* + * Transitioning to read-auto need only happen for arrays that call + * md_write_start and which are not ready for writes yet. + */ + if (!ro && mddev->ro == 1 && mddev->pers) { + err = restart_array(mddev); + if (err) + goto out_unlock; + mddev->ro = 2; + } + +out_unlock: + mddev_unlock(mddev); + return err; +} + static int md_open(struct block_device *bdev, fmode_t mode) { /* @@ -7886,6 +7881,7 @@ const struct block_device_operations md_fops = #endif .getgeo = md_getgeo, .check_events = md_check_events, + .set_read_only = md_set_read_only, }; static int md_thread(void *arg) @@ -8445,7 +8441,7 @@ static int is_mddev_idle(struct mddev *mddev, int init) rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_disk; - curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - + curr_events = (int)part_stat_read_accum(disk->part0, sectors) - atomic_read(&disk->sync_io); /* sync IO will cause sync_io to increase before the disk_stats * as sync_io is counted when a request starts, and @@ -9015,10 +9011,9 @@ void md_do_sync(struct md_thread *thread) mddev_lock_nointr(mddev); md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0)); mddev_unlock(mddev); - if (!mddev_is_clustered(mddev)) { - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk_size(mddev->gendisk, true); - } + if (!mddev_is_clustered(mddev)) + set_capacity_and_notify(mddev->gendisk, + mddev->array_sectors); } spin_lock(&mddev->lock); @@ -9547,18 +9542,15 @@ static int __init md_init(void) if (!md_rdev_misc_wq) goto err_rdev_misc_wq; - if ((ret = register_blkdev(MD_MAJOR, "md")) < 0) + ret = __register_blkdev(MD_MAJOR, "md", md_probe); + if (ret < 0) goto err_md; - if ((ret = register_blkdev(0, "mdp")) < 0) + ret = __register_blkdev(0, "mdp", md_probe); + if (ret < 0) goto err_mdp; mdp_major = ret; - blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE, - md_probe, NULL, NULL); - blk_register_region(MKDEV(mdp_major, 0), 1UL<gendisk) - trace_block_bio_remap(bdev_get_queue(rdev->bdev), - discard_bio, disk_devt(mddev->gendisk), + trace_block_bio_remap(discard_bio, + disk_devt(mddev->gendisk), bio->bi_iter.bi_sector); submit_bio_noacct(discard_bio); } @@ -581,8 +581,8 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) tmp_dev->data_offset; if (mddev->gendisk) - trace_block_bio_remap(bio->bi_disk->queue, bio, - disk_devt(mddev->gendisk), bio_sector); + trace_block_bio_remap(bio, disk_devt(mddev->gendisk), + bio_sector); mddev_check_writesame(mddev, bio); mddev_check_write_zeroes(mddev, bio); submit_bio_noacct(bio); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 960d854c07f8..c0347997f6ff 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1305,8 +1305,8 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_private = r1_bio; if (mddev->gendisk) - trace_block_bio_remap(read_bio->bi_disk->queue, read_bio, - disk_devt(mddev->gendisk), r1_bio->sector); + trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk), + r1_bio->sector); submit_bio_noacct(read_bio); } @@ -1517,8 +1517,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, atomic_inc(&r1_bio->remaining); if (mddev->gendisk) - trace_block_bio_remap(mbio->bi_disk->queue, - mbio, disk_devt(mddev->gendisk), + trace_block_bio_remap(mbio, disk_devt(mddev->gendisk), r1_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ mbio->bi_disk = (void *)conf->mirrors[i].rdev; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 3b598a3cb462..800fe0628d25 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1201,8 +1201,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_private = r10_bio; if (mddev->gendisk) - trace_block_bio_remap(read_bio->bi_disk->queue, - read_bio, disk_devt(mddev->gendisk), + trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk), r10_bio->sector); submit_bio_noacct(read_bio); return; @@ -1251,8 +1250,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, mbio->bi_private = r10_bio; if (conf->mddev->gendisk) - trace_block_bio_remap(mbio->bi_disk->queue, - mbio, disk_devt(conf->mddev->gendisk), + trace_block_bio_remap(mbio, disk_devt(conf->mddev->gendisk), r10_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ mbio->bi_disk = (void *)rdev; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 39343479ac2a..3a90cc0e43ca 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1222,9 +1222,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); if (conf->mddev->gendisk) - trace_block_bio_remap(bi->bi_disk->queue, - bi, disk_devt(conf->mddev->gendisk), - sh->dev[i].sector); + trace_block_bio_remap(bi, + disk_devt(conf->mddev->gendisk), + sh->dev[i].sector); if (should_defer && op_is_write(op)) bio_list_add(&pending_bios, bi); else @@ -1272,9 +1272,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) if (op == REQ_OP_DISCARD) rbi->bi_vcnt = 0; if (conf->mddev->gendisk) - trace_block_bio_remap(rbi->bi_disk->queue, - rbi, disk_devt(conf->mddev->gendisk), - sh->dev[i].sector); + trace_block_bio_remap(rbi, + disk_devt(conf->mddev->gendisk), + sh->dev[i].sector); if (should_defer && op_is_write(op)) bio_list_add(&pending_bios, rbi); else @@ -5468,8 +5468,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) spin_unlock_irq(&conf->device_lock); if (mddev->gendisk) - trace_block_bio_remap(align_bi->bi_disk->queue, - align_bi, disk_devt(mddev->gendisk), + trace_block_bio_remap(align_bi, disk_devt(mddev->gendisk), raid_bio->bi_iter.bi_sector); submit_bio_noacct(align_bi); return 1; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 0c05f77f9b21..fb8e12d590a1 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -298,38 +298,10 @@ static int blktrans_getgeo(struct block_device *bdev, struct hd_geometry *geo) return ret; } -static int blktrans_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk); - int ret = -ENXIO; - - if (!dev) - return ret; - - mutex_lock(&dev->lock); - - if (!dev->mtd) - goto unlock; - - switch (cmd) { - case BLKFLSBUF: - ret = dev->tr->flush ? dev->tr->flush(dev) : 0; - break; - default: - ret = -ENOTTY; - } -unlock: - mutex_unlock(&dev->lock); - blktrans_dev_put(dev); - return ret; -} - static const struct block_device_operations mtd_block_ops = { .owner = THIS_MODULE, .open = blktrans_open, .release = blktrans_release, - .ioctl = blktrans_ioctl, .getgeo = blktrans_getgeo, }; diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c index c3e2098372f2..38b6aa849c63 100644 --- a/drivers/mtd/mtdsuper.c +++ b/drivers/mtd/mtdsuper.c @@ -120,8 +120,8 @@ int get_tree_mtd(struct fs_context *fc, struct fs_context *fc)) { #ifdef CONFIG_BLOCK - struct block_device *bdev; - int ret, major; + dev_t dev; + int ret; #endif int mtdnr; @@ -169,20 +169,15 @@ int get_tree_mtd(struct fs_context *fc, /* try the old way - the hack where we allowed users to mount * /dev/mtdblock$(n) but didn't actually _use_ the blockdev */ - bdev = lookup_bdev(fc->source); - if (IS_ERR(bdev)) { - ret = PTR_ERR(bdev); + ret = lookup_bdev(fc->source, &dev); + if (ret) { errorf(fc, "MTD: Couldn't look up '%s': %d", fc->source, ret); return ret; } pr_debug("MTDSB: lookup_bdev() returned 0\n"); - major = MAJOR(bdev->bd_dev); - mtdnr = MINOR(bdev->bd_dev); - bdput(bdev); - - if (major == MTD_BLOCK_MAJOR) - return mtd_get_sb_by_nr(fc, mtdnr, fill_super); + if (MAJOR(dev) == MTD_BLOCK_MAJOR) + return mtd_get_sb_by_nr(fc, MINOR(dev), fill_super); #endif /* CONFIG_BLOCK */ diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9a270e49df17..9b6ebeb29cca 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -93,16 +93,6 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, unsigned nsid); -static void nvme_update_bdev_size(struct gendisk *disk) -{ - struct block_device *bdev = bdget_disk(disk, 0); - - if (bdev) { - bd_set_nr_sectors(bdev, get_capacity(disk)); - bdput(bdev); - } -} - /* * Prepare a queue for teardown. * @@ -119,8 +109,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns) blk_set_queue_dying(ns->queue); blk_mq_unquiesce_queue(ns->queue); - set_capacity(ns->disk, 0); - nvme_update_bdev_size(ns->disk); + set_capacity_and_notify(ns->disk, 0); } static void nvme_queue_scan(struct nvme_ctrl *ctrl) @@ -2053,7 +2042,7 @@ static void nvme_update_disk_info(struct gendisk *disk, capacity = 0; } - set_capacity_revalidate_and_notify(disk, capacity, false); + set_capacity_and_notify(disk, capacity); nvme_config_discard(disk, ns); nvme_config_write_zeroes(disk, ns); @@ -2134,7 +2123,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) blk_stack_limits(&ns->head->disk->queue->limits, &ns->queue->limits, 0); blk_queue_update_readahead(ns->head->disk->queue); - nvme_update_bdev_size(ns->head->disk); blk_mq_unfreeze_queue(ns->head->disk->queue); } #endif @@ -3962,8 +3950,6 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) */ if (ret && ret != -ENOMEM && !(ret > 0 && !(ret & NVME_SC_DNR))) nvme_ns_remove(ns); - else - revalidate_disk_size(ns->disk, true); } static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 74896be40c17..106cf5c44ee7 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -312,8 +312,7 @@ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) if (likely(ns)) { bio->bi_disk = ns->disk; bio->bi_opf |= REQ_NVME_MPATH; - trace_block_bio_remap(bio->bi_disk->queue, bio, - disk_devt(ns->head->disk), + trace_block_bio_remap(bio, disk_devt(ns->head->disk), bio->bi_iter.bi_sector); ret = submit_bio_noacct(bio); } else if (nvme_available_path(head)) { diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index dca34489a1dc..8d90235e4fcc 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -89,12 +89,12 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, if (!ns->bdev) goto out; - host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]); - data_units_read = DIV_ROUND_UP(part_stat_read(ns->bdev->bd_part, - sectors[READ]), 1000); - host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]); - data_units_written = DIV_ROUND_UP(part_stat_read(ns->bdev->bd_part, - sectors[WRITE]), 1000); + host_reads = part_stat_read(ns->bdev, ios[READ]); + data_units_read = + DIV_ROUND_UP(part_stat_read(ns->bdev, sectors[READ]), 1000); + host_writes = part_stat_read(ns->bdev, ios[WRITE]); + data_units_written = + DIV_ROUND_UP(part_stat_read(ns->bdev, sectors[WRITE]), 1000); put_unaligned_le64(host_reads, &slog->host_reads[0]); put_unaligned_le64(data_units_read, &slog->data_units_read[0]); @@ -120,12 +120,12 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req, /* we don't have the right data for file backed ns */ if (!ns->bdev) continue; - host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]); + host_reads += part_stat_read(ns->bdev, ios[READ]); data_units_read += DIV_ROUND_UP( - part_stat_read(ns->bdev->bd_part, sectors[READ]), 1000); - host_writes += part_stat_read(ns->bdev->bd_part, ios[WRITE]); + part_stat_read(ns->bdev, sectors[READ]), 1000); + host_writes += part_stat_read(ns->bdev, ios[WRITE]); data_units_written += DIV_ROUND_UP( - part_stat_read(ns->bdev->bd_part, sectors[WRITE]), 1000); + part_stat_read(ns->bdev, sectors[WRITE]), 1000); } put_unaligned_le64(host_reads, &slog->host_reads[0]); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index f6d81239be21..07806016c09d 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -211,6 +211,8 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set, (set == &ctrl->tag_set) ? hctx_idx + 1 : 0); } +static struct lock_class_key loop_hctx_fq_lock_key; + static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { @@ -219,6 +221,14 @@ static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); + /* + * flush_end_io() can be called recursively for us, so use our own + * lock class key for avoiding lockdep possible recursive locking, + * then we can remove the dynamically allocated lock class for each + * flush queue, that way may cause horrible boot delay. + */ + blk_mq_hctx_set_fq_lock_class(hctx, &loop_hctx_fq_lock_key); + hctx->driver_data = queue; return 0; } diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index fd568248fd26..6efacadc8fcd 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -430,7 +430,7 @@ dasd_state_ready_to_online(struct dasd_device * device) { struct gendisk *disk; struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; device->state = DASD_STATE_ONLINE; if (device->block) { @@ -443,7 +443,7 @@ dasd_state_ready_to_online(struct dasd_device * device) disk = device->block->bdev->bd_disk; disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE); + kobject_uevent(bdev_kobj(part), KOBJ_CHANGE); disk_part_iter_exit(&piter); } return 0; @@ -457,7 +457,7 @@ static int dasd_state_online_to_ready(struct dasd_device *device) int rc; struct gendisk *disk; struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; if (device->discipline->online_to_ready) { rc = device->discipline->online_to_ready(device); @@ -470,7 +470,7 @@ static int dasd_state_online_to_ready(struct dasd_device *device) disk = device->block->bdev->bd_disk; disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE); + kobject_uevent(bdev_kobj(part), KOBJ_CHANGE); disk_part_iter_exit(&piter); } return 0; @@ -3376,6 +3376,7 @@ dasd_device_operations = { .ioctl = dasd_ioctl, .compat_ioctl = dasd_ioctl, .getgeo = dasd_getgeo, + .set_read_only = dasd_set_read_only, }; /******************************************************************************* diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 7a34161ea5c6..8ca077fbdf4f 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -834,7 +834,8 @@ int dasd_scan_partitions(struct dasd_block *); void dasd_destroy_partitions(struct dasd_block *); /* externals in dasd_ioctl.c */ -int dasd_ioctl(struct block_device *, fmode_t, unsigned int, unsigned long); +int dasd_ioctl(struct block_device *, fmode_t, unsigned int, unsigned long); +int dasd_set_read_only(struct block_device *bdev, bool ro); /* externals in dasd_proc.c */ int dasd_proc_init(void); diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index cb6427fb9f3d..9f6424408946 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -54,8 +54,6 @@ dasd_ioctl_enable(struct block_device *bdev) return -ENODEV; dasd_enable_device(base); - /* Formatting the dasd device can change the capacity. */ - bd_set_nr_sectors(bdev, get_capacity(base->block->gdp)); dasd_put_device(base); return 0; } @@ -88,7 +86,7 @@ dasd_ioctl_disable(struct block_device *bdev) * Set i_size to zero, since read, write, etc. check against this * value. */ - bd_set_nr_sectors(bdev, 0); + set_capacity(bdev->bd_disk, 0); dasd_put_device(base); return 0; } @@ -222,9 +220,8 @@ dasd_format(struct dasd_block *block, struct format_data_t *fdata) * enabling the device later. */ if (fdata->start_unit == 0) { - struct block_device *bdev = bdget_disk(block->gdp, 0); - bdev->bd_inode->i_blkbits = blksize_bits(fdata->blksize); - bdput(bdev); + block->gdp->part0->bd_inode->i_blkbits = + blksize_bits(fdata->blksize); } rc = base->discipline->format_device(base, fdata, 1); @@ -532,28 +529,22 @@ static int dasd_ioctl_information(struct dasd_block *block, void __user *argp, /* * Set read only */ -static int -dasd_ioctl_set_ro(struct block_device *bdev, void __user *argp) +int dasd_set_read_only(struct block_device *bdev, bool ro) { struct dasd_device *base; - int intval, rc; + int rc; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + /* do not manipulate hardware state for partitions */ if (bdev_is_partition(bdev)) - // ro setting is not allowed for partitions - return -EINVAL; - if (get_user(intval, (int __user *)argp)) - return -EFAULT; + return 0; + base = dasd_device_from_gendisk(bdev->bd_disk); if (!base) return -ENODEV; - if (!intval && test_bit(DASD_FLAG_DEVICE_RO, &base->flags)) { - dasd_put_device(base); - return -EROFS; - } - set_disk_ro(bdev->bd_disk, intval); - rc = dasd_set_feature(base->cdev, DASD_FEATURE_READONLY, intval); + if (!ro && test_bit(DASD_FLAG_DEVICE_RO, &base->flags)) + rc = -EROFS; + else + rc = dasd_set_feature(base->cdev, DASD_FEATURE_READONLY, ro); dasd_put_device(base); return rc; } @@ -633,9 +624,6 @@ int dasd_ioctl(struct block_device *bdev, fmode_t mode, case BIODASDPRRST: rc = dasd_ioctl_reset_profile(block); break; - case BLKROSET: - rc = dasd_ioctl_set_ro(bdev, argp); - break; case DASDAPIVER: rc = dasd_ioctl_api_version(argp); break; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 6cb963a06777..37d450f46952 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -2359,8 +2359,7 @@ static void zfcp_fsf_req_trace(struct zfcp_fsf_req *req, struct scsi_cmnd *scsi) } } - blk_add_driver_data(scsi->request->q, scsi->request, &blktrc, - sizeof(blktrc)); + blk_add_driver_data(scsi->request, &blktrc, sizeof(blktrc)); } /** diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c index 682cf08ab041..f1553a453616 100644 --- a/drivers/scsi/scsicam.c +++ b/drivers/scsi/scsicam.c @@ -32,7 +32,7 @@ */ unsigned char *scsi_bios_ptable(struct block_device *dev) { - struct address_space *mapping = dev->bd_contains->bd_inode->i_mapping; + struct address_space *mapping = bdev_whole(dev)->bd_inode->i_mapping; unsigned char *res = NULL; struct page *page; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 656bcf4940d6..679c2c025047 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -630,13 +630,11 @@ static struct scsi_driver sd_template = { }; /* - * Dummy kobj_map->probe function. - * The default ->probe function will call modprobe, which is - * pointless as this module is already loaded. + * Don't request a new module, as that could deadlock in multipath + * environment. */ -static struct kobject *sd_default_probe(dev_t devt, int *partno, void *data) +static void sd_default_probe(dev_t devt) { - return NULL; } /* @@ -1750,10 +1748,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) static void sd_rescan(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); - int ret; - ret = sd_revalidate_disk(sdkp->disk); - revalidate_disk_size(sdkp->disk, ret == 0); + sd_revalidate_disk(sdkp->disk); } static int sd_ioctl(struct block_device *bdev, fmode_t mode, @@ -3265,8 +3261,7 @@ static int sd_revalidate_disk(struct gendisk *disk) sdkp->first_scan = 0; - set_capacity_revalidate_and_notify(disk, - logical_to_sectors(sdp, sdkp->capacity), false); + set_capacity_and_notify(disk, logical_to_sectors(sdp, sdkp->capacity)); sd_config_write_same(sdkp); kfree(buffer); @@ -3276,7 +3271,7 @@ static int sd_revalidate_disk(struct gendisk *disk) * capacity to 0. */ if (sd_zbc_revalidate_zones(sdkp)) - set_capacity_revalidate_and_notify(disk, 0, false); + set_capacity_and_notify(disk, 0); out: return 0; @@ -3528,9 +3523,6 @@ static int sd_remove(struct device *dev) free_opal_dev(sdkp->opal_dev); - blk_register_region(devt, SD_MINORS, NULL, - sd_default_probe, NULL, NULL); - mutex_lock(&sd_ref_mutex); dev_set_drvdata(dev, NULL); put_device(&sdkp->dev); @@ -3720,11 +3712,9 @@ static int __init init_sd(void) SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); for (i = 0; i < SD_MAJORS; i++) { - if (register_blkdev(sd_major(i), "sd") != 0) + if (__register_blkdev(sd_major(i), "sd", sd_default_probe)) continue; majors++; - blk_register_region(sd_major(i), SD_MINORS, NULL, - sd_default_probe, NULL, NULL); } if (!majors) @@ -3797,10 +3787,8 @@ static void __exit exit_sd(void) class_unregister(&sd_disk_class); - for (i = 0; i < SD_MAJORS; i++) { - blk_unregister_region(sd_major(i), SD_MINORS); + for (i = 0; i < SD_MAJORS; i++) unregister_blkdev(sd_major(i), "sd"); - } } module_init(init_sd); diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 7143d03f0e02..b0cb5b95e892 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -133,10 +133,10 @@ static int fd_configure_device(struct se_device *dev) */ inode = file->f_mapping->host; if (S_ISBLK(inode->i_mode)) { - struct request_queue *q = bdev_get_queue(inode->i_bdev); + struct request_queue *q = bdev_get_queue(I_BDEV(inode)); unsigned long long dev_size; - fd_dev->fd_block_size = bdev_logical_block_size(inode->i_bdev); + fd_dev->fd_block_size = bdev_logical_block_size(I_BDEV(inode)); /* * Determine the number of bytes from i_size_read() minus * one (1) logical sector from underlying struct block_device @@ -559,7 +559,7 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) if (S_ISBLK(inode->i_mode)) { /* The backend is block device, use discard */ - struct block_device *bdev = inode->i_bdev; + struct block_device *bdev = I_BDEV(inode); struct se_device *dev = cmd->se_dev; ret = blkdev_issue_discard(bdev, diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 4e37fa9b409d..7994f27e4527 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1029,9 +1029,8 @@ static sector_t pscsi_get_blocks(struct se_device *dev) { struct pscsi_dev_virt *pdv = PSCSI_DEV(dev); - if (pdv->pdv_bd && pdv->pdv_bd->bd_part) - return pdv->pdv_bd->bd_part->nr_sects; - + if (pdv->pdv_bd) + return bdev_nr_sectors(pdv->pdv_bd); return 0; } diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c index f7e6c42558eb..b859a158a414 100644 --- a/drivers/usb/gadget/function/storage_common.c +++ b/drivers/usb/gadget/function/storage_common.c @@ -204,7 +204,7 @@ int fsg_lun_open(struct fsg_lun *curlun, const char *filename) if (!(filp->f_mode & FMODE_WRITE)) ro = 1; - inode = file_inode(filp); + inode = filp->f_mapping->host; if ((!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) { LINFO(curlun, "invalid file type: %s\n", filename); goto out; @@ -221,7 +221,7 @@ int fsg_lun_open(struct fsg_lun *curlun, const char *filename) if (!(filp->f_mode & FMODE_CAN_WRITE)) ro = 1; - size = i_size_read(inode->i_mapping->host); + size = i_size_read(inode); if (size < 0) { LINFO(curlun, "unable to find file size: %s\n", filename); rc = (int) size; @@ -231,8 +231,8 @@ int fsg_lun_open(struct fsg_lun *curlun, const char *filename) if (curlun->cdrom) { blksize = 2048; blkbits = 11; - } else if (inode->i_bdev) { - blksize = bdev_logical_block_size(inode->i_bdev); + } else if (S_ISBLK(inode->i_mode)) { + blksize = bdev_logical_block_size(I_BDEV(inode)); blkbits = blksize_bits(blksize); } else { blksize = 512; diff --git a/fs/block_dev.c b/fs/block_dev.c index 9e84b1928b94..9e56ee1f2652 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -110,24 +111,20 @@ EXPORT_SYMBOL(invalidate_bdev); int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, loff_t lend) { - struct block_device *claimed_bdev = NULL; - int err; - /* * If we don't hold exclusive handle for the device, upgrade to it * while we discard the buffer cache to avoid discarding buffers * under live filesystem. */ if (!(mode & FMODE_EXCL)) { - claimed_bdev = bdev->bd_contains; - err = bd_prepare_to_claim(bdev, claimed_bdev, - truncate_bdev_range); + int err = bd_prepare_to_claim(bdev, truncate_bdev_range); if (err) return err; } + truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend); - if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, truncate_bdev_range); + if (!(mode & FMODE_EXCL)) + bd_abort_claiming(bdev, truncate_bdev_range); return 0; } EXPORT_SYMBOL(truncate_bdev_range); @@ -548,55 +545,47 @@ EXPORT_SYMBOL(fsync_bdev); * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze * actually. */ -struct super_block *freeze_bdev(struct block_device *bdev) +int freeze_bdev(struct block_device *bdev) { struct super_block *sb; int error = 0; mutex_lock(&bdev->bd_fsfreeze_mutex); - if (++bdev->bd_fsfreeze_count > 1) { - /* - * We don't even need to grab a reference - the first call - * to freeze_bdev grab an active reference and only the last - * thaw_bdev drops it. - */ - sb = get_super(bdev); - if (sb) - drop_super(sb); - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return sb; - } + if (++bdev->bd_fsfreeze_count > 1) + goto done; sb = get_active_super(bdev); if (!sb) - goto out; + goto sync; if (sb->s_op->freeze_super) error = sb->s_op->freeze_super(sb); else error = freeze_super(sb); - if (error) { - deactivate_super(sb); - bdev->bd_fsfreeze_count--; - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return ERR_PTR(error); - } deactivate_super(sb); - out: + + if (error) { + bdev->bd_fsfreeze_count--; + goto done; + } + bdev->bd_fsfreeze_sb = sb; + +sync: sync_blockdev(bdev); +done: mutex_unlock(&bdev->bd_fsfreeze_mutex); - return sb; /* thaw_bdev releases s->s_umount */ + return error; } EXPORT_SYMBOL(freeze_bdev); /** * thaw_bdev -- unlock filesystem * @bdev: blockdevice to unlock - * @sb: associated superblock * * Unlocks the filesystem and marks it writeable again after freeze_bdev(). */ -int thaw_bdev(struct block_device *bdev, struct super_block *sb) +int thaw_bdev(struct block_device *bdev) { + struct super_block *sb; int error = -EINVAL; mutex_lock(&bdev->bd_fsfreeze_mutex); @@ -607,6 +596,7 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb) if (--bdev->bd_fsfreeze_count > 0) goto out; + sb = bdev->bd_fsfreeze_sb; if (!sb) goto out; @@ -792,23 +782,19 @@ static struct inode *bdev_alloc_inode(struct super_block *sb) static void bdev_free_inode(struct inode *inode) { + struct block_device *bdev = I_BDEV(inode); + + free_percpu(bdev->bd_stats); + kfree(bdev->bd_meta_info); + kmem_cache_free(bdev_cachep, BDEV_I(inode)); } -static void init_once(void *foo) +static void init_once(void *data) { - struct bdev_inode *ei = (struct bdev_inode *) foo; - struct block_device *bdev = &ei->bdev; + struct bdev_inode *ei = data; - memset(bdev, 0, sizeof(*bdev)); - mutex_init(&bdev->bd_mutex); -#ifdef CONFIG_SYSFS - INIT_LIST_HEAD(&bdev->bd_holder_disks); -#endif - bdev->bd_bdi = &noop_backing_dev_info; inode_init_once(&ei->vfs_inode); - /* Initialize mutex for freeze. */ - mutex_init(&bdev->bd_fsfreeze_mutex); } static void bdev_evict_inode(struct inode *inode) @@ -870,72 +856,72 @@ void __init bdev_cache_init(void) blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ } -/* - * Most likely _very_ bad one - but then it's hardly critical for small - * /dev and can be fixed when somebody will need really large one. - * Keep in mind that it will be fed through icache hash function too. - */ -static inline unsigned long hash(dev_t dev) -{ - return MAJOR(dev)+MINOR(dev); -} - -static int bdev_test(struct inode *inode, void *data) -{ - return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data; -} - -static int bdev_set(struct inode *inode, void *data) -{ - BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data; - return 0; -} - -static struct block_device *bdget(dev_t dev) +struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) { struct block_device *bdev; struct inode *inode; - inode = iget5_locked(blockdev_superblock, hash(dev), - bdev_test, bdev_set, &dev); - + inode = new_inode(blockdev_superblock); if (!inode) return NULL; + inode->i_mode = S_IFBLK; + inode->i_rdev = 0; + inode->i_data.a_ops = &def_blk_aops; + mapping_set_gfp_mask(&inode->i_data, GFP_USER); - bdev = &BDEV_I(inode)->bdev; - - if (inode->i_state & I_NEW) { - spin_lock_init(&bdev->bd_size_lock); - bdev->bd_contains = NULL; - bdev->bd_super = NULL; - bdev->bd_inode = inode; - bdev->bd_part_count = 0; - inode->i_mode = S_IFBLK; - inode->i_rdev = dev; - inode->i_bdev = bdev; - inode->i_data.a_ops = &def_blk_aops; - mapping_set_gfp_mask(&inode->i_data, GFP_USER); - unlock_new_inode(inode); + bdev = I_BDEV(inode); + memset(bdev, 0, sizeof(*bdev)); + mutex_init(&bdev->bd_mutex); + mutex_init(&bdev->bd_fsfreeze_mutex); + spin_lock_init(&bdev->bd_size_lock); + bdev->bd_disk = disk; + bdev->bd_partno = partno; + bdev->bd_inode = inode; + bdev->bd_bdi = &noop_backing_dev_info; +#ifdef CONFIG_SYSFS + INIT_LIST_HEAD(&bdev->bd_holder_disks); +#endif + bdev->bd_stats = alloc_percpu(struct disk_stats); + if (!bdev->bd_stats) { + iput(inode); + return NULL; } return bdev; } +void bdev_add(struct block_device *bdev, dev_t dev) +{ + bdev->bd_dev = dev; + bdev->bd_inode->i_rdev = dev; + bdev->bd_inode->i_ino = dev; + insert_inode_hash(bdev->bd_inode); +} + +static struct block_device *bdget(dev_t dev) +{ + struct inode *inode; + + inode = ilookup(blockdev_superblock, dev); + if (!inode) + return NULL; + return &BDEV_I(inode)->bdev; +} + /** * bdgrab -- Grab a reference to an already referenced block device * @bdev: Block device to grab a reference to. + * + * Returns the block_device with an additional reference when successful, + * or NULL if the inode is already beeing freed. */ struct block_device *bdgrab(struct block_device *bdev) { - ihold(bdev->bd_inode); + if (!igrab(bdev->bd_inode)) + return NULL; return bdev; } EXPORT_SYMBOL(bdgrab); -struct block_device *bdget_part(struct hd_struct *part) -{ - return bdget(part_devt(part)); -} - long nr_blockdev_pages(void) { struct inode *inode; @@ -953,67 +939,8 @@ void bdput(struct block_device *bdev) { iput(bdev->bd_inode); } - EXPORT_SYMBOL(bdput); -static struct block_device *bd_acquire(struct inode *inode) -{ - struct block_device *bdev; - - spin_lock(&bdev_lock); - bdev = inode->i_bdev; - if (bdev && !inode_unhashed(bdev->bd_inode)) { - bdgrab(bdev); - spin_unlock(&bdev_lock); - return bdev; - } - spin_unlock(&bdev_lock); - - /* - * i_bdev references block device inode that was already shut down - * (corresponding device got removed). Remove the reference and look - * up block device inode again just in case new device got - * reestablished under the same device number. - */ - if (bdev) - bd_forget(inode); - - bdev = bdget(inode->i_rdev); - if (bdev) { - spin_lock(&bdev_lock); - if (!inode->i_bdev) { - /* - * We take an additional reference to bd_inode, - * and it's released in clear_inode() of inode. - * So, we can access it via ->i_mapping always - * without igrab(). - */ - bdgrab(bdev); - inode->i_bdev = bdev; - inode->i_mapping = bdev->bd_inode->i_mapping; - } - spin_unlock(&bdev_lock); - } - return bdev; -} - -/* Call when you free inode */ - -void bd_forget(struct inode *inode) -{ - struct block_device *bdev = NULL; - - spin_lock(&bdev_lock); - if (!sb_is_blkdev_sb(inode->i_sb)) - bdev = inode->i_bdev; - inode->i_bdev = NULL; - inode->i_mapping = &inode->i_data; - spin_unlock(&bdev_lock); - - if (bdev) - bdput(bdev); -} - /** * bd_may_claim - test whether a block device can be claimed * @bdev: block device of interest @@ -1049,7 +976,6 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, /** * bd_prepare_to_claim - claim a block device * @bdev: block device of interest - * @whole: the whole device containing @bdev, may equal @bdev * @holder: holder trying to claim @bdev * * Claim @bdev. This function fails if @bdev is already claimed by another @@ -1059,9 +985,12 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, * RETURNS: * 0 if @bdev can be claimed, -EBUSY otherwise. */ -int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, - void *holder) +int bd_prepare_to_claim(struct block_device *bdev, void *holder) { + struct block_device *whole = bdev_whole(bdev); + + if (WARN_ON_ONCE(!holder)) + return -EINVAL; retry: spin_lock(&bdev_lock); /* if someone else claimed, fail */ @@ -1089,27 +1018,6 @@ int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, } EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */ -static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno) -{ - struct gendisk *disk = get_gendisk(bdev->bd_dev, partno); - - if (!disk) - return NULL; - /* - * Now that we hold gendisk reference we make sure bdev we looked up is - * not stale. If it is, it means device got removed and created before - * we looked up gendisk and we fail open in such case. Associating - * unhashed bdev with newly created gendisk could lead to two bdevs - * (and thus two independent caches) being associated with one device - * which is bad. - */ - if (inode_unhashed(bdev->bd_inode)) { - put_disk_and_module(disk); - return NULL; - } - return disk; -} - static void bd_clear_claiming(struct block_device *whole, void *holder) { lockdep_assert_held(&bdev_lock); @@ -1122,15 +1030,15 @@ static void bd_clear_claiming(struct block_device *whole, void *holder) /** * bd_finish_claiming - finish claiming of a block device * @bdev: block device of interest - * @whole: whole block device * @holder: holder that has claimed @bdev * * Finish exclusive open of a block device. Mark the device as exlusively * open by the holder and wake up all waiters for exclusive open to finish. */ -static void bd_finish_claiming(struct block_device *bdev, - struct block_device *whole, void *holder) +static void bd_finish_claiming(struct block_device *bdev, void *holder) { + struct block_device *whole = bdev_whole(bdev); + spin_lock(&bdev_lock); BUG_ON(!bd_may_claim(bdev, whole, holder)); /* @@ -1155,11 +1063,10 @@ static void bd_finish_claiming(struct block_device *bdev, * also used when exclusive open is not actually desired and we just needed * to block other exclusive openers for a while. */ -void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, - void *holder) +void bd_abort_claiming(struct block_device *bdev, void *holder) { spin_lock(&bdev_lock); - bd_clear_claiming(whole, holder); + bd_clear_claiming(bdev_whole(bdev), holder); spin_unlock(&bdev_lock); } EXPORT_SYMBOL(bd_abort_claiming); @@ -1230,7 +1137,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) WARN_ON_ONCE(!bdev->bd_holder); /* FIXME: remove the following once add_disk() handles errors */ - if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir)) + if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir)) goto out_unlock; holder = bd_find_holder_disk(bdev, disk); @@ -1249,24 +1156,24 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) holder->disk = disk; holder->refcnt = 1; - ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + ret = add_symlink(disk->slave_dir, bdev_kobj(bdev)); if (ret) goto out_free; - ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); + ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj); if (ret) goto out_del; /* * bdev could be deleted beneath us which would implicitly destroy * the holder directory. Hold on to it. */ - kobject_get(bdev->bd_part->holder_dir); + kobject_get(bdev->bd_holder_dir); list_add(&holder->list, &bdev->bd_holder_disks); goto out_unlock; out_del: - del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + del_symlink(disk->slave_dir, bdev_kobj(bdev)); out_free: kfree(holder); out_unlock: @@ -1294,10 +1201,9 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) holder = bd_find_holder_disk(bdev, disk); if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { - del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); - del_symlink(bdev->bd_part->holder_dir, - &disk_to_dev(disk)->kobj); - kobject_put(bdev->bd_part->holder_dir); + del_symlink(disk->slave_dir, bdev_kobj(bdev)); + del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj); + kobject_put(bdev->bd_holder_dir); list_del_init(&holder->list); kfree(holder); } @@ -1307,77 +1213,6 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); #endif -/** - * check_disk_size_change - checks for disk size change and adjusts bdev size. - * @disk: struct gendisk to check - * @bdev: struct bdev to adjust. - * @verbose: if %true log a message about a size change if there is any - * - * This routine checks to see if the bdev size does not match the disk size - * and adjusts it if it differs. When shrinking the bdev size, its all caches - * are freed. - */ -static void check_disk_size_change(struct gendisk *disk, - struct block_device *bdev, bool verbose) -{ - loff_t disk_size, bdev_size; - - spin_lock(&bdev->bd_size_lock); - disk_size = (loff_t)get_capacity(disk) << 9; - bdev_size = i_size_read(bdev->bd_inode); - if (disk_size != bdev_size) { - if (verbose) { - printk(KERN_INFO - "%s: detected capacity change from %lld to %lld\n", - disk->disk_name, bdev_size, disk_size); - } - i_size_write(bdev->bd_inode, disk_size); - } - spin_unlock(&bdev->bd_size_lock); - - if (bdev_size > disk_size) { - if (__invalidate_device(bdev, false)) - pr_warn("VFS: busy inodes on resized disk %s\n", - disk->disk_name); - } -} - -/** - * revalidate_disk_size - checks for disk size change and adjusts bdev size. - * @disk: struct gendisk to check - * @verbose: if %true log a message about a size change if there is any - * - * This routine checks to see if the bdev size does not match the disk size - * and adjusts it if it differs. When shrinking the bdev size, its all caches - * are freed. - */ -void revalidate_disk_size(struct gendisk *disk, bool verbose) -{ - struct block_device *bdev; - - /* - * Hidden disks don't have associated bdev so there's no point in - * revalidating them. - */ - if (disk->flags & GENHD_FL_HIDDEN) - return; - - bdev = bdget_disk(disk, 0); - if (bdev) { - check_disk_size_change(disk, bdev, verbose); - bdput(bdev); - } -} -EXPORT_SYMBOL(revalidate_disk_size); - -void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors) -{ - spin_lock(&bdev->bd_size_lock); - i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - spin_unlock(&bdev->bd_size_lock); -} -EXPORT_SYMBOL(bd_set_nr_sectors); - static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); int bdev_disk_changed(struct block_device *bdev, bool invalidate) @@ -1411,8 +1246,6 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate) disk->fops->revalidate_disk(disk); } - check_disk_size_change(disk, bdev, !invalidate); - if (get_capacity(disk)) { ret = blk_add_partitions(disk, bdev); if (ret == -EAGAIN) @@ -1439,71 +1272,19 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed); * mutex_lock(part->bd_mutex) * mutex_lock_nested(whole->bd_mutex, 1) */ - -static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, - int for_part) +static int __blkdev_get(struct block_device *bdev, fmode_t mode) { - struct block_device *whole = NULL, *claiming = NULL; - struct gendisk *disk; - int ret; - int partno; - bool first_open = false, unblock_events = true, need_restart; + struct gendisk *disk = bdev->bd_disk; + int ret = 0; - restart: - need_restart = false; - ret = -ENXIO; - disk = bdev_get_gendisk(bdev, &partno); - if (!disk) - goto out; - - if (partno) { - whole = bdget_disk(disk, 0); - if (!whole) { - ret = -ENOMEM; - goto out_put_disk; - } - } - - if (!for_part && (mode & FMODE_EXCL)) { - WARN_ON_ONCE(!holder); - if (whole) - claiming = whole; - else - claiming = bdev; - ret = bd_prepare_to_claim(bdev, claiming, holder); - if (ret) - goto out_put_whole; - } - - disk_block_events(disk); - mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { - first_open = true; - bdev->bd_disk = disk; - bdev->bd_contains = bdev; - bdev->bd_partno = partno; - - if (!partno) { - ret = -ENXIO; - bdev->bd_part = disk_get_part(disk, partno); - if (!bdev->bd_part) - goto out_clear; - + if (!bdev_is_partition(bdev)) { ret = 0; - if (disk->fops->open) { + if (disk->fops->open) ret = disk->fops->open(bdev, mode); - /* - * If we lost a race with 'disk' being deleted, - * try again. See md.c - */ - if (ret == -ERESTARTSYS) - need_restart = true; - } - if (!ret) { - bd_set_nr_sectors(bdev, get_capacity(disk)); + if (!ret) set_init_blocksize(bdev); - } /* * If the device is invalidated, rescan partition @@ -1516,28 +1297,33 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) - goto out_clear; + return ret; } else { - BUG_ON(for_part); - ret = __blkdev_get(whole, mode, NULL, 1); - if (ret) - goto out_clear; - bdev->bd_contains = bdgrab(whole); - bdev->bd_part = disk_get_part(disk, partno); - if (!(disk->flags & GENHD_FL_UP) || - !bdev->bd_part || !bdev->bd_part->nr_sects) { - ret = -ENXIO; - goto out_clear; + struct block_device *whole = bdgrab(disk->part0); + + mutex_lock_nested(&whole->bd_mutex, 1); + ret = __blkdev_get(whole, mode); + if (ret) { + mutex_unlock(&whole->bd_mutex); + bdput(whole); + return ret; + } + whole->bd_part_count++; + mutex_unlock(&whole->bd_mutex); + + if (!(disk->flags & GENHD_FL_UP) || + !bdev_nr_sectors(bdev)) { + __blkdev_put(whole, mode, 1); + bdput(whole); + return -ENXIO; } - bd_set_nr_sectors(bdev, bdev->bd_part->nr_sects); set_init_blocksize(bdev); } if (bdev->bd_bdi == &noop_backing_dev_info) bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); } else { - if (bdev->bd_contains == bdev) { - ret = 0; + if (!bdev_is_partition(bdev)) { if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); /* the same as first opener case, read comment there */ @@ -1545,141 +1331,54 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) - goto out_unlock_bdev; + return ret; } } bdev->bd_openers++; - if (for_part) - bdev->bd_part_count++; - if (claiming) - bd_finish_claiming(bdev, claiming, holder); - - /* - * Block event polling for write claims if requested. Any write holder - * makes the write_holder state stick until all are released. This is - * good enough and tracking individual writeable reference is too - * fragile given the way @mode is used in blkdev_get/put(). - */ - if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder && - (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { - bdev->bd_write_holder = true; - unblock_events = false; - } - mutex_unlock(&bdev->bd_mutex); - - if (unblock_events) - disk_unblock_events(disk); - - /* only one opener holds refs to the module and disk */ - if (!first_open) - put_disk_and_module(disk); - if (whole) - bdput(whole); return 0; - - out_clear: - disk_put_part(bdev->bd_part); - bdev->bd_disk = NULL; - bdev->bd_part = NULL; - if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, mode, 1); - bdev->bd_contains = NULL; - out_unlock_bdev: - if (claiming) - bd_abort_claiming(bdev, claiming, holder); - mutex_unlock(&bdev->bd_mutex); - disk_unblock_events(disk); - out_put_whole: - if (whole) - bdput(whole); - out_put_disk: - put_disk_and_module(disk); - if (need_restart) - goto restart; - out: - return ret; } -/** - * blkdev_get - open a block device - * @bdev: block_device to open - * @mode: FMODE_* mask - * @holder: exclusive holder identifier - * - * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is - * open with exclusive access. Specifying %FMODE_EXCL with %NULL - * @holder is invalid. Exclusive opens may nest for the same @holder. - * - * On success, the reference count of @bdev is unchanged. On failure, - * @bdev is put. - * - * CONTEXT: - * Might sleep. - * - * RETURNS: - * 0 on success, -errno on failure. - */ -static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) -{ - int ret, perm = 0; - - if (mode & FMODE_READ) - perm |= MAY_READ; - if (mode & FMODE_WRITE) - perm |= MAY_WRITE; - ret = devcgroup_inode_permission(bdev->bd_inode, perm); - if (ret) - goto bdput; - - ret =__blkdev_get(bdev, mode, holder, 0); - if (ret) - goto bdput; - return 0; - -bdput: - bdput(bdev); - return ret; -} - -/** - * blkdev_get_by_path - open a block device by name - * @path: path to the block device to open - * @mode: FMODE_* mask - * @holder: exclusive holder identifier - * - * Open the blockdevice described by the device file at @path. @mode - * and @holder are identical to blkdev_get(). - * - * On success, the returned block_device has reference count of one. - * - * CONTEXT: - * Might sleep. - * - * RETURNS: - * Pointer to block_device on success, ERR_PTR(-errno) on failure. - */ -struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, - void *holder) +struct block_device *blkdev_get_no_open(dev_t dev) { struct block_device *bdev; - int err; + struct gendisk *disk; - bdev = lookup_bdev(path); - if (IS_ERR(bdev)) - return bdev; + down_read(&bdev_lookup_sem); + bdev = bdget(dev); + if (!bdev) { + up_read(&bdev_lookup_sem); + blk_request_module(dev); + down_read(&bdev_lookup_sem); - err = blkdev_get(bdev, mode, holder); - if (err) - return ERR_PTR(err); - - if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) { - blkdev_put(bdev, mode); - return ERR_PTR(-EACCES); + bdev = bdget(dev); + if (!bdev) + goto unlock; } + disk = bdev->bd_disk; + if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj)) + goto bdput; + if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP) + goto put_disk; + if (!try_module_get(bdev->bd_disk->fops->owner)) + goto put_disk; + up_read(&bdev_lookup_sem); return bdev; +put_disk: + put_disk(disk); +bdput: + bdput(bdev); +unlock: + up_read(&bdev_lookup_sem); + return NULL; +} + +void blkdev_put_no_open(struct block_device *bdev) +{ + module_put(bdev->bd_disk->fops->owner); + put_disk(bdev->bd_disk); + bdput(bdev); } -EXPORT_SYMBOL(blkdev_get_by_path); /** * blkdev_get_by_dev - open a block device by device number @@ -1687,38 +1386,128 @@ EXPORT_SYMBOL(blkdev_get_by_path); * @mode: FMODE_* mask * @holder: exclusive holder identifier * - * Open the blockdevice described by device number @dev. @mode and - * @holder are identical to blkdev_get(). + * Open the block device described by device number @dev. If @mode includes + * %FMODE_EXCL, the block device is opened with exclusive access. Specifying + * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for + * the same @holder. * - * Use it ONLY if you really do not have anything better - i.e. when - * you are behind a truly sucky interface and all you are given is a - * device number. _Never_ to be used for internal purposes. If you - * ever need it - reconsider your API. - * - * On success, the returned block_device has reference count of one. + * Use this interface ONLY if you really do not have anything better - i.e. when + * you are behind a truly sucky interface and all you are given is a device + * number. Everything else should use blkdev_get_by_path(). * * CONTEXT: * Might sleep. * * RETURNS: - * Pointer to block_device on success, ERR_PTR(-errno) on failure. + * Reference to the block_device on success, ERR_PTR(-errno) on failure. */ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) { + bool unblock_events = true; struct block_device *bdev; - int err; + struct gendisk *disk; + int ret; - bdev = bdget(dev); + ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, + MAJOR(dev), MINOR(dev), + ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) | + ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0)); + if (ret) + return ERR_PTR(ret); + + /* + * If we lost a race with 'disk' being deleted, try again. See md.c. + */ +retry: + bdev = blkdev_get_no_open(dev); if (!bdev) - return ERR_PTR(-ENOMEM); + return ERR_PTR(-ENXIO); + disk = bdev->bd_disk; - err = blkdev_get(bdev, mode, holder); - if (err) - return ERR_PTR(err); + if (mode & FMODE_EXCL) { + ret = bd_prepare_to_claim(bdev, holder); + if (ret) + goto put_blkdev; + } + + disk_block_events(disk); + + mutex_lock(&bdev->bd_mutex); + ret =__blkdev_get(bdev, mode); + if (ret) + goto abort_claiming; + if (mode & FMODE_EXCL) { + bd_finish_claiming(bdev, holder); + + /* + * Block event polling for write claims if requested. Any write + * holder makes the write_holder state stick until all are + * released. This is good enough and tracking individual + * writeable reference is too fragile given the way @mode is + * used in blkdev_get/put(). + */ + if ((mode & FMODE_WRITE) && !bdev->bd_write_holder && + (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { + bdev->bd_write_holder = true; + unblock_events = false; + } + } + mutex_unlock(&bdev->bd_mutex); + + if (unblock_events) + disk_unblock_events(disk); + return bdev; + +abort_claiming: + if (mode & FMODE_EXCL) + bd_abort_claiming(bdev, holder); + mutex_unlock(&bdev->bd_mutex); + disk_unblock_events(disk); +put_blkdev: + blkdev_put_no_open(bdev); + if (ret == -ERESTARTSYS) + goto retry; + return ERR_PTR(ret); +} +EXPORT_SYMBOL(blkdev_get_by_dev); + +/** + * blkdev_get_by_path - open a block device by name + * @path: path to the block device to open + * @mode: FMODE_* mask + * @holder: exclusive holder identifier + * + * Open the block device described by the device file at @path. If @mode + * includes %FMODE_EXCL, the block device is opened with exclusive access. + * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may + * nest for the same @holder. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * Reference to the block_device on success, ERR_PTR(-errno) on failure. + */ +struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, + void *holder) +{ + struct block_device *bdev; + dev_t dev; + int error; + + error = lookup_bdev(path, &dev); + if (error) + return ERR_PTR(error); + + bdev = blkdev_get_by_dev(dev, mode, holder); + if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) { + blkdev_put(bdev, mode); + return ERR_PTR(-EACCES); + } return bdev; } -EXPORT_SYMBOL(blkdev_get_by_dev); +EXPORT_SYMBOL(blkdev_get_by_path); static int blkdev_open(struct inode * inode, struct file * filp) { @@ -1741,14 +1530,12 @@ static int blkdev_open(struct inode * inode, struct file * filp) if ((filp->f_flags & O_ACCMODE) == 3) filp->f_mode |= FMODE_WRITE_IOCTL; - bdev = bd_acquire(inode); - if (bdev == NULL) - return -ENOMEM; - + bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); filp->f_mapping = bdev->bd_inode->i_mapping; filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); - - return blkdev_get(bdev, filp->f_mode, filp); + return 0; } static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) @@ -1774,34 +1561,28 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) WARN_ON_ONCE(bdev->bd_holders); sync_blockdev(bdev); kill_bdev(bdev); - bdev_write_inode(bdev); + if (bdev_is_partition(bdev)) + victim = bdev_whole(bdev); } - if (bdev->bd_contains == bdev) { - if (disk->fops->release) - disk->fops->release(disk, mode); - } - if (!bdev->bd_openers) { - disk_put_part(bdev->bd_part); - bdev->bd_part = NULL; - bdev->bd_disk = NULL; - if (bdev != bdev->bd_contains) - victim = bdev->bd_contains; - bdev->bd_contains = NULL; - put_disk_and_module(disk); - } + if (!bdev_is_partition(bdev) && disk->fops->release) + disk->fops->release(disk, mode); mutex_unlock(&bdev->bd_mutex); - bdput(bdev); - if (victim) + if (victim) { __blkdev_put(victim, mode, 1); + bdput(victim); + } } void blkdev_put(struct block_device *bdev, fmode_t mode) { + struct gendisk *disk = bdev->bd_disk; + mutex_lock(&bdev->bd_mutex); if (mode & FMODE_EXCL) { + struct block_device *whole = bdev_whole(bdev); bool bdev_free; /* @@ -1812,13 +1593,12 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) spin_lock(&bdev_lock); WARN_ON_ONCE(--bdev->bd_holders < 0); - WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0); + WARN_ON_ONCE(--whole->bd_holders < 0); - /* bd_contains might point to self, check in a separate step */ if ((bdev_free = !bdev->bd_holders)) bdev->bd_holder = NULL; - if (!bdev->bd_contains->bd_holders) - bdev->bd_contains->bd_holder = NULL; + if (!whole->bd_holders) + whole->bd_holder = NULL; spin_unlock(&bdev_lock); @@ -1827,7 +1607,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) * unblock evpoll if it was a write holder. */ if (bdev_free && bdev->bd_write_holder) { - disk_unblock_events(bdev->bd_disk); + disk_unblock_events(disk); bdev->bd_write_holder = false; } } @@ -1837,11 +1617,11 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) * event. This is to ensure detection of media removal commanded * from userland - e.g. eject(1). */ - disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE); - + disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE); mutex_unlock(&bdev->bd_mutex); __blkdev_put(bdev, mode, 0); + blkdev_put_no_open(bdev); } EXPORT_SYMBOL(blkdev_put); @@ -2054,37 +1834,32 @@ const struct file_operations def_blk_fops = { * namespace if possible and return it. Return ERR_PTR(error) * otherwise. */ -struct block_device *lookup_bdev(const char *pathname) +int lookup_bdev(const char *pathname, dev_t *dev) { - struct block_device *bdev; struct inode *inode; struct path path; int error; if (!pathname || !*pathname) - return ERR_PTR(-EINVAL); + return -EINVAL; error = kern_path(pathname, LOOKUP_FOLLOW, &path); if (error) - return ERR_PTR(error); + return error; inode = d_backing_inode(path.dentry); error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) - goto fail; + goto out_path_put; error = -EACCES; if (!may_open_dev(&path)) - goto fail; - error = -ENOMEM; - bdev = bd_acquire(inode); - if (!bdev) - goto fail; -out: + goto out_path_put; + + *dev = inode->i_rdev; + error = 0; +out_path_put: path_put(&path); - return bdev; -fail: - bdev = ERR_PTR(error); - goto out; + return error; } EXPORT_SYMBOL(lookup_bdev); diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 4522a1c4cd08..19b9fffa2c9c 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1343,8 +1343,6 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info, void btrfs_sysfs_remove_device(struct btrfs_device *device) { - struct hd_struct *disk; - struct kobject *disk_kobj; struct kobject *devices_kobj; /* @@ -1354,11 +1352,8 @@ void btrfs_sysfs_remove_device(struct btrfs_device *device) devices_kobj = device->fs_info->fs_devices->devices_kobj; ASSERT(devices_kobj); - if (device->bdev) { - disk = device->bdev->bd_part; - disk_kobj = &part_to_dev(disk)->kobj; - sysfs_remove_link(devices_kobj, disk_kobj->name); - } + if (device->bdev) + sysfs_remove_link(devices_kobj, bdev_kobj(device->bdev)->name); if (device->devid_kobj.state_initialized) { kobject_del(&device->devid_kobj); @@ -1464,11 +1459,7 @@ int btrfs_sysfs_add_device(struct btrfs_device *device) nofs_flag = memalloc_nofs_save(); if (device->bdev) { - struct hd_struct *disk; - struct kobject *disk_kobj; - - disk = device->bdev->bd_part; - disk_kobj = &part_to_dev(disk)->kobj; + struct kobject *disk_kobj = bdev_kobj(device->bdev); ret = sysfs_create_link(devices_kobj, disk_kobj, disk_kobj->name); if (ret) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7930e1c78c45..ee086fc56c30 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -935,16 +935,16 @@ static noinline struct btrfs_device *device_list_add(const char *path, * make sure it's the same device if the device is mounted */ if (device->bdev) { - struct block_device *path_bdev; + int error; + dev_t path_dev; - path_bdev = lookup_bdev(path); - if (IS_ERR(path_bdev)) { + error = lookup_bdev(path, &path_dev); + if (error) { mutex_unlock(&fs_devices->device_list_mutex); - return ERR_CAST(path_bdev); + return ERR_PTR(error); } - if (device->bdev != path_bdev) { - bdput(path_bdev); + if (device->bdev->bd_dev != path_dev) { mutex_unlock(&fs_devices->device_list_mutex); /* * device->fs_info may not be reliable here, so @@ -959,7 +959,6 @@ static noinline struct btrfs_device *device_list_add(const char *path, task_pid_nr(current)); return ERR_PTR(-EEXIST); } - bdput(path_bdev); btrfs_info_in_rcu(device->fs_info, "devid %llu device path %s changed to %s scanned by %s (%d)", devid, rcu_str_deref(device->name), diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 155545180046..c38846659019 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -165,7 +165,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) if (!zone_info) return -ENOMEM; - nr_sectors = bdev->bd_part->nr_sects; + nr_sectors = bdev_nr_sectors(bdev); zone_sectors = bdev_zone_sectors(bdev); /* Check if it's power of 2 (see is_power_of_2) */ ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0); @@ -505,7 +505,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, return -EINVAL; zone_size = zone_sectors << SECTOR_SHIFT; zone_sectors_shift = ilog2(zone_sectors); - nr_sectors = bdev->bd_part->nr_sects; + nr_sectors = bdev_nr_sectors(bdev); nr_zones = nr_sectors >> zone_sectors_shift; sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror); @@ -603,7 +603,7 @@ int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror) zone_sectors = bdev_zone_sectors(bdev); zone_sectors_shift = ilog2(zone_sectors); - nr_sectors = bdev->bd_part->nr_sects; + nr_sectors = bdev_nr_sectors(bdev); nr_zones = nr_sectors >> zone_sectors_shift; sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror); diff --git a/fs/buffer.c b/fs/buffer.c index b56f99f82b5b..32647d2011df 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -523,7 +523,7 @@ static int osync_buffers_list(spinlock_t *lock, struct list_head *list) void emergency_thaw_bdev(struct super_block *sb) { - while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) + while (sb->s_bdev && !thaw_bdev(sb->s_bdev)) printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev); } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index f0381876a7e5..524e13432447 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -624,7 +624,7 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg) case EXT4_GOING_FLAGS_DEFAULT: freeze_bdev(sb->s_bdev); set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); - thaw_bdev(sb->s_bdev, sb); + thaw_bdev(sb->s_bdev); break; case EXT4_GOING_FLAGS_LOGFLUSH: set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94472044f4c1..a2ec60fa8811 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4044,9 +4044,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_sb = sb; sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; sbi->s_sb_block = sb_block; - if (sb->s_bdev->bd_part) - sbi->s_sectors_written_start = - part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]); + sbi->s_sectors_written_start = + part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); /* Cleanup superblock name */ strreplace(sb->s_id, '/', '!'); @@ -5505,15 +5504,10 @@ static int ext4_commit_super(struct super_block *sb, int sync) */ if (!(sb->s_flags & SB_RDONLY)) ext4_update_tstamp(es, s_wtime); - if (sb->s_bdev->bd_part) - es->s_kbytes_written = - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + - ((part_stat_read(sb->s_bdev->bd_part, - sectors[STAT_WRITE]) - - EXT4_SB(sb)->s_sectors_written_start) >> 1)); - else - es->s_kbytes_written = - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); + es->s_kbytes_written = + cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + + ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - + EXT4_SB(sb)->s_sectors_written_start) >> 1)); if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) ext4_free_blocks_count_set(es, EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 4e27fe6ed3ae..075aa3a19ff5 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -62,11 +62,8 @@ static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) { struct super_block *sb = sbi->s_buddy_cache->i_sb; - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); return snprintf(buf, PAGE_SIZE, "%lu\n", - (part_stat_read(sb->s_bdev->bd_part, - sectors[STAT_WRITE]) - + (part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - sbi->s_sectors_written_start) >> 1); } @@ -74,12 +71,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) { struct super_block *sb = sbi->s_buddy_cache->i_sb; - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)(sbi->s_kbytes_written + - ((part_stat_read(sb->s_bdev->bd_part, - sectors[STAT_WRITE]) - + ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - EXT4_SB(sb)->s_sectors_written_start) >> 1))); } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 023462e80e58..54a1905af052 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1395,7 +1395,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) __u32 crc32 = 0; int i; int cp_payload_blks = __cp_payload(sbi); - struct super_block *sb = sbi->sb; struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); u64 kbytes_written; int err; @@ -1489,9 +1488,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk += data_sum_blocks; /* Record write statistics in the hot node summary */ - kbytes_written = sbi->kbytes_written; - if (sb->s_bdev->bd_part) - kbytes_written += BD_PART_WRITTEN(sbi); + kbytes_written = sbi->kbytes_written + BD_PART_WRITTEN(sbi); seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9a321c52face..9f793923dabe 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1675,7 +1675,7 @@ static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi) * and the return value is in kbytes. s is of struct f2fs_sb_info. */ #define BD_PART_WRITTEN(s) \ -(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[STAT_WRITE]) - \ + (((u64)part_stat_read((s)->sb->s_bdev, sectors[STAT_WRITE]) - \ (s)->sectors_written_start) >> 1) static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ee861c6d9ff0..a9fc482a0e60 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2230,16 +2230,12 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) switch (in) { case F2FS_GOING_DOWN_FULLSYNC: - sb = freeze_bdev(sb->s_bdev); - if (IS_ERR(sb)) { - ret = PTR_ERR(sb); + ret = freeze_bdev(sb->s_bdev); + if (ret) goto out; - } - if (sb) { - f2fs_stop_checkpoint(sbi, false); - set_sbi_flag(sbi, SBI_IS_SHUTDOWN); - thaw_bdev(sb->s_bdev, sb); - } + f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); + thaw_bdev(sb->s_bdev); break; case F2FS_GOING_DOWN_METASYNC: /* do checkpoint only */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 00eff2f51807..af9f449da64b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3151,7 +3151,7 @@ static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx, static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) { struct block_device *bdev = FDEV(devi).bdev; - sector_t nr_sectors = bdev->bd_part->nr_sects; + sector_t nr_sectors = bdev_nr_sectors(bdev); struct f2fs_report_zones_args rep_zone_arg; int ret; @@ -3700,10 +3700,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) } /* For write statistics */ - if (sb->s_bdev->bd_part) - sbi->sectors_written_start = - (u64)part_stat_read(sb->s_bdev->bd_part, - sectors[STAT_WRITE]); + sbi->sectors_written_start = + (u64)part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index ec77ccfea923..24e876e849c5 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -90,11 +90,6 @@ static ssize_t free_segments_show(struct f2fs_attr *a, static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - struct super_block *sb = sbi->sb; - - if (!sb->s_bdev->bd_part) - return sprintf(buf, "0\n"); - return sprintf(buf, "%llu\n", (unsigned long long)(sbi->kbytes_written + BD_PART_WRITTEN(sbi))); @@ -103,12 +98,8 @@ static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, static ssize_t features_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - struct super_block *sb = sbi->sb; int len = 0; - if (!sb->s_bdev->bd_part) - return sprintf(buf, "0\n"); - if (f2fs_sb_has_encrypt(sbi)) len += scnprintf(buf, PAGE_SIZE - len, "%s", "encryption"); diff --git a/fs/inode.c b/fs/inode.c index 9d78c37b00b8..cb008acf0efd 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -155,7 +155,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_bytes = 0; inode->i_generation = 0; inode->i_pipe = NULL; - inode->i_bdev = NULL; inode->i_cdev = NULL; inode->i_link = NULL; inode->i_dir_seq = 0; @@ -580,8 +579,6 @@ static void evict(struct inode *inode) truncate_inode_pages_final(&inode->i_data); clear_inode(inode); } - if (S_ISBLK(inode->i_mode) && inode->i_bdev) - bd_forget(inode); if (S_ISCHR(inode->i_mode) && inode->i_cdev) cd_forget(inode); diff --git a/fs/internal.h b/fs/internal.h index 6fd14ea213c3..77c50befbfbe 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -25,7 +25,6 @@ extern void __init bdev_cache_init(void); extern int __sync_blockdev(struct block_device *bdev, int wait); void iterate_bdevs(void (*)(struct block_device *, void *), void *); void emergency_thaw_bdev(struct super_block *sb); -void bd_forget(struct inode *inode); #else static inline void bdev_cache_init(void) { @@ -43,9 +42,6 @@ static inline int emergency_thaw_bdev(struct super_block *sb) { return 0; } -static inline void bd_forget(struct inode *inode) -{ -} #endif /* CONFIG_BLOCK */ /* @@ -116,7 +112,8 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *); */ extern int reconfigure_super(struct fs_context *); extern bool trylock_super(struct super_block *sb); -extern struct super_block *user_get_super(dev_t); +struct super_block *user_get_super(dev_t, bool excl); +void put_super(struct super_block *sb); extern bool mount_capable(struct fs_context *); /* diff --git a/fs/io_uring.c b/fs/io_uring.c index 22e31050f33e..6f9392c35eef 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2802,11 +2802,7 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd) static bool io_bdev_nowait(struct block_device *bdev) { -#ifdef CONFIG_BLOCK return !bdev || blk_queue_nowait(bdev_get_queue(bdev)); -#else - return true; -#endif } /* @@ -2819,14 +2815,16 @@ static bool io_file_supports_async(struct file *file, int rw) umode_t mode = file_inode(file)->i_mode; if (S_ISBLK(mode)) { - if (io_bdev_nowait(file->f_inode->i_bdev)) + if (IS_ENABLED(CONFIG_BLOCK) && + io_bdev_nowait(I_BDEV(file->f_mapping->host))) return true; return false; } if (S_ISCHR(mode) || S_ISSOCK(mode)) return true; if (S_ISREG(mode)) { - if (io_bdev_nowait(file->f_inode->i_sb->s_bdev) && + if (IS_ENABLED(CONFIG_BLOCK) && + io_bdev_nowait(file->f_inode->i_sb->s_bdev) && file->f_op != &io_uring_fops) return true; return false; diff --git a/fs/pipe.c b/fs/pipe.c index 0ac197658a2d..c5989cfd564d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1342,9 +1342,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) } /* - * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same - * location, so checking ->i_pipe is not enough to verify that this is a - * pipe. + * Note that i_pipe and i_cdev share the same location, so checking ->i_pipe is + * not enough to verify that this is a pipe. */ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) { diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index 882d0bc0cfd7..4bb8a344957a 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -244,7 +244,7 @@ static struct block_device *psblk_get_bdev(void *holder, return bdev; } - nr_sects = part_nr_sects_read(bdev->bd_part); + nr_sects = bdev_nr_sectors(bdev); if (!nr_sects) { pr_err("not enough space for '%s'\n", blkdev); blkdev_put(bdev, mode); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 9af95c7a0bbe..6d16b2be5ac4 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -20,6 +20,7 @@ #include #include #include "compat.h" +#include "../internal.h" static int check_quotactl_permission(struct super_block *sb, int type, int cmd, qid_t id) @@ -865,27 +866,42 @@ static bool quotactl_cmd_onoff(int cmd) static struct super_block *quotactl_block(const char __user *special, int cmd) { #ifdef CONFIG_BLOCK - struct block_device *bdev; struct super_block *sb; struct filename *tmp = getname(special); + bool excl = false, thawed = false; + int error; + dev_t dev; if (IS_ERR(tmp)) return ERR_CAST(tmp); - bdev = lookup_bdev(tmp->name); + error = lookup_bdev(tmp->name, &dev); putname(tmp); - if (IS_ERR(bdev)) - return ERR_CAST(bdev); - if (quotactl_cmd_onoff(cmd)) - sb = get_super_exclusive_thawed(bdev); - else if (quotactl_cmd_write(cmd)) - sb = get_super_thawed(bdev); - else - sb = get_super(bdev); - bdput(bdev); + if (error) + return ERR_PTR(error); + + if (quotactl_cmd_onoff(cmd)) { + excl = true; + thawed = true; + } else if (quotactl_cmd_write(cmd)) { + thawed = true; + } + +retry: + sb = user_get_super(dev, excl); if (!sb) return ERR_PTR(-ENODEV); - + if (thawed && sb->s_writers.frozen != SB_UNFROZEN) { + if (excl) + up_write(&sb->s_umount); + else + up_read(&sb->s_umount); + wait_event(sb->s_writers.wait_unfrozen, + sb->s_writers.frozen == SB_UNFROZEN); + put_super(sb); + goto retry; + } return sb; + #else return ERR_PTR(-ENODEV); #endif diff --git a/fs/statfs.c b/fs/statfs.c index 59f33752c131..68cb07788750 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -235,7 +235,7 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user static int vfs_ustat(dev_t dev, struct kstatfs *sbuf) { - struct super_block *s = user_get_super(dev); + struct super_block *s = user_get_super(dev, false); int err; if (!s) return -EINVAL; diff --git a/fs/super.c b/fs/super.c index 98bb0629ee10..2c6cdea2ab2d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -307,7 +307,7 @@ static void __put_super(struct super_block *s) * Drops a temporary reference, frees superblock if there's no * references left. */ -static void put_super(struct super_block *sb) +void put_super(struct super_block *sb) { spin_lock(&sb_lock); __put_super(sb); @@ -740,7 +740,14 @@ void iterate_supers_type(struct file_system_type *type, EXPORT_SYMBOL(iterate_supers_type); -static struct super_block *__get_super(struct block_device *bdev, bool excl) +/** + * get_super - get the superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device given. %NULL is returned if no match is found. + */ +struct super_block *get_super(struct block_device *bdev) { struct super_block *sb; @@ -755,17 +762,11 @@ static struct super_block *__get_super(struct block_device *bdev, bool excl) if (sb->s_bdev == bdev) { sb->s_count++; spin_unlock(&sb_lock); - if (!excl) - down_read(&sb->s_umount); - else - down_write(&sb->s_umount); + down_read(&sb->s_umount); /* still alive? */ if (sb->s_root && (sb->s_flags & SB_BORN)) return sb; - if (!excl) - up_read(&sb->s_umount); - else - up_write(&sb->s_umount); + up_read(&sb->s_umount); /* nope, got unmounted */ spin_lock(&sb_lock); __put_super(sb); @@ -776,66 +777,6 @@ static struct super_block *__get_super(struct block_device *bdev, bool excl) return NULL; } -/** - * get_super - get the superblock of a device - * @bdev: device to get the superblock for - * - * Scans the superblock list and finds the superblock of the file system - * mounted on the device given. %NULL is returned if no match is found. - */ -struct super_block *get_super(struct block_device *bdev) -{ - return __get_super(bdev, false); -} -EXPORT_SYMBOL(get_super); - -static struct super_block *__get_super_thawed(struct block_device *bdev, - bool excl) -{ - while (1) { - struct super_block *s = __get_super(bdev, excl); - if (!s || s->s_writers.frozen == SB_UNFROZEN) - return s; - if (!excl) - up_read(&s->s_umount); - else - up_write(&s->s_umount); - wait_event(s->s_writers.wait_unfrozen, - s->s_writers.frozen == SB_UNFROZEN); - put_super(s); - } -} - -/** - * get_super_thawed - get thawed superblock of a device - * @bdev: device to get the superblock for - * - * Scans the superblock list and finds the superblock of the file system - * mounted on the device. The superblock is returned once it is thawed - * (or immediately if it was not frozen). %NULL is returned if no match - * is found. - */ -struct super_block *get_super_thawed(struct block_device *bdev) -{ - return __get_super_thawed(bdev, false); -} -EXPORT_SYMBOL(get_super_thawed); - -/** - * get_super_exclusive_thawed - get thawed superblock of a device - * @bdev: device to get the superblock for - * - * Scans the superblock list and finds the superblock of the file system - * mounted on the device. The superblock is returned once it is thawed - * (or immediately if it was not frozen) and s_umount semaphore is held - * in exclusive mode. %NULL is returned if no match is found. - */ -struct super_block *get_super_exclusive_thawed(struct block_device *bdev) -{ - return __get_super_thawed(bdev, true); -} -EXPORT_SYMBOL(get_super_exclusive_thawed); - /** * get_active_super - get an active reference to the superblock of a device * @bdev: device to get the superblock for @@ -867,7 +808,7 @@ struct super_block *get_active_super(struct block_device *bdev) return NULL; } -struct super_block *user_get_super(dev_t dev) +struct super_block *user_get_super(dev_t dev, bool excl) { struct super_block *sb; @@ -879,11 +820,17 @@ struct super_block *user_get_super(dev_t dev) if (sb->s_dev == dev) { sb->s_count++; spin_unlock(&sb_lock); - down_read(&sb->s_umount); + if (excl) + down_write(&sb->s_umount); + else + down_read(&sb->s_umount); /* still alive? */ if (sb->s_root && (sb->s_flags & SB_BORN)) return sb; - up_read(&sb->s_umount); + if (excl) + up_write(&sb->s_umount); + else + up_read(&sb->s_umount); /* nope, got unmounted */ spin_lock(&sb_lock); __put_super(sb); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index ef1d5bb88b93..b7c5783a031c 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -433,13 +433,10 @@ xfs_fs_goingdown( { switch (inflags) { case XFS_FSOP_GOING_FLAGS_DEFAULT: { - struct super_block *sb = freeze_bdev(mp->m_super->s_bdev); - - if (sb && !IS_ERR(sb)) { + if (!freeze_bdev(mp->m_super->s_bdev)) { xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); - thaw_bdev(sb->s_bdev, sb); + thaw_bdev(mp->m_super->s_bdev); } - break; } case XFS_FSOP_GOING_FLAGS_LOGFLUSH: diff --git a/include/linux/bio.h b/include/linux/bio.h index c6d765382926..1edda614f7ce 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -148,11 +148,24 @@ static inline void bio_advance_iter(const struct bio *bio, /* TODO: It is reasonable to complete bio with error here. */ } +/* @bytes should be less or equal to bvec[i->bi_idx].bv_len */ +static inline void bio_advance_iter_single(const struct bio *bio, + struct bvec_iter *iter, + unsigned int bytes) +{ + iter->bi_sector += bytes >> 9; + + if (bio_no_advance_iter(bio)) + iter->bi_size -= bytes; + else + bvec_iter_advance_single(bio->bi_io_vec, iter, bytes); +} + #define __bio_for_each_segment(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bio_iter_iovec((bio), (iter))), 1); \ - bio_advance_iter((bio), &(iter), (bvl).bv_len)) + bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) #define bio_for_each_segment(bvl, bio, iter) \ __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) @@ -161,7 +174,7 @@ static inline void bio_advance_iter(const struct bio *bio, for (iter = (start); \ (iter).bi_size && \ ((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \ - bio_advance_iter((bio), &(iter), (bvl).bv_len)) + bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) /* iterate over multi-page bvec */ #define bio_for_each_bvec(bvl, bio, iter) \ @@ -711,12 +724,6 @@ static inline bool bioset_initialized(struct bio_set *bs) return bs->bio_slab != NULL; } -/* - * a small number of entries is fine, not going to be performance critical. - * basically we just need to survive - */ -#define BIO_SPLIT_ENTRIES 2 - #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_for_each_vec(bvl, bip, iter) \ diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index c8fc9792ac77..b9f3c246c3c9 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -197,12 +197,12 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); struct blkg_conf_ctx { - struct gendisk *disk; + struct block_device *bdev; struct blkcg_gq *blkg; char *body; }; -struct gendisk *blkcg_conf_get_disk(char **inputp); +struct block_device *blkcg_conf_open_bdev(char **inputp); int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 794b2a33a2c3..5f639240760e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -5,6 +5,7 @@ #include #include #include +#include struct blk_mq_tags; struct blk_flush_queue; @@ -594,5 +595,7 @@ static inline void blk_mq_cleanup_rq(struct request *rq) } blk_qc_t blk_mq_submit_bio(struct bio *bio); +void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, + struct lock_class_key *key); #endif diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d9b69bbde5cc..866f74261b3b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -8,6 +8,7 @@ #include #include +#include #include struct bio_set; @@ -20,21 +21,25 @@ typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; struct block_device { + sector_t bd_start_sect; + struct disk_stats __percpu *bd_stats; + unsigned long bd_stamp; + bool bd_read_only; /* read-only policy */ dev_t bd_dev; int bd_openers; struct inode * bd_inode; /* will die */ struct super_block * bd_super; struct mutex bd_mutex; /* open/close mutex */ void * bd_claiming; + struct device bd_device; void * bd_holder; int bd_holders; bool bd_write_holder; #ifdef CONFIG_SYSFS struct list_head bd_holder_disks; #endif - struct block_device * bd_contains; + struct kobject *bd_holder_dir; u8 bd_partno; - struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; @@ -46,8 +51,23 @@ struct block_device { int bd_fsfreeze_count; /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; + struct super_block *bd_fsfreeze_sb; + + struct partition_meta_info *bd_meta_info; +#ifdef CONFIG_FAIL_MAKE_REQUEST + bool bd_make_it_fail; +#endif } __randomize_layout; +#define bdev_whole(_bdev) \ + ((_bdev)->bd_disk->part0) + +#define dev_to_bdev(device) \ + container_of((device), struct block_device, bd_device) + +#define bdev_kobj(_bdev) \ + (&((_bdev)->bd_device.kobj)) + /* * Block error status values. See block/blk-core:blk_errors for the details. * Alpha cannot write a byte atomically, so we need to use 32-bit value. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 033eb5f73b65..070de09425ad 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -191,7 +191,7 @@ struct request { }; struct gendisk *rq_disk; - struct hd_struct *part; + struct block_device *part; #ifdef CONFIG_BLK_RQ_ALLOC_TIME /* Time that the first bio started allocating this request. */ u64 alloc_time_ns; @@ -1491,7 +1491,7 @@ static inline int bdev_alignment_offset(struct block_device *bdev) return -1; if (bdev_is_partition(bdev)) return queue_limit_alignment_offset(&q->limits, - bdev->bd_part->start_sect); + bdev->bd_start_sect); return q->limits.alignment_offset; } @@ -1532,7 +1532,7 @@ static inline int bdev_discard_alignment(struct block_device *bdev) if (bdev_is_partition(bdev)) return queue_limit_discard_alignment(&q->limits, - bdev->bd_part->start_sect); + bdev->bd_start_sect); return q->limits.discard_alignment; } @@ -1853,6 +1853,7 @@ struct block_device_operations { void (*unlock_native_capacity) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); + int (*set_read_only)(struct block_device *bdev, bool ro); /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, @@ -1869,8 +1870,6 @@ extern int blkdev_compat_ptr_ioctl(struct block_device *, fmode_t, #define blkdev_compat_ptr_ioctl NULL #endif -extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, - unsigned long); extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); @@ -1947,9 +1946,9 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time); -unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, - struct bio *bio); -void part_end_io_acct(struct hd_struct *part, struct bio *bio, +unsigned long part_start_io_acct(struct gendisk *disk, + struct block_device **part, struct bio *bio); +void part_end_io_acct(struct block_device *part, struct bio *bio, unsigned long start_time); /** @@ -1977,7 +1976,7 @@ int bdev_read_only(struct block_device *bdev); int set_blocksize(struct block_device *bdev, int size); const char *bdevname(struct block_device *bdev, char *buffer); -struct block_device *lookup_bdev(const char *); +int lookup_bdev(const char *pathname, dev_t *dev); void blkdev_show(struct seq_file *seqf, off_t offset); @@ -1992,14 +1991,17 @@ void blkdev_show(struct seq_file *seqf, off_t offset); struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder); struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); -int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, - void *holder); -void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, - void *holder); +int bd_prepare_to_claim(struct block_device *bdev, void *holder); +void bd_abort_claiming(struct block_device *bdev, void *holder); void blkdev_put(struct block_device *bdev, fmode_t mode); +/* just for blk-cgroup, don't use elsewhere */ +struct block_device *blkdev_get_no_open(dev_t dev); +void blkdev_put_no_open(struct block_device *bdev); + +struct block_device *bdev_alloc(struct gendisk *disk, u8 partno); +void bdev_add(struct block_device *bdev, dev_t dev); struct block_device *I_BDEV(struct inode *inode); -struct block_device *bdget_part(struct hd_struct *part); struct block_device *bdgrab(struct block_device *bdev); void bdput(struct block_device *); @@ -2024,7 +2026,7 @@ static inline int sync_blockdev(struct block_device *bdev) #endif int fsync_bdev(struct block_device *bdev); -struct super_block *freeze_bdev(struct block_device *bdev); -int thaw_bdev(struct block_device *bdev, struct super_block *sb); +int freeze_bdev(struct block_device *bdev); +int thaw_bdev(struct block_device *bdev); #endif /* _LINUX_BLKDEV_H */ diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 3b6ff5902edc..05556573b896 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -75,8 +75,7 @@ static inline bool blk_trace_note_message_enabled(struct request_queue *q) return ret; } -extern void blk_add_driver_data(struct request_queue *q, struct request *rq, - void *data, size_t len); +extern void blk_add_driver_data(struct request *rq, void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg); @@ -90,7 +89,7 @@ extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) # define blk_trace_shutdown(q) do { } while (0) -# define blk_add_driver_data(q, rq, data, len) do {} while (0) +# define blk_add_driver_data(rq, data, len) do {} while (0) # define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 2efec10bf792..ff832e698efb 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -121,18 +121,28 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, return true; } -static inline void bvec_iter_skip_zero_bvec(struct bvec_iter *iter) +/* + * A simpler version of bvec_iter_advance(), @bytes should not span + * across multiple bvec entries, i.e. bytes <= bv[i->bi_idx].bv_len + */ +static inline void bvec_iter_advance_single(const struct bio_vec *bv, + struct bvec_iter *iter, unsigned int bytes) { - iter->bi_bvec_done = 0; - iter->bi_idx++; + unsigned int done = iter->bi_bvec_done + bytes; + + if (done == bv[iter->bi_idx].bv_len) { + done = 0; + iter->bi_idx++; + } + iter->bi_bvec_done = done; + iter->bi_size -= bytes; } #define for_each_bvec(bvl, bio_vec, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ - (bvl).bv_len ? (void)bvec_iter_advance((bio_vec), &(iter), \ - (bvl).bv_len) : bvec_iter_skip_zero_bvec(&(iter))) + bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) /* for iterating one bio from start to end */ #define BVEC_ITER_ALL_INIT (struct bvec_iter) \ diff --git a/include/linux/fs.h b/include/linux/fs.h index afb42d1bd64d..59bba648147f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -696,7 +696,6 @@ struct inode { struct list_head i_devices; union { struct pipe_inode_info *i_pipe; - struct block_device *i_bdev; struct cdev *i_cdev; char *i_link; unsigned i_dir_seq; @@ -1408,7 +1407,7 @@ enum { struct sb_writers { int frozen; /* Is sb frozen? */ - wait_queue_head_t wait_unfrozen; /* for get_super_thawed() */ + wait_queue_head_t wait_unfrozen; /* wait for thaw */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; @@ -3131,8 +3130,6 @@ extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); -extern struct super_block *get_super_thawed(struct block_device *); -extern struct super_block *get_super_exclusive_thawed(struct block_device *bdev); extern struct super_block *get_active_super(struct block_device *bdev); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 03da3f603d30..809aaa32d53c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -19,11 +19,6 @@ #include #include -#define dev_to_disk(device) container_of((device), struct gendisk, part0.__dev) -#define dev_to_part(device) container_of((device), struct hd_struct, __dev) -#define disk_to_dev(disk) (&(disk)->part0.__dev) -#define part_to_dev(part) (&((part)->__dev)) - extern const struct device_type disk_type; extern struct device_type part_type; extern struct class block_class; @@ -50,31 +45,6 @@ struct partition_meta_info { u8 volname[PARTITION_META_INFO_VOLNAMELTH]; }; -struct hd_struct { - sector_t start_sect; - /* - * nr_sects is protected by sequence counter. One might extend a - * partition while IO is happening to it and update of nr_sects - * can be non-atomic on 32bit machines with 64bit sector_t. - */ - sector_t nr_sects; -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - seqcount_t nr_sects_seq; -#endif - unsigned long stamp; - struct disk_stats __percpu *dkstats; - struct percpu_ref ref; - - struct device __dev; - struct kobject *holder_dir; - int policy, partno; - struct partition_meta_info *info; -#ifdef CONFIG_FAIL_MAKE_REQUEST - int make_it_fail; -#endif - struct rcu_work rcu_work; -}; - /** * DOC: genhd capability flags * @@ -149,8 +119,8 @@ enum { struct disk_part_tbl { struct rcu_head rcu_head; int len; - struct hd_struct __rcu *last_lookup; - struct hd_struct __rcu *part[]; + struct block_device __rcu *last_lookup; + struct block_device __rcu *part[]; }; struct disk_events; @@ -184,7 +154,7 @@ struct gendisk { * helpers. */ struct disk_part_tbl __rcu *part_tbl; - struct hd_struct part0; + struct block_device *part0; const struct block_device_operations *fops; struct request_queue *queue; @@ -193,7 +163,6 @@ struct gendisk { int flags; unsigned long state; #define GD_NEED_PART_SCAN 0 - struct rw_semaphore lookup_sem; struct kobject *slave_dir; struct timer_rand_state *random; @@ -210,23 +179,21 @@ struct gendisk { struct lockdep_map lockdep_map; }; +/* + * The gendisk is refcounted by the part0 block_device, and the bd_device + * therein is also used for device model presentation in sysfs. + */ +#define dev_to_disk(device) \ + (dev_to_bdev(device)->bd_disk) +#define disk_to_dev(disk) \ + (&((disk)->part0->bd_device)) + #if IS_REACHABLE(CONFIG_CDROM) #define disk_to_cdi(disk) ((disk)->cdi) #else #define disk_to_cdi(disk) NULL #endif -static inline struct gendisk *part_to_disk(struct hd_struct *part) -{ - if (likely(part)) { - if (part->partno) - return dev_to_disk(part_to_dev(part)->parent); - else - return dev_to_disk(part_to_dev(part)); - } - return NULL; -} - static inline int disk_max_parts(struct gendisk *disk) { if (disk->flags & GENHD_FL_EXT_DEVT) @@ -245,27 +212,6 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } -static inline dev_t part_devt(struct hd_struct *part) -{ - return part_to_dev(part)->devt; -} - -extern struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); -extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); - -static inline void disk_put_part(struct hd_struct *part) -{ - if (likely(part)) - put_device(part_to_dev(part)); -} - -static inline void hd_sects_seq_init(struct hd_struct *p) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - seqcount_init(&p->nr_sects_seq); -#endif -} - /* * Smarter partition iterator without context limits. */ @@ -276,14 +222,14 @@ static inline void hd_sects_seq_init(struct hd_struct *p) struct disk_part_iter { struct gendisk *disk; - struct hd_struct *part; + struct block_device *part; int idx; unsigned int flags; }; extern void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, unsigned int flags); -extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); +struct block_device *disk_part_iter_next(struct disk_part_iter *piter); extern void disk_part_iter_exit(struct disk_part_iter *piter); extern bool disk_has_partitions(struct gendisk *disk); @@ -301,22 +247,19 @@ static inline void add_disk_no_queue_reg(struct gendisk *disk) } extern void del_gendisk(struct gendisk *gp); -extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno); -extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); static inline int get_disk_ro(struct gendisk *disk) { - return disk->part0.policy; + return disk->part0->bd_read_only; } extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); -bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, - bool update_bdev); +bool set_capacity_and_notify(struct gendisk *disk, sector_t size); /* drivers/char/random.c */ extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; @@ -324,15 +267,17 @@ extern void rand_initialize_disk(struct gendisk *disk); static inline sector_t get_start_sect(struct block_device *bdev) { - return bdev->bd_part->start_sect; + return bdev->bd_start_sect; } + +static inline sector_t bdev_nr_sectors(struct block_device *bdev) +{ + return i_size_read(bdev->bd_inode) >> 9; +} + static inline sector_t get_capacity(struct gendisk *disk) { - return disk->part0.nr_sects; -} -static inline void set_capacity(struct gendisk *disk, sector_t size) -{ - disk->part0.nr_sects = size; + return bdev_nr_sectors(disk->part0); } int bdev_disk_changed(struct block_device *bdev, bool invalidate); @@ -340,15 +285,7 @@ int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); int blk_drop_partitions(struct block_device *bdev); extern struct gendisk *__alloc_disk_node(int minors, int node_id); -extern struct kobject *get_disk_and_module(struct gendisk *disk); extern void put_disk(struct gendisk *disk); -extern void put_disk_and_module(struct gendisk *disk); -extern void blk_register_region(dev_t devt, unsigned long range, - struct module *module, - struct kobject *(*probe)(dev_t, int *, void *), - int (*lock)(dev_t, void *), - void *data); -extern void blk_unregister_region(dev_t devt, unsigned long range); #define alloc_disk_node(minors, node_id) \ ({ \ @@ -368,13 +305,15 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) -int register_blkdev(unsigned int major, const char *name); +int __register_blkdev(unsigned int major, const char *name, + void (*probe)(dev_t devt)); +#define register_blkdev(major, name) \ + __register_blkdev(major, name, NULL) void unregister_blkdev(unsigned int major, const char *name); -void revalidate_disk_size(struct gendisk *disk, bool verbose); bool bdev_check_media_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); -void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); +void set_capacity(struct gendisk *disk, sector_t size); /* for drivers/char/raw.c: */ int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); @@ -395,18 +334,16 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, } #endif /* CONFIG_SYSFS */ +extern struct rw_semaphore bdev_lookup_sem; + +dev_t blk_lookup_devt(const char *name, int partno); +void blk_request_module(dev_t devt); #ifdef CONFIG_BLOCK void printk_all_partitions(void); -dev_t blk_lookup_devt(const char *name, int partno); #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -static inline dev_t blk_lookup_devt(const char *name, int partno) -{ - dev_t devt = MKDEV(0, 0); - return devt; -} #endif /* CONFIG_BLOCK */ #endif /* _LINUX_GENHD_H */ diff --git a/include/linux/ide.h b/include/linux/ide.h index 62653769509f..2c300689a51a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1493,9 +1493,6 @@ static inline void ide_acpi_port_init_devices(ide_hwif_t *hwif) { ; } static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} #endif -void ide_register_region(struct gendisk *); -void ide_unregister_region(struct gendisk *); - void ide_check_nien_quirk_list(ide_drive_t *); void ide_undecoded_slave(ide_drive_t *); diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 24125778ef3e..d2558121d48c 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -25,26 +25,26 @@ struct disk_stats { #define part_stat_unlock() preempt_enable() #define part_stat_get_cpu(part, field, cpu) \ - (per_cpu_ptr((part)->dkstats, (cpu))->field) + (per_cpu_ptr((part)->bd_stats, (cpu))->field) #define part_stat_get(part, field) \ part_stat_get_cpu(part, field, smp_processor_id()) #define part_stat_read(part, field) \ ({ \ - typeof((part)->dkstats->field) res = 0; \ + typeof((part)->bd_stats->field) res = 0; \ unsigned int _cpu; \ for_each_possible_cpu(_cpu) \ - res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ + res += per_cpu_ptr((part)->bd_stats, _cpu)->field; \ res; \ }) -static inline void part_stat_set_all(struct hd_struct *part, int value) +static inline void part_stat_set_all(struct block_device *part, int value) { int i; for_each_possible_cpu(i) - memset(per_cpu_ptr(part->dkstats, i), value, + memset(per_cpu_ptr(part->bd_stats, i), value, sizeof(struct disk_stats)); } @@ -54,29 +54,28 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) part_stat_read(part, field[STAT_DISCARD])) #define __part_stat_add(part, field, addnd) \ - __this_cpu_add((part)->dkstats->field, addnd) + __this_cpu_add((part)->bd_stats->field, addnd) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ - if ((part)->partno) \ - __part_stat_add(&part_to_disk((part))->part0, \ - field, addnd); \ + if ((part)->bd_partno) \ + __part_stat_add(bdev_whole(part), field, addnd); \ } while (0) -#define part_stat_dec(gendiskp, field) \ - part_stat_add(gendiskp, field, -1) -#define part_stat_inc(gendiskp, field) \ - part_stat_add(gendiskp, field, 1) -#define part_stat_sub(gendiskp, field, subnd) \ - part_stat_add(gendiskp, field, -subnd) +#define part_stat_dec(part, field) \ + part_stat_add(part, field, -1) +#define part_stat_inc(part, field) \ + part_stat_add(part, field, 1) +#define part_stat_sub(part, field, subnd) \ + part_stat_add(part, field, -subnd) -#define part_stat_local_dec(gendiskp, field) \ - local_dec(&(part_stat_get(gendiskp, field))) -#define part_stat_local_inc(gendiskp, field) \ - local_inc(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read(gendiskp, field) \ - local_read(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read_cpu(gendiskp, field, cpu) \ - local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) +#define part_stat_local_dec(part, field) \ + local_dec(&(part_stat_get(part, field))) +#define part_stat_local_inc(part, field) \ + local_inc(&(part_stat_get(part, field))) +#define part_stat_local_read(part, field) \ + local_read(&(part_stat_get(part, field))) +#define part_stat_local_read_cpu(part, field, cpu) \ + local_read(&(part_stat_get_cpu(part, field, cpu))) #endif /* _LINUX_PART_STAT_H */ diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index e40d019c3d9d..74cc6384715e 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -32,11 +32,6 @@ struct sbitmap_word { * @cleared: word holding cleared bits */ unsigned long cleared ____cacheline_aligned_in_smp; - - /** - * @swap_lock: Held while swapping word <-> cleared - */ - spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 34d64ca306b1..0d782663a005 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -64,7 +64,6 @@ DEFINE_EVENT(block_buffer, block_dirty_buffer, /** * block_rq_requeue - place block IO request back on a queue - * @q: queue holding operation * @rq: block IO operation request * * The block operation request @rq is being placed back into queue @@ -73,9 +72,9 @@ DEFINE_EVENT(block_buffer, block_dirty_buffer, */ TRACE_EVENT(block_rq_requeue, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq), + TP_ARGS(rq), TP_STRUCT__entry( __field( dev_t, dev ) @@ -147,9 +146,9 @@ TRACE_EVENT(block_rq_complete, DECLARE_EVENT_CLASS(block_rq, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq), + TP_ARGS(rq), TP_STRUCT__entry( __field( dev_t, dev ) @@ -181,7 +180,6 @@ DECLARE_EVENT_CLASS(block_rq, /** * block_rq_insert - insert block operation request into queue - * @q: target queue * @rq: block IO operation request * * Called immediately before block operation request @rq is inserted @@ -191,14 +189,13 @@ DECLARE_EVENT_CLASS(block_rq, */ DEFINE_EVENT(block_rq, block_rq_insert, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq) + TP_ARGS(rq) ); /** * block_rq_issue - issue pending block IO request operation to device driver - * @q: queue holding operation * @rq: block IO operation operation request * * Called when block operation request @rq from queue @q is sent to a @@ -206,14 +203,13 @@ DEFINE_EVENT(block_rq, block_rq_insert, */ DEFINE_EVENT(block_rq, block_rq_issue, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq) + TP_ARGS(rq) ); /** * block_rq_merge - merge request with another one in the elevator - * @q: queue holding operation * @rq: block IO operation operation request * * Called when block operation request @rq from queue @q is merged to another @@ -221,48 +217,9 @@ DEFINE_EVENT(block_rq, block_rq_issue, */ DEFINE_EVENT(block_rq, block_rq_merge, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq) -); - -/** - * block_bio_bounce - used bounce buffer when processing block operation - * @q: queue holding the block operation - * @bio: block operation - * - * A bounce buffer was used to handle the block operation @bio in @q. - * This occurs when hardware limitations prevent a direct transfer of - * data between the @bio data memory area and the IO device. Use of a - * bounce buffer requires extra copying of data and decreases - * performance. - */ -TRACE_EVENT(block_bio_bounce, - - TP_PROTO(struct request_queue *q, struct bio *bio), - - TP_ARGS(q, bio), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, RWBS_LEN ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio_dev(bio); - __entry->sector = bio->bi_iter.bi_sector; - __entry->nr_sector = bio_sectors(bio); - blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(rq) ); /** @@ -301,11 +258,11 @@ TRACE_EVENT(block_bio_complete, __entry->nr_sector, __entry->error) ); -DECLARE_EVENT_CLASS(block_bio_merge, +DECLARE_EVENT_CLASS(block_bio, - TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), + TP_PROTO(struct bio *bio), - TP_ARGS(q, rq, bio), + TP_ARGS(bio), TP_STRUCT__entry( __field( dev_t, dev ) @@ -329,134 +286,63 @@ DECLARE_EVENT_CLASS(block_bio_merge, __entry->nr_sector, __entry->comm) ); +/** + * block_bio_bounce - used bounce buffer when processing block operation + * @bio: block operation + * + * A bounce buffer was used to handle the block operation @bio in @q. + * This occurs when hardware limitations prevent a direct transfer of + * data between the @bio data memory area and the IO device. Use of a + * bounce buffer requires extra copying of data and decreases + * performance. + */ +DEFINE_EVENT(block_bio, block_bio_bounce, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) +); + /** * block_bio_backmerge - merging block operation to the end of an existing operation - * @q: queue holding operation - * @rq: request bio is being merged into * @bio: new block operation to merge * - * Merging block request @bio to the end of an existing block request - * in queue @q. + * Merging block request @bio to the end of an existing block request. */ -DEFINE_EVENT(block_bio_merge, block_bio_backmerge, - - TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - - TP_ARGS(q, rq, bio) +DEFINE_EVENT(block_bio, block_bio_backmerge, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) ); /** * block_bio_frontmerge - merging block operation to the beginning of an existing operation - * @q: queue holding operation - * @rq: request bio is being merged into * @bio: new block operation to merge * - * Merging block IO operation @bio to the beginning of an existing block - * operation in queue @q. + * Merging block IO operation @bio to the beginning of an existing block request. */ -DEFINE_EVENT(block_bio_merge, block_bio_frontmerge, - - TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - - TP_ARGS(q, rq, bio) +DEFINE_EVENT(block_bio, block_bio_frontmerge, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) ); /** * block_bio_queue - putting new block IO operation in queue - * @q: queue holding operation * @bio: new block operation * * About to place the block IO operation @bio into queue @q. */ -TRACE_EVENT(block_bio_queue, - - TP_PROTO(struct request_queue *q, struct bio *bio), - - TP_ARGS(q, bio), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, RWBS_LEN ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio_dev(bio); - __entry->sector = bio->bi_iter.bi_sector; - __entry->nr_sector = bio_sectors(bio); - blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) -); - -DECLARE_EVENT_CLASS(block_get_rq, - - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - - TP_ARGS(q, bio, rw), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, RWBS_LEN ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio ? bio_dev(bio) : 0; - __entry->sector = bio ? bio->bi_iter.bi_sector : 0; - __entry->nr_sector = bio ? bio_sectors(bio) : 0; - blk_fill_rwbs(__entry->rwbs, - bio ? bio->bi_opf : 0, __entry->nr_sector); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) +DEFINE_EVENT(block_bio, block_bio_queue, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) ); /** * block_getrq - get a free request entry in queue for block IO operations - * @q: queue for operations * @bio: pending block IO operation (can be %NULL) - * @rw: low bit indicates a read (%0) or a write (%1) * - * A request struct for queue @q has been allocated to handle the - * block IO operation @bio. + * A request struct has been allocated to handle the block IO operation @bio. */ -DEFINE_EVENT(block_get_rq, block_getrq, - - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - - TP_ARGS(q, bio, rw) -); - -/** - * block_sleeprq - waiting to get a free request entry in queue for block IO operation - * @q: queue for operation - * @bio: pending block IO operation (can be %NULL) - * @rw: low bit indicates a read (%0) or a write (%1) - * - * In the case where a request struct cannot be provided for queue @q - * the process needs to wait for an request struct to become - * available. This tracepoint event is generated each time the - * process goes to sleep waiting for request struct become available. - */ -DEFINE_EVENT(block_get_rq, block_sleeprq, - - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - - TP_ARGS(q, bio, rw) +DEFINE_EVENT(block_bio, block_getrq, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) ); /** @@ -521,21 +407,19 @@ DEFINE_EVENT(block_unplug, block_unplug, /** * block_split - split a single bio struct into two bio structs - * @q: queue containing the bio * @bio: block operation being split * @new_sector: The starting sector for the new bio * - * The bio request @bio in request queue @q needs to be split into two - * bio requests. The newly created @bio request starts at - * @new_sector. This split may be required due to hardware limitation - * such as operation crossing device boundaries in a RAID system. + * The bio request @bio needs to be split into two bio requests. The newly + * created @bio request starts at @new_sector. This split may be required due to + * hardware limitations such as operation crossing device boundaries in a RAID + * system. */ TRACE_EVENT(block_split, - TP_PROTO(struct request_queue *q, struct bio *bio, - unsigned int new_sector), + TP_PROTO(struct bio *bio, unsigned int new_sector), - TP_ARGS(q, bio, new_sector), + TP_ARGS(bio, new_sector), TP_STRUCT__entry( __field( dev_t, dev ) @@ -562,9 +446,8 @@ TRACE_EVENT(block_split, /** * block_bio_remap - map request for a logical device to the raw device - * @q: queue holding the operation * @bio: revised operation - * @dev: device for the operation + * @dev: original device for the operation * @from: original sector for the operation * * An operation for a logical device has been mapped to the @@ -572,10 +455,9 @@ TRACE_EVENT(block_split, */ TRACE_EVENT(block_bio_remap, - TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from), + TP_PROTO(struct bio *bio, dev_t dev, sector_t from), - TP_ARGS(q, bio, dev, from), + TP_ARGS(bio, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) @@ -605,7 +487,6 @@ TRACE_EVENT(block_bio_remap, /** * block_rq_remap - map request for a block operation request - * @q: queue holding the operation * @rq: block IO operation request * @dev: device for the operation * @from: original sector for the operation @@ -616,10 +497,9 @@ TRACE_EVENT(block_bio_remap, */ TRACE_EVENT(block_rq_remap, - TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, - sector_t from), + TP_PROTO(struct request *rq, dev_t dev, sector_t from), - TP_ARGS(q, rq, dev, from), + TP_ARGS(rq, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) diff --git a/init/do_mounts.c b/init/do_mounts.c index b5f9604d0c98..a78e44ee6adb 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -76,21 +76,15 @@ struct uuidcmp { */ static int match_dev_by_uuid(struct device *dev, const void *data) { + struct block_device *bdev = dev_to_bdev(dev); const struct uuidcmp *cmp = data; - struct hd_struct *part = dev_to_part(dev); - - if (!part->info) - goto no_match; - - if (strncasecmp(cmp->uuid, part->info->uuid, cmp->len)) - goto no_match; + if (!bdev->bd_meta_info || + strncasecmp(cmp->uuid, bdev->bd_meta_info->uuid, cmp->len)) + return 0; return 1; -no_match: - return 0; } - /** * devt_from_partuuid - looks up the dev_t of a partition by its UUID * @uuid_str: char array containing ascii UUID @@ -106,13 +100,10 @@ static int match_dev_by_uuid(struct device *dev, const void *data) */ static dev_t devt_from_partuuid(const char *uuid_str) { - dev_t res = 0; struct uuidcmp cmp; struct device *dev = NULL; - struct gendisk *disk; - struct hd_struct *part; + dev_t devt = 0; int offset = 0; - bool clear_root_wait = false; char *slash; cmp.uuid = uuid_str; @@ -121,52 +112,49 @@ static dev_t devt_from_partuuid(const char *uuid_str) /* Check for optional partition number offset attributes. */ if (slash) { char c = 0; + /* Explicitly fail on poor PARTUUID syntax. */ - if (sscanf(slash + 1, - "PARTNROFF=%d%c", &offset, &c) != 1) { - clear_root_wait = true; - goto done; - } + if (sscanf(slash + 1, "PARTNROFF=%d%c", &offset, &c) != 1) + goto clear_root_wait; cmp.len = slash - uuid_str; } else { cmp.len = strlen(uuid_str); } - if (!cmp.len) { - clear_root_wait = true; - goto done; - } + if (!cmp.len) + goto clear_root_wait; - dev = class_find_device(&block_class, NULL, &cmp, - &match_dev_by_uuid); + dev = class_find_device(&block_class, NULL, &cmp, &match_dev_by_uuid); if (!dev) - goto done; + return 0; - res = dev->devt; + if (offset) { + /* + * Attempt to find the requested partition by adding an offset + * to the partition number found by UUID. + */ + struct block_device *part; - /* Attempt to find the partition by offset. */ - if (!offset) - goto no_offset; - - res = 0; - disk = part_to_disk(dev_to_part(dev)); - part = disk_get_part(disk, dev_to_part(dev)->partno + offset); - if (part) { - res = part_devt(part); - put_device(part_to_dev(part)); + part = bdget_disk(dev_to_disk(dev), + dev_to_bdev(dev)->bd_partno + offset); + if (part) { + devt = part->bd_dev; + bdput(part); + } + } else { + devt = dev->devt; } -no_offset: put_device(dev); -done: - if (clear_root_wait) { - pr_err("VFS: PARTUUID= is invalid.\n" - "Expected PARTUUID=[/PARTNROFF=%%d]\n"); - if (root_wait) - pr_err("Disabling rootwait; root= is invalid.\n"); - root_wait = 0; - } - return res; + return devt; + +clear_root_wait: + pr_err("VFS: PARTUUID= is invalid.\n" + "Expected PARTUUID=[/PARTNROFF=%%d]\n"); + if (root_wait) + pr_err("Disabling rootwait; root= is invalid.\n"); + root_wait = 0; + return 0; } /** @@ -178,15 +166,90 @@ static dev_t devt_from_partuuid(const char *uuid_str) */ static int match_dev_by_label(struct device *dev, const void *data) { + struct block_device *bdev = dev_to_bdev(dev); const char *label = data; - struct hd_struct *part = dev_to_part(dev); - if (part->info && !strcmp(label, part->info->volname)) - return 1; - - return 0; + if (!bdev->bd_meta_info || strcmp(label, bdev->bd_meta_info->volname)) + return 0; + return 1; +} + +static dev_t devt_from_partlabel(const char *label) +{ + struct device *dev; + dev_t devt = 0; + + dev = class_find_device(&block_class, NULL, label, &match_dev_by_label); + if (dev) { + devt = dev->devt; + put_device(dev); + } + + return devt; +} + +static dev_t devt_from_devname(const char *name) +{ + dev_t devt = 0; + int part; + char s[32]; + char *p; + + if (strlen(name) > 31) + return 0; + strcpy(s, name); + for (p = s; *p; p++) { + if (*p == '/') + *p = '!'; + } + + devt = blk_lookup_devt(s, 0); + if (devt) + return devt; + + /* + * Try non-existent, but valid partition, which may only exist after + * opening the device, like partitioned md devices. + */ + while (p > s && isdigit(p[-1])) + p--; + if (p == s || !*p || *p == '0') + return 0; + + /* try disk name without */ + part = simple_strtoul(p, NULL, 10); + *p = '\0'; + devt = blk_lookup_devt(s, part); + if (devt) + return devt; + + /* try disk name without p */ + if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') + return 0; + p[-1] = '\0'; + return blk_lookup_devt(s, part); +} +#endif /* CONFIG_BLOCK */ + +static dev_t devt_from_devnum(const char *name) +{ + unsigned maj, min, offset; + dev_t devt = 0; + char *p, dummy; + + if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 || + sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) { + devt = MKDEV(maj, min); + if (maj != MAJOR(devt) || min != MINOR(devt)) + return 0; + } else { + devt = new_decode_dev(simple_strtoul(name, &p, 16)); + if (*p) + return 0; + } + + return devt; } -#endif /* * Convert a name into device number. We accept the following variants: @@ -218,101 +281,23 @@ static int match_dev_by_label(struct device *dev, const void *data) * name contains slashes, the device name has them replaced with * bangs. */ - dev_t name_to_dev_t(const char *name) { - char s[32]; - char *p; - dev_t res = 0; - int part; - + if (strcmp(name, "/dev/nfs") == 0) + return Root_NFS; + if (strcmp(name, "/dev/cifs") == 0) + return Root_CIFS; + if (strcmp(name, "/dev/ram") == 0) + return Root_RAM0; #ifdef CONFIG_BLOCK - if (strncmp(name, "PARTUUID=", 9) == 0) { - name += 9; - res = devt_from_partuuid(name); - if (!res) - goto fail; - goto done; - } else if (strncmp(name, "PARTLABEL=", 10) == 0) { - struct device *dev; - - dev = class_find_device(&block_class, NULL, name + 10, - &match_dev_by_label); - if (!dev) - goto fail; - - res = dev->devt; - put_device(dev); - goto done; - } + if (strncmp(name, "PARTUUID=", 9) == 0) + return devt_from_partuuid(name + 9); + if (strncmp(name, "PARTLABEL=", 10) == 0) + return devt_from_partlabel(name + 10); + if (strncmp(name, "/dev/", 5) == 0) + return devt_from_devname(name + 5); #endif - - if (strncmp(name, "/dev/", 5) != 0) { - unsigned maj, min, offset; - char dummy; - - if ((sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2) || - (sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3)) { - res = MKDEV(maj, min); - if (maj != MAJOR(res) || min != MINOR(res)) - goto fail; - } else { - res = new_decode_dev(simple_strtoul(name, &p, 16)); - if (*p) - goto fail; - } - goto done; - } - - name += 5; - res = Root_NFS; - if (strcmp(name, "nfs") == 0) - goto done; - res = Root_CIFS; - if (strcmp(name, "cifs") == 0) - goto done; - res = Root_RAM0; - if (strcmp(name, "ram") == 0) - goto done; - - if (strlen(name) > 31) - goto fail; - strcpy(s, name); - for (p = s; *p; p++) - if (*p == '/') - *p = '!'; - res = blk_lookup_devt(s, 0); - if (res) - goto done; - - /* - * try non-existent, but valid partition, which may only exist - * after revalidating the disk, like partitioned md devices - */ - while (p > s && isdigit(p[-1])) - p--; - if (p == s || !*p || *p == '0') - goto fail; - - /* try disk name without */ - part = simple_strtoul(p, NULL, 10); - *p = '\0'; - res = blk_lookup_devt(s, part); - if (res) - goto done; - - /* try disk name without p */ - if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') - goto fail; - p[-1] = '\0'; - res = blk_lookup_devt(s, part); - if (res) - goto done; - -fail: - return 0; -done: - return res; + return devt_from_devnum(name); } EXPORT_SYMBOL_GPL(name_to_dev_t); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b5c4b9ade960..456fe4ce6942 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -458,14 +458,9 @@ static const struct rchan_callbacks blk_relay_callbacks = { static void blk_trace_setup_lba(struct blk_trace *bt, struct block_device *bdev) { - struct hd_struct *part = NULL; - - if (bdev) - part = bdev->bd_part; - - if (part) { - bt->start_lba = part->start_sect; - bt->end_lba = part->start_sect + part->nr_sects; + if (bdev) { + bt->start_lba = bdev->bd_start_sect; + bt->end_lba = bdev->bd_start_sect + bdev_nr_sectors(bdev); } else { bt->start_lba = 0; bt->end_lba = -1ULL; @@ -800,12 +795,12 @@ static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) #endif static u64 -blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) +blk_trace_request_get_cgid(struct request *rq) { if (!rq->bio) return 0; /* Use the first bio */ - return blk_trace_bio_get_cgid(q, rq->bio); + return blk_trace_bio_get_cgid(rq->q, rq->bio); } /* @@ -846,40 +841,35 @@ static void blk_add_trace_rq(struct request *rq, int error, rcu_read_unlock(); } -static void blk_add_trace_rq_insert(void *ignore, - struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_insert(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } -static void blk_add_trace_rq_issue(void *ignore, - struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_issue(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } -static void blk_add_trace_rq_merge(void *ignore, - struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_merge(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } -static void blk_add_trace_rq_requeue(void *ignore, - struct request_queue *q, - struct request *rq) +static void blk_add_trace_rq_requeue(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } static void blk_add_trace_rq_complete(void *ignore, struct request *rq, int error, unsigned int nr_bytes) { blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE, - blk_trace_request_get_cgid(rq->q, rq)); + blk_trace_request_get_cgid(rq)); } /** @@ -911,10 +901,9 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, rcu_read_unlock(); } -static void blk_add_trace_bio_bounce(void *ignore, - struct request_queue *q, struct bio *bio) +static void blk_add_trace_bio_bounce(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_BOUNCE, 0); } static void blk_add_trace_bio_complete(void *ignore, @@ -924,63 +913,24 @@ static void blk_add_trace_bio_complete(void *ignore, blk_status_to_errno(bio->bi_status)); } -static void blk_add_trace_bio_backmerge(void *ignore, - struct request_queue *q, - struct request *rq, - struct bio *bio) +static void blk_add_trace_bio_backmerge(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_BACKMERGE, 0); } -static void blk_add_trace_bio_frontmerge(void *ignore, - struct request_queue *q, - struct request *rq, - struct bio *bio) +static void blk_add_trace_bio_frontmerge(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_FRONTMERGE, 0); } -static void blk_add_trace_bio_queue(void *ignore, - struct request_queue *q, struct bio *bio) +static void blk_add_trace_bio_queue(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_QUEUE, 0); } -static void blk_add_trace_getrq(void *ignore, - struct request_queue *q, - struct bio *bio, int rw) +static void blk_add_trace_getrq(void *ignore, struct bio *bio) { - if (bio) - blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); - else { - struct blk_trace *bt; - - rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); - if (bt) - __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, - NULL, 0); - rcu_read_unlock(); - } -} - - -static void blk_add_trace_sleeprq(void *ignore, - struct request_queue *q, - struct bio *bio, int rw) -{ - if (bio) - blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); - else { - struct blk_trace *bt; - - rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); - if (bt) - __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, - 0, 0, NULL, 0); - rcu_read_unlock(); - } + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_GETRQ, 0); } static void blk_add_trace_plug(void *ignore, struct request_queue *q) @@ -1015,10 +965,9 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q, rcu_read_unlock(); } -static void blk_add_trace_split(void *ignore, - struct request_queue *q, struct bio *bio, - unsigned int pdu) +static void blk_add_trace_split(void *ignore, struct bio *bio, unsigned int pdu) { + struct request_queue *q = bio->bi_disk->queue; struct blk_trace *bt; rcu_read_lock(); @@ -1039,20 +988,16 @@ static void blk_add_trace_split(void *ignore, /** * blk_add_trace_bio_remap - Add a trace for a bio-remap operation * @ignore: trace callback data parameter (not used) - * @q: queue the io is for * @bio: the source bio - * @dev: target device + * @dev: source device * @from: source sector * - * Description: - * Device mapper or raid target sometimes need to split a bio because - * it spans a stripe (or similar). Add a trace for that action. - * + * Called after a bio is remapped to a different device and/or sector. **/ -static void blk_add_trace_bio_remap(void *ignore, - struct request_queue *q, struct bio *bio, - dev_t dev, sector_t from) +static void blk_add_trace_bio_remap(void *ignore, struct bio *bio, dev_t dev, + sector_t from) { + struct request_queue *q = bio->bi_disk->queue; struct blk_trace *bt; struct blk_io_trace_remap r; @@ -1077,7 +1022,6 @@ static void blk_add_trace_bio_remap(void *ignore, /** * blk_add_trace_rq_remap - Add a trace for a request-remap operation * @ignore: trace callback data parameter (not used) - * @q: queue the io is for * @rq: the source request * @dev: target device * @from: source sector @@ -1087,16 +1031,14 @@ static void blk_add_trace_bio_remap(void *ignore, * Add a trace for that action. * **/ -static void blk_add_trace_rq_remap(void *ignore, - struct request_queue *q, - struct request *rq, dev_t dev, +static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, sector_t from) { struct blk_trace *bt; struct blk_io_trace_remap r; rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); + bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; @@ -1108,13 +1050,12 @@ static void blk_add_trace_rq_remap(void *ignore, __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rq_data_dir(rq), 0, BLK_TA_REMAP, 0, - sizeof(r), &r, blk_trace_request_get_cgid(q, rq)); + sizeof(r), &r, blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } /** * blk_add_driver_data - Add binary message with driver-specific data - * @q: queue the io is for * @rq: io request * @data: driver-specific data * @len: length of driver-specific data @@ -1123,14 +1064,12 @@ static void blk_add_trace_rq_remap(void *ignore, * Some drivers might want to write driver-specific data per request. * **/ -void blk_add_driver_data(struct request_queue *q, - struct request *rq, - void *data, size_t len) +void blk_add_driver_data(struct request *rq, void *data, size_t len) { struct blk_trace *bt; rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); + bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; @@ -1138,7 +1077,7 @@ void blk_add_driver_data(struct request_queue *q, __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, BLK_TA_DRV_DATA, 0, len, data, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(blk_add_driver_data); @@ -1169,8 +1108,6 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_getrq(blk_add_trace_getrq, NULL); WARN_ON(ret); - ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); - WARN_ON(ret); ret = register_trace_block_plug(blk_add_trace_plug, NULL); WARN_ON(ret); ret = register_trace_block_unplug(blk_add_trace_unplug, NULL); @@ -1190,7 +1127,6 @@ static void blk_unregister_tracepoints(void) unregister_trace_block_split(blk_add_trace_split, NULL); unregister_trace_block_unplug(blk_add_trace_unplug, NULL); unregister_trace_block_plug(blk_add_trace_plug, NULL); - unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); unregister_trace_block_getrq(blk_add_trace_getrq, NULL); unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); @@ -1815,30 +1751,15 @@ static ssize_t blk_trace_mask2str(char *buf, int mask) return p - buf; } -static struct request_queue *blk_trace_get_queue(struct block_device *bdev) -{ - if (bdev->bd_disk == NULL) - return NULL; - - return bdev_get_queue(bdev); -} - static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct block_device *bdev = bdget_part(dev_to_part(dev)); - struct request_queue *q; + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev_get_queue(bdev); struct blk_trace *bt; ssize_t ret = -ENXIO; - if (bdev == NULL) - goto out; - - q = blk_trace_get_queue(bdev); - if (q == NULL) - goto out_bdput; - mutex_lock(&q->debugfs_mutex); bt = rcu_dereference_protected(q->blk_trace, @@ -1861,9 +1782,6 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, out_unlock_bdev: mutex_unlock(&q->debugfs_mutex); -out_bdput: - bdput(bdev); -out: return ret; } @@ -1871,8 +1789,8 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct block_device *bdev; - struct request_queue *q; + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev_get_queue(bdev); struct blk_trace *bt; u64 value; ssize_t ret = -EINVAL; @@ -1888,17 +1806,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, goto out; value = ret; } - } else if (kstrtoull(buf, 0, &value)) - goto out; - - ret = -ENXIO; - bdev = bdget_part(dev_to_part(dev)); - if (bdev == NULL) - goto out; - - q = blk_trace_get_queue(bdev); - if (q == NULL) - goto out_bdput; + } else { + if (kstrtoull(buf, 0, &value)) + goto out; + } mutex_lock(&q->debugfs_mutex); @@ -1936,8 +1847,6 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, out_unlock_bdev: mutex_unlock(&q->debugfs_mutex); -out_bdput: - bdput(bdev); out: return ret ? ret : count; } diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 267aa7709416..d693d9213ceb 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -12,33 +12,24 @@ /* * See if we have deferred clears that we can batch move */ -static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index) +static inline bool sbitmap_deferred_clear(struct sbitmap_word *map) { - unsigned long mask, val; - bool ret = false; - unsigned long flags; + unsigned long mask; - spin_lock_irqsave(&sb->map[index].swap_lock, flags); - - if (!sb->map[index].cleared) - goto out_unlock; + if (!READ_ONCE(map->cleared)) + return false; /* * First get a stable cleared mask, setting the old mask to 0. */ - mask = xchg(&sb->map[index].cleared, 0); + mask = xchg(&map->cleared, 0); /* * Now clear the masked bits in our free word */ - do { - val = sb->map[index].word; - } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val); - - ret = true; -out_unlock: - spin_unlock_irqrestore(&sb->map[index].swap_lock, flags); - return ret; + atomic_long_andnot(mask, (atomic_long_t *)&map->word); + BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(map->word)); + return true; } int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, @@ -80,7 +71,6 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, for (i = 0; i < sb->map_nr; i++) { sb->map[i].depth = min(depth, bits_per_word); depth -= sb->map[i].depth; - spin_lock_init(&sb->map[i].swap_lock); } return 0; } @@ -92,7 +82,7 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth) unsigned int i; for (i = 0; i < sb->map_nr; i++) - sbitmap_deferred_clear(sb, i); + sbitmap_deferred_clear(&sb->map[i]); sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); @@ -107,9 +97,11 @@ EXPORT_SYMBOL_GPL(sbitmap_resize); static int __sbitmap_get_word(unsigned long *word, unsigned long depth, unsigned int hint, bool wrap) { - unsigned int orig_hint = hint; int nr; + /* don't wrap if starting from 0 */ + wrap = wrap && hint; + while (1) { nr = find_next_zero_bit(word, depth, hint); if (unlikely(nr >= depth)) { @@ -118,8 +110,8 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth, * offset to 0 in a failure case, so start from 0 to * exhaust the map. */ - if (orig_hint && hint && wrap) { - hint = orig_hint = 0; + if (hint && wrap) { + hint = 0; continue; } return -1; @@ -139,15 +131,15 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth, static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, unsigned int alloc_hint, bool round_robin) { + struct sbitmap_word *map = &sb->map[index]; int nr; do { - nr = __sbitmap_get_word(&sb->map[index].word, - sb->map[index].depth, alloc_hint, + nr = __sbitmap_get_word(&map->word, map->depth, alloc_hint, !round_robin); if (nr != -1) break; - if (!sbitmap_deferred_clear(sb, index)) + if (!sbitmap_deferred_clear(map)) break; } while (1); @@ -207,7 +199,7 @@ int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, break; } - if (sbitmap_deferred_clear(sb, index)) + if (sbitmap_deferred_clear(&sb->map[index])) goto again; /* Jump to next index. */ diff --git a/mm/filemap.c b/mm/filemap.c index 2e16daf98bf9..7a49bac48aea 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2981,14 +2981,14 @@ EXPORT_SYMBOL(filemap_map_pages); vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) { + struct address_space *mapping = vmf->vma->vm_file->f_mapping; struct page *page = vmf->page; - struct inode *inode = file_inode(vmf->vma->vm_file); vm_fault_t ret = VM_FAULT_LOCKED; - sb_start_pagefault(inode->i_sb); + sb_start_pagefault(mapping->host->i_sb); file_update_time(vmf->vma->vm_file); lock_page(page); - if (page->mapping != inode->i_mapping) { + if (page->mapping != mapping) { unlock_page(page); ret = VM_FAULT_NOPAGE; goto out; @@ -3001,7 +3001,7 @@ vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) set_page_dirty(page); wait_for_stable_page(page); out: - sb_end_pagefault(inode->i_sb); + sb_end_pagefault(mapping->host->i_sb); return ret; } @@ -3244,10 +3244,9 @@ void dio_warn_stale_pagecache(struct file *filp) { static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST); char pathname[128]; - struct inode *inode = file_inode(filp); char *path; - errseq_set(&inode->i_mapping->wb_err, -EIO); + errseq_set(&filp->f_mapping->wb_err, -EIO); if (__ratelimit(&_rs)) { path = file_path(filp, pathname, sizeof(pathname)); if (IS_ERR(path)) @@ -3274,7 +3273,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_NOWAIT) { /* If there are pages to writeback, return */ - if (filemap_range_has_page(inode->i_mapping, pos, + if (filemap_range_has_page(file->f_mapping, pos, pos + write_len - 1)) return -EAGAIN; } else {