linux/drivers/mtd/mtd_blkdevs.c
Linus Torvalds 1081230b74 Merge branch 'for-4.3/core' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe:
 "This first core part of the block IO changes contains:

   - Cleanup of the bio IO error signaling from Christoph.  We used to
     rely on the uptodate bit and passing around of an error, now we
     store the error in the bio itself.

   - Improvement of the above from myself, by shrinking the bio size
     down again to fit in two cachelines on x86-64.

   - Revert of the max_hw_sectors cap removal from a revision again,
     from Jeff Moyer.  This caused performance regressions in various
     tests.  Reinstate the limit, bump it to a more reasonable size
     instead.

   - Make /sys/block/<dev>/queue/discard_max_bytes writeable, by me.
     Most devices have huge trim limits, which can cause nasty latencies
     when deleting files.  Enable the admin to configure the size down.
     We will look into having a more sane default instead of UINT_MAX
     sectors.

   - Improvement of the SGP gaps logic from Keith Busch.

   - Enable the block core to handle arbitrarily sized bios, which
     enables a nice simplification of bio_add_page() (which is an IO hot
     path).  From Kent.

   - Improvements to the partition io stats accounting, making it
     faster.  From Ming Lei.

   - Also from Ming Lei, a basic fixup for overflow of the sysfs pending
     file in blk-mq, as well as a fix for a blk-mq timeout race
     condition.

   - Ming Lin has been carrying Kents above mentioned patches forward
     for a while, and testing them.  Ming also did a few fixes around
     that.

   - Sasha Levin found and fixed a use-after-free problem introduced by
     the bio->bi_error changes from Christoph.

   - Small blk cgroup cleanup from Viresh Kumar"

* 'for-4.3/core' of git://git.kernel.dk/linux-block: (26 commits)
  blk: Fix bio_io_vec index when checking bvec gaps
  block: Replace SG_GAPS with new queue limits mask
  block: bump BLK_DEF_MAX_SECTORS to 2560
  Revert "block: remove artifical max_hw_sectors cap"
  blk-mq: fix race between timeout and freeing request
  blk-mq: fix buffer overflow when reading sysfs file of 'pending'
  Documentation: update notes in biovecs about arbitrarily sized bios
  block: remove bio_get_nr_vecs()
  fs: use helper bio_add_page() instead of open coding on bi_io_vec
  block: kill merge_bvec_fn() completely
  md/raid5: get rid of bio_fits_rdev()
  md/raid5: split bio for chunk_aligned_read
  block: remove split code in blkdev_issue_{discard,write_same}
  btrfs: remove bio splitting and merge_bvec_fn() calls
  bcache: remove driver private bio splitting code
  block: simplify bio_add_page()
  block: make generic_make_request handle arbitrarily sized bios
  blk-cgroup: Drop unlikely before IS_ERR(_OR_NULL)
  block: don't access bio->bi_error after bio_put()
  block: shrink struct bio down to 2 cache lines again
  ...
2015-09-02 13:10:25 -07:00

600 lines
13 KiB
C

/*
* Interface to Linux block layer for MTD 'translation layers'.
*
* Copyright © 2003-2010 David Woodhouse <dwmw2@infradead.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/fs.h>
#include <linux/mtd/blktrans.h>
#include <linux/mtd/mtd.h>
#include <linux/blkdev.h>
#include <linux/blkpg.h>
#include <linux/spinlock.h>
#include <linux/hdreg.h>
#include <linux/mutex.h>
#include <asm/uaccess.h>
#include "mtdcore.h"
static LIST_HEAD(blktrans_majors);
static DEFINE_MUTEX(blktrans_ref_mutex);
static void blktrans_dev_release(struct kref *kref)
{
struct mtd_blktrans_dev *dev =
container_of(kref, struct mtd_blktrans_dev, ref);
dev->disk->private_data = NULL;
blk_cleanup_queue(dev->rq);
put_disk(dev->disk);
list_del(&dev->list);
kfree(dev);
}
static struct mtd_blktrans_dev *blktrans_dev_get(struct gendisk *disk)
{
struct mtd_blktrans_dev *dev;
mutex_lock(&blktrans_ref_mutex);
dev = disk->private_data;
if (!dev)
goto unlock;
kref_get(&dev->ref);
unlock:
mutex_unlock(&blktrans_ref_mutex);
return dev;
}
static void blktrans_dev_put(struct mtd_blktrans_dev *dev)
{
mutex_lock(&blktrans_ref_mutex);
kref_put(&dev->ref, blktrans_dev_release);
mutex_unlock(&blktrans_ref_mutex);
}
static int do_blktrans_request(struct mtd_blktrans_ops *tr,
struct mtd_blktrans_dev *dev,
struct request *req)
{
unsigned long block, nsect;
char *buf;
block = blk_rq_pos(req) << 9 >> tr->blkshift;
nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
buf = bio_data(req->bio);
if (req->cmd_type != REQ_TYPE_FS)
return -EIO;
if (req->cmd_flags & REQ_FLUSH)
return tr->flush(dev);
if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
get_capacity(req->rq_disk))
return -EIO;
if (req->cmd_flags & REQ_DISCARD)
return tr->discard(dev, block, nsect);
if (rq_data_dir(req) == READ) {
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
if (tr->readsect(dev, block, buf))
return -EIO;
rq_flush_dcache_pages(req);
return 0;
} else {
if (!tr->writesect)
return -EIO;
rq_flush_dcache_pages(req);
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
if (tr->writesect(dev, block, buf))
return -EIO;
return 0;
}
}
int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev)
{
return dev->bg_stop;
}
EXPORT_SYMBOL_GPL(mtd_blktrans_cease_background);
static void mtd_blktrans_work(struct work_struct *work)
{
struct mtd_blktrans_dev *dev =
container_of(work, struct mtd_blktrans_dev, work);
struct mtd_blktrans_ops *tr = dev->tr;
struct request_queue *rq = dev->rq;
struct request *req = NULL;
int background_done = 0;
spin_lock_irq(rq->queue_lock);
while (1) {
int res;
dev->bg_stop = false;
if (!req && !(req = blk_fetch_request(rq))) {
if (tr->background && !background_done) {
spin_unlock_irq(rq->queue_lock);
mutex_lock(&dev->lock);
tr->background(dev);
mutex_unlock(&dev->lock);
spin_lock_irq(rq->queue_lock);
/*
* Do background processing just once per idle
* period.
*/
background_done = !dev->bg_stop;
continue;
}
break;
}
spin_unlock_irq(rq->queue_lock);
mutex_lock(&dev->lock);
res = do_blktrans_request(dev->tr, dev, req);
mutex_unlock(&dev->lock);
spin_lock_irq(rq->queue_lock);
if (!__blk_end_request_cur(req, res))
req = NULL;
background_done = 0;
}
spin_unlock_irq(rq->queue_lock);
}
static void mtd_blktrans_request(struct request_queue *rq)
{
struct mtd_blktrans_dev *dev;
struct request *req = NULL;
dev = rq->queuedata;
if (!dev)
while ((req = blk_fetch_request(rq)) != NULL)
__blk_end_request_all(req, -ENODEV);
else
queue_work(dev->wq, &dev->work);
}
static int blktrans_open(struct block_device *bdev, fmode_t mode)
{
struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk);
int ret = 0;
if (!dev)
return -ERESTARTSYS; /* FIXME: busy loop! -arnd*/
mutex_lock(&dev->lock);
mutex_lock(&mtd_table_mutex);
if (dev->open)
goto unlock;
kref_get(&dev->ref);
__module_get(dev->tr->owner);
if (!dev->mtd)
goto unlock;
if (dev->tr->open) {
ret = dev->tr->open(dev);
if (ret)
goto error_put;
}
ret = __get_mtd_device(dev->mtd);
if (ret)
goto error_release;
dev->file_mode = mode;
unlock:
dev->open++;
mutex_unlock(&mtd_table_mutex);
mutex_unlock(&dev->lock);
blktrans_dev_put(dev);
return ret;
error_release:
if (dev->tr->release)
dev->tr->release(dev);
error_put:
module_put(dev->tr->owner);
kref_put(&dev->ref, blktrans_dev_release);
mutex_unlock(&mtd_table_mutex);
mutex_unlock(&dev->lock);
blktrans_dev_put(dev);
return ret;
}
static void blktrans_release(struct gendisk *disk, fmode_t mode)
{
struct mtd_blktrans_dev *dev = blktrans_dev_get(disk);
if (!dev)
return;
mutex_lock(&dev->lock);
mutex_lock(&mtd_table_mutex);
if (--dev->open)
goto unlock;
kref_put(&dev->ref, blktrans_dev_release);
module_put(dev->tr->owner);
if (dev->mtd) {
if (dev->tr->release)
dev->tr->release(dev);
__put_mtd_device(dev->mtd);
}
unlock:
mutex_unlock(&mtd_table_mutex);
mutex_unlock(&dev->lock);
blktrans_dev_put(dev);
}
static int blktrans_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk);
int ret = -ENXIO;
if (!dev)
return ret;
mutex_lock(&dev->lock);
if (!dev->mtd)
goto unlock;
ret = dev->tr->getgeo ? dev->tr->getgeo(dev, geo) : -ENOTTY;
unlock:
mutex_unlock(&dev->lock);
blktrans_dev_put(dev);
return ret;
}
static int blktrans_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk);
int ret = -ENXIO;
if (!dev)
return ret;
mutex_lock(&dev->lock);
if (!dev->mtd)
goto unlock;
switch (cmd) {
case BLKFLSBUF:
ret = dev->tr->flush ? dev->tr->flush(dev) : 0;
break;
default:
ret = -ENOTTY;
}
unlock:
mutex_unlock(&dev->lock);
blktrans_dev_put(dev);
return ret;
}
static const struct block_device_operations mtd_block_ops = {
.owner = THIS_MODULE,
.open = blktrans_open,
.release = blktrans_release,
.ioctl = blktrans_ioctl,
.getgeo = blktrans_getgeo,
};
int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
{
struct mtd_blktrans_ops *tr = new->tr;
struct mtd_blktrans_dev *d;
int last_devnum = -1;
struct gendisk *gd;
int ret;
if (mutex_trylock(&mtd_table_mutex)) {
mutex_unlock(&mtd_table_mutex);
BUG();
}
mutex_lock(&blktrans_ref_mutex);
list_for_each_entry(d, &tr->devs, list) {
if (new->devnum == -1) {
/* Use first free number */
if (d->devnum != last_devnum+1) {
/* Found a free devnum. Plug it in here */
new->devnum = last_devnum+1;
list_add_tail(&new->list, &d->list);
goto added;
}
} else if (d->devnum == new->devnum) {
/* Required number taken */
mutex_unlock(&blktrans_ref_mutex);
return -EBUSY;
} else if (d->devnum > new->devnum) {
/* Required number was free */
list_add_tail(&new->list, &d->list);
goto added;
}
last_devnum = d->devnum;
}
ret = -EBUSY;
if (new->devnum == -1)
new->devnum = last_devnum+1;
/* Check that the device and any partitions will get valid
* minor numbers and that the disk naming code below can cope
* with this number. */
if (new->devnum > (MINORMASK >> tr->part_bits) ||
(tr->part_bits && new->devnum >= 27 * 26)) {
mutex_unlock(&blktrans_ref_mutex);
goto error1;
}
list_add_tail(&new->list, &tr->devs);
added:
mutex_unlock(&blktrans_ref_mutex);
mutex_init(&new->lock);
kref_init(&new->ref);
if (!tr->writesect)
new->readonly = 1;
/* Create gendisk */
ret = -ENOMEM;
gd = alloc_disk(1 << tr->part_bits);
if (!gd)
goto error2;
new->disk = gd;
gd->private_data = new;
gd->major = tr->major;
gd->first_minor = (new->devnum) << tr->part_bits;
gd->fops = &mtd_block_ops;
if (tr->part_bits)
if (new->devnum < 26)
snprintf(gd->disk_name, sizeof(gd->disk_name),
"%s%c", tr->name, 'a' + new->devnum);
else
snprintf(gd->disk_name, sizeof(gd->disk_name),
"%s%c%c", tr->name,
'a' - 1 + new->devnum / 26,
'a' + new->devnum % 26);
else
snprintf(gd->disk_name, sizeof(gd->disk_name),
"%s%d", tr->name, new->devnum);
set_capacity(gd, (new->size * tr->blksize) >> 9);
/* Create the request queue */
spin_lock_init(&new->queue_lock);
new->rq = blk_init_queue(mtd_blktrans_request, &new->queue_lock);
if (!new->rq)
goto error3;
if (tr->flush)
blk_queue_flush(new->rq, REQ_FLUSH);
new->rq->queuedata = new;
blk_queue_logical_block_size(new->rq, tr->blksize);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq);
if (tr->discard) {
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, new->rq);
blk_queue_max_discard_sectors(new->rq, UINT_MAX);
}
gd->queue = new->rq;
/* Create processing workqueue */
new->wq = alloc_workqueue("%s%d", 0, 0,
tr->name, new->mtd->index);
if (!new->wq)
goto error4;
INIT_WORK(&new->work, mtd_blktrans_work);
gd->driverfs_dev = &new->mtd->dev;
if (new->readonly)
set_disk_ro(gd, 1);
add_disk(gd);
if (new->disk_attributes) {
ret = sysfs_create_group(&disk_to_dev(gd)->kobj,
new->disk_attributes);
WARN_ON(ret);
}
return 0;
error4:
blk_cleanup_queue(new->rq);
error3:
put_disk(new->disk);
error2:
list_del(&new->list);
error1:
return ret;
}
int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
{
unsigned long flags;
if (mutex_trylock(&mtd_table_mutex)) {
mutex_unlock(&mtd_table_mutex);
BUG();
}
if (old->disk_attributes)
sysfs_remove_group(&disk_to_dev(old->disk)->kobj,
old->disk_attributes);
/* Stop new requests to arrive */
del_gendisk(old->disk);
/* Stop workqueue. This will perform any pending request. */
destroy_workqueue(old->wq);
/* Kill current requests */
spin_lock_irqsave(&old->queue_lock, flags);
old->rq->queuedata = NULL;
blk_start_queue(old->rq);
spin_unlock_irqrestore(&old->queue_lock, flags);
/* If the device is currently open, tell trans driver to close it,
then put mtd device, and don't touch it again */
mutex_lock(&old->lock);
if (old->open) {
if (old->tr->release)
old->tr->release(old);
__put_mtd_device(old->mtd);
}
old->mtd = NULL;
mutex_unlock(&old->lock);
blktrans_dev_put(old);
return 0;
}
static void blktrans_notify_remove(struct mtd_info *mtd)
{
struct mtd_blktrans_ops *tr;
struct mtd_blktrans_dev *dev, *next;
list_for_each_entry(tr, &blktrans_majors, list)
list_for_each_entry_safe(dev, next, &tr->devs, list)
if (dev->mtd == mtd)
tr->remove_dev(dev);
}
static void blktrans_notify_add(struct mtd_info *mtd)
{
struct mtd_blktrans_ops *tr;
if (mtd->type == MTD_ABSENT)
return;
list_for_each_entry(tr, &blktrans_majors, list)
tr->add_mtd(tr, mtd);
}
static struct mtd_notifier blktrans_notifier = {
.add = blktrans_notify_add,
.remove = blktrans_notify_remove,
};
int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
{
struct mtd_info *mtd;
int ret;
/* Register the notifier if/when the first device type is
registered, to prevent the link/init ordering from fucking
us over. */
if (!blktrans_notifier.list.next)
register_mtd_user(&blktrans_notifier);
mutex_lock(&mtd_table_mutex);
ret = register_blkdev(tr->major, tr->name);
if (ret < 0) {
printk(KERN_WARNING "Unable to register %s block device on major %d: %d\n",
tr->name, tr->major, ret);
mutex_unlock(&mtd_table_mutex);
return ret;
}
if (ret)
tr->major = ret;
tr->blkshift = ffs(tr->blksize) - 1;
INIT_LIST_HEAD(&tr->devs);
list_add(&tr->list, &blktrans_majors);
mtd_for_each_device(mtd)
if (mtd->type != MTD_ABSENT)
tr->add_mtd(tr, mtd);
mutex_unlock(&mtd_table_mutex);
return 0;
}
int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr)
{
struct mtd_blktrans_dev *dev, *next;
mutex_lock(&mtd_table_mutex);
/* Remove it from the list of active majors */
list_del(&tr->list);
list_for_each_entry_safe(dev, next, &tr->devs, list)
tr->remove_dev(dev);
unregister_blkdev(tr->major, tr->name);
mutex_unlock(&mtd_table_mutex);
BUG_ON(!list_empty(&tr->devs));
return 0;
}
static void __exit mtd_blktrans_exit(void)
{
/* No race here -- if someone's currently in register_mtd_blktrans
we're screwed anyway. */
if (blktrans_notifier.list.next)
unregister_mtd_user(&blktrans_notifier);
}
module_exit(mtd_blktrans_exit);
EXPORT_SYMBOL_GPL(register_mtd_blktrans);
EXPORT_SYMBOL_GPL(deregister_mtd_blktrans);
EXPORT_SYMBOL_GPL(add_mtd_blktrans_dev);
EXPORT_SYMBOL_GPL(del_mtd_blktrans_dev);
MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Common interface to block layer for MTD 'translation layers'");