linux/fs/char_dev.c
Linus Torvalds 0910c0bdf7 Merge branch 'for-3.13/core' of git://git.kernel.dk/linux-block
Pull block IO core updates from Jens Axboe:
 "This is the pull request for the core changes in the block layer for
  3.13.  It contains:

   - The new blk-mq request interface.

     This is a new and more scalable queueing model that marries the
     best part of the request based interface we currently have (which
     is fully featured, but scales poorly) and the bio based "interface"
     which the new drivers for high IOPS devices end up using because
     it's much faster than the request based one.

     The bio interface has no block layer support, since it taps into
     the stack much earlier.  This means that drivers end up having to
     implement a lot of functionality on their own, like tagging,
     timeout handling, requeue, etc.  The blk-mq interface provides all
     these.  Some drivers even provide a switch to select bio or rq and
     has code to handle both, since things like merging only works in
     the rq model and hence is faster for some workloads.  This is a
     huge mess.  Conversion of these drivers nets us a substantial code
     reduction.  Initial results on converting SCSI to this model even
     shows an 8x improvement on single queue devices.  So while the
     model was intended to work on the newer multiqueue devices, it has
     substantial improvements for "classic" hardware as well.  This code
     has gone through extensive testing and development, it's now ready
     to go.  A pull request is coming to convert virtio-blk to this
     model will be will be coming as well, with more drivers scheduled
     for 3.14 conversion.

   - Two blktrace fixes from Jan and Chen Gang.

   - A plug merge fix from Alireza Haghdoost.

   - Conversion of __get_cpu_var() from Christoph Lameter.

   - Fix for sector_div() with 64-bit divider from Geert Uytterhoeven.

   - A fix for a race between request completion and the timeout
     handling from Jeff Moyer.  This is what caused the merge conflict
     with blk-mq/core, in case you are looking at that.

   - A dm stacking fix from Mike Snitzer.

   - A code consolidation fix and duplicated code removal from Kent
     Overstreet.

   - A handful of block bug fixes from Mikulas Patocka, fixing a loop
     crash and memory corruption on blk cg.

   - Elevator switch bug fix from Tomoki Sekiyama.

  A heads-up that I had to rebase this branch.  Initially the immutable
  bio_vecs had been queued up for inclusion, but a week later, it became
  clear that it wasn't fully cooked yet.  So the decision was made to
  pull this out and postpone it until 3.14.  It was a straight forward
  rebase, just pruning out the immutable series and the later fixes of
  problems with it.  The rest of the patches applied directly and no
  further changes were made"

* 'for-3.13/core' of git://git.kernel.dk/linux-block: (31 commits)
  block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO
  block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO
  block: Do not call sector_div() with a 64-bit divisor
  kernel: trace: blktrace: remove redundent memcpy() in compat_blk_trace_setup()
  block: Consolidate duplicated bio_trim() implementations
  block: Use rw_copy_check_uvector()
  block: Enable sysfs nomerge control for I/O requests in the plug list
  block: properly stack underlying max_segment_size to DM device
  elevator: acquire q->sysfs_lock in elevator_change()
  elevator: Fix a race in elevator switching and md device initialization
  block: Replace __get_cpu_var uses
  bdi: test bdi_init failure
  block: fix a probe argument to blk_register_region
  loop: fix crash if blk_alloc_queue fails
  blk-core: Fix memory corruption if blkcg_init_queue fails
  block: fix race between request completion and timeout handling
  blktrace: Send BLK_TN_PROCESS events to all running traces
  blk-mq: don't disallow request merges for req->special being set
  blk-mq: mq plug list breakage
  blk-mq: fix for flush deadlock
  ...
2013-11-14 12:08:14 +09:00

594 lines
14 KiB
C

/*
* linux/fs/char_dev.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/kdev_t.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/major.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/kobject.h>
#include <linux/kobj_map.h>
#include <linux/cdev.h>
#include <linux/mutex.h>
#include <linux/backing-dev.h>
#include <linux/tty.h>
#include "internal.h"
/*
* capabilities for /dev/mem, /dev/kmem and similar directly mappable character
* devices
* - permits shared-mmap for read, write and/or exec
* - does not permit private mmap in NOMMU mode (can't do COW)
* - no readahead or I/O queue unplugging required
*/
struct backing_dev_info directly_mappable_cdev_bdi = {
.name = "char",
.capabilities = (
#ifdef CONFIG_MMU
/* permit private copies of the data to be taken */
BDI_CAP_MAP_COPY |
#endif
/* permit direct mmap, for read, write or exec */
BDI_CAP_MAP_DIRECT |
BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP |
/* no writeback happens */
BDI_CAP_NO_ACCT_AND_WRITEBACK),
};
static struct kobj_map *cdev_map;
static DEFINE_MUTEX(chrdevs_lock);
static struct char_device_struct {
struct char_device_struct *next;
unsigned int major;
unsigned int baseminor;
int minorct;
char name[64];
struct cdev *cdev; /* will die */
} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
/* index in the above */
static inline int major_to_index(unsigned major)
{
return major % CHRDEV_MAJOR_HASH_SIZE;
}
#ifdef CONFIG_PROC_FS
void chrdev_show(struct seq_file *f, off_t offset)
{
struct char_device_struct *cd;
if (offset < CHRDEV_MAJOR_HASH_SIZE) {
mutex_lock(&chrdevs_lock);
for (cd = chrdevs[offset]; cd; cd = cd->next)
seq_printf(f, "%3d %s\n", cd->major, cd->name);
mutex_unlock(&chrdevs_lock);
}
}
#endif /* CONFIG_PROC_FS */
/*
* Register a single major with a specified minor range.
*
* If major == 0 this functions will dynamically allocate a major and return
* its number.
*
* If major > 0 this function will attempt to reserve the passed range of
* minors and will return zero on success.
*
* Returns a -ve errno on failure.
*/
static struct char_device_struct *
__register_chrdev_region(unsigned int major, unsigned int baseminor,
int minorct, const char *name)
{
struct char_device_struct *cd, **cp;
int ret = 0;
int i;
cd = kzalloc(sizeof(struct char_device_struct), GFP_KERNEL);
if (cd == NULL)
return ERR_PTR(-ENOMEM);
mutex_lock(&chrdevs_lock);
/* temporary */
if (major == 0) {
for (i = ARRAY_SIZE(chrdevs)-1; i > 0; i--) {
if (chrdevs[i] == NULL)
break;
}
if (i == 0) {
ret = -EBUSY;
goto out;
}
major = i;
ret = major;
}
cd->major = major;
cd->baseminor = baseminor;
cd->minorct = minorct;
strlcpy(cd->name, name, sizeof(cd->name));
i = major_to_index(major);
for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
if ((*cp)->major > major ||
((*cp)->major == major &&
(((*cp)->baseminor >= baseminor) ||
((*cp)->baseminor + (*cp)->minorct > baseminor))))
break;
/* Check for overlapping minor ranges. */
if (*cp && (*cp)->major == major) {
int old_min = (*cp)->baseminor;
int old_max = (*cp)->baseminor + (*cp)->minorct - 1;
int new_min = baseminor;
int new_max = baseminor + minorct - 1;
/* New driver overlaps from the left. */
if (new_max >= old_min && new_max <= old_max) {
ret = -EBUSY;
goto out;
}
/* New driver overlaps from the right. */
if (new_min <= old_max && new_min >= old_min) {
ret = -EBUSY;
goto out;
}
}
cd->next = *cp;
*cp = cd;
mutex_unlock(&chrdevs_lock);
return cd;
out:
mutex_unlock(&chrdevs_lock);
kfree(cd);
return ERR_PTR(ret);
}
static struct char_device_struct *
__unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct)
{
struct char_device_struct *cd = NULL, **cp;
int i = major_to_index(major);
mutex_lock(&chrdevs_lock);
for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
if ((*cp)->major == major &&
(*cp)->baseminor == baseminor &&
(*cp)->minorct == minorct)
break;
if (*cp) {
cd = *cp;
*cp = cd->next;
}
mutex_unlock(&chrdevs_lock);
return cd;
}
/**
* register_chrdev_region() - register a range of device numbers
* @from: the first in the desired range of device numbers; must include
* the major number.
* @count: the number of consecutive device numbers required
* @name: the name of the device or driver.
*
* Return value is zero on success, a negative error code on failure.
*/
int register_chrdev_region(dev_t from, unsigned count, const char *name)
{
struct char_device_struct *cd;
dev_t to = from + count;
dev_t n, next;
for (n = from; n < to; n = next) {
next = MKDEV(MAJOR(n)+1, 0);
if (next > to)
next = to;
cd = __register_chrdev_region(MAJOR(n), MINOR(n),
next - n, name);
if (IS_ERR(cd))
goto fail;
}
return 0;
fail:
to = n;
for (n = from; n < to; n = next) {
next = MKDEV(MAJOR(n)+1, 0);
kfree(__unregister_chrdev_region(MAJOR(n), MINOR(n), next - n));
}
return PTR_ERR(cd);
}
/**
* alloc_chrdev_region() - register a range of char device numbers
* @dev: output parameter for first assigned number
* @baseminor: first of the requested range of minor numbers
* @count: the number of minor numbers required
* @name: the name of the associated device or driver
*
* Allocates a range of char device numbers. The major number will be
* chosen dynamically, and returned (along with the first minor number)
* in @dev. Returns zero or a negative error code.
*/
int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
const char *name)
{
struct char_device_struct *cd;
cd = __register_chrdev_region(0, baseminor, count, name);
if (IS_ERR(cd))
return PTR_ERR(cd);
*dev = MKDEV(cd->major, cd->baseminor);
return 0;
}
/**
* __register_chrdev() - create and register a cdev occupying a range of minors
* @major: major device number or 0 for dynamic allocation
* @baseminor: first of the requested range of minor numbers
* @count: the number of minor numbers required
* @name: name of this range of devices
* @fops: file operations associated with this devices
*
* If @major == 0 this functions will dynamically allocate a major and return
* its number.
*
* If @major > 0 this function will attempt to reserve a device with the given
* major number and will return zero on success.
*
* Returns a -ve errno on failure.
*
* The name of this device has nothing to do with the name of the device in
* /dev. It only helps to keep track of the different owners of devices. If
* your module name has only one type of devices it's ok to use e.g. the name
* of the module here.
*/
int __register_chrdev(unsigned int major, unsigned int baseminor,
unsigned int count, const char *name,
const struct file_operations *fops)
{
struct char_device_struct *cd;
struct cdev *cdev;
int err = -ENOMEM;
cd = __register_chrdev_region(major, baseminor, count, name);
if (IS_ERR(cd))
return PTR_ERR(cd);
cdev = cdev_alloc();
if (!cdev)
goto out2;
cdev->owner = fops->owner;
cdev->ops = fops;
kobject_set_name(&cdev->kobj, "%s", name);
err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
if (err)
goto out;
cd->cdev = cdev;
return major ? 0 : cd->major;
out:
kobject_put(&cdev->kobj);
out2:
kfree(__unregister_chrdev_region(cd->major, baseminor, count));
return err;
}
/**
* unregister_chrdev_region() - return a range of device numbers
* @from: the first in the range of numbers to unregister
* @count: the number of device numbers to unregister
*
* This function will unregister a range of @count device numbers,
* starting with @from. The caller should normally be the one who
* allocated those numbers in the first place...
*/
void unregister_chrdev_region(dev_t from, unsigned count)
{
dev_t to = from + count;
dev_t n, next;
for (n = from; n < to; n = next) {
next = MKDEV(MAJOR(n)+1, 0);
if (next > to)
next = to;
kfree(__unregister_chrdev_region(MAJOR(n), MINOR(n), next - n));
}
}
/**
* __unregister_chrdev - unregister and destroy a cdev
* @major: major device number
* @baseminor: first of the range of minor numbers
* @count: the number of minor numbers this cdev is occupying
* @name: name of this range of devices
*
* Unregister and destroy the cdev occupying the region described by
* @major, @baseminor and @count. This function undoes what
* __register_chrdev() did.
*/
void __unregister_chrdev(unsigned int major, unsigned int baseminor,
unsigned int count, const char *name)
{
struct char_device_struct *cd;
cd = __unregister_chrdev_region(major, baseminor, count);
if (cd && cd->cdev)
cdev_del(cd->cdev);
kfree(cd);
}
static DEFINE_SPINLOCK(cdev_lock);
static struct kobject *cdev_get(struct cdev *p)
{
struct module *owner = p->owner;
struct kobject *kobj;
if (owner && !try_module_get(owner))
return NULL;
kobj = kobject_get(&p->kobj);
if (!kobj)
module_put(owner);
return kobj;
}
void cdev_put(struct cdev *p)
{
if (p) {
struct module *owner = p->owner;
kobject_put(&p->kobj);
module_put(owner);
}
}
/*
* Called every time a character special file is opened
*/
static int chrdev_open(struct inode *inode, struct file *filp)
{
const struct file_operations *fops;
struct cdev *p;
struct cdev *new = NULL;
int ret = 0;
spin_lock(&cdev_lock);
p = inode->i_cdev;
if (!p) {
struct kobject *kobj;
int idx;
spin_unlock(&cdev_lock);
kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
if (!kobj)
return -ENXIO;
new = container_of(kobj, struct cdev, kobj);
spin_lock(&cdev_lock);
/* Check i_cdev again in case somebody beat us to it while
we dropped the lock. */
p = inode->i_cdev;
if (!p) {
inode->i_cdev = p = new;
list_add(&inode->i_devices, &p->list);
new = NULL;
} else if (!cdev_get(p))
ret = -ENXIO;
} else if (!cdev_get(p))
ret = -ENXIO;
spin_unlock(&cdev_lock);
cdev_put(new);
if (ret)
return ret;
ret = -ENXIO;
fops = fops_get(p->ops);
if (!fops)
goto out_cdev_put;
replace_fops(filp, fops);
if (filp->f_op->open) {
ret = filp->f_op->open(inode, filp);
if (ret)
goto out_cdev_put;
}
return 0;
out_cdev_put:
cdev_put(p);
return ret;
}
void cd_forget(struct inode *inode)
{
spin_lock(&cdev_lock);
list_del_init(&inode->i_devices);
inode->i_cdev = NULL;
spin_unlock(&cdev_lock);
}
static void cdev_purge(struct cdev *cdev)
{
spin_lock(&cdev_lock);
while (!list_empty(&cdev->list)) {
struct inode *inode;
inode = container_of(cdev->list.next, struct inode, i_devices);
list_del_init(&inode->i_devices);
inode->i_cdev = NULL;
}
spin_unlock(&cdev_lock);
}
/*
* Dummy default file-operations: the only thing this does
* is contain the open that then fills in the correct operations
* depending on the special file...
*/
const struct file_operations def_chr_fops = {
.open = chrdev_open,
.llseek = noop_llseek,
};
static struct kobject *exact_match(dev_t dev, int *part, void *data)
{
struct cdev *p = data;
return &p->kobj;
}
static int exact_lock(dev_t dev, void *data)
{
struct cdev *p = data;
return cdev_get(p) ? 0 : -1;
}
/**
* cdev_add() - add a char device to the system
* @p: the cdev structure for the device
* @dev: the first device number for which this device is responsible
* @count: the number of consecutive minor numbers corresponding to this
* device
*
* cdev_add() adds the device represented by @p to the system, making it
* live immediately. A negative error code is returned on failure.
*/
int cdev_add(struct cdev *p, dev_t dev, unsigned count)
{
int error;
p->dev = dev;
p->count = count;
error = kobj_map(cdev_map, dev, count, NULL,
exact_match, exact_lock, p);
if (error)
return error;
kobject_get(p->kobj.parent);
return 0;
}
static void cdev_unmap(dev_t dev, unsigned count)
{
kobj_unmap(cdev_map, dev, count);
}
/**
* cdev_del() - remove a cdev from the system
* @p: the cdev structure to be removed
*
* cdev_del() removes @p from the system, possibly freeing the structure
* itself.
*/
void cdev_del(struct cdev *p)
{
cdev_unmap(p->dev, p->count);
kobject_put(&p->kobj);
}
static void cdev_default_release(struct kobject *kobj)
{
struct cdev *p = container_of(kobj, struct cdev, kobj);
struct kobject *parent = kobj->parent;
cdev_purge(p);
kobject_put(parent);
}
static void cdev_dynamic_release(struct kobject *kobj)
{
struct cdev *p = container_of(kobj, struct cdev, kobj);
struct kobject *parent = kobj->parent;
cdev_purge(p);
kfree(p);
kobject_put(parent);
}
static struct kobj_type ktype_cdev_default = {
.release = cdev_default_release,
};
static struct kobj_type ktype_cdev_dynamic = {
.release = cdev_dynamic_release,
};
/**
* cdev_alloc() - allocate a cdev structure
*
* Allocates and returns a cdev structure, or NULL on failure.
*/
struct cdev *cdev_alloc(void)
{
struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL);
if (p) {
INIT_LIST_HEAD(&p->list);
kobject_init(&p->kobj, &ktype_cdev_dynamic);
}
return p;
}
/**
* cdev_init() - initialize a cdev structure
* @cdev: the structure to initialize
* @fops: the file_operations for this device
*
* Initializes @cdev, remembering @fops, making it ready to add to the
* system with cdev_add().
*/
void cdev_init(struct cdev *cdev, const struct file_operations *fops)
{
memset(cdev, 0, sizeof *cdev);
INIT_LIST_HEAD(&cdev->list);
kobject_init(&cdev->kobj, &ktype_cdev_default);
cdev->ops = fops;
}
static struct kobject *base_probe(dev_t dev, int *part, void *data)
{
if (request_module("char-major-%d-%d", MAJOR(dev), MINOR(dev)) > 0)
/* Make old-style 2.4 aliases work */
request_module("char-major-%d", MAJOR(dev));
return NULL;
}
void __init chrdev_init(void)
{
cdev_map = kobj_map_init(base_probe, &chrdevs_lock);
if (bdi_init(&directly_mappable_cdev_bdi))
panic("Failed to init directly mappable cdev bdi");
}
/* Let modules do char dev stuff */
EXPORT_SYMBOL(register_chrdev_region);
EXPORT_SYMBOL(unregister_chrdev_region);
EXPORT_SYMBOL(alloc_chrdev_region);
EXPORT_SYMBOL(cdev_init);
EXPORT_SYMBOL(cdev_alloc);
EXPORT_SYMBOL(cdev_del);
EXPORT_SYMBOL(cdev_add);
EXPORT_SYMBOL(__register_chrdev);
EXPORT_SYMBOL(__unregister_chrdev);
EXPORT_SYMBOL(directly_mappable_cdev_bdi);