Merge branch 'for-4.4/lightnvm' of git://git.kernel.dk/linux-block

Pull lightnvm support from Jens Axboe:
 "This adds support for lightnvm, and adds support to NVMe as well.
  This is pretty exciting, in that it enables new and interesting use
  cases for compatible flash devices.  There's a LWN writeup about an
  earlier posting here:

      https://lwn.net/Articles/641247/

  This has been underway for a while, and should be ready for merging at
  this point"

* 'for-4.4/lightnvm' of git://git.kernel.dk/linux-block:
  nvme: lightnvm: clean up a data type
  lightnvm: refactor phys addrs type to u64
  nvme: LightNVM support
  rrpc: Round-robin sector target with cost-based gc
  gennvm: Generic NVM manager
  lightnvm: Support for Open-Channel SSDs
This commit is contained in:
Linus Torvalds 2015-11-04 20:46:08 -08:00
commit effa04cc5a
17 changed files with 4197 additions and 12 deletions

View file

@ -149,6 +149,7 @@ Code Seq#(hex) Include File Comments
'K' all linux/kd.h
'L' 00-1F linux/loop.h conflict!
'L' 10-1F drivers/scsi/mpt2sas/mpt2sas_ctl.h conflict!
'L' 20-2F linux/lightnvm.h
'L' E0-FF linux/ppdd.h encrypted disk device driver
<http://linux01.gwdg.de/~alatham/ppdd.html>
'M' all linux/soundcard.h conflict!

View file

@ -6279,6 +6279,14 @@ F: drivers/nvdimm/pmem.c
F: include/linux/pmem.h
F: arch/*/include/asm/pmem.h
LIGHTNVM PLATFORM SUPPORT
M: Matias Bjorling <mb@lightnvm.io>
W: http://github/OpenChannelSSD
S: Maintained
F: drivers/lightnvm/
F: include/linux/lightnvm.h
F: include/uapi/linux/lightnvm.h
LINUX FOR IBM pSERIES (RS/6000)
M: Paul Mackerras <paulus@au.ibm.com>
W: http://www.ibm.com/linux/ltc/projects/ppc

View file

@ -44,6 +44,8 @@ source "drivers/net/Kconfig"
source "drivers/isdn/Kconfig"
source "drivers/lightnvm/Kconfig"
# input before char - char/joystick depends on it. As does USB.
source "drivers/input/Kconfig"

View file

@ -70,6 +70,7 @@ obj-$(CONFIG_NUBUS) += nubus/
obj-y += macintosh/
obj-$(CONFIG_IDE) += ide/
obj-$(CONFIG_SCSI) += scsi/
obj-$(CONFIG_NVM) += lightnvm/
obj-y += nvme/
obj-$(CONFIG_ATA) += ata/
obj-$(CONFIG_TARGET_CORE) += target/

42
drivers/lightnvm/Kconfig Normal file
View file

@ -0,0 +1,42 @@
#
# Open-Channel SSD NVM configuration
#
menuconfig NVM
bool "Open-Channel SSD target support"
depends on BLOCK
help
Say Y here to get to enable Open-channel SSDs.
Open-Channel SSDs implement a set of extension to SSDs, that
exposes direct access to the underlying non-volatile memory.
If you say N, all options in this submenu will be skipped and disabled
only do this if you know what you are doing.
if NVM
config NVM_DEBUG
bool "Open-Channel SSD debugging support"
---help---
Exposes a debug management interface to create/remove targets at:
/sys/module/lnvm/parameters/configure_debug
It is required to create/remove targets without IOCTLs.
config NVM_GENNVM
tristate "Generic NVM manager for Open-Channel SSDs"
---help---
NVM media manager for Open-Channel SSDs that offload management
functionality to device, while keeping data placement and garbage
collection decisions on the host.
config NVM_RRPC
tristate "Round-robin Hybrid Open-Channel SSD target"
---help---
Allows an open-channel SSD to be exposed as a block device to the
host. The target is implemented using a linear mapping table and
cost-based garbage collection. It is optimized for 4K IO sizes.
endif # NVM

View file

@ -0,0 +1,7 @@
#
# Makefile for Open-Channel SSDs.
#
obj-$(CONFIG_NVM) := core.o
obj-$(CONFIG_NVM_GENNVM) += gennvm.o
obj-$(CONFIG_NVM_RRPC) += rrpc.o

826
drivers/lightnvm/core.c Normal file
View file

@ -0,0 +1,826 @@
/*
* Copyright (C) 2015 IT University of Copenhagen. All rights reserved.
* Initial release: Matias Bjorling <m@bjorling.me>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
* USA.
*
*/
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/sem.h>
#include <linux/bitmap.h>
#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/lightnvm.h>
#include <uapi/linux/lightnvm.h>
static LIST_HEAD(nvm_targets);
static LIST_HEAD(nvm_mgrs);
static LIST_HEAD(nvm_devices);
static DECLARE_RWSEM(nvm_lock);
static struct nvm_tgt_type *nvm_find_target_type(const char *name)
{
struct nvm_tgt_type *tt;
list_for_each_entry(tt, &nvm_targets, list)
if (!strcmp(name, tt->name))
return tt;
return NULL;
}
int nvm_register_target(struct nvm_tgt_type *tt)
{
int ret = 0;
down_write(&nvm_lock);
if (nvm_find_target_type(tt->name))
ret = -EEXIST;
else
list_add(&tt->list, &nvm_targets);
up_write(&nvm_lock);
return ret;
}
EXPORT_SYMBOL(nvm_register_target);
void nvm_unregister_target(struct nvm_tgt_type *tt)
{
if (!tt)
return;
down_write(&nvm_lock);
list_del(&tt->list);
up_write(&nvm_lock);
}
EXPORT_SYMBOL(nvm_unregister_target);
void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
dma_addr_t *dma_handler)
{
return dev->ops->dev_dma_alloc(dev->q, dev->ppalist_pool, mem_flags,
dma_handler);
}
EXPORT_SYMBOL(nvm_dev_dma_alloc);
void nvm_dev_dma_free(struct nvm_dev *dev, void *ppa_list,
dma_addr_t dma_handler)
{
dev->ops->dev_dma_free(dev->ppalist_pool, ppa_list, dma_handler);
}
EXPORT_SYMBOL(nvm_dev_dma_free);
static struct nvmm_type *nvm_find_mgr_type(const char *name)
{
struct nvmm_type *mt;
list_for_each_entry(mt, &nvm_mgrs, list)
if (!strcmp(name, mt->name))
return mt;
return NULL;
}
int nvm_register_mgr(struct nvmm_type *mt)
{
int ret = 0;
down_write(&nvm_lock);
if (nvm_find_mgr_type(mt->name))
ret = -EEXIST;
else
list_add(&mt->list, &nvm_mgrs);
up_write(&nvm_lock);
return ret;
}
EXPORT_SYMBOL(nvm_register_mgr);
void nvm_unregister_mgr(struct nvmm_type *mt)
{
if (!mt)
return;
down_write(&nvm_lock);
list_del(&mt->list);
up_write(&nvm_lock);
}
EXPORT_SYMBOL(nvm_unregister_mgr);
static struct nvm_dev *nvm_find_nvm_dev(const char *name)
{
struct nvm_dev *dev;
list_for_each_entry(dev, &nvm_devices, devices)
if (!strcmp(name, dev->name))
return dev;
return NULL;
}
struct nvm_block *nvm_get_blk(struct nvm_dev *dev, struct nvm_lun *lun,
unsigned long flags)
{
return dev->mt->get_blk(dev, lun, flags);
}
EXPORT_SYMBOL(nvm_get_blk);
/* Assumes that all valid pages have already been moved on release to bm */
void nvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
{
return dev->mt->put_blk(dev, blk);
}
EXPORT_SYMBOL(nvm_put_blk);
int nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
{
return dev->mt->submit_io(dev, rqd);
}
EXPORT_SYMBOL(nvm_submit_io);
int nvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk)
{
return dev->mt->erase_blk(dev, blk, 0);
}
EXPORT_SYMBOL(nvm_erase_blk);
static void nvm_core_free(struct nvm_dev *dev)
{
kfree(dev);
}
static int nvm_core_init(struct nvm_dev *dev)
{
struct nvm_id *id = &dev->identity;
struct nvm_id_group *grp = &id->groups[0];
/* device values */
dev->nr_chnls = grp->num_ch;
dev->luns_per_chnl = grp->num_lun;
dev->pgs_per_blk = grp->num_pg;
dev->blks_per_lun = grp->num_blk;
dev->nr_planes = grp->num_pln;
dev->sec_size = grp->csecs;
dev->oob_size = grp->sos;
dev->sec_per_pg = grp->fpg_sz / grp->csecs;
dev->addr_mode = id->ppat;
dev->addr_format = id->ppaf;
dev->plane_mode = NVM_PLANE_SINGLE;
dev->max_rq_size = dev->ops->max_phys_sect * dev->sec_size;
if (grp->mpos & 0x020202)
dev->plane_mode = NVM_PLANE_DOUBLE;
if (grp->mpos & 0x040404)
dev->plane_mode = NVM_PLANE_QUAD;
/* calculated values */
dev->sec_per_pl = dev->sec_per_pg * dev->nr_planes;
dev->sec_per_blk = dev->sec_per_pl * dev->pgs_per_blk;
dev->sec_per_lun = dev->sec_per_blk * dev->blks_per_lun;
dev->nr_luns = dev->luns_per_chnl * dev->nr_chnls;
dev->total_blocks = dev->nr_planes *
dev->blks_per_lun *
dev->luns_per_chnl *
dev->nr_chnls;
dev->total_pages = dev->total_blocks * dev->pgs_per_blk;
INIT_LIST_HEAD(&dev->online_targets);
return 0;
}
static void nvm_free(struct nvm_dev *dev)
{
if (!dev)
return;
if (dev->mt)
dev->mt->unregister_mgr(dev);
nvm_core_free(dev);
}
static int nvm_init(struct nvm_dev *dev)
{
struct nvmm_type *mt;
int ret = 0;
if (!dev->q || !dev->ops)
return -EINVAL;
if (dev->ops->identity(dev->q, &dev->identity)) {
pr_err("nvm: device could not be identified\n");
ret = -EINVAL;
goto err;
}
pr_debug("nvm: ver:%x nvm_vendor:%x groups:%u\n",
dev->identity.ver_id, dev->identity.vmnt,
dev->identity.cgrps);
if (dev->identity.ver_id != 1) {
pr_err("nvm: device not supported by kernel.");
goto err;
}
if (dev->identity.cgrps != 1) {
pr_err("nvm: only one group configuration supported.");
goto err;
}
ret = nvm_core_init(dev);
if (ret) {
pr_err("nvm: could not initialize core structures.\n");
goto err;
}
/* register with device with a supported manager */
list_for_each_entry(mt, &nvm_mgrs, list) {
ret = mt->register_mgr(dev);
if (ret < 0)
goto err; /* initialization failed */
if (ret > 0) {
dev->mt = mt;
break; /* successfully initialized */
}
}
if (!ret) {
pr_info("nvm: no compatible manager found.\n");
return 0;
}
pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n",
dev->name, dev->sec_per_pg, dev->nr_planes,
dev->pgs_per_blk, dev->blks_per_lun, dev->nr_luns,
dev->nr_chnls);
return 0;
err:
nvm_free(dev);
pr_err("nvm: failed to initialize nvm\n");
return ret;
}
static void nvm_exit(struct nvm_dev *dev)
{
if (dev->ppalist_pool)
dev->ops->destroy_dma_pool(dev->ppalist_pool);
nvm_free(dev);
pr_info("nvm: successfully unloaded\n");
}
int nvm_register(struct request_queue *q, char *disk_name,
struct nvm_dev_ops *ops)
{
struct nvm_dev *dev;
int ret;
if (!ops->identity)
return -EINVAL;
dev = kzalloc(sizeof(struct nvm_dev), GFP_KERNEL);
if (!dev)
return -ENOMEM;
dev->q = q;
dev->ops = ops;
strncpy(dev->name, disk_name, DISK_NAME_LEN);
ret = nvm_init(dev);
if (ret)
goto err_init;
down_write(&nvm_lock);
list_add(&dev->devices, &nvm_devices);
up_write(&nvm_lock);
if (dev->ops->max_phys_sect > 1) {
dev->ppalist_pool = dev->ops->create_dma_pool(dev->q,
"ppalist");
if (!dev->ppalist_pool) {
pr_err("nvm: could not create ppa pool\n");
return -ENOMEM;
}
} else if (dev->ops->max_phys_sect > 256) {
pr_info("nvm: max sectors supported is 256.\n");
return -EINVAL;
}
return 0;
err_init:
kfree(dev);
return ret;
}
EXPORT_SYMBOL(nvm_register);
void nvm_unregister(char *disk_name)
{
struct nvm_dev *dev = nvm_find_nvm_dev(disk_name);
if (!dev) {
pr_err("nvm: could not find device %s to unregister\n",
disk_name);
return;
}
nvm_exit(dev);
down_write(&nvm_lock);
list_del(&dev->devices);
up_write(&nvm_lock);
}
EXPORT_SYMBOL(nvm_unregister);
static const struct block_device_operations nvm_fops = {
.owner = THIS_MODULE,
};
static int nvm_create_target(struct nvm_dev *dev,
struct nvm_ioctl_create *create)
{
struct nvm_ioctl_create_simple *s = &create->conf.s;
struct request_queue *tqueue;
struct nvmm_type *mt;
struct gendisk *tdisk;
struct nvm_tgt_type *tt;
struct nvm_target *t;
void *targetdata;
int ret = 0;
if (!dev->mt) {
/* register with device with a supported NVM manager */
list_for_each_entry(mt, &nvm_mgrs, list) {
ret = mt->register_mgr(dev);
if (ret < 0)
return ret; /* initialization failed */
if (ret > 0) {
dev->mt = mt;
break; /* successfully initialized */
}
}
if (!ret) {
pr_info("nvm: no compatible nvm manager found.\n");
return -ENODEV;
}
}
tt = nvm_find_target_type(create->tgttype);
if (!tt) {
pr_err("nvm: target type %s not found\n", create->tgttype);
return -EINVAL;
}
down_write(&nvm_lock);
list_for_each_entry(t, &dev->online_targets, list) {
if (!strcmp(create->tgtname, t->disk->disk_name)) {
pr_err("nvm: target name already exists.\n");
up_write(&nvm_lock);
return -EINVAL;
}
}
up_write(&nvm_lock);
t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
if (!t)
return -ENOMEM;
tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
if (!tqueue)
goto err_t;
blk_queue_make_request(tqueue, tt->make_rq);
tdisk = alloc_disk(0);
if (!tdisk)
goto err_queue;
sprintf(tdisk->disk_name, "%s", create->tgtname);
tdisk->flags = GENHD_FL_EXT_DEVT;
tdisk->major = 0;
tdisk->first_minor = 0;
tdisk->fops = &nvm_fops;
tdisk->queue = tqueue;
targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end);
if (IS_ERR(targetdata))
goto err_init;
tdisk->private_data = targetdata;
tqueue->queuedata = targetdata;
blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect);
set_capacity(tdisk, tt->capacity(targetdata));
add_disk(tdisk);
t->type = tt;
t->disk = tdisk;
down_write(&nvm_lock);
list_add_tail(&t->list, &dev->online_targets);
up_write(&nvm_lock);
return 0;
err_init:
put_disk(tdisk);
err_queue:
blk_cleanup_queue(tqueue);
err_t:
kfree(t);
return -ENOMEM;
}
static void nvm_remove_target(struct nvm_target *t)
{
struct nvm_tgt_type *tt = t->type;
struct gendisk *tdisk = t->disk;
struct request_queue *q = tdisk->queue;
lockdep_assert_held(&nvm_lock);
del_gendisk(tdisk);
if (tt->exit)
tt->exit(tdisk->private_data);
blk_cleanup_queue(q);
put_disk(tdisk);
list_del(&t->list);
kfree(t);
}
static int __nvm_configure_create(struct nvm_ioctl_create *create)
{
struct nvm_dev *dev;
struct nvm_ioctl_create_simple *s;
dev = nvm_find_nvm_dev(create->dev);
if (!dev) {
pr_err("nvm: device not found\n");
return -EINVAL;
}
if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) {
pr_err("nvm: config type not valid\n");
return -EINVAL;
}
s = &create->conf.s;
if (s->lun_begin > s->lun_end || s->lun_end > dev->nr_luns) {
pr_err("nvm: lun out of bound (%u:%u > %u)\n",
s->lun_begin, s->lun_end, dev->nr_luns);
return -EINVAL;
}
return nvm_create_target(dev, create);
}
static int __nvm_configure_remove(struct nvm_ioctl_remove *remove)
{
struct nvm_target *t = NULL;
struct nvm_dev *dev;
int ret = -1;
down_write(&nvm_lock);
list_for_each_entry(dev, &nvm_devices, devices)
list_for_each_entry(t, &dev->online_targets, list) {
if (!strcmp(remove->tgtname, t->disk->disk_name)) {
nvm_remove_target(t);
ret = 0;
break;
}
}
up_write(&nvm_lock);
if (ret) {
pr_err("nvm: target \"%s\" doesn't exist.\n", remove->tgtname);
return -EINVAL;
}
return 0;
}
#ifdef CONFIG_NVM_DEBUG
static int nvm_configure_show(const char *val)
{
struct nvm_dev *dev;
char opcode, devname[DISK_NAME_LEN];
int ret;
ret = sscanf(val, "%c %32s", &opcode, devname);
if (ret != 2) {
pr_err("nvm: invalid command. Use \"opcode devicename\".\n");
return -EINVAL;
}
dev = nvm_find_nvm_dev(devname);
if (!dev) {
pr_err("nvm: device not found\n");
return -EINVAL;
}
if (!dev->mt)
return 0;
dev->mt->free_blocks_print(dev);
return 0;
}
static int nvm_configure_remove(const char *val)
{
struct nvm_ioctl_remove remove;
char opcode;
int ret;
ret = sscanf(val, "%c %256s", &opcode, remove.tgtname);
if (ret != 2) {
pr_err("nvm: invalid command. Use \"d targetname\".\n");
return -EINVAL;
}
remove.flags = 0;
return __nvm_configure_remove(&remove);
}
static int nvm_configure_create(const char *val)
{
struct nvm_ioctl_create create;
char opcode;
int lun_begin, lun_end, ret;
ret = sscanf(val, "%c %256s %256s %48s %u:%u", &opcode, create.dev,
create.tgtname, create.tgttype,
&lun_begin, &lun_end);
if (ret != 6) {
pr_err("nvm: invalid command. Use \"opcode device name tgttype lun_begin:lun_end\".\n");
return -EINVAL;
}
create.flags = 0;
create.conf.type = NVM_CONFIG_TYPE_SIMPLE;
create.conf.s.lun_begin = lun_begin;
create.conf.s.lun_end = lun_end;
return __nvm_configure_create(&create);
}
/* Exposes administrative interface through /sys/module/lnvm/configure_by_str */
static int nvm_configure_by_str_event(const char *val,
const struct kernel_param *kp)
{
char opcode;
int ret;
ret = sscanf(val, "%c", &opcode);
if (ret != 1) {
pr_err("nvm: string must have the format of \"cmd ...\"\n");
return -EINVAL;
}
switch (opcode) {
case 'a':
return nvm_configure_create(val);
case 'd':
return nvm_configure_remove(val);
case 's':
return nvm_configure_show(val);
default:
pr_err("nvm: invalid command\n");
return -EINVAL;
}
return 0;
}
static int nvm_configure_get(char *buf, const struct kernel_param *kp)
{
int sz = 0;
char *buf_start = buf;
struct nvm_dev *dev;
buf += sprintf(buf, "available devices:\n");
down_write(&nvm_lock);
list_for_each_entry(dev, &nvm_devices, devices) {
if (sz > 4095 - DISK_NAME_LEN)
break;
buf += sprintf(buf, " %32s\n", dev->name);
}
up_write(&nvm_lock);
return buf - buf_start - 1;
}
static const struct kernel_param_ops nvm_configure_by_str_event_param_ops = {
.set = nvm_configure_by_str_event,
.get = nvm_configure_get,
};
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "lnvm."
module_param_cb(configure_debug, &nvm_configure_by_str_event_param_ops, NULL,
0644);
#endif /* CONFIG_NVM_DEBUG */
static long nvm_ioctl_info(struct file *file, void __user *arg)
{
struct nvm_ioctl_info *info;
struct nvm_tgt_type *tt;
int tgt_iter = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
info = memdup_user(arg, sizeof(struct nvm_ioctl_info));
if (IS_ERR(info))
return -EFAULT;
info->version[0] = NVM_VERSION_MAJOR;
info->version[1] = NVM_VERSION_MINOR;
info->version[2] = NVM_VERSION_PATCH;
down_write(&nvm_lock);
list_for_each_entry(tt, &nvm_targets, list) {
struct nvm_ioctl_info_tgt *tgt = &info->tgts[tgt_iter];
tgt->version[0] = tt->version[0];
tgt->version[1] = tt->version[1];
tgt->version[2] = tt->version[2];
strncpy(tgt->tgtname, tt->name, NVM_TTYPE_NAME_MAX);
tgt_iter++;
}
info->tgtsize = tgt_iter;
up_write(&nvm_lock);
if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info)))
return -EFAULT;
kfree(info);
return 0;
}
static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
{
struct nvm_ioctl_get_devices *devices;
struct nvm_dev *dev;
int i = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
devices = kzalloc(sizeof(struct nvm_ioctl_get_devices), GFP_KERNEL);
if (!devices)
return -ENOMEM;
down_write(&nvm_lock);
list_for_each_entry(dev, &nvm_devices, devices) {
struct nvm_ioctl_device_info *info = &devices->info[i];
sprintf(info->devname, "%s", dev->name);
if (dev->mt) {
info->bmversion[0] = dev->mt->version[0];
info->bmversion[1] = dev->mt->version[1];
info->bmversion[2] = dev->mt->version[2];
sprintf(info->bmname, "%s", dev->mt->name);
} else {
sprintf(info->bmname, "none");
}
i++;
if (i > 31) {
pr_err("nvm: max 31 devices can be reported.\n");
break;
}
}
up_write(&nvm_lock);
devices->nr_devices = i;
if (copy_to_user(arg, devices, sizeof(struct nvm_ioctl_get_devices)))
return -EFAULT;
kfree(devices);
return 0;
}
static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
{
struct nvm_ioctl_create create;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create)))
return -EFAULT;
create.dev[DISK_NAME_LEN - 1] = '\0';
create.tgttype[NVM_TTYPE_NAME_MAX - 1] = '\0';
create.tgtname[DISK_NAME_LEN - 1] = '\0';
if (create.flags != 0) {
pr_err("nvm: no flags supported\n");
return -EINVAL;
}
return __nvm_configure_create(&create);
}
static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
{
struct nvm_ioctl_remove remove;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove)))
return -EFAULT;
remove.tgtname[DISK_NAME_LEN - 1] = '\0';
if (remove.flags != 0) {
pr_err("nvm: no flags supported\n");
return -EINVAL;
}
return __nvm_configure_remove(&remove);
}
static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
switch (cmd) {
case NVM_INFO:
return nvm_ioctl_info(file, argp);
case NVM_GET_DEVICES:
return nvm_ioctl_get_devices(file, argp);
case NVM_DEV_CREATE:
return nvm_ioctl_dev_create(file, argp);
case NVM_DEV_REMOVE:
return nvm_ioctl_dev_remove(file, argp);
}
return 0;
}
static const struct file_operations _ctl_fops = {
.open = nonseekable_open,
.unlocked_ioctl = nvm_ctl_ioctl,
.owner = THIS_MODULE,
.llseek = noop_llseek,
};
static struct miscdevice _nvm_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = "lightnvm",
.nodename = "lightnvm/control",
.fops = &_ctl_fops,
};
MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR);
static int __init nvm_mod_init(void)
{
int ret;
ret = misc_register(&_nvm_misc);
if (ret)
pr_err("nvm: misc_register failed for control device");
return ret;
}
static void __exit nvm_mod_exit(void)
{
misc_deregister(&_nvm_misc);
}
MODULE_AUTHOR("Matias Bjorling <m@bjorling.me>");
MODULE_LICENSE("GPL v2");
MODULE_VERSION("0.1");
module_init(nvm_mod_init);
module_exit(nvm_mod_exit);

485
drivers/lightnvm/gennvm.c Normal file
View file

@ -0,0 +1,485 @@
/*
* Copyright (C) 2015 Matias Bjorling <m@bjorling.me>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
* USA.
*
* Implementation of a generic nvm manager for Open-Channel SSDs.
*/
#include "gennvm.h"
static void gennvm_blocks_free(struct nvm_dev *dev)
{
struct gen_nvm *gn = dev->mp;
struct gen_lun *lun;
int i;
gennvm_for_each_lun(gn, lun, i) {
if (!lun->vlun.blocks)
break;
vfree(lun->vlun.blocks);
}
}
static void gennvm_luns_free(struct nvm_dev *dev)
{
struct gen_nvm *gn = dev->mp;
kfree(gn->luns);
}
static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn)
{
struct gen_lun *lun;
int i;
gn->luns = kcalloc(dev->nr_luns, sizeof(struct gen_lun), GFP_KERNEL);
if (!gn->luns)
return -ENOMEM;
gennvm_for_each_lun(gn, lun, i) {
spin_lock_init(&lun->vlun.lock);
INIT_LIST_HEAD(&lun->free_list);
INIT_LIST_HEAD(&lun->used_list);
INIT_LIST_HEAD(&lun->bb_list);
lun->reserved_blocks = 2; /* for GC only */
lun->vlun.id = i;
lun->vlun.lun_id = i % dev->luns_per_chnl;
lun->vlun.chnl_id = i / dev->luns_per_chnl;
lun->vlun.nr_free_blocks = dev->blks_per_lun;
}
return 0;
}
static int gennvm_block_bb(u32 lun_id, void *bb_bitmap, unsigned int nr_blocks,
void *private)
{
struct gen_nvm *gn = private;
struct gen_lun *lun = &gn->luns[lun_id];
struct nvm_block *blk;
int i;
if (unlikely(bitmap_empty(bb_bitmap, nr_blocks)))
return 0;
i = -1;
while ((i = find_next_bit(bb_bitmap, nr_blocks, i + 1)) < nr_blocks) {
blk = &lun->vlun.blocks[i];
if (!blk) {
pr_err("gennvm: BB data is out of bounds.\n");
return -EINVAL;
}
list_move_tail(&blk->list, &lun->bb_list);
}
return 0;
}
static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
{
struct nvm_dev *dev = private;
struct gen_nvm *gn = dev->mp;
sector_t max_pages = dev->total_pages * (dev->sec_size >> 9);
u64 elba = slba + nlb;
struct gen_lun *lun;
struct nvm_block *blk;
u64 i;
int lun_id;
if (unlikely(elba > dev->total_pages)) {
pr_err("gennvm: L2P data from device is out of bounds!\n");
return -EINVAL;
}
for (i = 0; i < nlb; i++) {
u64 pba = le64_to_cpu(entries[i]);
if (unlikely(pba >= max_pages && pba != U64_MAX)) {
pr_err("gennvm: L2P data entry is out of bounds!\n");
return -EINVAL;
}
/* Address zero is a special one. The first page on a disk is
* protected. It often holds internal device boot
* information.
*/
if (!pba)
continue;
/* resolve block from physical address */
lun_id = div_u64(pba, dev->sec_per_lun);
lun = &gn->luns[lun_id];
/* Calculate block offset into lun */
pba = pba - (dev->sec_per_lun * lun_id);
blk = &lun->vlun.blocks[div_u64(pba, dev->sec_per_blk)];
if (!blk->type) {
/* at this point, we don't know anything about the
* block. It's up to the FTL on top to re-etablish the
* block state
*/
list_move_tail(&blk->list, &lun->used_list);
blk->type = 1;
lun->vlun.nr_free_blocks--;
}
}
return 0;
}
static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
{
struct gen_lun *lun;
struct nvm_block *block;
sector_t lun_iter, blk_iter, cur_block_id = 0;
int ret;
gennvm_for_each_lun(gn, lun, lun_iter) {
lun->vlun.blocks = vzalloc(sizeof(struct nvm_block) *
dev->blks_per_lun);
if (!lun->vlun.blocks)
return -ENOMEM;
for (blk_iter = 0; blk_iter < dev->blks_per_lun; blk_iter++) {
block = &lun->vlun.blocks[blk_iter];
INIT_LIST_HEAD(&block->list);
block->lun = &lun->vlun;
block->id = cur_block_id++;
/* First block is reserved for device */
if (unlikely(lun_iter == 0 && blk_iter == 0))
continue;
list_add_tail(&block->list, &lun->free_list);
}
if (dev->ops->get_bb_tbl) {
ret = dev->ops->get_bb_tbl(dev->q, lun->vlun.id,
dev->blks_per_lun, gennvm_block_bb, gn);
if (ret)
pr_err("gennvm: could not read BB table\n");
}
}
if (dev->ops->get_l2p_tbl) {
ret = dev->ops->get_l2p_tbl(dev->q, 0, dev->total_pages,
gennvm_block_map, dev);
if (ret) {
pr_err("gennvm: could not read L2P table.\n");
pr_warn("gennvm: default block initialization");
}
}
return 0;
}
static int gennvm_register(struct nvm_dev *dev)
{
struct gen_nvm *gn;
int ret;
gn = kzalloc(sizeof(struct gen_nvm), GFP_KERNEL);
if (!gn)
return -ENOMEM;
gn->nr_luns = dev->nr_luns;
dev->mp = gn;
ret = gennvm_luns_init(dev, gn);
if (ret) {
pr_err("gennvm: could not initialize luns\n");
goto err;
}
ret = gennvm_blocks_init(dev, gn);
if (ret) {
pr_err("gennvm: could not initialize blocks\n");
goto err;
}
return 1;
err:
kfree(gn);
return ret;
}
static void gennvm_unregister(struct nvm_dev *dev)
{
gennvm_blocks_free(dev);
gennvm_luns_free(dev);
kfree(dev->mp);
dev->mp = NULL;
}
static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
struct nvm_lun *vlun, unsigned long flags)
{
struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun);
struct nvm_block *blk = NULL;
int is_gc = flags & NVM_IOTYPE_GC;
spin_lock(&vlun->lock);
if (list_empty(&lun->free_list)) {
pr_err_ratelimited("gennvm: lun %u have no free pages available",
lun->vlun.id);
spin_unlock(&vlun->lock);
goto out;
}
while (!is_gc && lun->vlun.nr_free_blocks < lun->reserved_blocks) {
spin_unlock(&vlun->lock);
goto out;
}
blk = list_first_entry(&lun->free_list, struct nvm_block, list);
list_move_tail(&blk->list, &lun->used_list);
blk->type = 1;
lun->vlun.nr_free_blocks--;
spin_unlock(&vlun->lock);
out:
return blk;
}
static void gennvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
{
struct nvm_lun *vlun = blk->lun;
struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun);
spin_lock(&vlun->lock);
switch (blk->type) {
case 1:
list_move_tail(&blk->list, &lun->free_list);
lun->vlun.nr_free_blocks++;
blk->type = 0;
break;
case 2:
list_move_tail(&blk->list, &lun->bb_list);
break;
default:
WARN_ON_ONCE(1);
pr_err("gennvm: erroneous block type (%lu -> %u)\n",
blk->id, blk->type);
list_move_tail(&blk->list, &lun->bb_list);
}
spin_unlock(&vlun->lock);
}
static void gennvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
{
int i;
if (rqd->nr_pages > 1) {
for (i = 0; i < rqd->nr_pages; i++)
rqd->ppa_list[i] = addr_to_generic_mode(dev,
rqd->ppa_list[i]);
} else {
rqd->ppa_addr = addr_to_generic_mode(dev, rqd->ppa_addr);
}
}
static void gennvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
{
int i;
if (rqd->nr_pages > 1) {
for (i = 0; i < rqd->nr_pages; i++)
rqd->ppa_list[i] = generic_to_addr_mode(dev,
rqd->ppa_list[i]);
} else {
rqd->ppa_addr = generic_to_addr_mode(dev, rqd->ppa_addr);
}
}
static int gennvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
{
if (!dev->ops->submit_io)
return 0;
/* Convert address space */
gennvm_generic_to_addr_mode(dev, rqd);
rqd->dev = dev;
return dev->ops->submit_io(dev->q, rqd);
}
static void gennvm_blk_set_type(struct nvm_dev *dev, struct ppa_addr *ppa,
int type)
{
struct gen_nvm *gn = dev->mp;
struct gen_lun *lun;
struct nvm_block *blk;
if (unlikely(ppa->g.ch > dev->nr_chnls ||
ppa->g.lun > dev->luns_per_chnl ||
ppa->g.blk > dev->blks_per_lun)) {
WARN_ON_ONCE(1);
pr_err("gennvm: ppa broken (ch: %u > %u lun: %u > %u blk: %u > %u",
ppa->g.ch, dev->nr_chnls,
ppa->g.lun, dev->luns_per_chnl,
ppa->g.blk, dev->blks_per_lun);
return;
}
lun = &gn->luns[ppa->g.lun * ppa->g.ch];
blk = &lun->vlun.blocks[ppa->g.blk];
/* will be moved to bb list on put_blk from target */
blk->type = type;
}
/* mark block bad. It is expected the target recover from the error. */
static void gennvm_mark_blk_bad(struct nvm_dev *dev, struct nvm_rq *rqd)
{
int i;
if (!dev->ops->set_bb)
return;
if (dev->ops->set_bb(dev->q, rqd, 1))
return;
gennvm_addr_to_generic_mode(dev, rqd);
/* look up blocks and mark them as bad */
if (rqd->nr_pages > 1)
for (i = 0; i < rqd->nr_pages; i++)
gennvm_blk_set_type(dev, &rqd->ppa_list[i], 2);
else
gennvm_blk_set_type(dev, &rqd->ppa_addr, 2);
}
static int gennvm_end_io(struct nvm_rq *rqd, int error)
{
struct nvm_tgt_instance *ins = rqd->ins;
int ret = 0;
switch (error) {
case NVM_RSP_SUCCESS:
break;
case NVM_RSP_ERR_EMPTYPAGE:
break;
case NVM_RSP_ERR_FAILWRITE:
gennvm_mark_blk_bad(rqd->dev, rqd);
default:
ret++;
}
ret += ins->tt->end_io(rqd, error);
return ret;
}
static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
unsigned long flags)
{
int plane_cnt = 0, pl_idx, ret;
struct ppa_addr addr;
struct nvm_rq rqd;
if (!dev->ops->erase_block)
return 0;
addr = block_to_ppa(dev, blk);
if (dev->plane_mode == NVM_PLANE_SINGLE) {
rqd.nr_pages = 1;
rqd.ppa_addr = addr;
} else {
plane_cnt = (1 << dev->plane_mode);
rqd.nr_pages = plane_cnt;
rqd.ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL,
&rqd.dma_ppa_list);
if (!rqd.ppa_list) {
pr_err("gennvm: failed to allocate dma memory\n");
return -ENOMEM;
}
for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) {
addr.g.pl = pl_idx;
rqd.ppa_list[pl_idx] = addr;
}
}
gennvm_generic_to_addr_mode(dev, &rqd);
ret = dev->ops->erase_block(dev->q, &rqd);
if (plane_cnt)
nvm_dev_dma_free(dev, rqd.ppa_list, rqd.dma_ppa_list);
return ret;
}
static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
{
struct gen_nvm *gn = dev->mp;
return &gn->luns[lunid].vlun;
}
static void gennvm_free_blocks_print(struct nvm_dev *dev)
{
struct gen_nvm *gn = dev->mp;
struct gen_lun *lun;
unsigned int i;
gennvm_for_each_lun(gn, lun, i)
pr_info("%s: lun%8u\t%u\n",
dev->name, i, lun->vlun.nr_free_blocks);
}
static struct nvmm_type gennvm = {
.name = "gennvm",
.version = {0, 1, 0},
.register_mgr = gennvm_register,
.unregister_mgr = gennvm_unregister,
.get_blk = gennvm_get_blk,
.put_blk = gennvm_put_blk,
.submit_io = gennvm_submit_io,
.end_io = gennvm_end_io,
.erase_blk = gennvm_erase_blk,
.get_lun = gennvm_get_lun,
.free_blocks_print = gennvm_free_blocks_print,
};
static int __init gennvm_module_init(void)
{
return nvm_register_mgr(&gennvm);
}
static void gennvm_module_exit(void)
{
nvm_unregister_mgr(&gennvm);
}
module_init(gennvm_module_init);
module_exit(gennvm_module_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Generic media manager for Open-Channel SSDs");

46
drivers/lightnvm/gennvm.h Normal file
View file

@ -0,0 +1,46 @@
/*
* Copyright: Matias Bjorling <mb@bjorling.me>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
*/
#ifndef GENNVM_H_
#define GENNVM_H_
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/lightnvm.h>
struct gen_lun {
struct nvm_lun vlun;
int reserved_blocks;
/* lun block lists */
struct list_head used_list; /* In-use blocks */
struct list_head free_list; /* Not used blocks i.e. released
* and ready for use
*/
struct list_head bb_list; /* Bad blocks. Mutually exclusive with
* free_list and used_list
*/
};
struct gen_nvm {
int nr_luns;
struct gen_lun *luns;
};
#define gennvm_for_each_lun(bm, lun, i) \
for ((i) = 0, lun = &(bm)->luns[0]; \
(i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)])
#endif /* GENNVM_H_ */

1323
drivers/lightnvm/rrpc.c Normal file

File diff suppressed because it is too large Load diff

239
drivers/lightnvm/rrpc.h Normal file
View file

@ -0,0 +1,239 @@
/*
* Copyright (C) 2015 IT University of Copenhagen
* Initial release: Matias Bjorling <m@bjorling.me>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* Implementation of a Round-robin page-based Hybrid FTL for Open-channel SSDs.
*/
#ifndef RRPC_H_
#define RRPC_H_
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/bio.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/vmalloc.h>
#include <linux/lightnvm.h>
/* Run only GC if less than 1/X blocks are free */
#define GC_LIMIT_INVERSE 10
#define GC_TIME_SECS 100
#define RRPC_SECTOR (512)
#define RRPC_EXPOSED_PAGE_SIZE (4096)
#define NR_PHY_IN_LOG (RRPC_EXPOSED_PAGE_SIZE / RRPC_SECTOR)
struct rrpc_inflight {
struct list_head reqs;
spinlock_t lock;
};
struct rrpc_inflight_rq {
struct list_head list;
sector_t l_start;
sector_t l_end;
};
struct rrpc_rq {
struct rrpc_inflight_rq inflight_rq;
struct rrpc_addr *addr;
unsigned long flags;
};
struct rrpc_block {
struct nvm_block *parent;
struct list_head prio;
#define MAX_INVALID_PAGES_STORAGE 8
/* Bitmap for invalid page intries */
unsigned long invalid_pages[MAX_INVALID_PAGES_STORAGE];
/* points to the next writable page within a block */
unsigned int next_page;
/* number of pages that are invalid, wrt host page size */
unsigned int nr_invalid_pages;
spinlock_t lock;
atomic_t data_cmnt_size; /* data pages committed to stable storage */
};
struct rrpc_lun {
struct rrpc *rrpc;
struct nvm_lun *parent;
struct rrpc_block *cur, *gc_cur;
struct rrpc_block *blocks; /* Reference to block allocation */
struct list_head prio_list; /* Blocks that may be GC'ed */
struct work_struct ws_gc;
spinlock_t lock;
};
struct rrpc {
/* instance must be kept in top to resolve rrpc in unprep */
struct nvm_tgt_instance instance;
struct nvm_dev *dev;
struct gendisk *disk;
u64 poffset; /* physical page offset */
int lun_offset;
int nr_luns;
struct rrpc_lun *luns;
/* calculated values */
unsigned long long nr_pages;
unsigned long total_blocks;
/* Write strategy variables. Move these into each for structure for each
* strategy
*/
atomic_t next_lun; /* Whenever a page is written, this is updated
* to point to the next write lun
*/
spinlock_t bio_lock;
struct bio_list requeue_bios;
struct work_struct ws_requeue;
/* Simple translation map of logical addresses to physical addresses.
* The logical addresses is known by the host system, while the physical
* addresses are used when writing to the disk block device.
*/
struct rrpc_addr *trans_map;
/* also store a reverse map for garbage collection */
struct rrpc_rev_addr *rev_trans_map;
spinlock_t rev_lock;
struct rrpc_inflight inflights;
mempool_t *addr_pool;
mempool_t *page_pool;
mempool_t *gcb_pool;
mempool_t *rq_pool;
struct timer_list gc_timer;
struct workqueue_struct *krqd_wq;
struct workqueue_struct *kgc_wq;
};
struct rrpc_block_gc {
struct rrpc *rrpc;
struct rrpc_block *rblk;
struct work_struct ws_gc;
};
/* Logical to physical mapping */
struct rrpc_addr {
u64 addr;
struct rrpc_block *rblk;
};
/* Physical to logical mapping */
struct rrpc_rev_addr {
u64 addr;
};
static inline sector_t rrpc_get_laddr(struct bio *bio)
{
return bio->bi_iter.bi_sector / NR_PHY_IN_LOG;
}
static inline unsigned int rrpc_get_pages(struct bio *bio)
{
return bio->bi_iter.bi_size / RRPC_EXPOSED_PAGE_SIZE;
}
static inline sector_t rrpc_get_sector(sector_t laddr)
{
return laddr * NR_PHY_IN_LOG;
}
static inline int request_intersects(struct rrpc_inflight_rq *r,
sector_t laddr_start, sector_t laddr_end)
{
return (laddr_end >= r->l_start && laddr_end <= r->l_end) &&
(laddr_start >= r->l_start && laddr_start <= r->l_end);
}
static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
unsigned pages, struct rrpc_inflight_rq *r)
{
sector_t laddr_end = laddr + pages - 1;
struct rrpc_inflight_rq *rtmp;
spin_lock_irq(&rrpc->inflights.lock);
list_for_each_entry(rtmp, &rrpc->inflights.reqs, list) {
if (unlikely(request_intersects(rtmp, laddr, laddr_end))) {
/* existing, overlapping request, come back later */
spin_unlock_irq(&rrpc->inflights.lock);
return 1;
}
}
r->l_start = laddr;
r->l_end = laddr_end;
list_add_tail(&r->list, &rrpc->inflights.reqs);
spin_unlock_irq(&rrpc->inflights.lock);
return 0;
}
static inline int rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
unsigned pages,
struct rrpc_inflight_rq *r)
{
BUG_ON((laddr + pages) > rrpc->nr_pages);
return __rrpc_lock_laddr(rrpc, laddr, pages, r);
}
static inline struct rrpc_inflight_rq *rrpc_get_inflight_rq(struct nvm_rq *rqd)
{
struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
return &rrqd->inflight_rq;
}
static inline int rrpc_lock_rq(struct rrpc *rrpc, struct bio *bio,
struct nvm_rq *rqd)
{
sector_t laddr = rrpc_get_laddr(bio);
unsigned int pages = rrpc_get_pages(bio);
struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
return rrpc_lock_laddr(rrpc, laddr, pages, r);
}
static inline void rrpc_unlock_laddr(struct rrpc *rrpc,
struct rrpc_inflight_rq *r)
{
unsigned long flags;
spin_lock_irqsave(&rrpc->inflights.lock, flags);
list_del_init(&r->list);
spin_unlock_irqrestore(&rrpc->inflights.lock, flags);
}
static inline void rrpc_unlock_rq(struct rrpc *rrpc, struct nvm_rq *rqd)
{
struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
uint8_t pages = rqd->nr_pages;
BUG_ON((r->l_start + pages) > rrpc->nr_pages);
rrpc_unlock_laddr(rrpc, r);
}
#endif /* RRPC_H_ */

View file

@ -1,4 +1,4 @@
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
nvme-y += pci.o scsi.o
nvme-y += pci.o scsi.o lightnvm.o

View file

@ -0,0 +1,526 @@
/*
* nvme-lightnvm.c - LightNVM NVMe device
*
* Copyright (C) 2014-2015 IT University of Copenhagen
* Initial release: Matias Bjorling <mb@lightnvm.io>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
* USA.
*
*/
#include "nvme.h"
#ifdef CONFIG_NVM
#include <linux/nvme.h>
#include <linux/bitops.h>
#include <linux/lightnvm.h>
#include <linux/vmalloc.h>
enum nvme_nvm_admin_opcode {
nvme_nvm_admin_identity = 0xe2,
nvme_nvm_admin_get_l2p_tbl = 0xea,
nvme_nvm_admin_get_bb_tbl = 0xf2,
nvme_nvm_admin_set_bb_tbl = 0xf1,
};
struct nvme_nvm_hb_rw {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2;
__le64 metadata;
__le64 prp1;
__le64 prp2;
__le64 spba;
__le16 length;
__le16 control;
__le32 dsmgmt;
__le64 slba;
};
struct nvme_nvm_ph_rw {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2;
__le64 metadata;
__le64 prp1;
__le64 prp2;
__le64 spba;
__le16 length;
__le16 control;
__le32 dsmgmt;
__le64 resv;
};
struct nvme_nvm_identity {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd[2];
__le64 prp1;
__le64 prp2;
__le32 chnl_off;
__u32 rsvd11[5];
};
struct nvme_nvm_l2ptbl {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__le32 cdw2[4];
__le64 prp1;
__le64 prp2;
__le64 slba;
__le32 nlb;
__le16 cdw14[6];
};
struct nvme_nvm_bbtbl {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd[2];
__le64 prp1;
__le64 prp2;
__le32 prp1_len;
__le32 prp2_len;
__le32 lbb;
__u32 rsvd11[3];
};
struct nvme_nvm_erase_blk {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd[2];
__le64 prp1;
__le64 prp2;
__le64 spba;
__le16 length;
__le16 control;
__le32 dsmgmt;
__le64 resv;
};
struct nvme_nvm_command {
union {
struct nvme_common_command common;
struct nvme_nvm_identity identity;
struct nvme_nvm_hb_rw hb_rw;
struct nvme_nvm_ph_rw ph_rw;
struct nvme_nvm_l2ptbl l2p;
struct nvme_nvm_bbtbl get_bb;
struct nvme_nvm_bbtbl set_bb;
struct nvme_nvm_erase_blk erase;
};
};
struct nvme_nvm_id_group {
__u8 mtype;
__u8 fmtype;
__le16 res16;
__u8 num_ch;
__u8 num_lun;
__u8 num_pln;
__le16 num_blk;
__le16 num_pg;
__le16 fpg_sz;
__le16 csecs;
__le16 sos;
__le32 trdt;
__le32 trdm;
__le32 tprt;
__le32 tprm;
__le32 tbet;
__le32 tbem;
__le32 mpos;
__le16 cpar;
__u8 reserved[913];
} __packed;
struct nvme_nvm_addr_format {
__u8 ch_offset;
__u8 ch_len;
__u8 lun_offset;
__u8 lun_len;
__u8 pln_offset;
__u8 pln_len;
__u8 blk_offset;
__u8 blk_len;
__u8 pg_offset;
__u8 pg_len;
__u8 sect_offset;
__u8 sect_len;
__u8 res[4];
} __packed;
struct nvme_nvm_id {
__u8 ver_id;
__u8 vmnt;
__u8 cgrps;
__u8 res[5];
__le32 cap;
__le32 dom;
struct nvme_nvm_addr_format ppaf;
__u8 ppat;
__u8 resv[223];
struct nvme_nvm_id_group groups[4];
} __packed;
/*
* Check we didn't inadvertently grow the command struct
*/
static inline void _nvme_nvm_check_size(void)
{
BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_bbtbl) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128);
BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
}
static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
{
struct nvme_nvm_id_group *src;
struct nvm_id_group *dst;
int i, end;
end = min_t(u32, 4, nvm_id->cgrps);
for (i = 0; i < end; i++) {
src = &nvme_nvm_id->groups[i];
dst = &nvm_id->groups[i];
dst->mtype = src->mtype;
dst->fmtype = src->fmtype;
dst->num_ch = src->num_ch;
dst->num_lun = src->num_lun;
dst->num_pln = src->num_pln;
dst->num_pg = le16_to_cpu(src->num_pg);
dst->num_blk = le16_to_cpu(src->num_blk);
dst->fpg_sz = le16_to_cpu(src->fpg_sz);
dst->csecs = le16_to_cpu(src->csecs);
dst->sos = le16_to_cpu(src->sos);
dst->trdt = le32_to_cpu(src->trdt);
dst->trdm = le32_to_cpu(src->trdm);
dst->tprt = le32_to_cpu(src->tprt);
dst->tprm = le32_to_cpu(src->tprm);
dst->tbet = le32_to_cpu(src->tbet);
dst->tbem = le32_to_cpu(src->tbem);
dst->mpos = le32_to_cpu(src->mpos);
dst->cpar = le16_to_cpu(src->cpar);
}
return 0;
}
static int nvme_nvm_identity(struct request_queue *q, struct nvm_id *nvm_id)
{
struct nvme_ns *ns = q->queuedata;
struct nvme_nvm_id *nvme_nvm_id;
struct nvme_nvm_command c = {};
int ret;
c.identity.opcode = nvme_nvm_admin_identity;
c.identity.nsid = cpu_to_le32(ns->ns_id);
c.identity.chnl_off = 0;
nvme_nvm_id = kmalloc(sizeof(struct nvme_nvm_id), GFP_KERNEL);
if (!nvme_nvm_id)
return -ENOMEM;
ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, nvme_nvm_id,
sizeof(struct nvme_nvm_id));
if (ret) {
ret = -EIO;
goto out;
}
nvm_id->ver_id = nvme_nvm_id->ver_id;
nvm_id->vmnt = nvme_nvm_id->vmnt;
nvm_id->cgrps = nvme_nvm_id->cgrps;
nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap);
nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom);
ret = init_grps(nvm_id, nvme_nvm_id);
out:
kfree(nvme_nvm_id);
return ret;
}
static int nvme_nvm_get_l2p_tbl(struct request_queue *q, u64 slba, u32 nlb,
nvm_l2p_update_fn *update_l2p, void *priv)
{
struct nvme_ns *ns = q->queuedata;
struct nvme_dev *dev = ns->dev;
struct nvme_nvm_command c = {};
u32 len = queue_max_hw_sectors(q) << 9;
u32 nlb_pr_rq = len / sizeof(u64);
u64 cmd_slba = slba;
void *entries;
int ret = 0;
c.l2p.opcode = nvme_nvm_admin_get_l2p_tbl;
c.l2p.nsid = cpu_to_le32(ns->ns_id);
entries = kmalloc(len, GFP_KERNEL);
if (!entries)
return -ENOMEM;
while (nlb) {
u32 cmd_nlb = min(nlb_pr_rq, nlb);
c.l2p.slba = cpu_to_le64(cmd_slba);
c.l2p.nlb = cpu_to_le32(cmd_nlb);
ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c,
entries, len);
if (ret) {
dev_err(dev->dev, "L2P table transfer failed (%d)\n",
ret);
ret = -EIO;
goto out;
}
if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) {
ret = -EINTR;
goto out;
}
cmd_slba += cmd_nlb;
nlb -= cmd_nlb;
}
out:
kfree(entries);
return ret;
}
static int nvme_nvm_get_bb_tbl(struct request_queue *q, int lunid,
unsigned int nr_blocks,
nvm_bb_update_fn *update_bbtbl, void *priv)
{
struct nvme_ns *ns = q->queuedata;
struct nvme_dev *dev = ns->dev;
struct nvme_nvm_command c = {};
void *bb_bitmap;
u16 bb_bitmap_size;
int ret = 0;
c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl;
c.get_bb.nsid = cpu_to_le32(ns->ns_id);
c.get_bb.lbb = cpu_to_le32(lunid);
bb_bitmap_size = ((nr_blocks >> 15) + 1) * PAGE_SIZE;
bb_bitmap = kmalloc(bb_bitmap_size, GFP_KERNEL);
if (!bb_bitmap)
return -ENOMEM;
bitmap_zero(bb_bitmap, nr_blocks);
ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, bb_bitmap,
bb_bitmap_size);
if (ret) {
dev_err(dev->dev, "get bad block table failed (%d)\n", ret);
ret = -EIO;
goto out;
}
ret = update_bbtbl(lunid, bb_bitmap, nr_blocks, priv);
if (ret) {
ret = -EINTR;
goto out;
}
out:
kfree(bb_bitmap);
return ret;
}
static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
struct nvme_ns *ns, struct nvme_nvm_command *c)
{
c->ph_rw.opcode = rqd->opcode;
c->ph_rw.nsid = cpu_to_le32(ns->ns_id);
c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa);
c->ph_rw.control = cpu_to_le16(rqd->flags);
c->ph_rw.length = cpu_to_le16(rqd->nr_pages - 1);
if (rqd->opcode == NVM_OP_HBWRITE || rqd->opcode == NVM_OP_HBREAD)
c->hb_rw.slba = cpu_to_le64(nvme_block_nr(ns,
rqd->bio->bi_iter.bi_sector));
}
static void nvme_nvm_end_io(struct request *rq, int error)
{
struct nvm_rq *rqd = rq->end_io_data;
struct nvm_dev *dev = rqd->dev;
if (dev->mt->end_io(rqd, error))
pr_err("nvme: err status: %x result: %lx\n",
rq->errors, (unsigned long)rq->special);
kfree(rq->cmd);
blk_mq_free_request(rq);
}
static int nvme_nvm_submit_io(struct request_queue *q, struct nvm_rq *rqd)
{
struct nvme_ns *ns = q->queuedata;
struct request *rq;
struct bio *bio = rqd->bio;
struct nvme_nvm_command *cmd;
rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0);
if (IS_ERR(rq))
return -ENOMEM;
cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
if (!cmd) {
blk_mq_free_request(rq);
return -ENOMEM;
}
rq->cmd_type = REQ_TYPE_DRV_PRIV;
rq->ioprio = bio_prio(bio);
if (bio_has_data(bio))
rq->nr_phys_segments = bio_phys_segments(q, bio);
rq->__data_len = bio->bi_iter.bi_size;
rq->bio = rq->biotail = bio;
nvme_nvm_rqtocmd(rq, rqd, ns, cmd);
rq->cmd = (unsigned char *)cmd;
rq->cmd_len = sizeof(struct nvme_nvm_command);
rq->special = (void *)0;
rq->end_io_data = rqd;
blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io);
return 0;
}
static int nvme_nvm_erase_block(struct request_queue *q, struct nvm_rq *rqd)
{
struct nvme_ns *ns = q->queuedata;
struct nvme_nvm_command c = {};
c.erase.opcode = NVM_OP_ERASE;
c.erase.nsid = cpu_to_le32(ns->ns_id);
c.erase.spba = cpu_to_le64(rqd->ppa_addr.ppa);
c.erase.length = cpu_to_le16(rqd->nr_pages - 1);
return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0);
}
static void *nvme_nvm_create_dma_pool(struct request_queue *q, char *name)
{
struct nvme_ns *ns = q->queuedata;
struct nvme_dev *dev = ns->dev;
return dma_pool_create(name, dev->dev, PAGE_SIZE, PAGE_SIZE, 0);
}
static void nvme_nvm_destroy_dma_pool(void *pool)
{
struct dma_pool *dma_pool = pool;
dma_pool_destroy(dma_pool);
}
static void *nvme_nvm_dev_dma_alloc(struct request_queue *q, void *pool,
gfp_t mem_flags, dma_addr_t *dma_handler)
{
return dma_pool_alloc(pool, mem_flags, dma_handler);
}
static void nvme_nvm_dev_dma_free(void *pool, void *ppa_list,
dma_addr_t dma_handler)
{
dma_pool_free(pool, ppa_list, dma_handler);
}
static struct nvm_dev_ops nvme_nvm_dev_ops = {
.identity = nvme_nvm_identity,
.get_l2p_tbl = nvme_nvm_get_l2p_tbl,
.get_bb_tbl = nvme_nvm_get_bb_tbl,
.submit_io = nvme_nvm_submit_io,
.erase_block = nvme_nvm_erase_block,
.create_dma_pool = nvme_nvm_create_dma_pool,
.destroy_dma_pool = nvme_nvm_destroy_dma_pool,
.dev_dma_alloc = nvme_nvm_dev_dma_alloc,
.dev_dma_free = nvme_nvm_dev_dma_free,
.max_phys_sect = 64,
};
int nvme_nvm_register(struct request_queue *q, char *disk_name)
{
return nvm_register(q, disk_name, &nvme_nvm_dev_ops);
}
void nvme_nvm_unregister(struct request_queue *q, char *disk_name)
{
nvm_unregister(disk_name);
}
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
{
struct nvme_dev *dev = ns->dev;
struct pci_dev *pdev = to_pci_dev(dev->dev);
/* QEMU NVMe simulator - PCI ID + Vendor specific bit */
if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == 0x5845 &&
id->vs[0] == 0x1)
return 1;
/* CNEX Labs - PCI ID + Vendor specific bit */
if (pdev->vendor == 0x1d1d && pdev->device == 0x2807 &&
id->vs[0] == 0x1)
return 1;
return 0;
}
#else
int nvme_nvm_register(struct request_queue *q, char *disk_name)
{
return 0;
}
void nvme_nvm_unregister(struct request_queue *q, char *disk_name) {};
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
{
return 0;
}
#endif /* CONFIG_NVM */

View file

@ -22,6 +22,11 @@
extern unsigned char nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
enum {
NVME_NS_LBA = 0,
NVME_NS_LIGHTNVM = 1,
};
/*
* Represents an NVM Express device. Each nvme_dev is a PCI function.
*/
@ -84,6 +89,7 @@ struct nvme_ns {
u16 ms;
bool ext;
u8 pi_type;
int type;
u64 mode_select_num_blocks;
u32 mode_select_block_len;
};
@ -130,4 +136,8 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
int nvme_sg_get_version_num(int __user *ip);
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
int nvme_nvm_register(struct request_queue *q, char *disk_name);
void nvme_nvm_unregister(struct request_queue *q, char *disk_name);
#endif /* _NVME_H */

View file

@ -1952,6 +1952,9 @@ static void nvme_free_ns(struct kref *kref)
{
struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
if (ns->type == NVME_NS_LIGHTNVM)
nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
spin_lock(&dev_list_lock);
ns->disk->private_data = NULL;
spin_unlock(&dev_list_lock);
@ -2021,6 +2024,16 @@ static int nvme_revalidate_disk(struct gendisk *disk)
return -ENODEV;
}
if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
if (nvme_nvm_register(ns->queue, disk->disk_name)) {
dev_warn(dev->dev,
"%s: LightNVM init failure\n", __func__);
kfree(id);
return -ENODEV;
}
ns->type = NVME_NS_LIGHTNVM;
}
old_ms = ns->ms;
lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
ns->lba_shift = id->lbaf[lbaf].ds;
@ -2052,7 +2065,9 @@ static int nvme_revalidate_disk(struct gendisk *disk)
!ns->ext)
nvme_init_integrity(ns);
if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
if ((ns->ms && !(ns->ms == 8 && ns->pi_type) &&
!blk_get_integrity(disk)) ||
ns->type == NVME_NS_LIGHTNVM)
set_capacity(disk, 0);
else
set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
@ -2175,17 +2190,19 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
goto out_free_disk;
kref_get(&dev->kref);
add_disk(ns->disk);
if (ns->ms) {
struct block_device *bd = bdget_disk(ns->disk, 0);
if (!bd)
return;
if (blkdev_get(bd, FMODE_READ, NULL)) {
bdput(bd);
return;
if (ns->type != NVME_NS_LIGHTNVM) {
add_disk(ns->disk);
if (ns->ms) {
struct block_device *bd = bdget_disk(ns->disk, 0);
if (!bd)
return;
if (blkdev_get(bd, FMODE_READ, NULL)) {
bdput(bd);
return;
}
blkdev_reread_part(bd);
blkdev_put(bd, FMODE_READ);
}
blkdev_reread_part(bd);
blkdev_put(bd, FMODE_READ);
}
return;
out_free_disk:

522
include/linux/lightnvm.h Normal file
View file

@ -0,0 +1,522 @@
#ifndef NVM_H
#define NVM_H
enum {
NVM_IO_OK = 0,
NVM_IO_REQUEUE = 1,
NVM_IO_DONE = 2,
NVM_IO_ERR = 3,
NVM_IOTYPE_NONE = 0,
NVM_IOTYPE_GC = 1,
};
#ifdef CONFIG_NVM
#include <linux/blkdev.h>
#include <linux/types.h>
#include <linux/file.h>
#include <linux/dmapool.h>
enum {
/* HW Responsibilities */
NVM_RSP_L2P = 1 << 0,
NVM_RSP_ECC = 1 << 1,
/* Physical Adressing Mode */
NVM_ADDRMODE_LINEAR = 0,
NVM_ADDRMODE_CHANNEL = 1,
/* Plane programming mode for LUN */
NVM_PLANE_SINGLE = 0,
NVM_PLANE_DOUBLE = 1,
NVM_PLANE_QUAD = 2,
/* Status codes */
NVM_RSP_SUCCESS = 0x0,
NVM_RSP_NOT_CHANGEABLE = 0x1,
NVM_RSP_ERR_FAILWRITE = 0x40ff,
NVM_RSP_ERR_EMPTYPAGE = 0x42ff,
/* Device opcodes */
NVM_OP_HBREAD = 0x02,
NVM_OP_HBWRITE = 0x81,
NVM_OP_PWRITE = 0x91,
NVM_OP_PREAD = 0x92,
NVM_OP_ERASE = 0x90,
/* PPA Command Flags */
NVM_IO_SNGL_ACCESS = 0x0,
NVM_IO_DUAL_ACCESS = 0x1,
NVM_IO_QUAD_ACCESS = 0x2,
NVM_IO_SUSPEND = 0x80,
NVM_IO_SLC_MODE = 0x100,
NVM_IO_SCRAMBLE_DISABLE = 0x200,
};
struct nvm_id_group {
u8 mtype;
u8 fmtype;
u16 res16;
u8 num_ch;
u8 num_lun;
u8 num_pln;
u16 num_blk;
u16 num_pg;
u16 fpg_sz;
u16 csecs;
u16 sos;
u32 trdt;
u32 trdm;
u32 tprt;
u32 tprm;
u32 tbet;
u32 tbem;
u32 mpos;
u16 cpar;
u8 res[913];
} __packed;
struct nvm_addr_format {
u8 ch_offset;
u8 ch_len;
u8 lun_offset;
u8 lun_len;
u8 pln_offset;
u8 pln_len;
u8 blk_offset;
u8 blk_len;
u8 pg_offset;
u8 pg_len;
u8 sect_offset;
u8 sect_len;
u8 res[4];
};
struct nvm_id {
u8 ver_id;
u8 vmnt;
u8 cgrps;
u8 res[5];
u32 cap;
u32 dom;
struct nvm_addr_format ppaf;
u8 ppat;
u8 resv[224];
struct nvm_id_group groups[4];
} __packed;
struct nvm_target {
struct list_head list;
struct nvm_tgt_type *type;
struct gendisk *disk;
};
struct nvm_tgt_instance {
struct nvm_tgt_type *tt;
};
#define ADDR_EMPTY (~0ULL)
#define NVM_VERSION_MAJOR 1
#define NVM_VERSION_MINOR 0
#define NVM_VERSION_PATCH 0
#define NVM_SEC_BITS (8)
#define NVM_PL_BITS (6)
#define NVM_PG_BITS (16)
#define NVM_BLK_BITS (16)
#define NVM_LUN_BITS (10)
#define NVM_CH_BITS (8)
struct ppa_addr {
union {
/* Channel-based PPA format in nand 4x2x2x2x8x10 */
struct {
u64 ch : 4;
u64 sec : 2; /* 4 sectors per page */
u64 pl : 2; /* 4 planes per LUN */
u64 lun : 2; /* 4 LUNs per channel */
u64 pg : 8; /* 256 pages per block */
u64 blk : 10;/* 1024 blocks per plane */
u64 resved : 36;
} chnl;
/* Generic structure for all addresses */
struct {
u64 sec : NVM_SEC_BITS;
u64 pl : NVM_PL_BITS;
u64 pg : NVM_PG_BITS;
u64 blk : NVM_BLK_BITS;
u64 lun : NVM_LUN_BITS;
u64 ch : NVM_CH_BITS;
} g;
u64 ppa;
};
} __packed;
struct nvm_rq {
struct nvm_tgt_instance *ins;
struct nvm_dev *dev;
struct bio *bio;
union {
struct ppa_addr ppa_addr;
dma_addr_t dma_ppa_list;
};
struct ppa_addr *ppa_list;
void *metadata;
dma_addr_t dma_metadata;
uint8_t opcode;
uint16_t nr_pages;
uint16_t flags;
};
static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu)
{
return pdu - sizeof(struct nvm_rq);
}
static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
{
return rqdata + 1;
}
struct nvm_block;
typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
typedef int (nvm_bb_update_fn)(u32, void *, unsigned int, void *);
typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *);
typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32,
nvm_l2p_update_fn *, void *);
typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, int, unsigned int,
nvm_bb_update_fn *, void *);
typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int);
typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *);
typedef int (nvm_erase_blk_fn)(struct request_queue *, struct nvm_rq *);
typedef void *(nvm_create_dma_pool_fn)(struct request_queue *, char *);
typedef void (nvm_destroy_dma_pool_fn)(void *);
typedef void *(nvm_dev_dma_alloc_fn)(struct request_queue *, void *, gfp_t,
dma_addr_t *);
typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
struct nvm_dev_ops {
nvm_id_fn *identity;
nvm_get_l2p_tbl_fn *get_l2p_tbl;
nvm_op_bb_tbl_fn *get_bb_tbl;
nvm_op_set_bb_fn *set_bb;
nvm_submit_io_fn *submit_io;
nvm_erase_blk_fn *erase_block;
nvm_create_dma_pool_fn *create_dma_pool;
nvm_destroy_dma_pool_fn *destroy_dma_pool;
nvm_dev_dma_alloc_fn *dev_dma_alloc;
nvm_dev_dma_free_fn *dev_dma_free;
uint8_t max_phys_sect;
};
struct nvm_lun {
int id;
int lun_id;
int chnl_id;
unsigned int nr_free_blocks; /* Number of unused blocks */
struct nvm_block *blocks;
spinlock_t lock;
};
struct nvm_block {
struct list_head list;
struct nvm_lun *lun;
unsigned long id;
void *priv;
int type;
};
struct nvm_dev {
struct nvm_dev_ops *ops;
struct list_head devices;
struct list_head online_targets;
/* Media manager */
struct nvmm_type *mt;
void *mp;
/* Device information */
int nr_chnls;
int nr_planes;
int luns_per_chnl;
int sec_per_pg; /* only sectors for a single page */
int pgs_per_blk;
int blks_per_lun;
int sec_size;
int oob_size;
int addr_mode;
struct nvm_addr_format addr_format;
/* Calculated/Cached values. These do not reflect the actual usable
* blocks at run-time.
*/
int max_rq_size;
int plane_mode; /* drive device in single, double or quad mode */
int sec_per_pl; /* all sectors across planes */
int sec_per_blk;
int sec_per_lun;
unsigned long total_pages;
unsigned long total_blocks;
int nr_luns;
unsigned max_pages_per_blk;
void *ppalist_pool;
struct nvm_id identity;
/* Backend device */
struct request_queue *q;
char name[DISK_NAME_LEN];
};
/* fallback conversion */
static struct ppa_addr __generic_to_linear_addr(struct nvm_dev *dev,
struct ppa_addr r)
{
struct ppa_addr l;
l.ppa = r.g.sec +
r.g.pg * dev->sec_per_pg +
r.g.blk * (dev->pgs_per_blk *
dev->sec_per_pg) +
r.g.lun * (dev->blks_per_lun *
dev->pgs_per_blk *
dev->sec_per_pg) +
r.g.ch * (dev->blks_per_lun *
dev->pgs_per_blk *
dev->luns_per_chnl *
dev->sec_per_pg);
return l;
}
/* fallback conversion */
static struct ppa_addr __linear_to_generic_addr(struct nvm_dev *dev,
struct ppa_addr r)
{
struct ppa_addr l;
int secs, pgs, blks, luns;
sector_t ppa = r.ppa;
l.ppa = 0;
div_u64_rem(ppa, dev->sec_per_pg, &secs);
l.g.sec = secs;
sector_div(ppa, dev->sec_per_pg);
div_u64_rem(ppa, dev->sec_per_blk, &pgs);
l.g.pg = pgs;
sector_div(ppa, dev->pgs_per_blk);
div_u64_rem(ppa, dev->blks_per_lun, &blks);
l.g.blk = blks;
sector_div(ppa, dev->blks_per_lun);
div_u64_rem(ppa, dev->luns_per_chnl, &luns);
l.g.lun = luns;
sector_div(ppa, dev->luns_per_chnl);
l.g.ch = ppa;
return l;
}
static struct ppa_addr __generic_to_chnl_addr(struct ppa_addr r)
{
struct ppa_addr l;
l.ppa = 0;
l.chnl.sec = r.g.sec;
l.chnl.pl = r.g.pl;
l.chnl.pg = r.g.pg;
l.chnl.blk = r.g.blk;
l.chnl.lun = r.g.lun;
l.chnl.ch = r.g.ch;
return l;
}
static struct ppa_addr __chnl_to_generic_addr(struct ppa_addr r)
{
struct ppa_addr l;
l.ppa = 0;
l.g.sec = r.chnl.sec;
l.g.pl = r.chnl.pl;
l.g.pg = r.chnl.pg;
l.g.blk = r.chnl.blk;
l.g.lun = r.chnl.lun;
l.g.ch = r.chnl.ch;
return l;
}
static inline struct ppa_addr addr_to_generic_mode(struct nvm_dev *dev,
struct ppa_addr gppa)
{
switch (dev->addr_mode) {
case NVM_ADDRMODE_LINEAR:
return __linear_to_generic_addr(dev, gppa);
case NVM_ADDRMODE_CHANNEL:
return __chnl_to_generic_addr(gppa);
default:
BUG();
}
return gppa;
}
static inline struct ppa_addr generic_to_addr_mode(struct nvm_dev *dev,
struct ppa_addr gppa)
{
switch (dev->addr_mode) {
case NVM_ADDRMODE_LINEAR:
return __generic_to_linear_addr(dev, gppa);
case NVM_ADDRMODE_CHANNEL:
return __generic_to_chnl_addr(gppa);
default:
BUG();
}
return gppa;
}
static inline int ppa_empty(struct ppa_addr ppa_addr)
{
return (ppa_addr.ppa == ADDR_EMPTY);
}
static inline void ppa_set_empty(struct ppa_addr *ppa_addr)
{
ppa_addr->ppa = ADDR_EMPTY;
}
static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
struct nvm_block *blk)
{
struct ppa_addr ppa;
struct nvm_lun *lun = blk->lun;
ppa.ppa = 0;
ppa.g.blk = blk->id % dev->blks_per_lun;
ppa.g.lun = lun->lun_id;
ppa.g.ch = lun->chnl_id;
return ppa;
}
typedef void (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
typedef sector_t (nvm_tgt_capacity_fn)(void *);
typedef int (nvm_tgt_end_io_fn)(struct nvm_rq *, int);
typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int);
typedef void (nvm_tgt_exit_fn)(void *);
struct nvm_tgt_type {
const char *name;
unsigned int version[3];
/* target entry points */
nvm_tgt_make_rq_fn *make_rq;
nvm_tgt_capacity_fn *capacity;
nvm_tgt_end_io_fn *end_io;
/* module-specific init/teardown */
nvm_tgt_init_fn *init;
nvm_tgt_exit_fn *exit;
/* For internal use */
struct list_head list;
};
extern int nvm_register_target(struct nvm_tgt_type *);
extern void nvm_unregister_target(struct nvm_tgt_type *);
extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *);
extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t);
typedef int (nvmm_register_fn)(struct nvm_dev *);
typedef void (nvmm_unregister_fn)(struct nvm_dev *);
typedef struct nvm_block *(nvmm_get_blk_fn)(struct nvm_dev *,
struct nvm_lun *, unsigned long);
typedef void (nvmm_put_blk_fn)(struct nvm_dev *, struct nvm_block *);
typedef int (nvmm_open_blk_fn)(struct nvm_dev *, struct nvm_block *);
typedef int (nvmm_close_blk_fn)(struct nvm_dev *, struct nvm_block *);
typedef void (nvmm_flush_blk_fn)(struct nvm_dev *, struct nvm_block *);
typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
typedef int (nvmm_end_io_fn)(struct nvm_rq *, int);
typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
unsigned long);
typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
typedef void (nvmm_free_blocks_print_fn)(struct nvm_dev *);
struct nvmm_type {
const char *name;
unsigned int version[3];
nvmm_register_fn *register_mgr;
nvmm_unregister_fn *unregister_mgr;
/* Block administration callbacks */
nvmm_get_blk_fn *get_blk;
nvmm_put_blk_fn *put_blk;
nvmm_open_blk_fn *open_blk;
nvmm_close_blk_fn *close_blk;
nvmm_flush_blk_fn *flush_blk;
nvmm_submit_io_fn *submit_io;
nvmm_end_io_fn *end_io;
nvmm_erase_blk_fn *erase_blk;
/* Configuration management */
nvmm_get_lun_fn *get_lun;
/* Statistics */
nvmm_free_blocks_print_fn *free_blocks_print;
struct list_head list;
};
extern int nvm_register_mgr(struct nvmm_type *);
extern void nvm_unregister_mgr(struct nvmm_type *);
extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *,
unsigned long);
extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *);
extern int nvm_register(struct request_queue *, char *,
struct nvm_dev_ops *);
extern void nvm_unregister(char *);
extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *);
extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *);
#else /* CONFIG_NVM */
struct nvm_dev_ops;
static inline int nvm_register(struct request_queue *q, char *disk_name,
struct nvm_dev_ops *ops)
{
return -EINVAL;
}
static inline void nvm_unregister(char *disk_name) {}
#endif /* CONFIG_NVM */
#endif /* LIGHTNVM.H */

View file

@ -0,0 +1,130 @@
/*
* Copyright (C) 2015 CNEX Labs. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
* USA.
*/
#ifndef _UAPI_LINUX_LIGHTNVM_H
#define _UAPI_LINUX_LIGHTNVM_H
#ifdef __KERNEL__
#include <linux/kernel.h>
#include <linux/ioctl.h>
#else /* __KERNEL__ */
#include <stdio.h>
#include <sys/ioctl.h>
#define DISK_NAME_LEN 32
#endif /* __KERNEL__ */
#include <linux/types.h>
#include <linux/ioctl.h>
#define NVM_TTYPE_NAME_MAX 48
#define NVM_TTYPE_MAX 63
#define NVM_CTRL_FILE "/dev/lightnvm/control"
struct nvm_ioctl_info_tgt {
__u32 version[3];
__u32 reserved;
char tgtname[NVM_TTYPE_NAME_MAX];
};
struct nvm_ioctl_info {
__u32 version[3]; /* in/out - major, minor, patch */
__u16 tgtsize; /* number of targets */
__u16 reserved16; /* pad to 4K page */
__u32 reserved[12];
struct nvm_ioctl_info_tgt tgts[NVM_TTYPE_MAX];
};
enum {
NVM_DEVICE_ACTIVE = 1 << 0,
};
struct nvm_ioctl_device_info {
char devname[DISK_NAME_LEN];
char bmname[NVM_TTYPE_NAME_MAX];
__u32 bmversion[3];
__u32 flags;
__u32 reserved[8];
};
struct nvm_ioctl_get_devices {
__u32 nr_devices;
__u32 reserved[31];
struct nvm_ioctl_device_info info[31];
};
struct nvm_ioctl_create_simple {
__u32 lun_begin;
__u32 lun_end;
};
enum {
NVM_CONFIG_TYPE_SIMPLE = 0,
};
struct nvm_ioctl_create_conf {
__u32 type;
union {
struct nvm_ioctl_create_simple s;
};
};
struct nvm_ioctl_create {
char dev[DISK_NAME_LEN]; /* open-channel SSD device */
char tgttype[NVM_TTYPE_NAME_MAX]; /* target type name */
char tgtname[DISK_NAME_LEN]; /* dev to expose target as */
__u32 flags;
struct nvm_ioctl_create_conf conf;
};
struct nvm_ioctl_remove {
char tgtname[DISK_NAME_LEN];
__u32 flags;
};
/* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */
enum {
/* top level cmds */
NVM_INFO_CMD = 0x20,
NVM_GET_DEVICES_CMD,
/* device level cmds */
NVM_DEV_CREATE_CMD,
NVM_DEV_REMOVE_CMD,
};
#define NVM_IOCTL 'L' /* 0x4c */
#define NVM_INFO _IOWR(NVM_IOCTL, NVM_INFO_CMD, \
struct nvm_ioctl_info)
#define NVM_GET_DEVICES _IOR(NVM_IOCTL, NVM_GET_DEVICES_CMD, \
struct nvm_ioctl_get_devices)
#define NVM_DEV_CREATE _IOW(NVM_IOCTL, NVM_DEV_CREATE_CMD, \
struct nvm_ioctl_create)
#define NVM_DEV_REMOVE _IOW(NVM_IOCTL, NVM_DEV_REMOVE_CMD, \
struct nvm_ioctl_remove)
#define NVM_VERSION_MAJOR 1
#define NVM_VERSION_MINOR 0
#define NVM_VERSION_PATCHLEVEL 0
#endif