mirror of
https://github.com/freebsd/freebsd-src
synced 2024-07-21 10:19:04 +00:00
nvmf: The in-kernel NVMe over Fabrics host
This is the client (initiator in SCSI terms) for NVMe over Fabrics. Userland is responsible for creating a set of queue pairs and then handing them off via an ioctl to this driver, e.g. via the 'connect' command from nvmecontrol(8). An nvmeX new-bus device is created at the top-level to represent the remote controller similar to PCI nvmeX devices for PCI-express controllers. As with nvme(4), namespace devices named /dev/nvmeXnsY are created and pass through commands can be submitted to either the namespace devices or the controller device. For example, 'nvmecontrol identify nvmeX' works for a remote Fabrics controller the same as for a PCI-express controller. nvmf exports remote namespaces via nda(4) devices using the new NVMF CAM transport. nvmf does not support nvd(4), only nda(4). Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D44714
This commit is contained in:
parent
07c6a62bab
commit
a1eda74167
|
@ -408,6 +408,7 @@ MAN= aac.4 \
|
|||
nvd.4 \
|
||||
${_nvdimm.4} \
|
||||
nvme.4 \
|
||||
nvmf.4 \
|
||||
nvmf_tcp.4 \
|
||||
${_nvram.4} \
|
||||
oce.4 \
|
||||
|
|
87
share/man/man4/nvmf.4
Normal file
87
share/man/man4/nvmf.4
Normal file
|
@ -0,0 +1,87 @@
|
|||
.\"
|
||||
.\" SPDX-License-Identifier: BSD-2-Clause
|
||||
.\"
|
||||
.\" Copyright (c) 2024 Chelsio Communications, Inc.
|
||||
.\"
|
||||
.Dd May 2, 2024
|
||||
.Dt NVMF 4
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm nvmf
|
||||
.Nd "NVM Express over Fabrics host driver"
|
||||
.Sh SYNOPSIS
|
||||
To compile the driver into the kernel,
|
||||
place the following line in the
|
||||
kernel configuration file:
|
||||
.Bd -ragged -offset indent
|
||||
.Cd "device nvmf"
|
||||
.Ed
|
||||
.Pp
|
||||
Alternatively, to load the driver as a
|
||||
module at boot time, place the following line in
|
||||
.Xr loader.conf 5 :
|
||||
.Bd -literal -offset indent
|
||||
nvmf_load="YES"
|
||||
.Ed
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Nm
|
||||
driver provides the kernel component of an NVM Express over Fabrics
|
||||
host.
|
||||
The NVMeoF host is the client which provides local access to
|
||||
namespaces exported by a remote controller.
|
||||
.Pp
|
||||
Associations between the local host and remote controllers are managed
|
||||
using
|
||||
.Xr nvmecontrol 8 .
|
||||
New associations are created via the
|
||||
.Cm connect
|
||||
command and destroyed via the
|
||||
.Cm disconnect
|
||||
command.
|
||||
If an association's connection is interrupted,
|
||||
the
|
||||
.Cm reconnect
|
||||
command creates a new association to replace the interrupted association.
|
||||
.Pp
|
||||
Similar to
|
||||
.Xr nvme 4 ,
|
||||
.Nm
|
||||
creates controller device nodes using the format
|
||||
.Pa /dev/nvmeX
|
||||
and namespace device nodes using the format
|
||||
.Pa /dev/nvmeXnsY .
|
||||
.Nm
|
||||
also exports remote namespaces via the CAM
|
||||
.Xr nda 4
|
||||
peripheral driver.
|
||||
Unlike
|
||||
.Xr nvme 4 ,
|
||||
.Nm
|
||||
does not support the
|
||||
.Xr nvd 4
|
||||
disk driver.
|
||||
.Pp
|
||||
Associations require a supported transport such as
|
||||
.Xr nvmf_tcp 4
|
||||
for associations using TCP/IP.
|
||||
.Sh SEE ALSO
|
||||
.Xr nda 4 ,
|
||||
.Xr nvme 4 ,
|
||||
.Xr nvmf_tcp 4 ,
|
||||
.Xr nvmft 4 ,
|
||||
.Xr nvmecontrol 8
|
||||
.Sh HISTORY
|
||||
The
|
||||
.Nm
|
||||
module first appeared in
|
||||
.Fx 15.0 .
|
||||
.Sh AUTHORS
|
||||
The
|
||||
.Nm
|
||||
driver was developed by
|
||||
.An John Baldwin Aq Mt jhb@FreeBSD.org
|
||||
under sponsorship from Chelsio Communications, Inc.
|
||||
.Sh BUGS
|
||||
.Nm
|
||||
only supports a single I/O queue pair per association.
|
|
@ -1676,12 +1676,14 @@ device mrsas # LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s
|
|||
# NVM Express
|
||||
#
|
||||
# nvme: PCI-express NVM Express host controllers
|
||||
# nvmf: NVM Express over Fabrics host
|
||||
# nvmf_tcp: TCP transport for NVM Express over Fabrics
|
||||
# nda: CAM NVMe disk driver
|
||||
# nvd: non-CAM NVMe disk driver
|
||||
|
||||
device nvme # base NVMe driver
|
||||
device nvme # PCI-express NVMe host driver
|
||||
options NVME_USE_NVD=1 # Use nvd(4) instead of the CAM nda(4) driver
|
||||
device nvmf # NVMeoF host driver
|
||||
device nvmf_tcp # NVMeoF TCP transport
|
||||
device nda # NVMe direct access devices (aka disks)
|
||||
device nvd # expose NVMe namespaces as disks, depends on nvme
|
||||
|
|
|
@ -2533,7 +2533,15 @@ dev/nvme/nvme_test.c optional nvme
|
|||
dev/nvme/nvme_util.c optional nvme
|
||||
dev/nvmem/nvmem.c optional nvmem fdt
|
||||
dev/nvmem/nvmem_if.m optional nvmem
|
||||
dev/nvmf/host/nvmf.c optional nvmf
|
||||
dev/nvmf/host/nvmf_aer.c optional nvmf
|
||||
dev/nvmf/host/nvmf_cmd.c optional nvmf
|
||||
dev/nvmf/host/nvmf_ctldev.c optional nvmf
|
||||
dev/nvmf/host/nvmf_ns.c optional nvmf
|
||||
dev/nvmf/host/nvmf_qpair.c optional nvmf
|
||||
dev/nvmf/host/nvmf_sim.c optional nvmf
|
||||
dev/nvmf/nvmf_tcp.c optional nvmf_tcp
|
||||
dev/nvmf/nvmf_transport.c optional nvmf
|
||||
dev/oce/oce_hw.c optional oce pci
|
||||
dev/oce/oce_if.c optional oce pci
|
||||
dev/oce/oce_mbox.c optional oce pci
|
||||
|
|
939
sys/dev/nvmf/host/nvmf.c
Normal file
939
sys/dev/nvmf/host/nvmf.c
Normal file
|
@ -0,0 +1,939 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2023-2024 Chelsio Communications, Inc.
|
||||
* Written by: John Baldwin <jhb@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/bus.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/memdesc.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/sx.h>
|
||||
#include <sys/taskqueue.h>
|
||||
#include <dev/nvme/nvme.h>
|
||||
#include <dev/nvmf/nvmf.h>
|
||||
#include <dev/nvmf/nvmf_transport.h>
|
||||
#include <dev/nvmf/host/nvmf_var.h>
|
||||
|
||||
static struct cdevsw nvmf_cdevsw;
|
||||
|
||||
MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host");
|
||||
|
||||
static void nvmf_disconnect_task(void *arg, int pending);
|
||||
|
||||
void
|
||||
nvmf_complete(void *arg, const struct nvme_completion *cqe)
|
||||
{
|
||||
struct nvmf_completion_status *status = arg;
|
||||
struct mtx *mtx;
|
||||
|
||||
status->cqe = *cqe;
|
||||
mtx = mtx_pool_find(mtxpool_sleep, status);
|
||||
mtx_lock(mtx);
|
||||
status->done = true;
|
||||
mtx_unlock(mtx);
|
||||
wakeup(status);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_io_complete(void *arg, size_t xfered, int error)
|
||||
{
|
||||
struct nvmf_completion_status *status = arg;
|
||||
struct mtx *mtx;
|
||||
|
||||
status->io_error = error;
|
||||
mtx = mtx_pool_find(mtxpool_sleep, status);
|
||||
mtx_lock(mtx);
|
||||
status->io_done = true;
|
||||
mtx_unlock(mtx);
|
||||
wakeup(status);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_wait_for_reply(struct nvmf_completion_status *status)
|
||||
{
|
||||
struct mtx *mtx;
|
||||
|
||||
mtx = mtx_pool_find(mtxpool_sleep, status);
|
||||
mtx_lock(mtx);
|
||||
while (!status->done || !status->io_done)
|
||||
mtx_sleep(status, mtx, 0, "nvmfcmd", 0);
|
||||
mtx_unlock(mtx);
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_read_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size,
|
||||
uint64_t *value)
|
||||
{
|
||||
const struct nvmf_fabric_prop_get_rsp *rsp;
|
||||
struct nvmf_completion_status status;
|
||||
|
||||
nvmf_status_init(&status);
|
||||
if (!nvmf_cmd_get_property(sc, offset, size, nvmf_complete, &status,
|
||||
M_WAITOK))
|
||||
return (ECONNABORTED);
|
||||
nvmf_wait_for_reply(&status);
|
||||
|
||||
if (status.cqe.status != 0) {
|
||||
device_printf(sc->dev, "PROPERTY_GET failed, status %#x\n",
|
||||
le16toh(status.cqe.status));
|
||||
return (EIO);
|
||||
}
|
||||
|
||||
rsp = (const struct nvmf_fabric_prop_get_rsp *)&status.cqe;
|
||||
if (size == 8)
|
||||
*value = le64toh(rsp->value.u64);
|
||||
else
|
||||
*value = le32toh(rsp->value.u32.low);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_write_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size,
|
||||
uint64_t value)
|
||||
{
|
||||
struct nvmf_completion_status status;
|
||||
|
||||
nvmf_status_init(&status);
|
||||
if (!nvmf_cmd_set_property(sc, offset, size, value, nvmf_complete, &status,
|
||||
M_WAITOK))
|
||||
return (ECONNABORTED);
|
||||
nvmf_wait_for_reply(&status);
|
||||
|
||||
if (status.cqe.status != 0) {
|
||||
device_printf(sc->dev, "PROPERTY_SET failed, status %#x\n",
|
||||
le16toh(status.cqe.status));
|
||||
return (EIO);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_shutdown_controller(struct nvmf_softc *sc)
|
||||
{
|
||||
uint64_t cc;
|
||||
int error;
|
||||
|
||||
error = nvmf_read_property(sc, NVMF_PROP_CC, 4, &cc);
|
||||
if (error != 0) {
|
||||
device_printf(sc->dev, "Failed to fetch CC for shutdown\n");
|
||||
return;
|
||||
}
|
||||
|
||||
cc |= NVMEF(NVME_CC_REG_SHN, NVME_SHN_NORMAL);
|
||||
|
||||
error = nvmf_write_property(sc, NVMF_PROP_CC, 4, cc);
|
||||
if (error != 0)
|
||||
device_printf(sc->dev,
|
||||
"Failed to set CC to trigger shutdown\n");
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_check_keep_alive(void *arg)
|
||||
{
|
||||
struct nvmf_softc *sc = arg;
|
||||
int traffic;
|
||||
|
||||
traffic = atomic_readandclear_int(&sc->ka_active_rx_traffic);
|
||||
if (traffic == 0) {
|
||||
device_printf(sc->dev,
|
||||
"disconnecting due to KeepAlive timeout\n");
|
||||
nvmf_disconnect(sc);
|
||||
return;
|
||||
}
|
||||
|
||||
callout_schedule_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, C_HARDCLOCK);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_keep_alive_complete(void *arg, const struct nvme_completion *cqe)
|
||||
{
|
||||
struct nvmf_softc *sc = arg;
|
||||
|
||||
atomic_store_int(&sc->ka_active_rx_traffic, 1);
|
||||
if (cqe->status != 0) {
|
||||
device_printf(sc->dev,
|
||||
"KeepAlive response reported status %#x\n",
|
||||
le16toh(cqe->status));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_send_keep_alive(void *arg)
|
||||
{
|
||||
struct nvmf_softc *sc = arg;
|
||||
int traffic;
|
||||
|
||||
/*
|
||||
* Don't bother sending a KeepAlive command if TKAS is active
|
||||
* and another command has been sent during the interval.
|
||||
*/
|
||||
traffic = atomic_load_int(&sc->ka_active_tx_traffic);
|
||||
if (traffic == 0 && !nvmf_cmd_keep_alive(sc, nvmf_keep_alive_complete,
|
||||
sc, M_NOWAIT))
|
||||
device_printf(sc->dev,
|
||||
"Failed to allocate KeepAlive command\n");
|
||||
|
||||
/* Clear ka_active_tx_traffic after sending the keep alive command. */
|
||||
atomic_store_int(&sc->ka_active_tx_traffic, 0);
|
||||
|
||||
callout_schedule_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0, C_HARDCLOCK);
|
||||
}
|
||||
|
||||
int
|
||||
nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh)
|
||||
{
|
||||
size_t len;
|
||||
u_int i;
|
||||
int error;
|
||||
|
||||
memset(ivars, 0, sizeof(*ivars));
|
||||
|
||||
if (!hh->admin.admin || hh->num_io_queues < 1)
|
||||
return (EINVAL);
|
||||
|
||||
ivars->cdata = malloc(sizeof(*ivars->cdata), M_NVMF, M_WAITOK);
|
||||
error = copyin(hh->cdata, ivars->cdata, sizeof(*ivars->cdata));
|
||||
if (error != 0)
|
||||
goto out;
|
||||
nvme_controller_data_swapbytes(ivars->cdata);
|
||||
|
||||
len = hh->num_io_queues * sizeof(*ivars->io_params);
|
||||
ivars->io_params = malloc(len, M_NVMF, M_WAITOK);
|
||||
error = copyin(hh->io, ivars->io_params, len);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
for (i = 0; i < hh->num_io_queues; i++) {
|
||||
if (ivars->io_params[i].admin) {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Require all I/O queues to be the same size. */
|
||||
if (ivars->io_params[i].qsize != ivars->io_params[0].qsize) {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ivars->hh = hh;
|
||||
return (0);
|
||||
|
||||
out:
|
||||
free(ivars->io_params, M_NVMF);
|
||||
free(ivars->cdata, M_NVMF);
|
||||
return (error);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_free_ivars(struct nvmf_ivars *ivars)
|
||||
{
|
||||
free(ivars->io_params, M_NVMF);
|
||||
free(ivars->cdata, M_NVMF);
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_probe(device_t dev)
|
||||
{
|
||||
struct nvmf_ivars *ivars = device_get_ivars(dev);
|
||||
char desc[260];
|
||||
|
||||
if (ivars == NULL)
|
||||
return (ENXIO);
|
||||
|
||||
snprintf(desc, sizeof(desc), "Fabrics: %.256s", ivars->cdata->subnqn);
|
||||
device_set_desc_copy(dev, desc);
|
||||
return (BUS_PROBE_DEFAULT);
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars)
|
||||
{
|
||||
char name[16];
|
||||
|
||||
/* Setup the admin queue. */
|
||||
sc->admin = nvmf_init_qp(sc, ivars->hh->trtype, &ivars->hh->admin,
|
||||
"admin queue");
|
||||
if (sc->admin == NULL) {
|
||||
device_printf(sc->dev, "Failed to setup admin queue\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
/* Setup I/O queues. */
|
||||
sc->io = malloc(ivars->hh->num_io_queues * sizeof(*sc->io), M_NVMF,
|
||||
M_WAITOK | M_ZERO);
|
||||
sc->num_io_queues = ivars->hh->num_io_queues;
|
||||
for (u_int i = 0; i < sc->num_io_queues; i++) {
|
||||
snprintf(name, sizeof(name), "I/O queue %u", i);
|
||||
sc->io[i] = nvmf_init_qp(sc, ivars->hh->trtype,
|
||||
&ivars->io_params[i], name);
|
||||
if (sc->io[i] == NULL) {
|
||||
device_printf(sc->dev, "Failed to setup I/O queue %u\n",
|
||||
i + 1);
|
||||
return (ENXIO);
|
||||
}
|
||||
}
|
||||
|
||||
/* Start KeepAlive timers. */
|
||||
if (ivars->hh->kato != 0) {
|
||||
sc->ka_traffic = NVMEV(NVME_CTRLR_DATA_CTRATT_TBKAS,
|
||||
sc->cdata->ctratt) != 0;
|
||||
sc->ka_rx_sbt = mstosbt(ivars->hh->kato);
|
||||
sc->ka_tx_sbt = sc->ka_rx_sbt / 2;
|
||||
callout_reset_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0,
|
||||
nvmf_check_keep_alive, sc, C_HARDCLOCK);
|
||||
callout_reset_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0,
|
||||
nvmf_send_keep_alive, sc, C_HARDCLOCK);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static bool
|
||||
nvmf_scan_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist,
|
||||
struct nvme_namespace_data *data, uint32_t *nsidp)
|
||||
{
|
||||
struct nvmf_completion_status status;
|
||||
uint32_t nsid;
|
||||
|
||||
nvmf_status_init(&status);
|
||||
nvmf_status_wait_io(&status);
|
||||
if (!nvmf_cmd_identify_active_namespaces(sc, *nsidp, nslist,
|
||||
nvmf_complete, &status, nvmf_io_complete, &status, M_WAITOK)) {
|
||||
device_printf(sc->dev,
|
||||
"failed to send IDENTIFY active namespaces command\n");
|
||||
return (false);
|
||||
}
|
||||
nvmf_wait_for_reply(&status);
|
||||
|
||||
if (status.cqe.status != 0) {
|
||||
device_printf(sc->dev,
|
||||
"IDENTIFY active namespaces failed, status %#x\n",
|
||||
le16toh(status.cqe.status));
|
||||
return (false);
|
||||
}
|
||||
|
||||
if (status.io_error != 0) {
|
||||
device_printf(sc->dev,
|
||||
"IDENTIFY active namespaces failed with I/O error %d\n",
|
||||
status.io_error);
|
||||
return (false);
|
||||
}
|
||||
|
||||
for (u_int i = 0; i < nitems(nslist->ns); i++) {
|
||||
nsid = nslist->ns[i];
|
||||
if (nsid == 0) {
|
||||
*nsidp = 0;
|
||||
return (true);
|
||||
}
|
||||
|
||||
if (sc->ns[nsid - 1] != NULL) {
|
||||
device_printf(sc->dev,
|
||||
"duplicate namespace %u in active namespace list\n",
|
||||
nsid);
|
||||
return (false);
|
||||
}
|
||||
|
||||
nvmf_status_init(&status);
|
||||
nvmf_status_wait_io(&status);
|
||||
if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete,
|
||||
&status, nvmf_io_complete, &status, M_WAITOK)) {
|
||||
device_printf(sc->dev,
|
||||
"failed to send IDENTIFY namespace %u command\n",
|
||||
nsid);
|
||||
return (false);
|
||||
}
|
||||
nvmf_wait_for_reply(&status);
|
||||
|
||||
if (status.cqe.status != 0) {
|
||||
device_printf(sc->dev,
|
||||
"IDENTIFY namespace %u failed, status %#x\n", nsid,
|
||||
le16toh(status.cqe.status));
|
||||
return (false);
|
||||
}
|
||||
|
||||
if (status.io_error != 0) {
|
||||
device_printf(sc->dev,
|
||||
"IDENTIFY namespace %u failed with I/O error %d\n",
|
||||
nsid, status.io_error);
|
||||
return (false);
|
||||
}
|
||||
|
||||
/*
|
||||
* As in nvme_ns_construct, a size of zero indicates an
|
||||
* invalid namespace.
|
||||
*/
|
||||
nvme_namespace_data_swapbytes(data);
|
||||
if (data->nsze == 0) {
|
||||
device_printf(sc->dev,
|
||||
"ignoring active namespace %u with zero size\n",
|
||||
nsid);
|
||||
continue;
|
||||
}
|
||||
|
||||
sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data);
|
||||
|
||||
nvmf_sim_rescan_ns(sc, nsid);
|
||||
}
|
||||
|
||||
MPASS(nsid == nslist->ns[nitems(nslist->ns) - 1] && nsid != 0);
|
||||
|
||||
if (nsid >= 0xfffffffd)
|
||||
*nsidp = 0;
|
||||
else
|
||||
*nsidp = nsid + 1;
|
||||
return (true);
|
||||
}
|
||||
|
||||
static bool
|
||||
nvmf_add_namespaces(struct nvmf_softc *sc)
|
||||
{
|
||||
struct nvme_namespace_data *data;
|
||||
struct nvme_ns_list *nslist;
|
||||
uint32_t nsid;
|
||||
bool retval;
|
||||
|
||||
sc->ns = mallocarray(sc->cdata->nn, sizeof(*sc->ns), M_NVMF,
|
||||
M_WAITOK | M_ZERO);
|
||||
nslist = malloc(sizeof(*nslist), M_NVMF, M_WAITOK);
|
||||
data = malloc(sizeof(*data), M_NVMF, M_WAITOK);
|
||||
|
||||
nsid = 0;
|
||||
retval = true;
|
||||
for (;;) {
|
||||
if (!nvmf_scan_nslist(sc, nslist, data, &nsid)) {
|
||||
retval = false;
|
||||
break;
|
||||
}
|
||||
if (nsid == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
free(data, M_NVMF);
|
||||
free(nslist, M_NVMF);
|
||||
return (retval);
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_attach(device_t dev)
|
||||
{
|
||||
struct make_dev_args mda;
|
||||
struct nvmf_softc *sc = device_get_softc(dev);
|
||||
struct nvmf_ivars *ivars = device_get_ivars(dev);
|
||||
uint64_t val;
|
||||
u_int i;
|
||||
int error;
|
||||
|
||||
if (ivars == NULL)
|
||||
return (ENXIO);
|
||||
|
||||
sc->dev = dev;
|
||||
sc->trtype = ivars->hh->trtype;
|
||||
callout_init(&sc->ka_rx_timer, 1);
|
||||
callout_init(&sc->ka_tx_timer, 1);
|
||||
sx_init(&sc->connection_lock, "nvmf connection");
|
||||
TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc);
|
||||
|
||||
/* Claim the cdata pointer from ivars. */
|
||||
sc->cdata = ivars->cdata;
|
||||
ivars->cdata = NULL;
|
||||
|
||||
nvmf_init_aer(sc);
|
||||
|
||||
/* TODO: Multiqueue support. */
|
||||
sc->max_pending_io = ivars->io_params[0].qsize /* * sc->num_io_queues */;
|
||||
|
||||
error = nvmf_establish_connection(sc, ivars);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
|
||||
error = nvmf_read_property(sc, NVMF_PROP_CAP, 8, &sc->cap);
|
||||
if (error != 0) {
|
||||
device_printf(sc->dev, "Failed to fetch CAP\n");
|
||||
error = ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = nvmf_read_property(sc, NVMF_PROP_VS, 4, &val);
|
||||
if (error != 0) {
|
||||
device_printf(sc->dev, "Failed to fetch VS\n");
|
||||
error = ENXIO;
|
||||
goto out;
|
||||
}
|
||||
sc->vs = val;
|
||||
|
||||
/* Honor MDTS if it is set. */
|
||||
sc->max_xfer_size = maxphys;
|
||||
if (sc->cdata->mdts != 0) {
|
||||
sc->max_xfer_size = ulmin(sc->max_xfer_size,
|
||||
1 << (sc->cdata->mdts + NVME_MPS_SHIFT +
|
||||
NVME_CAP_HI_MPSMIN(sc->cap >> 32)));
|
||||
}
|
||||
|
||||
error = nvmf_init_sim(sc);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
|
||||
error = nvmf_start_aer(sc);
|
||||
if (error != 0) {
|
||||
nvmf_destroy_sim(sc);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!nvmf_add_namespaces(sc)) {
|
||||
nvmf_destroy_sim(sc);
|
||||
goto out;
|
||||
}
|
||||
|
||||
make_dev_args_init(&mda);
|
||||
mda.mda_devsw = &nvmf_cdevsw;
|
||||
mda.mda_uid = UID_ROOT;
|
||||
mda.mda_gid = GID_WHEEL;
|
||||
mda.mda_mode = 0600;
|
||||
mda.mda_si_drv1 = sc;
|
||||
error = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
|
||||
if (error != 0) {
|
||||
nvmf_destroy_sim(sc);
|
||||
goto out;
|
||||
}
|
||||
|
||||
return (0);
|
||||
out:
|
||||
if (sc->ns != NULL) {
|
||||
for (i = 0; i < sc->cdata->nn; i++) {
|
||||
if (sc->ns[i] != NULL)
|
||||
nvmf_destroy_ns(sc->ns[i]);
|
||||
}
|
||||
free(sc->ns, M_NVMF);
|
||||
}
|
||||
|
||||
callout_drain(&sc->ka_tx_timer);
|
||||
callout_drain(&sc->ka_rx_timer);
|
||||
|
||||
if (sc->admin != NULL)
|
||||
nvmf_shutdown_controller(sc);
|
||||
|
||||
for (i = 0; i < sc->num_io_queues; i++) {
|
||||
if (sc->io[i] != NULL)
|
||||
nvmf_destroy_qp(sc->io[i]);
|
||||
}
|
||||
free(sc->io, M_NVMF);
|
||||
if (sc->admin != NULL)
|
||||
nvmf_destroy_qp(sc->admin);
|
||||
|
||||
nvmf_destroy_aer(sc);
|
||||
|
||||
taskqueue_drain(taskqueue_thread, &sc->disconnect_task);
|
||||
sx_destroy(&sc->connection_lock);
|
||||
free(sc->cdata, M_NVMF);
|
||||
return (error);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_disconnect(struct nvmf_softc *sc)
|
||||
{
|
||||
taskqueue_enqueue(taskqueue_thread, &sc->disconnect_task);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_disconnect_task(void *arg, int pending __unused)
|
||||
{
|
||||
struct nvmf_softc *sc = arg;
|
||||
u_int i;
|
||||
|
||||
sx_xlock(&sc->connection_lock);
|
||||
if (sc->admin == NULL) {
|
||||
/*
|
||||
* Ignore transport errors if there is no active
|
||||
* association.
|
||||
*/
|
||||
sx_xunlock(&sc->connection_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
if (sc->detaching) {
|
||||
if (sc->admin != NULL) {
|
||||
/*
|
||||
* This unsticks the detach process if a
|
||||
* transport error occurs during detach.
|
||||
*/
|
||||
nvmf_shutdown_qp(sc->admin);
|
||||
}
|
||||
sx_xunlock(&sc->connection_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
if (sc->cdev == NULL) {
|
||||
/*
|
||||
* Transport error occurred during attach (nvmf_add_namespaces).
|
||||
* Shutdown the admin queue.
|
||||
*/
|
||||
nvmf_shutdown_qp(sc->admin);
|
||||
sx_xunlock(&sc->connection_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
callout_drain(&sc->ka_tx_timer);
|
||||
callout_drain(&sc->ka_rx_timer);
|
||||
sc->ka_traffic = false;
|
||||
|
||||
/* Quiesce namespace consumers. */
|
||||
nvmf_disconnect_sim(sc);
|
||||
for (i = 0; i < sc->cdata->nn; i++) {
|
||||
if (sc->ns[i] != NULL)
|
||||
nvmf_disconnect_ns(sc->ns[i]);
|
||||
}
|
||||
|
||||
/* Shutdown the existing qpairs. */
|
||||
for (i = 0; i < sc->num_io_queues; i++) {
|
||||
nvmf_destroy_qp(sc->io[i]);
|
||||
}
|
||||
free(sc->io, M_NVMF);
|
||||
sc->io = NULL;
|
||||
sc->num_io_queues = 0;
|
||||
nvmf_destroy_qp(sc->admin);
|
||||
sc->admin = NULL;
|
||||
|
||||
sx_xunlock(&sc->connection_lock);
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh)
|
||||
{
|
||||
struct nvmf_ivars ivars;
|
||||
u_int i;
|
||||
int error;
|
||||
|
||||
/* XXX: Should we permit changing the transport type? */
|
||||
if (sc->trtype != hh->trtype) {
|
||||
device_printf(sc->dev,
|
||||
"transport type mismatch on reconnect\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
error = nvmf_init_ivars(&ivars, hh);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
sx_xlock(&sc->connection_lock);
|
||||
if (sc->admin != NULL || sc->detaching) {
|
||||
error = EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure this is for the same controller. Note that the
|
||||
* controller ID can vary across associations if the remote
|
||||
* system is using the dynamic controller model. This merely
|
||||
* ensures the new association is connected to the same NVMe
|
||||
* subsystem.
|
||||
*/
|
||||
if (memcmp(sc->cdata->subnqn, ivars.cdata->subnqn,
|
||||
sizeof(ivars.cdata->subnqn)) != 0) {
|
||||
device_printf(sc->dev,
|
||||
"controller subsystem NQN mismatch on reconnect\n");
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: Require same number and size of I/O queues so that
|
||||
* max_pending_io is still correct?
|
||||
*/
|
||||
|
||||
error = nvmf_establish_connection(sc, &ivars);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
|
||||
error = nvmf_start_aer(sc);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
|
||||
device_printf(sc->dev,
|
||||
"established new association with %u I/O queues\n",
|
||||
sc->num_io_queues);
|
||||
|
||||
/* Restart namespace consumers. */
|
||||
for (i = 0; i < sc->cdata->nn; i++) {
|
||||
if (sc->ns[i] != NULL)
|
||||
nvmf_reconnect_ns(sc->ns[i]);
|
||||
}
|
||||
nvmf_reconnect_sim(sc);
|
||||
out:
|
||||
sx_xunlock(&sc->connection_lock);
|
||||
nvmf_free_ivars(&ivars);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_detach(device_t dev)
|
||||
{
|
||||
struct nvmf_softc *sc = device_get_softc(dev);
|
||||
u_int i;
|
||||
|
||||
destroy_dev(sc->cdev);
|
||||
|
||||
sx_xlock(&sc->connection_lock);
|
||||
sc->detaching = true;
|
||||
sx_xunlock(&sc->connection_lock);
|
||||
|
||||
nvmf_destroy_sim(sc);
|
||||
for (i = 0; i < sc->cdata->nn; i++) {
|
||||
if (sc->ns[i] != NULL)
|
||||
nvmf_destroy_ns(sc->ns[i]);
|
||||
}
|
||||
free(sc->ns, M_NVMF);
|
||||
|
||||
callout_drain(&sc->ka_tx_timer);
|
||||
callout_drain(&sc->ka_rx_timer);
|
||||
|
||||
if (sc->admin != NULL)
|
||||
nvmf_shutdown_controller(sc);
|
||||
|
||||
for (i = 0; i < sc->num_io_queues; i++) {
|
||||
nvmf_destroy_qp(sc->io[i]);
|
||||
}
|
||||
free(sc->io, M_NVMF);
|
||||
|
||||
taskqueue_drain(taskqueue_thread, &sc->disconnect_task);
|
||||
|
||||
if (sc->admin != NULL)
|
||||
nvmf_destroy_qp(sc->admin);
|
||||
|
||||
nvmf_destroy_aer(sc);
|
||||
|
||||
sx_destroy(&sc->connection_lock);
|
||||
free(sc->cdata, M_NVMF);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid)
|
||||
{
|
||||
struct nvmf_completion_status status;
|
||||
struct nvme_namespace_data *data;
|
||||
struct nvmf_namespace *ns;
|
||||
|
||||
data = malloc(sizeof(*data), M_NVMF, M_WAITOK);
|
||||
|
||||
nvmf_status_init(&status);
|
||||
nvmf_status_wait_io(&status);
|
||||
if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete,
|
||||
&status, nvmf_io_complete, &status, M_WAITOK)) {
|
||||
device_printf(sc->dev,
|
||||
"failed to send IDENTIFY namespace %u command\n", nsid);
|
||||
free(data, M_NVMF);
|
||||
return;
|
||||
}
|
||||
nvmf_wait_for_reply(&status);
|
||||
|
||||
if (status.cqe.status != 0) {
|
||||
device_printf(sc->dev,
|
||||
"IDENTIFY namespace %u failed, status %#x\n", nsid,
|
||||
le16toh(status.cqe.status));
|
||||
free(data, M_NVMF);
|
||||
return;
|
||||
}
|
||||
|
||||
if (status.io_error != 0) {
|
||||
device_printf(sc->dev,
|
||||
"IDENTIFY namespace %u failed with I/O error %d\n",
|
||||
nsid, status.io_error);
|
||||
free(data, M_NVMF);
|
||||
return;
|
||||
}
|
||||
|
||||
nvme_namespace_data_swapbytes(data);
|
||||
|
||||
/* XXX: Needs locking around sc->ns[]. */
|
||||
ns = sc->ns[nsid - 1];
|
||||
if (data->nsze == 0) {
|
||||
/* XXX: Needs locking */
|
||||
if (ns != NULL) {
|
||||
nvmf_destroy_ns(ns);
|
||||
sc->ns[nsid - 1] = NULL;
|
||||
}
|
||||
} else {
|
||||
/* XXX: Needs locking */
|
||||
if (ns == NULL) {
|
||||
sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data);
|
||||
} else {
|
||||
if (!nvmf_update_ns(ns, data)) {
|
||||
nvmf_destroy_ns(ns);
|
||||
sc->ns[nsid - 1] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(data, M_NVMF);
|
||||
|
||||
nvmf_sim_rescan_ns(sc, nsid);
|
||||
}
|
||||
|
||||
int
|
||||
nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt,
|
||||
bool admin)
|
||||
{
|
||||
struct nvmf_completion_status status;
|
||||
struct nvme_command cmd;
|
||||
struct memdesc mem;
|
||||
struct nvmf_host_qpair *qp;
|
||||
struct nvmf_request *req;
|
||||
void *buf;
|
||||
int error;
|
||||
|
||||
if (pt->len > sc->max_xfer_size)
|
||||
return (EINVAL);
|
||||
|
||||
buf = NULL;
|
||||
if (pt->len != 0) {
|
||||
/*
|
||||
* XXX: Depending on the size we may want to pin the
|
||||
* user pages and use a memdesc with vm_page_t's
|
||||
* instead.
|
||||
*/
|
||||
buf = malloc(pt->len, M_NVMF, M_WAITOK);
|
||||
if (pt->is_read == 0) {
|
||||
error = copyin(pt->buf, buf, pt->len);
|
||||
if (error != 0) {
|
||||
free(buf, M_NVMF);
|
||||
return (error);
|
||||
}
|
||||
} else {
|
||||
/* Ensure no kernel data is leaked to userland. */
|
||||
memset(buf, 0, pt->len);
|
||||
}
|
||||
}
|
||||
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
cmd.opc = pt->cmd.opc;
|
||||
cmd.fuse = pt->cmd.fuse;
|
||||
cmd.nsid = pt->cmd.nsid;
|
||||
cmd.cdw10 = pt->cmd.cdw10;
|
||||
cmd.cdw11 = pt->cmd.cdw11;
|
||||
cmd.cdw12 = pt->cmd.cdw12;
|
||||
cmd.cdw13 = pt->cmd.cdw13;
|
||||
cmd.cdw14 = pt->cmd.cdw14;
|
||||
cmd.cdw15 = pt->cmd.cdw15;
|
||||
|
||||
if (admin)
|
||||
qp = sc->admin;
|
||||
else
|
||||
qp = nvmf_select_io_queue(sc);
|
||||
nvmf_status_init(&status);
|
||||
req = nvmf_allocate_request(qp, &cmd, nvmf_complete, &status, M_WAITOK);
|
||||
if (req == NULL) {
|
||||
device_printf(sc->dev, "failed to send passthrough command\n");
|
||||
error = ECONNABORTED;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (pt->len != 0) {
|
||||
mem = memdesc_vaddr(buf, pt->len);
|
||||
nvmf_capsule_append_data(req->nc, &mem, pt->len,
|
||||
pt->is_read == 0, nvmf_io_complete, &status);
|
||||
nvmf_status_wait_io(&status);
|
||||
}
|
||||
|
||||
nvmf_submit_request(req);
|
||||
nvmf_wait_for_reply(&status);
|
||||
|
||||
memset(&pt->cpl, 0, sizeof(pt->cpl));
|
||||
pt->cpl.cdw0 = status.cqe.cdw0;
|
||||
pt->cpl.status = status.cqe.status;
|
||||
|
||||
error = status.io_error;
|
||||
if (error == 0 && pt->len != 0 && pt->is_read != 0)
|
||||
error = copyout(buf, pt->buf, pt->len);
|
||||
error:
|
||||
free(buf, M_NVMF);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
|
||||
struct thread *td)
|
||||
{
|
||||
struct nvmf_softc *sc = cdev->si_drv1;
|
||||
struct nvme_get_nsid *gnsid;
|
||||
struct nvme_pt_command *pt;
|
||||
struct nvmf_reconnect_params *rp;
|
||||
struct nvmf_handoff_host *hh;
|
||||
|
||||
switch (cmd) {
|
||||
case NVME_PASSTHROUGH_CMD:
|
||||
pt = (struct nvme_pt_command *)arg;
|
||||
return (nvmf_passthrough_cmd(sc, pt, true));
|
||||
case NVME_GET_NSID:
|
||||
gnsid = (struct nvme_get_nsid *)arg;
|
||||
strncpy(gnsid->cdev, device_get_nameunit(sc->dev),
|
||||
sizeof(gnsid->cdev));
|
||||
gnsid->cdev[sizeof(gnsid->cdev) - 1] = '\0';
|
||||
gnsid->nsid = 0;
|
||||
return (0);
|
||||
case NVME_GET_MAX_XFER_SIZE:
|
||||
*(uint64_t *)arg = sc->max_xfer_size;
|
||||
return (0);
|
||||
case NVMF_RECONNECT_PARAMS:
|
||||
rp = (struct nvmf_reconnect_params *)arg;
|
||||
if ((sc->cdata->fcatt & 1) == 0)
|
||||
rp->cntlid = NVMF_CNTLID_DYNAMIC;
|
||||
else
|
||||
rp->cntlid = sc->cdata->ctrlr_id;
|
||||
memcpy(rp->subnqn, sc->cdata->subnqn, sizeof(rp->subnqn));
|
||||
return (0);
|
||||
case NVMF_RECONNECT_HOST:
|
||||
hh = (struct nvmf_handoff_host *)arg;
|
||||
return (nvmf_reconnect_host(sc, hh));
|
||||
default:
|
||||
return (ENOTTY);
|
||||
}
|
||||
}
|
||||
|
||||
static struct cdevsw nvmf_cdevsw = {
|
||||
.d_version = D_VERSION,
|
||||
.d_ioctl = nvmf_ioctl
|
||||
};
|
||||
|
||||
static int
|
||||
nvmf_modevent(module_t mod, int what, void *arg)
|
||||
{
|
||||
switch (what) {
|
||||
case MOD_LOAD:
|
||||
return (nvmf_ctl_load());
|
||||
case MOD_QUIESCE:
|
||||
return (0);
|
||||
case MOD_UNLOAD:
|
||||
nvmf_ctl_unload();
|
||||
destroy_dev_drain(&nvmf_cdevsw);
|
||||
return (0);
|
||||
default:
|
||||
return (EOPNOTSUPP);
|
||||
}
|
||||
}
|
||||
|
||||
static device_method_t nvmf_methods[] = {
|
||||
/* Device interface */
|
||||
DEVMETHOD(device_probe, nvmf_probe),
|
||||
DEVMETHOD(device_attach, nvmf_attach),
|
||||
DEVMETHOD(device_detach, nvmf_detach),
|
||||
#if 0
|
||||
DEVMETHOD(device_shutdown, nvmf_shutdown),
|
||||
#endif
|
||||
DEVMETHOD_END
|
||||
};
|
||||
|
||||
driver_t nvme_nvmf_driver = {
|
||||
"nvme",
|
||||
nvmf_methods,
|
||||
sizeof(struct nvmf_softc),
|
||||
};
|
||||
|
||||
DRIVER_MODULE(nvme, root, nvme_nvmf_driver, nvmf_modevent, NULL);
|
||||
MODULE_DEPEND(nvmf, nvmf_transport, 1, 1, 1);
|
290
sys/dev/nvmf/host/nvmf_aer.c
Normal file
290
sys/dev/nvmf/host/nvmf_aer.c
Normal file
|
@ -0,0 +1,290 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2024 Chelsio Communications, Inc.
|
||||
* Written by: John Baldwin <jhb@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/bus.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/taskqueue.h>
|
||||
#include <dev/nvmf/host/nvmf_var.h>
|
||||
|
||||
struct nvmf_aer {
|
||||
struct nvmf_softc *sc;
|
||||
uint8_t log_page_id;
|
||||
uint8_t info;
|
||||
uint8_t type;
|
||||
|
||||
u_int page_len;
|
||||
void *page;
|
||||
|
||||
int error;
|
||||
uint16_t status;
|
||||
int pending;
|
||||
struct mtx *lock;
|
||||
struct task complete_task;
|
||||
struct task finish_page_task;
|
||||
};
|
||||
|
||||
#define MAX_LOG_PAGE_SIZE 4096
|
||||
|
||||
static void nvmf_complete_aer(void *arg, const struct nvme_completion *cqe);
|
||||
|
||||
static void
|
||||
nvmf_submit_aer(struct nvmf_softc *sc, struct nvmf_aer *aer)
|
||||
{
|
||||
struct nvmf_request *req;
|
||||
struct nvme_command cmd;
|
||||
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST;
|
||||
|
||||
req = nvmf_allocate_request(sc->admin, &cmd, nvmf_complete_aer, aer,
|
||||
M_WAITOK);
|
||||
if (req == NULL)
|
||||
return;
|
||||
req->aer = true;
|
||||
nvmf_submit_request(req);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_handle_changed_namespaces(struct nvmf_softc *sc,
|
||||
struct nvme_ns_list *ns_list)
|
||||
{
|
||||
uint32_t nsid;
|
||||
|
||||
/*
|
||||
* If more than 1024 namespaces have changed, we should
|
||||
* probably just rescan the entire set of namespaces.
|
||||
*/
|
||||
if (ns_list->ns[0] == 0xffffffff) {
|
||||
device_printf(sc->dev, "too many changed namespaces\n");
|
||||
return;
|
||||
}
|
||||
|
||||
for (u_int i = 0; i < nitems(ns_list->ns); i++) {
|
||||
if (ns_list->ns[i] == 0)
|
||||
break;
|
||||
|
||||
nsid = le32toh(ns_list->ns[i]);
|
||||
nvmf_rescan_ns(sc, nsid);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_finish_aer_page(struct nvmf_softc *sc, struct nvmf_aer *aer)
|
||||
{
|
||||
/* If an error occurred fetching the page, just bail. */
|
||||
if (aer->error != 0 || aer->status != 0)
|
||||
return;
|
||||
|
||||
taskqueue_enqueue(taskqueue_thread, &aer->finish_page_task);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_finish_aer_page_task(void *arg, int pending)
|
||||
{
|
||||
struct nvmf_aer *aer = arg;
|
||||
struct nvmf_softc *sc = aer->sc;
|
||||
|
||||
switch (aer->log_page_id) {
|
||||
case NVME_LOG_ERROR:
|
||||
/* TODO: Should we log these? */
|
||||
break;
|
||||
case NVME_LOG_CHANGED_NAMESPACE:
|
||||
nvmf_handle_changed_namespaces(sc, aer->page);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Resubmit this AER command. */
|
||||
nvmf_submit_aer(sc, aer);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_io_complete_aer_page(void *arg, size_t xfered, int error)
|
||||
{
|
||||
struct nvmf_aer *aer = arg;
|
||||
struct nvmf_softc *sc = aer->sc;
|
||||
|
||||
mtx_lock(aer->lock);
|
||||
aer->error = error;
|
||||
aer->pending--;
|
||||
if (aer->pending == 0) {
|
||||
mtx_unlock(aer->lock);
|
||||
nvmf_finish_aer_page(sc, aer);
|
||||
} else
|
||||
mtx_unlock(aer->lock);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_complete_aer_page(void *arg, const struct nvme_completion *cqe)
|
||||
{
|
||||
struct nvmf_aer *aer = arg;
|
||||
struct nvmf_softc *sc = aer->sc;
|
||||
|
||||
mtx_lock(aer->lock);
|
||||
aer->status = cqe->status;
|
||||
aer->pending--;
|
||||
if (aer->pending == 0) {
|
||||
mtx_unlock(aer->lock);
|
||||
nvmf_finish_aer_page(sc, aer);
|
||||
} else
|
||||
mtx_unlock(aer->lock);
|
||||
}
|
||||
|
||||
static u_int
|
||||
nvmf_log_page_size(struct nvmf_softc *sc, uint8_t log_page_id)
|
||||
{
|
||||
switch (log_page_id) {
|
||||
case NVME_LOG_ERROR:
|
||||
return ((sc->cdata->elpe + 1) *
|
||||
sizeof(struct nvme_error_information_entry));
|
||||
case NVME_LOG_CHANGED_NAMESPACE:
|
||||
return (sizeof(struct nvme_ns_list));
|
||||
default:
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_complete_aer(void *arg, const struct nvme_completion *cqe)
|
||||
{
|
||||
struct nvmf_aer *aer = arg;
|
||||
struct nvmf_softc *sc = aer->sc;
|
||||
uint32_t cdw0;
|
||||
|
||||
/*
|
||||
* The only error defined for AER is an abort due to
|
||||
* submitting too many AER commands. Just discard this AER
|
||||
* without resubmitting if we get an error.
|
||||
*
|
||||
* NB: Pending AER commands are aborted during controller
|
||||
* shutdown, so discard aborted commands silently.
|
||||
*/
|
||||
if (cqe->status != 0) {
|
||||
if (!nvmf_cqe_aborted(cqe))
|
||||
device_printf(sc->dev, "Ignoring error %#x for AER\n",
|
||||
le16toh(cqe->status));
|
||||
return;
|
||||
}
|
||||
|
||||
cdw0 = le32toh(cqe->cdw0);
|
||||
aer->log_page_id = NVMEV(NVME_ASYNC_EVENT_LOG_PAGE_ID, cdw0);
|
||||
aer->info = NVMEV(NVME_ASYNC_EVENT_INFO, cdw0);
|
||||
aer->type = NVMEV(NVME_ASYNC_EVENT_TYPE, cdw0);
|
||||
|
||||
device_printf(sc->dev, "AER type %u, info %#x, page %#x\n",
|
||||
aer->type, aer->info, aer->log_page_id);
|
||||
|
||||
aer->page_len = nvmf_log_page_size(sc, aer->log_page_id);
|
||||
taskqueue_enqueue(taskqueue_thread, &aer->complete_task);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_complete_aer_task(void *arg, int pending)
|
||||
{
|
||||
struct nvmf_aer *aer = arg;
|
||||
struct nvmf_softc *sc = aer->sc;
|
||||
|
||||
if (aer->page_len != 0) {
|
||||
/* Read the associated log page. */
|
||||
aer->page_len = MIN(aer->page_len, MAX_LOG_PAGE_SIZE);
|
||||
aer->pending = 2;
|
||||
(void) nvmf_cmd_get_log_page(sc, NVME_GLOBAL_NAMESPACE_TAG,
|
||||
aer->log_page_id, 0, aer->page, aer->page_len,
|
||||
nvmf_complete_aer_page, aer, nvmf_io_complete_aer_page,
|
||||
aer, M_WAITOK);
|
||||
} else {
|
||||
/* Resubmit this AER command. */
|
||||
nvmf_submit_aer(sc, aer);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_set_async_event_config(struct nvmf_softc *sc, uint32_t config)
|
||||
{
|
||||
struct nvme_command cmd;
|
||||
struct nvmf_completion_status status;
|
||||
struct nvmf_request *req;
|
||||
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
cmd.opc = NVME_OPC_SET_FEATURES;
|
||||
cmd.cdw10 = htole32(NVME_FEAT_ASYNC_EVENT_CONFIGURATION);
|
||||
cmd.cdw11 = htole32(config);
|
||||
|
||||
nvmf_status_init(&status);
|
||||
req = nvmf_allocate_request(sc->admin, &cmd, nvmf_complete, &status,
|
||||
M_WAITOK);
|
||||
if (req == NULL) {
|
||||
device_printf(sc->dev,
|
||||
"failed to allocate SET_FEATURES (ASYNC_EVENT_CONFIGURATION) command\n");
|
||||
return (ECONNABORTED);
|
||||
}
|
||||
nvmf_submit_request(req);
|
||||
nvmf_wait_for_reply(&status);
|
||||
|
||||
if (status.cqe.status != 0) {
|
||||
device_printf(sc->dev,
|
||||
"SET_FEATURES (ASYNC_EVENT_CONFIGURATION) failed, status %#x\n",
|
||||
le16toh(status.cqe.status));
|
||||
return (EIO);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_init_aer(struct nvmf_softc *sc)
|
||||
{
|
||||
/* 8 matches NVME_MAX_ASYNC_EVENTS */
|
||||
sc->num_aer = min(8, sc->cdata->aerl + 1);
|
||||
sc->aer = mallocarray(sc->num_aer, sizeof(*sc->aer), M_NVMF,
|
||||
M_WAITOK | M_ZERO);
|
||||
for (u_int i = 0; i < sc->num_aer; i++) {
|
||||
sc->aer[i].sc = sc;
|
||||
sc->aer[i].page = malloc(MAX_LOG_PAGE_SIZE, M_NVMF, M_WAITOK);
|
||||
sc->aer[i].lock = mtx_pool_find(mtxpool_sleep, &sc->aer[i]);
|
||||
TASK_INIT(&sc->aer[i].complete_task, 0, nvmf_complete_aer_task,
|
||||
&sc->aer[i]);
|
||||
TASK_INIT(&sc->aer[i].finish_page_task, 0,
|
||||
nvmf_finish_aer_page_task, &sc->aer[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
nvmf_start_aer(struct nvmf_softc *sc)
|
||||
{
|
||||
uint32_t async_event_config;
|
||||
int error;
|
||||
|
||||
async_event_config = NVME_CRIT_WARN_ST_AVAILABLE_SPARE |
|
||||
NVME_CRIT_WARN_ST_DEVICE_RELIABILITY |
|
||||
NVME_CRIT_WARN_ST_READ_ONLY |
|
||||
NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP;
|
||||
if (sc->cdata->ver >= NVME_REV(1, 2))
|
||||
async_event_config |=
|
||||
sc->cdata->oaes & NVME_ASYNC_EVENT_NS_ATTRIBUTE;
|
||||
error = nvmf_set_async_event_config(sc, async_event_config);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
for (u_int i = 0; i < sc->num_aer; i++)
|
||||
nvmf_submit_aer(sc, &sc->aer[i]);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_destroy_aer(struct nvmf_softc *sc)
|
||||
{
|
||||
for (u_int i = 0; i < sc->num_aer; i++) {
|
||||
taskqueue_drain(taskqueue_thread, &sc->aer[i].complete_task);
|
||||
taskqueue_drain(taskqueue_thread, &sc->aer[i].finish_page_task);
|
||||
free(sc->aer[i].page, M_NVMF);
|
||||
}
|
||||
free(sc->aer, M_NVMF);
|
||||
}
|
171
sys/dev/nvmf/host/nvmf_cmd.c
Normal file
171
sys/dev/nvmf/host/nvmf_cmd.c
Normal file
|
@ -0,0 +1,171 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2023-2024 Chelsio Communications, Inc.
|
||||
* Written by: John Baldwin <jhb@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/memdesc.h>
|
||||
#include <sys/systm.h>
|
||||
#include <dev/nvme/nvme.h>
|
||||
#include <dev/nvmf/nvmf.h>
|
||||
#include <dev/nvmf/nvmf_proto.h>
|
||||
#include <dev/nvmf/host/nvmf_var.h>
|
||||
|
||||
bool
|
||||
nvmf_cmd_get_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size,
|
||||
nvmf_request_complete_t *cb, void *cb_arg, int how)
|
||||
{
|
||||
struct nvmf_fabric_prop_get_cmd cmd;
|
||||
struct nvmf_request *req;
|
||||
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
cmd.opcode = NVME_OPC_FABRICS_COMMANDS;
|
||||
cmd.fctype = NVMF_FABRIC_COMMAND_PROPERTY_GET;
|
||||
switch (size) {
|
||||
case 4:
|
||||
cmd.attrib.size = NVMF_PROP_SIZE_4;
|
||||
break;
|
||||
case 8:
|
||||
cmd.attrib.size = NVMF_PROP_SIZE_8;
|
||||
break;
|
||||
default:
|
||||
panic("Invalid property size");
|
||||
}
|
||||
cmd.ofst = htole32(offset);
|
||||
|
||||
req = nvmf_allocate_request(sc->admin, &cmd, cb, cb_arg, how);
|
||||
if (req != NULL)
|
||||
nvmf_submit_request(req);
|
||||
return (req != NULL);
|
||||
}
|
||||
|
||||
bool
|
||||
nvmf_cmd_set_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size,
|
||||
uint64_t value, nvmf_request_complete_t *cb, void *cb_arg, int how)
|
||||
{
|
||||
struct nvmf_fabric_prop_set_cmd cmd;
|
||||
struct nvmf_request *req;
|
||||
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
cmd.opcode = NVME_OPC_FABRICS_COMMANDS;
|
||||
cmd.fctype = NVMF_FABRIC_COMMAND_PROPERTY_SET;
|
||||
switch (size) {
|
||||
case 4:
|
||||
cmd.attrib.size = NVMF_PROP_SIZE_4;
|
||||
cmd.value.u32.low = htole32(value);
|
||||
break;
|
||||
case 8:
|
||||
cmd.attrib.size = NVMF_PROP_SIZE_8;
|
||||
cmd.value.u64 = htole64(value);
|
||||
break;
|
||||
default:
|
||||
panic("Invalid property size");
|
||||
}
|
||||
cmd.ofst = htole32(offset);
|
||||
|
||||
req = nvmf_allocate_request(sc->admin, &cmd, cb, cb_arg, how);
|
||||
if (req != NULL)
|
||||
nvmf_submit_request(req);
|
||||
return (req != NULL);
|
||||
}
|
||||
|
||||
bool
|
||||
nvmf_cmd_keep_alive(struct nvmf_softc *sc, nvmf_request_complete_t *cb,
|
||||
void *cb_arg, int how)
|
||||
{
|
||||
struct nvme_command cmd;
|
||||
struct nvmf_request *req;
|
||||
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
cmd.opc = NVME_OPC_KEEP_ALIVE;
|
||||
|
||||
req = nvmf_allocate_request(sc->admin, &cmd, cb, cb_arg, how);
|
||||
if (req != NULL)
|
||||
nvmf_submit_request(req);
|
||||
return (req != NULL);
|
||||
}
|
||||
|
||||
bool
|
||||
nvmf_cmd_identify_active_namespaces(struct nvmf_softc *sc, uint32_t id,
|
||||
struct nvme_ns_list *nslist, nvmf_request_complete_t *req_cb,
|
||||
void *req_cb_arg, nvmf_io_complete_t *io_cb, void *io_cb_arg, int how)
|
||||
{
|
||||
struct nvme_command cmd;
|
||||
struct memdesc mem;
|
||||
struct nvmf_request *req;
|
||||
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
cmd.opc = NVME_OPC_IDENTIFY;
|
||||
|
||||
/* 5.15.1 Use CNS of 0x02 for namespace data. */
|
||||
cmd.cdw10 = htole32(2);
|
||||
cmd.nsid = htole32(id);
|
||||
|
||||
req = nvmf_allocate_request(sc->admin, &cmd, req_cb, req_cb_arg, how);
|
||||
if (req == NULL)
|
||||
return (false);
|
||||
mem = memdesc_vaddr(nslist, sizeof(*nslist));
|
||||
nvmf_capsule_append_data(req->nc, &mem, sizeof(*nslist), false,
|
||||
io_cb, io_cb_arg);
|
||||
nvmf_submit_request(req);
|
||||
return (true);
|
||||
}
|
||||
|
||||
bool
|
||||
nvmf_cmd_identify_namespace(struct nvmf_softc *sc, uint32_t id,
|
||||
struct nvme_namespace_data *nsdata, nvmf_request_complete_t *req_cb,
|
||||
void *req_cb_arg, nvmf_io_complete_t *io_cb, void *io_cb_arg, int how)
|
||||
{
|
||||
struct nvme_command cmd;
|
||||
struct memdesc mem;
|
||||
struct nvmf_request *req;
|
||||
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
cmd.opc = NVME_OPC_IDENTIFY;
|
||||
|
||||
/* 5.15.1 Use CNS of 0x00 for namespace data. */
|
||||
cmd.cdw10 = htole32(0);
|
||||
cmd.nsid = htole32(id);
|
||||
|
||||
req = nvmf_allocate_request(sc->admin, &cmd, req_cb, req_cb_arg, how);
|
||||
if (req == NULL)
|
||||
return (false);
|
||||
mem = memdesc_vaddr(nsdata, sizeof(*nsdata));
|
||||
nvmf_capsule_append_data(req->nc, &mem, sizeof(*nsdata), false,
|
||||
io_cb, io_cb_arg);
|
||||
nvmf_submit_request(req);
|
||||
return (true);
|
||||
}
|
||||
|
||||
bool
|
||||
nvmf_cmd_get_log_page(struct nvmf_softc *sc, uint32_t nsid, uint8_t lid,
|
||||
uint64_t offset, void *buf, size_t len, nvmf_request_complete_t *req_cb,
|
||||
void *req_cb_arg, nvmf_io_complete_t *io_cb, void *io_cb_arg, int how)
|
||||
{
|
||||
struct nvme_command cmd;
|
||||
struct memdesc mem;
|
||||
struct nvmf_request *req;
|
||||
size_t numd;
|
||||
|
||||
MPASS(len != 0 && len % 4 == 0);
|
||||
MPASS(offset % 4 == 0);
|
||||
|
||||
numd = (len / 4) - 1;
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
cmd.opc = NVME_OPC_GET_LOG_PAGE;
|
||||
cmd.nsid = htole32(nsid);
|
||||
cmd.cdw10 = htole32(numd << 16 | lid);
|
||||
cmd.cdw11 = htole32(numd >> 16);
|
||||
cmd.cdw12 = htole32(offset);
|
||||
cmd.cdw13 = htole32(offset >> 32);
|
||||
|
||||
req = nvmf_allocate_request(sc->admin, &cmd, req_cb, req_cb_arg, how);
|
||||
if (req == NULL)
|
||||
return (false);
|
||||
mem = memdesc_vaddr(buf, len);
|
||||
nvmf_capsule_append_data(req->nc, &mem, len, false, io_cb, io_cb_arg);
|
||||
nvmf_submit_request(req);
|
||||
return (true);
|
||||
}
|
159
sys/dev/nvmf/host/nvmf_ctldev.c
Normal file
159
sys/dev/nvmf/host/nvmf_ctldev.c
Normal file
|
@ -0,0 +1,159 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2023 Chelsio Communications, Inc.
|
||||
* Written by: John Baldwin <jhb@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/bus.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <dev/nvme/nvme.h>
|
||||
#include <dev/nvmf/nvmf.h>
|
||||
#include <dev/nvmf/nvmf_transport.h>
|
||||
#include <dev/nvmf/host/nvmf_var.h>
|
||||
|
||||
static struct cdev *nvmf_cdev;
|
||||
|
||||
static int
|
||||
nvmf_handoff_host(struct nvmf_handoff_host *hh)
|
||||
{
|
||||
struct nvmf_ivars ivars;
|
||||
device_t dev;
|
||||
int error;
|
||||
|
||||
error = nvmf_init_ivars(&ivars, hh);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
bus_topo_lock();
|
||||
dev = device_add_child(root_bus, "nvme", -1);
|
||||
if (dev == NULL) {
|
||||
bus_topo_unlock();
|
||||
error = ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
device_set_ivars(dev, &ivars);
|
||||
error = device_probe_and_attach(dev);
|
||||
device_set_ivars(dev, NULL);
|
||||
if (error != 0)
|
||||
device_delete_child(root_bus, dev);
|
||||
bus_topo_unlock();
|
||||
|
||||
out:
|
||||
nvmf_free_ivars(&ivars);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static bool
|
||||
nvmf_matches(device_t dev, char *name)
|
||||
{
|
||||
struct nvmf_softc *sc = device_get_softc(dev);
|
||||
|
||||
if (strcmp(device_get_nameunit(dev), name) == 0)
|
||||
return (true);
|
||||
if (strcmp(sc->cdata->subnqn, name) == 0)
|
||||
return (true);
|
||||
return (false);
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_disconnect_by_name(char *name)
|
||||
{
|
||||
devclass_t dc;
|
||||
device_t dev;
|
||||
int error, unit;
|
||||
bool found;
|
||||
|
||||
found = false;
|
||||
error = 0;
|
||||
bus_topo_lock();
|
||||
dc = devclass_find("nvme");
|
||||
if (dc == NULL)
|
||||
goto out;
|
||||
|
||||
for (unit = 0; unit < devclass_get_maxunit(dc); unit++) {
|
||||
dev = devclass_get_device(dc, unit);
|
||||
if (dev == NULL)
|
||||
continue;
|
||||
if (device_get_driver(dev) != &nvme_nvmf_driver)
|
||||
continue;
|
||||
if (device_get_parent(dev) != root_bus)
|
||||
continue;
|
||||
if (name != NULL && !nvmf_matches(dev, name))
|
||||
continue;
|
||||
|
||||
error = device_delete_child(root_bus, dev);
|
||||
if (error != 0)
|
||||
break;
|
||||
found = true;
|
||||
}
|
||||
out:
|
||||
bus_topo_unlock();
|
||||
if (error == 0 && !found)
|
||||
error = ENOENT;
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_disconnect_host(const char **namep)
|
||||
{
|
||||
char *name;
|
||||
int error;
|
||||
|
||||
name = malloc(PATH_MAX, M_NVMF, M_WAITOK);
|
||||
error = copyinstr(*namep, name, PATH_MAX, NULL);
|
||||
if (error == 0)
|
||||
error = nvmf_disconnect_by_name(name);
|
||||
free(name, M_NVMF);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag,
|
||||
struct thread *td)
|
||||
{
|
||||
switch (cmd) {
|
||||
case NVMF_HANDOFF_HOST:
|
||||
return (nvmf_handoff_host((struct nvmf_handoff_host *)arg));
|
||||
case NVMF_DISCONNECT_HOST:
|
||||
return (nvmf_disconnect_host((const char **)arg));
|
||||
case NVMF_DISCONNECT_ALL:
|
||||
return (nvmf_disconnect_by_name(NULL));
|
||||
default:
|
||||
return (ENOTTY);
|
||||
}
|
||||
}
|
||||
|
||||
static struct cdevsw nvmf_ctl_cdevsw = {
|
||||
.d_version = D_VERSION,
|
||||
.d_ioctl = nvmf_ctl_ioctl
|
||||
};
|
||||
|
||||
int
|
||||
nvmf_ctl_load(void)
|
||||
{
|
||||
struct make_dev_args mda;
|
||||
int error;
|
||||
|
||||
make_dev_args_init(&mda);
|
||||
mda.mda_devsw = &nvmf_ctl_cdevsw;
|
||||
mda.mda_uid = UID_ROOT;
|
||||
mda.mda_gid = GID_WHEEL;
|
||||
mda.mda_mode = 0600;
|
||||
error = make_dev_s(&mda, &nvmf_cdev, "nvmf");
|
||||
if (error != 0)
|
||||
nvmf_cdev = NULL;
|
||||
return (error);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_ctl_unload(void)
|
||||
{
|
||||
if (nvmf_cdev != NULL) {
|
||||
destroy_dev(nvmf_cdev);
|
||||
nvmf_cdev = NULL;
|
||||
}
|
||||
}
|
483
sys/dev/nvmf/host/nvmf_ns.c
Normal file
483
sys/dev/nvmf/host/nvmf_ns.c
Normal file
|
@ -0,0 +1,483 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2023-2024 Chelsio Communications, Inc.
|
||||
* Written by: John Baldwin <jhb@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/bio.h>
|
||||
#include <sys/bus.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/disk.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/memdesc.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/sbuf.h>
|
||||
#include <machine/stdarg.h>
|
||||
#include <dev/nvme/nvme.h>
|
||||
#include <dev/nvmf/host/nvmf_var.h>
|
||||
|
||||
struct nvmf_namespace {
|
||||
struct nvmf_softc *sc;
|
||||
uint64_t size;
|
||||
uint32_t id;
|
||||
u_int flags;
|
||||
uint32_t lba_size;
|
||||
bool disconnected;
|
||||
|
||||
TAILQ_HEAD(, bio) pending_bios;
|
||||
struct mtx lock;
|
||||
volatile u_int active_bios;
|
||||
|
||||
struct cdev *cdev;
|
||||
};
|
||||
|
||||
static void nvmf_ns_strategy(struct bio *bio);
|
||||
|
||||
static void
|
||||
ns_printf(struct nvmf_namespace *ns, const char *fmt, ...)
|
||||
{
|
||||
char buf[128];
|
||||
struct sbuf sb;
|
||||
va_list ap;
|
||||
|
||||
sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
|
||||
sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
|
||||
|
||||
sbuf_printf(&sb, "%sns%u: ", device_get_nameunit(ns->sc->dev),
|
||||
ns->id);
|
||||
|
||||
va_start(ap, fmt);
|
||||
sbuf_vprintf(&sb, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
sbuf_finish(&sb);
|
||||
sbuf_delete(&sb);
|
||||
}
|
||||
|
||||
/*
|
||||
* The I/O completion may trigger after the received CQE if the I/O
|
||||
* used a zero-copy mbuf that isn't harvested until after the NIC
|
||||
* driver processes TX completions. Abuse bio_driver1 as a refcount.
|
||||
* Store I/O errors in bio_driver2.
|
||||
*/
|
||||
static __inline u_int *
|
||||
bio_refs(struct bio *bio)
|
||||
{
|
||||
return ((u_int *)&bio->bio_driver1);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_ns_biodone(struct bio *bio)
|
||||
{
|
||||
struct nvmf_namespace *ns;
|
||||
int error;
|
||||
|
||||
if (!refcount_release(bio_refs(bio)))
|
||||
return;
|
||||
|
||||
ns = bio->bio_dev->si_drv1;
|
||||
|
||||
/* If a request is aborted, resubmit or queue it for resubmission. */
|
||||
if (bio->bio_error == ECONNABORTED) {
|
||||
bio->bio_error = 0;
|
||||
bio->bio_driver2 = 0;
|
||||
mtx_lock(&ns->lock);
|
||||
if (ns->disconnected) {
|
||||
TAILQ_INSERT_TAIL(&ns->pending_bios, bio, bio_queue);
|
||||
mtx_unlock(&ns->lock);
|
||||
} else {
|
||||
mtx_unlock(&ns->lock);
|
||||
nvmf_ns_strategy(bio);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* I/O errors take precedence over generic EIO from
|
||||
* CQE errors.
|
||||
*/
|
||||
error = (intptr_t)bio->bio_driver2;
|
||||
if (error != 0)
|
||||
bio->bio_error = error;
|
||||
if (bio->bio_error != 0)
|
||||
bio->bio_flags |= BIO_ERROR;
|
||||
biodone(bio);
|
||||
}
|
||||
|
||||
if (refcount_release(&ns->active_bios))
|
||||
wakeup(ns);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_ns_io_complete(void *arg, size_t xfered, int error)
|
||||
{
|
||||
struct bio *bio = arg;
|
||||
|
||||
KASSERT(xfered <= bio->bio_bcount,
|
||||
("%s: xfered > bio_bcount", __func__));
|
||||
|
||||
bio->bio_driver2 = (void *)(intptr_t)error;
|
||||
bio->bio_resid = bio->bio_bcount - xfered;
|
||||
|
||||
nvmf_ns_biodone(bio);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_ns_delete_complete(void *arg, size_t xfered, int error)
|
||||
{
|
||||
struct bio *bio = arg;
|
||||
|
||||
if (error != 0)
|
||||
bio->bio_resid = bio->bio_bcount;
|
||||
else
|
||||
bio->bio_resid = 0;
|
||||
|
||||
free(bio->bio_driver2, M_NVMF);
|
||||
bio->bio_driver2 = (void *)(intptr_t)error;
|
||||
|
||||
nvmf_ns_biodone(bio);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_ns_bio_complete(void *arg, const struct nvme_completion *cqe)
|
||||
{
|
||||
struct bio *bio = arg;
|
||||
|
||||
if (nvmf_cqe_aborted(cqe))
|
||||
bio->bio_error = ECONNABORTED;
|
||||
else if (cqe->status != 0)
|
||||
bio->bio_error = EIO;
|
||||
|
||||
nvmf_ns_biodone(bio);
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_ns_submit_bio(struct nvmf_namespace *ns, struct bio *bio)
|
||||
{
|
||||
struct nvme_command cmd;
|
||||
struct nvmf_request *req;
|
||||
struct nvme_dsm_range *dsm_range;
|
||||
struct memdesc mem;
|
||||
uint64_t lba, lba_count;
|
||||
|
||||
dsm_range = NULL;
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
switch (bio->bio_cmd) {
|
||||
case BIO_READ:
|
||||
lba = bio->bio_offset / ns->lba_size;
|
||||
lba_count = bio->bio_bcount / ns->lba_size;
|
||||
nvme_ns_read_cmd(&cmd, ns->id, lba, lba_count);
|
||||
break;
|
||||
case BIO_WRITE:
|
||||
lba = bio->bio_offset / ns->lba_size;
|
||||
lba_count = bio->bio_bcount / ns->lba_size;
|
||||
nvme_ns_write_cmd(&cmd, ns->id, lba, lba_count);
|
||||
break;
|
||||
case BIO_FLUSH:
|
||||
nvme_ns_flush_cmd(&cmd, ns->id);
|
||||
break;
|
||||
case BIO_DELETE:
|
||||
dsm_range = malloc(sizeof(*dsm_range), M_NVMF, M_NOWAIT |
|
||||
M_ZERO);
|
||||
if (dsm_range == NULL)
|
||||
return (ENOMEM);
|
||||
lba = bio->bio_offset / ns->lba_size;
|
||||
lba_count = bio->bio_bcount / ns->lba_size;
|
||||
dsm_range->starting_lba = htole64(lba);
|
||||
dsm_range->length = htole32(lba_count);
|
||||
|
||||
cmd.opc = NVME_OPC_DATASET_MANAGEMENT;
|
||||
cmd.nsid = htole32(ns->id);
|
||||
cmd.cdw10 = htole32(0); /* 1 range */
|
||||
cmd.cdw11 = htole32(NVME_DSM_ATTR_DEALLOCATE);
|
||||
break;
|
||||
default:
|
||||
return (EOPNOTSUPP);
|
||||
}
|
||||
|
||||
mtx_lock(&ns->lock);
|
||||
if (ns->disconnected) {
|
||||
TAILQ_INSERT_TAIL(&ns->pending_bios, bio, bio_queue);
|
||||
mtx_unlock(&ns->lock);
|
||||
free(dsm_range, M_NVMF);
|
||||
return (0);
|
||||
}
|
||||
|
||||
req = nvmf_allocate_request(nvmf_select_io_queue(ns->sc), &cmd,
|
||||
nvmf_ns_bio_complete, bio, M_NOWAIT);
|
||||
if (req == NULL) {
|
||||
mtx_unlock(&ns->lock);
|
||||
free(dsm_range, M_NVMF);
|
||||
return (ENOMEM);
|
||||
}
|
||||
|
||||
switch (bio->bio_cmd) {
|
||||
case BIO_READ:
|
||||
case BIO_WRITE:
|
||||
refcount_init(bio_refs(bio), 2);
|
||||
mem = memdesc_bio(bio);
|
||||
nvmf_capsule_append_data(req->nc, &mem, bio->bio_bcount,
|
||||
bio->bio_cmd == BIO_WRITE, nvmf_ns_io_complete, bio);
|
||||
break;
|
||||
case BIO_DELETE:
|
||||
refcount_init(bio_refs(bio), 2);
|
||||
mem = memdesc_vaddr(dsm_range, sizeof(*dsm_range));
|
||||
nvmf_capsule_append_data(req->nc, &mem, sizeof(*dsm_range),
|
||||
true, nvmf_ns_delete_complete, bio);
|
||||
bio->bio_driver2 = dsm_range;
|
||||
break;
|
||||
default:
|
||||
refcount_init(bio_refs(bio), 1);
|
||||
KASSERT(bio->bio_resid == 0,
|
||||
("%s: input bio_resid != 0", __func__));
|
||||
break;
|
||||
}
|
||||
|
||||
refcount_acquire(&ns->active_bios);
|
||||
nvmf_submit_request(req);
|
||||
mtx_unlock(&ns->lock);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_ns_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag,
|
||||
struct thread *td)
|
||||
{
|
||||
struct nvmf_namespace *ns = dev->si_drv1;
|
||||
struct nvme_get_nsid *gnsid;
|
||||
struct nvme_pt_command *pt;
|
||||
|
||||
switch (cmd) {
|
||||
case NVME_PASSTHROUGH_CMD:
|
||||
pt = (struct nvme_pt_command *)arg;
|
||||
pt->cmd.nsid = htole32(ns->id);
|
||||
return (nvmf_passthrough_cmd(ns->sc, pt, false));
|
||||
case NVME_GET_NSID:
|
||||
gnsid = (struct nvme_get_nsid *)arg;
|
||||
strncpy(gnsid->cdev, device_get_nameunit(ns->sc->dev),
|
||||
sizeof(gnsid->cdev));
|
||||
gnsid->cdev[sizeof(gnsid->cdev) - 1] = '\0';
|
||||
gnsid->nsid = ns->id;
|
||||
return (0);
|
||||
case DIOCGMEDIASIZE:
|
||||
*(off_t *)arg = ns->size;
|
||||
return (0);
|
||||
case DIOCGSECTORSIZE:
|
||||
*(u_int *)arg = ns->lba_size;
|
||||
return (0);
|
||||
default:
|
||||
return (ENOTTY);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_ns_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = 0;
|
||||
if ((oflags & FWRITE) != 0)
|
||||
error = securelevel_gt(td->td_ucred, 0);
|
||||
return (error);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_ns_strategy(struct bio *bio)
|
||||
{
|
||||
struct nvmf_namespace *ns;
|
||||
int error;
|
||||
|
||||
ns = bio->bio_dev->si_drv1;
|
||||
|
||||
error = nvmf_ns_submit_bio(ns, bio);
|
||||
if (error != 0) {
|
||||
bio->bio_error = error;
|
||||
bio->bio_flags |= BIO_ERROR;
|
||||
bio->bio_resid = bio->bio_bcount;
|
||||
biodone(bio);
|
||||
}
|
||||
}
|
||||
|
||||
static struct cdevsw nvmf_ns_cdevsw = {
|
||||
.d_version = D_VERSION,
|
||||
.d_flags = D_DISK,
|
||||
.d_open = nvmf_ns_open,
|
||||
.d_read = physread,
|
||||
.d_write = physwrite,
|
||||
.d_strategy = nvmf_ns_strategy,
|
||||
.d_ioctl = nvmf_ns_ioctl
|
||||
};
|
||||
|
||||
struct nvmf_namespace *
|
||||
nvmf_init_ns(struct nvmf_softc *sc, uint32_t id,
|
||||
struct nvme_namespace_data *data)
|
||||
{
|
||||
struct make_dev_args mda;
|
||||
struct nvmf_namespace *ns;
|
||||
int error;
|
||||
uint8_t lbads, lbaf;
|
||||
|
||||
ns = malloc(sizeof(*ns), M_NVMF, M_WAITOK | M_ZERO);
|
||||
ns->sc = sc;
|
||||
ns->id = id;
|
||||
TAILQ_INIT(&ns->pending_bios);
|
||||
mtx_init(&ns->lock, "nvmf ns", NULL, MTX_DEF);
|
||||
|
||||
/* One dummy bio avoids dropping to 0 until destroy. */
|
||||
refcount_init(&ns->active_bios, 1);
|
||||
|
||||
if (NVMEV(NVME_NS_DATA_DPS_PIT, data->dps) != 0) {
|
||||
ns_printf(ns, "End-to-end data protection not supported\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
lbaf = NVMEV(NVME_NS_DATA_FLBAS_FORMAT, data->flbas);
|
||||
if (lbaf > data->nlbaf) {
|
||||
ns_printf(ns, "Invalid LBA format index\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (NVMEV(NVME_NS_DATA_LBAF_MS, data->lbaf[lbaf]) != 0) {
|
||||
ns_printf(ns, "Namespaces with metadata are not supported\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
lbads = NVMEV(NVME_NS_DATA_LBAF_LBADS, data->lbaf[lbaf]);
|
||||
if (lbads == 0) {
|
||||
ns_printf(ns, "Invalid LBA format index\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ns->lba_size = 1 << lbads;
|
||||
ns->size = data->nsze * ns->lba_size;
|
||||
|
||||
if (nvme_ctrlr_has_dataset_mgmt(sc->cdata))
|
||||
ns->flags |= NVME_NS_DEALLOCATE_SUPPORTED;
|
||||
|
||||
if (NVMEV(NVME_CTRLR_DATA_VWC_PRESENT, sc->cdata->vwc) != 0)
|
||||
ns->flags |= NVME_NS_FLUSH_SUPPORTED;
|
||||
|
||||
/*
|
||||
* XXX: Does any of the boundary splitting for NOIOB make any
|
||||
* sense for Fabrics?
|
||||
*/
|
||||
|
||||
make_dev_args_init(&mda);
|
||||
mda.mda_devsw = &nvmf_ns_cdevsw;
|
||||
mda.mda_uid = UID_ROOT;
|
||||
mda.mda_gid = GID_WHEEL;
|
||||
mda.mda_mode = 0600;
|
||||
mda.mda_si_drv1 = ns;
|
||||
error = make_dev_s(&mda, &ns->cdev, "%sns%u",
|
||||
device_get_nameunit(sc->dev), id);
|
||||
if (error != 0)
|
||||
goto fail;
|
||||
|
||||
ns->cdev->si_flags |= SI_UNMAPPED;
|
||||
|
||||
return (ns);
|
||||
fail:
|
||||
mtx_destroy(&ns->lock);
|
||||
free(ns, M_NVMF);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_disconnect_ns(struct nvmf_namespace *ns)
|
||||
{
|
||||
mtx_lock(&ns->lock);
|
||||
ns->disconnected = true;
|
||||
mtx_unlock(&ns->lock);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_reconnect_ns(struct nvmf_namespace *ns)
|
||||
{
|
||||
TAILQ_HEAD(, bio) bios;
|
||||
struct bio *bio;
|
||||
|
||||
mtx_lock(&ns->lock);
|
||||
ns->disconnected = false;
|
||||
TAILQ_INIT(&bios);
|
||||
TAILQ_CONCAT(&bios, &ns->pending_bios, bio_queue);
|
||||
mtx_unlock(&ns->lock);
|
||||
|
||||
while (!TAILQ_EMPTY(&bios)) {
|
||||
bio = TAILQ_FIRST(&bios);
|
||||
TAILQ_REMOVE(&bios, bio, bio_queue);
|
||||
nvmf_ns_strategy(bio);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_destroy_ns(struct nvmf_namespace *ns)
|
||||
{
|
||||
TAILQ_HEAD(, bio) bios;
|
||||
struct bio *bio;
|
||||
|
||||
destroy_dev(ns->cdev);
|
||||
|
||||
/*
|
||||
* Wait for active I/O requests to drain. The release drops
|
||||
* the reference on the "dummy bio" when the namespace is
|
||||
* created.
|
||||
*/
|
||||
mtx_lock(&ns->lock);
|
||||
if (!refcount_release(&ns->active_bios)) {
|
||||
while (ns->active_bios != 0)
|
||||
mtx_sleep(ns, &ns->lock, 0, "nvmfrmns", 0);
|
||||
}
|
||||
|
||||
/* Abort any pending I/O requests. */
|
||||
TAILQ_INIT(&bios);
|
||||
TAILQ_CONCAT(&bios, &ns->pending_bios, bio_queue);
|
||||
mtx_unlock(&ns->lock);
|
||||
|
||||
while (!TAILQ_EMPTY(&bios)) {
|
||||
bio = TAILQ_FIRST(&bios);
|
||||
TAILQ_REMOVE(&bios, bio, bio_queue);
|
||||
bio->bio_error = ECONNABORTED;
|
||||
bio->bio_flags |= BIO_ERROR;
|
||||
bio->bio_resid = bio->bio_bcount;
|
||||
biodone(bio);
|
||||
}
|
||||
|
||||
mtx_destroy(&ns->lock);
|
||||
free(ns, M_NVMF);
|
||||
}
|
||||
|
||||
bool
|
||||
nvmf_update_ns(struct nvmf_namespace *ns, struct nvme_namespace_data *data)
|
||||
{
|
||||
uint8_t lbads, lbaf;
|
||||
|
||||
if (NVMEV(NVME_NS_DATA_DPS_PIT, data->dps) != 0) {
|
||||
ns_printf(ns, "End-to-end data protection not supported\n");
|
||||
return (false);
|
||||
}
|
||||
|
||||
lbaf = NVMEV(NVME_NS_DATA_FLBAS_FORMAT, data->flbas);
|
||||
if (lbaf > data->nlbaf) {
|
||||
ns_printf(ns, "Invalid LBA format index\n");
|
||||
return (false);
|
||||
}
|
||||
|
||||
if (NVMEV(NVME_NS_DATA_LBAF_MS, data->lbaf[lbaf]) != 0) {
|
||||
ns_printf(ns, "Namespaces with metadata are not supported\n");
|
||||
return (false);
|
||||
}
|
||||
|
||||
lbads = NVMEV(NVME_NS_DATA_LBAF_LBADS, data->lbaf[lbaf]);
|
||||
if (lbads == 0) {
|
||||
ns_printf(ns, "Invalid LBA format index\n");
|
||||
return (false);
|
||||
}
|
||||
|
||||
ns->lba_size = 1 << lbads;
|
||||
ns->size = data->nsze * ns->lba_size;
|
||||
return (true);
|
||||
}
|
386
sys/dev/nvmf/host/nvmf_qpair.c
Normal file
386
sys/dev/nvmf/host/nvmf_qpair.c
Normal file
|
@ -0,0 +1,386 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2023-2024 Chelsio Communications, Inc.
|
||||
* Written by: John Baldwin <jhb@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/bus.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <dev/nvme/nvme.h>
|
||||
#include <dev/nvmf/nvmf.h>
|
||||
#include <dev/nvmf/nvmf_transport.h>
|
||||
#include <dev/nvmf/host/nvmf_var.h>
|
||||
|
||||
struct nvmf_host_command {
|
||||
struct nvmf_request *req;
|
||||
TAILQ_ENTRY(nvmf_host_command) link;
|
||||
uint16_t cid;
|
||||
};
|
||||
|
||||
struct nvmf_host_qpair {
|
||||
struct nvmf_softc *sc;
|
||||
struct nvmf_qpair *qp;
|
||||
|
||||
bool sq_flow_control;
|
||||
bool shutting_down;
|
||||
u_int allocating;
|
||||
u_int num_commands;
|
||||
uint16_t sqhd;
|
||||
uint16_t sqtail;
|
||||
|
||||
struct mtx lock;
|
||||
|
||||
TAILQ_HEAD(, nvmf_host_command) free_commands;
|
||||
STAILQ_HEAD(, nvmf_request) pending_requests;
|
||||
|
||||
/* Indexed by cid. */
|
||||
struct nvmf_host_command **active_commands;
|
||||
|
||||
char name[16];
|
||||
};
|
||||
|
||||
struct nvmf_request *
|
||||
nvmf_allocate_request(struct nvmf_host_qpair *qp, void *sqe,
|
||||
nvmf_request_complete_t *cb, void *cb_arg, int how)
|
||||
{
|
||||
struct nvmf_request *req;
|
||||
struct nvmf_qpair *nq;
|
||||
|
||||
KASSERT(how == M_WAITOK || how == M_NOWAIT,
|
||||
("%s: invalid how", __func__));
|
||||
|
||||
req = malloc(sizeof(*req), M_NVMF, how | M_ZERO);
|
||||
if (req == NULL)
|
||||
return (NULL);
|
||||
|
||||
mtx_lock(&qp->lock);
|
||||
nq = qp->qp;
|
||||
if (nq == NULL) {
|
||||
mtx_unlock(&qp->lock);
|
||||
free(req, M_NVMF);
|
||||
return (NULL);
|
||||
}
|
||||
qp->allocating++;
|
||||
MPASS(qp->allocating != 0);
|
||||
mtx_unlock(&qp->lock);
|
||||
|
||||
req->qp = qp;
|
||||
req->cb = cb;
|
||||
req->cb_arg = cb_arg;
|
||||
req->nc = nvmf_allocate_command(nq, sqe, how);
|
||||
if (req->nc == NULL) {
|
||||
free(req, M_NVMF);
|
||||
req = NULL;
|
||||
}
|
||||
|
||||
mtx_lock(&qp->lock);
|
||||
qp->allocating--;
|
||||
if (qp->allocating == 0 && qp->shutting_down)
|
||||
wakeup(qp);
|
||||
mtx_unlock(&qp->lock);
|
||||
|
||||
return (req);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_abort_request(struct nvmf_request *req, uint16_t cid)
|
||||
{
|
||||
struct nvme_completion cqe;
|
||||
|
||||
memset(&cqe, 0, sizeof(cqe));
|
||||
cqe.cid = cid;
|
||||
cqe.status = htole16(NVMEF(NVME_STATUS_SCT, NVME_SCT_PATH_RELATED) |
|
||||
NVMEF(NVME_STATUS_SC, NVME_SC_COMMAND_ABORTED_BY_HOST));
|
||||
req->cb(req->cb_arg, &cqe);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_free_request(struct nvmf_request *req)
|
||||
{
|
||||
if (req->nc != NULL)
|
||||
nvmf_free_capsule(req->nc);
|
||||
free(req, M_NVMF);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_dispatch_command(struct nvmf_host_qpair *qp, struct nvmf_host_command *cmd)
|
||||
{
|
||||
struct nvmf_softc *sc = qp->sc;
|
||||
struct nvme_command *sqe;
|
||||
struct nvmf_capsule *nc;
|
||||
int error;
|
||||
|
||||
nc = cmd->req->nc;
|
||||
sqe = nvmf_capsule_sqe(nc);
|
||||
|
||||
/*
|
||||
* NB: Don't bother byte-swapping the cid so that receive
|
||||
* doesn't have to swap.
|
||||
*/
|
||||
sqe->cid = cmd->cid;
|
||||
|
||||
error = nvmf_transmit_capsule(nc);
|
||||
if (error != 0) {
|
||||
device_printf(sc->dev,
|
||||
"failed to transmit capsule: %d, disconnecting\n", error);
|
||||
nvmf_disconnect(sc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (sc->ka_traffic)
|
||||
atomic_store_int(&sc->ka_active_tx_traffic, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_qp_error(void *arg, int error)
|
||||
{
|
||||
struct nvmf_host_qpair *qp = arg;
|
||||
struct nvmf_softc *sc = qp->sc;
|
||||
|
||||
/* Ignore simple close of queue pairs during shutdown. */
|
||||
if (!(sc->detaching && error == 0))
|
||||
device_printf(sc->dev, "error %d on %s, disconnecting\n", error,
|
||||
qp->name);
|
||||
nvmf_disconnect(sc);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc)
|
||||
{
|
||||
struct nvmf_host_qpair *qp = arg;
|
||||
struct nvmf_softc *sc = qp->sc;
|
||||
struct nvmf_host_command *cmd;
|
||||
struct nvmf_request *req;
|
||||
const struct nvme_completion *cqe;
|
||||
uint16_t cid;
|
||||
|
||||
cqe = nvmf_capsule_cqe(nc);
|
||||
|
||||
if (sc->ka_traffic)
|
||||
atomic_store_int(&sc->ka_active_rx_traffic, 1);
|
||||
|
||||
/*
|
||||
* NB: Don't bother byte-swapping the cid as transmit doesn't
|
||||
* swap either.
|
||||
*/
|
||||
cid = cqe->cid;
|
||||
|
||||
if (cid > qp->num_commands) {
|
||||
device_printf(sc->dev,
|
||||
"received invalid CID %u, disconnecting\n", cid);
|
||||
nvmf_disconnect(sc);
|
||||
nvmf_free_capsule(nc);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the queue has been shutdown due to an error, silently
|
||||
* drop the response.
|
||||
*/
|
||||
mtx_lock(&qp->lock);
|
||||
if (qp->qp == NULL) {
|
||||
device_printf(sc->dev,
|
||||
"received completion for CID %u on shutdown %s\n", cid,
|
||||
qp->name);
|
||||
mtx_unlock(&qp->lock);
|
||||
nvmf_free_capsule(nc);
|
||||
return;
|
||||
}
|
||||
|
||||
cmd = qp->active_commands[cid];
|
||||
if (cmd == NULL) {
|
||||
mtx_unlock(&qp->lock);
|
||||
device_printf(sc->dev,
|
||||
"received completion for inactive CID %u, disconnecting\n",
|
||||
cid);
|
||||
nvmf_disconnect(sc);
|
||||
nvmf_free_capsule(nc);
|
||||
return;
|
||||
}
|
||||
|
||||
KASSERT(cmd->cid == cid, ("%s: CID mismatch", __func__));
|
||||
req = cmd->req;
|
||||
cmd->req = NULL;
|
||||
if (STAILQ_EMPTY(&qp->pending_requests)) {
|
||||
qp->active_commands[cid] = NULL;
|
||||
TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link);
|
||||
mtx_unlock(&qp->lock);
|
||||
} else {
|
||||
cmd->req = STAILQ_FIRST(&qp->pending_requests);
|
||||
STAILQ_REMOVE_HEAD(&qp->pending_requests, link);
|
||||
mtx_unlock(&qp->lock);
|
||||
nvmf_dispatch_command(qp, cmd);
|
||||
}
|
||||
|
||||
req->cb(req->cb_arg, cqe);
|
||||
nvmf_free_capsule(nc);
|
||||
nvmf_free_request(req);
|
||||
}
|
||||
|
||||
struct nvmf_host_qpair *
|
||||
nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype,
|
||||
struct nvmf_handoff_qpair_params *handoff, const char *name)
|
||||
{
|
||||
struct nvmf_host_command *cmd, *ncmd;
|
||||
struct nvmf_host_qpair *qp;
|
||||
u_int i;
|
||||
|
||||
qp = malloc(sizeof(*qp), M_NVMF, M_WAITOK | M_ZERO);
|
||||
qp->sc = sc;
|
||||
qp->sq_flow_control = handoff->sq_flow_control;
|
||||
qp->sqhd = handoff->sqhd;
|
||||
qp->sqtail = handoff->sqtail;
|
||||
strlcpy(qp->name, name, sizeof(qp->name));
|
||||
mtx_init(&qp->lock, "nvmf qp", NULL, MTX_DEF);
|
||||
|
||||
/*
|
||||
* Allocate a spare command slot for each pending AER command
|
||||
* on the admin queue.
|
||||
*/
|
||||
qp->num_commands = handoff->qsize - 1;
|
||||
if (handoff->admin)
|
||||
qp->num_commands += sc->num_aer;
|
||||
|
||||
qp->active_commands = malloc(sizeof(*qp->active_commands) *
|
||||
qp->num_commands, M_NVMF, M_WAITOK | M_ZERO);
|
||||
TAILQ_INIT(&qp->free_commands);
|
||||
for (i = 0; i < qp->num_commands; i++) {
|
||||
cmd = malloc(sizeof(*cmd), M_NVMF, M_WAITOK | M_ZERO);
|
||||
cmd->cid = i;
|
||||
TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link);
|
||||
}
|
||||
STAILQ_INIT(&qp->pending_requests);
|
||||
|
||||
qp->qp = nvmf_allocate_qpair(trtype, false, handoff, nvmf_qp_error,
|
||||
qp, nvmf_receive_capsule, qp);
|
||||
if (qp->qp == NULL) {
|
||||
TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) {
|
||||
TAILQ_REMOVE(&qp->free_commands, cmd, link);
|
||||
free(cmd, M_NVMF);
|
||||
}
|
||||
free(qp->active_commands, M_NVMF);
|
||||
mtx_destroy(&qp->lock);
|
||||
free(qp, M_NVMF);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
return (qp);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_shutdown_qp(struct nvmf_host_qpair *qp)
|
||||
{
|
||||
struct nvmf_host_command *cmd;
|
||||
struct nvmf_request *req;
|
||||
struct nvmf_qpair *nq;
|
||||
|
||||
mtx_lock(&qp->lock);
|
||||
nq = qp->qp;
|
||||
qp->qp = NULL;
|
||||
|
||||
if (nq == NULL) {
|
||||
while (qp->shutting_down)
|
||||
mtx_sleep(qp, &qp->lock, 0, "nvmfqpsh", 0);
|
||||
mtx_unlock(&qp->lock);
|
||||
return;
|
||||
}
|
||||
qp->shutting_down = true;
|
||||
while (qp->allocating != 0)
|
||||
mtx_sleep(qp, &qp->lock, 0, "nvmfqpqu", 0);
|
||||
mtx_unlock(&qp->lock);
|
||||
|
||||
nvmf_free_qpair(nq);
|
||||
|
||||
/*
|
||||
* Abort outstanding requests. Active requests will have
|
||||
* their I/O completions invoked and associated capsules freed
|
||||
* by the transport layer via nvmf_free_qpair. Pending
|
||||
* requests must have their I/O completion invoked via
|
||||
* nvmf_abort_capsule_data.
|
||||
*/
|
||||
for (u_int i = 0; i < qp->num_commands; i++) {
|
||||
cmd = qp->active_commands[i];
|
||||
if (cmd != NULL) {
|
||||
if (!cmd->req->aer)
|
||||
printf("%s: aborted active command %p (CID %u)\n",
|
||||
__func__, cmd->req, cmd->cid);
|
||||
|
||||
/* This was freed by nvmf_free_qpair. */
|
||||
cmd->req->nc = NULL;
|
||||
nvmf_abort_request(cmd->req, cmd->cid);
|
||||
nvmf_free_request(cmd->req);
|
||||
free(cmd, M_NVMF);
|
||||
}
|
||||
}
|
||||
while (!STAILQ_EMPTY(&qp->pending_requests)) {
|
||||
req = STAILQ_FIRST(&qp->pending_requests);
|
||||
STAILQ_REMOVE_HEAD(&qp->pending_requests, link);
|
||||
if (!req->aer)
|
||||
printf("%s: aborted pending command %p\n", __func__,
|
||||
req);
|
||||
nvmf_abort_capsule_data(req->nc, ECONNABORTED);
|
||||
nvmf_abort_request(req, 0);
|
||||
nvmf_free_request(req);
|
||||
}
|
||||
|
||||
mtx_lock(&qp->lock);
|
||||
qp->shutting_down = false;
|
||||
mtx_unlock(&qp->lock);
|
||||
wakeup(qp);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_destroy_qp(struct nvmf_host_qpair *qp)
|
||||
{
|
||||
struct nvmf_host_command *cmd, *ncmd;
|
||||
|
||||
nvmf_shutdown_qp(qp);
|
||||
|
||||
TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) {
|
||||
TAILQ_REMOVE(&qp->free_commands, cmd, link);
|
||||
free(cmd, M_NVMF);
|
||||
}
|
||||
free(qp->active_commands, M_NVMF);
|
||||
mtx_destroy(&qp->lock);
|
||||
free(qp, M_NVMF);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_submit_request(struct nvmf_request *req)
|
||||
{
|
||||
struct nvmf_host_qpair *qp;
|
||||
struct nvmf_host_command *cmd;
|
||||
|
||||
qp = req->qp;
|
||||
mtx_lock(&qp->lock);
|
||||
if (qp->qp == NULL) {
|
||||
mtx_unlock(&qp->lock);
|
||||
printf("%s: aborted pending command %p\n", __func__, req);
|
||||
nvmf_abort_capsule_data(req->nc, ECONNABORTED);
|
||||
nvmf_abort_request(req, 0);
|
||||
nvmf_free_request(req);
|
||||
return;
|
||||
}
|
||||
cmd = TAILQ_FIRST(&qp->free_commands);
|
||||
if (cmd == NULL) {
|
||||
/*
|
||||
* Queue this request. Will be sent after enough
|
||||
* in-flight requests have completed.
|
||||
*/
|
||||
STAILQ_INSERT_TAIL(&qp->pending_requests, req, link);
|
||||
mtx_unlock(&qp->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
TAILQ_REMOVE(&qp->free_commands, cmd, link);
|
||||
KASSERT(qp->active_commands[cmd->cid] == NULL,
|
||||
("%s: CID already busy", __func__));
|
||||
qp->active_commands[cmd->cid] = cmd;
|
||||
cmd->req = req;
|
||||
mtx_unlock(&qp->lock);
|
||||
nvmf_dispatch_command(qp, cmd);
|
||||
}
|
332
sys/dev/nvmf/host/nvmf_sim.c
Normal file
332
sys/dev/nvmf/host/nvmf_sim.c
Normal file
|
@ -0,0 +1,332 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2023-2024 Chelsio Communications, Inc.
|
||||
* Written by: John Baldwin <jhb@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/memdesc.h>
|
||||
#include <sys/refcount.h>
|
||||
|
||||
#include <cam/cam.h>
|
||||
#include <cam/cam_ccb.h>
|
||||
#include <cam/cam_sim.h>
|
||||
#include <cam/cam_xpt_sim.h>
|
||||
#include <cam/cam_debug.h>
|
||||
|
||||
#include <dev/nvmf/host/nvmf_var.h>
|
||||
|
||||
/*
|
||||
* The I/O completion may trigger after the received CQE if the I/O
|
||||
* used a zero-copy mbuf that isn't harvested until after the NIC
|
||||
* driver processes TX completions. Use spriv_field0 to as a refcount.
|
||||
*
|
||||
* Store any I/O error returned in spriv_field1.
|
||||
*/
|
||||
static __inline u_int *
|
||||
ccb_refs(union ccb *ccb)
|
||||
{
|
||||
return ((u_int *)&ccb->ccb_h.spriv_field0);
|
||||
}
|
||||
|
||||
#define spriv_ioerror spriv_field1
|
||||
|
||||
static void
|
||||
nvmf_ccb_done(union ccb *ccb)
|
||||
{
|
||||
if (!refcount_release(ccb_refs(ccb)))
|
||||
return;
|
||||
|
||||
if (nvmf_cqe_aborted(&ccb->nvmeio.cpl)) {
|
||||
ccb->ccb_h.status = CAM_REQUEUE_REQ;
|
||||
xpt_done(ccb);
|
||||
} else if (ccb->nvmeio.cpl.status != 0) {
|
||||
ccb->ccb_h.status = CAM_NVME_STATUS_ERROR;
|
||||
xpt_done(ccb);
|
||||
} else if (ccb->ccb_h.spriv_ioerror != 0) {
|
||||
KASSERT(ccb->ccb_h.spriv_ioerror != EJUSTRETURN,
|
||||
("%s: zero sized transfer without CQE error", __func__));
|
||||
ccb->ccb_h.status = CAM_REQ_CMP_ERR;
|
||||
xpt_done(ccb);
|
||||
} else {
|
||||
ccb->ccb_h.status = CAM_REQ_CMP;
|
||||
xpt_done_direct(ccb);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_ccb_io_complete(void *arg, size_t xfered, int error)
|
||||
{
|
||||
union ccb *ccb = arg;
|
||||
|
||||
/*
|
||||
* TODO: Reporting partial completions requires extending
|
||||
* nvmeio to support resid and updating nda to handle partial
|
||||
* reads, either by returning partial success (or an error) to
|
||||
* the caller, or retrying all or part of the request.
|
||||
*/
|
||||
ccb->ccb_h.spriv_ioerror = error;
|
||||
if (error == 0) {
|
||||
if (xfered == 0) {
|
||||
#ifdef INVARIANTS
|
||||
/*
|
||||
* If the request fails with an error in the CQE
|
||||
* there will be no data transferred but also no
|
||||
* I/O error.
|
||||
*/
|
||||
ccb->ccb_h.spriv_ioerror = EJUSTRETURN;
|
||||
#endif
|
||||
} else
|
||||
KASSERT(xfered == ccb->nvmeio.dxfer_len,
|
||||
("%s: partial CCB completion", __func__));
|
||||
}
|
||||
|
||||
nvmf_ccb_done(ccb);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_ccb_complete(void *arg, const struct nvme_completion *cqe)
|
||||
{
|
||||
union ccb *ccb = arg;
|
||||
|
||||
ccb->nvmeio.cpl = *cqe;
|
||||
nvmf_ccb_done(ccb);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_sim_io(struct nvmf_softc *sc, union ccb *ccb)
|
||||
{
|
||||
struct ccb_nvmeio *nvmeio = &ccb->nvmeio;
|
||||
struct memdesc mem;
|
||||
struct nvmf_request *req;
|
||||
struct nvmf_host_qpair *qp;
|
||||
|
||||
mtx_lock(&sc->sim_mtx);
|
||||
if (sc->sim_disconnected) {
|
||||
mtx_unlock(&sc->sim_mtx);
|
||||
nvmeio->ccb_h.status = CAM_REQUEUE_REQ;
|
||||
xpt_done(ccb);
|
||||
return;
|
||||
}
|
||||
if (nvmeio->ccb_h.func_code == XPT_NVME_IO)
|
||||
qp = nvmf_select_io_queue(sc);
|
||||
else
|
||||
qp = sc->admin;
|
||||
req = nvmf_allocate_request(qp, &nvmeio->cmd, nvmf_ccb_complete,
|
||||
ccb, M_NOWAIT);
|
||||
if (req == NULL) {
|
||||
mtx_unlock(&sc->sim_mtx);
|
||||
nvmeio->ccb_h.status = CAM_RESRC_UNAVAIL;
|
||||
xpt_done(ccb);
|
||||
return;
|
||||
}
|
||||
|
||||
if (nvmeio->dxfer_len != 0) {
|
||||
refcount_init(ccb_refs(ccb), 2);
|
||||
mem = memdesc_ccb(ccb);
|
||||
nvmf_capsule_append_data(req->nc, &mem, nvmeio->dxfer_len,
|
||||
(ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT,
|
||||
nvmf_ccb_io_complete, ccb);
|
||||
} else
|
||||
refcount_init(ccb_refs(ccb), 1);
|
||||
|
||||
/*
|
||||
* Clear spriv_ioerror as it can hold an earlier error if this
|
||||
* CCB was aborted and has been retried.
|
||||
*/
|
||||
ccb->ccb_h.spriv_ioerror = 0;
|
||||
KASSERT(ccb->ccb_h.status == CAM_REQ_INPROG,
|
||||
("%s: incoming CCB is not in-progress", __func__));
|
||||
ccb->ccb_h.status |= CAM_SIM_QUEUED;
|
||||
nvmf_submit_request(req);
|
||||
mtx_unlock(&sc->sim_mtx);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_sim_action(struct cam_sim *sim, union ccb *ccb)
|
||||
{
|
||||
struct nvmf_softc *sc = cam_sim_softc(sim);
|
||||
|
||||
CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE,
|
||||
("nvmf_sim_action: func= %#x\n",
|
||||
ccb->ccb_h.func_code));
|
||||
|
||||
switch (ccb->ccb_h.func_code) {
|
||||
case XPT_PATH_INQ: /* Path routing inquiry */
|
||||
{
|
||||
struct ccb_pathinq *cpi = &ccb->cpi;
|
||||
|
||||
cpi->version_num = 1;
|
||||
cpi->hba_inquiry = 0;
|
||||
cpi->target_sprt = 0;
|
||||
cpi->hba_misc = PIM_UNMAPPED | PIM_NOSCAN;
|
||||
cpi->hba_eng_cnt = 0;
|
||||
cpi->max_target = 0;
|
||||
cpi->max_lun = sc->cdata->nn;
|
||||
cpi->async_flags = 0;
|
||||
cpi->hpath_id = 0;
|
||||
cpi->initiator_id = 0;
|
||||
strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
|
||||
strlcpy(cpi->hba_vid, "NVMeoF", HBA_IDLEN);
|
||||
strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
|
||||
cpi->unit_number = cam_sim_unit(sim);
|
||||
cpi->bus_id = 0;
|
||||
|
||||
/* XXX: Same as iSCSI. */
|
||||
cpi->base_transfer_speed = 150000;
|
||||
cpi->protocol = PROTO_NVME;
|
||||
cpi->protocol_version = sc->vs;
|
||||
cpi->transport = XPORT_NVMF;
|
||||
cpi->transport_version = sc->vs;
|
||||
cpi->xport_specific.nvmf.nsid =
|
||||
xpt_path_lun_id(ccb->ccb_h.path);
|
||||
cpi->xport_specific.nvmf.trtype = sc->trtype;
|
||||
strncpy(cpi->xport_specific.nvmf.dev_name,
|
||||
device_get_nameunit(sc->dev),
|
||||
sizeof(cpi->xport_specific.nvmf.dev_name));
|
||||
cpi->maxio = sc->max_xfer_size;
|
||||
cpi->hba_vendor = 0;
|
||||
cpi->hba_device = 0;
|
||||
cpi->hba_subvendor = 0;
|
||||
cpi->hba_subdevice = 0;
|
||||
cpi->ccb_h.status = CAM_REQ_CMP;
|
||||
break;
|
||||
}
|
||||
case XPT_GET_TRAN_SETTINGS: /* Get transport settings */
|
||||
{
|
||||
struct ccb_trans_settings *cts = &ccb->cts;
|
||||
struct ccb_trans_settings_nvme *nvme;
|
||||
struct ccb_trans_settings_nvmf *nvmf;
|
||||
|
||||
cts->protocol = PROTO_NVME;
|
||||
cts->protocol_version = sc->vs;
|
||||
cts->transport = XPORT_NVMF;
|
||||
cts->transport_version = sc->vs;
|
||||
|
||||
nvme = &cts->proto_specific.nvme;
|
||||
nvme->valid = CTS_NVME_VALID_SPEC;
|
||||
nvme->spec = sc->vs;
|
||||
|
||||
nvmf = &cts->xport_specific.nvmf;
|
||||
nvmf->valid = CTS_NVMF_VALID_TRTYPE;
|
||||
nvmf->trtype = sc->trtype;
|
||||
cts->ccb_h.status = CAM_REQ_CMP;
|
||||
break;
|
||||
}
|
||||
case XPT_SET_TRAN_SETTINGS: /* Set transport settings */
|
||||
/*
|
||||
* No transfer settings can be set, but nvme_xpt sends
|
||||
* this anyway.
|
||||
*/
|
||||
ccb->ccb_h.status = CAM_REQ_CMP;
|
||||
break;
|
||||
case XPT_NVME_IO: /* Execute the requested I/O */
|
||||
case XPT_NVME_ADMIN: /* or Admin operation */
|
||||
nvmf_sim_io(sc, ccb);
|
||||
return;
|
||||
default:
|
||||
/* XXX */
|
||||
device_printf(sc->dev, "unhandled sim function %#x\n",
|
||||
ccb->ccb_h.func_code);
|
||||
ccb->ccb_h.status = CAM_REQ_INVALID;
|
||||
break;
|
||||
}
|
||||
xpt_done(ccb);
|
||||
}
|
||||
|
||||
int
|
||||
nvmf_init_sim(struct nvmf_softc *sc)
|
||||
{
|
||||
struct cam_devq *devq;
|
||||
int max_trans;
|
||||
|
||||
max_trans = sc->max_pending_io * 3 / 4;
|
||||
devq = cam_simq_alloc(max_trans);
|
||||
if (devq == NULL) {
|
||||
device_printf(sc->dev, "Failed to allocate CAM simq\n");
|
||||
return (ENOMEM);
|
||||
}
|
||||
|
||||
mtx_init(&sc->sim_mtx, "nvmf sim", NULL, MTX_DEF);
|
||||
sc->sim = cam_sim_alloc(nvmf_sim_action, NULL, "nvme", sc,
|
||||
device_get_unit(sc->dev), NULL, max_trans, max_trans, devq);
|
||||
if (sc->sim == NULL) {
|
||||
device_printf(sc->dev, "Failed to allocate CAM sim\n");
|
||||
cam_simq_free(devq);
|
||||
mtx_destroy(&sc->sim_mtx);
|
||||
return (ENXIO);
|
||||
}
|
||||
if (xpt_bus_register(sc->sim, sc->dev, 0) != CAM_SUCCESS) {
|
||||
device_printf(sc->dev, "Failed to create CAM bus\n");
|
||||
cam_sim_free(sc->sim, TRUE);
|
||||
mtx_destroy(&sc->sim_mtx);
|
||||
return (ENXIO);
|
||||
}
|
||||
if (xpt_create_path(&sc->path, NULL, cam_sim_path(sc->sim),
|
||||
CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
|
||||
device_printf(sc->dev, "Failed to create CAM path\n");
|
||||
xpt_bus_deregister(cam_sim_path(sc->sim));
|
||||
cam_sim_free(sc->sim, TRUE);
|
||||
mtx_destroy(&sc->sim_mtx);
|
||||
return (ENXIO);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_sim_rescan_ns(struct nvmf_softc *sc, uint32_t id)
|
||||
{
|
||||
union ccb *ccb;
|
||||
|
||||
ccb = xpt_alloc_ccb_nowait();
|
||||
if (ccb == NULL) {
|
||||
device_printf(sc->dev,
|
||||
"unable to alloc CCB for rescan of namespace %u\n", id);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* As with nvme_sim, map NVMe namespace IDs onto CAM unit
|
||||
* LUNs.
|
||||
*/
|
||||
if (xpt_create_path(&ccb->ccb_h.path, NULL, cam_sim_path(sc->sim), 0,
|
||||
id) != CAM_REQ_CMP) {
|
||||
device_printf(sc->dev,
|
||||
"Unable to create path for rescan of namespace %u\n", id);
|
||||
xpt_free_ccb(ccb);
|
||||
return;
|
||||
}
|
||||
xpt_rescan(ccb);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_disconnect_sim(struct nvmf_softc *sc)
|
||||
{
|
||||
mtx_lock(&sc->sim_mtx);
|
||||
sc->sim_disconnected = true;
|
||||
xpt_freeze_simq(sc->sim, 1);
|
||||
mtx_unlock(&sc->sim_mtx);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_reconnect_sim(struct nvmf_softc *sc)
|
||||
{
|
||||
mtx_lock(&sc->sim_mtx);
|
||||
sc->sim_disconnected = false;
|
||||
mtx_unlock(&sc->sim_mtx);
|
||||
xpt_release_simq(sc->sim, 1);
|
||||
}
|
||||
|
||||
void
|
||||
nvmf_destroy_sim(struct nvmf_softc *sc)
|
||||
{
|
||||
xpt_async(AC_LOST_DEVICE, sc->path, NULL);
|
||||
if (sc->sim_disconnected)
|
||||
xpt_release_simq(sc->sim, 1);
|
||||
xpt_free_path(sc->path);
|
||||
xpt_bus_deregister(cam_sim_path(sc->sim));
|
||||
cam_sim_free(sc->sim, TRUE);
|
||||
mtx_destroy(&sc->sim_mtx);
|
||||
}
|
208
sys/dev/nvmf/host/nvmf_var.h
Normal file
208
sys/dev/nvmf/host/nvmf_var.h
Normal file
|
@ -0,0 +1,208 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2023-2024 Chelsio Communications, Inc.
|
||||
* Written by: John Baldwin <jhb@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#ifndef __NVMF_VAR_H__
|
||||
#define __NVMF_VAR_H__
|
||||
|
||||
#include <sys/_callout.h>
|
||||
#include <sys/_lock.h>
|
||||
#include <sys/_mutex.h>
|
||||
#include <sys/_sx.h>
|
||||
#include <sys/_task.h>
|
||||
#include <sys/queue.h>
|
||||
#include <dev/nvme/nvme.h>
|
||||
#include <dev/nvmf/nvmf_transport.h>
|
||||
|
||||
struct nvmf_aer;
|
||||
struct nvmf_capsule;
|
||||
struct nvmf_host_qpair;
|
||||
struct nvmf_namespace;
|
||||
|
||||
typedef void nvmf_request_complete_t(void *, const struct nvme_completion *);
|
||||
|
||||
struct nvmf_ivars {
|
||||
struct nvmf_handoff_host *hh;
|
||||
struct nvmf_handoff_qpair_params *io_params;
|
||||
struct nvme_controller_data *cdata;
|
||||
};
|
||||
|
||||
struct nvmf_softc {
|
||||
device_t dev;
|
||||
|
||||
struct nvmf_host_qpair *admin;
|
||||
struct nvmf_host_qpair **io;
|
||||
u_int num_io_queues;
|
||||
enum nvmf_trtype trtype;
|
||||
|
||||
struct cam_sim *sim;
|
||||
struct cam_path *path;
|
||||
struct mtx sim_mtx;
|
||||
bool sim_disconnected;
|
||||
|
||||
struct nvmf_namespace **ns;
|
||||
|
||||
struct nvme_controller_data *cdata;
|
||||
uint64_t cap;
|
||||
uint32_t vs;
|
||||
u_int max_pending_io;
|
||||
u_long max_xfer_size;
|
||||
|
||||
struct cdev *cdev;
|
||||
|
||||
/*
|
||||
* Keep Alive support depends on two timers. The 'tx' timer
|
||||
* is responsible for sending KeepAlive commands and runs at
|
||||
* half the timeout interval. The 'rx' timer is responsible
|
||||
* for detecting an actual timeout.
|
||||
*
|
||||
* For efficient support of TKAS, the host does not reschedule
|
||||
* these timers every time new commands are scheduled.
|
||||
* Instead, the host sets the *_traffic flags when commands
|
||||
* are sent and received. The timeout handlers check and
|
||||
* clear these flags. This does mean it can take up to twice
|
||||
* the timeout time to detect an AWOL controller.
|
||||
*/
|
||||
bool ka_traffic; /* Using TKAS? */
|
||||
|
||||
volatile int ka_active_tx_traffic;
|
||||
struct callout ka_tx_timer;
|
||||
sbintime_t ka_tx_sbt;
|
||||
|
||||
volatile int ka_active_rx_traffic;
|
||||
struct callout ka_rx_timer;
|
||||
sbintime_t ka_rx_sbt;
|
||||
|
||||
struct sx connection_lock;
|
||||
struct task disconnect_task;
|
||||
bool detaching;
|
||||
|
||||
u_int num_aer;
|
||||
struct nvmf_aer *aer;
|
||||
};
|
||||
|
||||
struct nvmf_request {
|
||||
struct nvmf_host_qpair *qp;
|
||||
struct nvmf_capsule *nc;
|
||||
nvmf_request_complete_t *cb;
|
||||
void *cb_arg;
|
||||
bool aer;
|
||||
|
||||
STAILQ_ENTRY(nvmf_request) link;
|
||||
};
|
||||
|
||||
struct nvmf_completion_status {
|
||||
struct nvme_completion cqe;
|
||||
bool done;
|
||||
bool io_done;
|
||||
int io_error;
|
||||
};
|
||||
|
||||
static __inline struct nvmf_host_qpair *
|
||||
nvmf_select_io_queue(struct nvmf_softc *sc)
|
||||
{
|
||||
/* TODO: Support multiple queues? */
|
||||
return (sc->io[0]);
|
||||
}
|
||||
|
||||
static __inline bool
|
||||
nvmf_cqe_aborted(const struct nvme_completion *cqe)
|
||||
{
|
||||
uint16_t status;
|
||||
|
||||
status = le16toh(cqe->status);
|
||||
return (NVME_STATUS_GET_SCT(status) == NVME_SCT_PATH_RELATED &&
|
||||
NVME_STATUS_GET_SC(status) == NVME_SC_COMMAND_ABORTED_BY_HOST);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
nvmf_status_init(struct nvmf_completion_status *status)
|
||||
{
|
||||
status->done = false;
|
||||
status->io_done = true;
|
||||
status->io_error = 0;
|
||||
}
|
||||
|
||||
static __inline void
|
||||
nvmf_status_wait_io(struct nvmf_completion_status *status)
|
||||
{
|
||||
status->io_done = false;
|
||||
}
|
||||
|
||||
#ifdef DRIVER_MODULE
|
||||
extern driver_t nvme_nvmf_driver;
|
||||
#endif
|
||||
|
||||
#ifdef MALLOC_DECLARE
|
||||
MALLOC_DECLARE(M_NVMF);
|
||||
#endif
|
||||
|
||||
/* nvmf.c */
|
||||
void nvmf_complete(void *arg, const struct nvme_completion *cqe);
|
||||
void nvmf_io_complete(void *arg, size_t xfered, int error);
|
||||
void nvmf_wait_for_reply(struct nvmf_completion_status *status);
|
||||
int nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh);
|
||||
void nvmf_free_ivars(struct nvmf_ivars *ivars);
|
||||
void nvmf_disconnect(struct nvmf_softc *sc);
|
||||
void nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid);
|
||||
int nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt,
|
||||
bool admin);
|
||||
|
||||
/* nvmf_aer.c */
|
||||
void nvmf_init_aer(struct nvmf_softc *sc);
|
||||
int nvmf_start_aer(struct nvmf_softc *sc);
|
||||
void nvmf_destroy_aer(struct nvmf_softc *sc);
|
||||
|
||||
/* nvmf_cmd.c */
|
||||
bool nvmf_cmd_get_property(struct nvmf_softc *sc, uint32_t offset,
|
||||
uint8_t size, nvmf_request_complete_t *cb, void *cb_arg, int how);
|
||||
bool nvmf_cmd_set_property(struct nvmf_softc *sc, uint32_t offset,
|
||||
uint8_t size, uint64_t value, nvmf_request_complete_t *cb, void *cb_arg,
|
||||
int how);
|
||||
bool nvmf_cmd_keep_alive(struct nvmf_softc *sc, nvmf_request_complete_t *cb,
|
||||
void *cb_arg, int how);
|
||||
bool nvmf_cmd_identify_active_namespaces(struct nvmf_softc *sc, uint32_t id,
|
||||
struct nvme_ns_list *nslist, nvmf_request_complete_t *req_cb,
|
||||
void *req_cb_arg, nvmf_io_complete_t *io_cb, void *io_cb_arg, int how);
|
||||
bool nvmf_cmd_identify_namespace(struct nvmf_softc *sc, uint32_t id,
|
||||
struct nvme_namespace_data *nsdata, nvmf_request_complete_t *req_cb,
|
||||
void *req_cb_arg, nvmf_io_complete_t *io_cb, void *io_cb_arg, int how);
|
||||
bool nvmf_cmd_get_log_page(struct nvmf_softc *sc, uint32_t nsid, uint8_t lid,
|
||||
uint64_t offset, void *buf, size_t len, nvmf_request_complete_t *req_cb,
|
||||
void *req_cb_arg, nvmf_io_complete_t *io_cb, void *io_cb_arg, int how);
|
||||
|
||||
/* nvmf_ctldev.c */
|
||||
int nvmf_ctl_load(void);
|
||||
void nvmf_ctl_unload(void);
|
||||
|
||||
/* nvmf_ns.c */
|
||||
struct nvmf_namespace *nvmf_init_ns(struct nvmf_softc *sc, uint32_t id,
|
||||
struct nvme_namespace_data *data);
|
||||
void nvmf_disconnect_ns(struct nvmf_namespace *ns);
|
||||
void nvmf_reconnect_ns(struct nvmf_namespace *ns);
|
||||
void nvmf_destroy_ns(struct nvmf_namespace *ns);
|
||||
bool nvmf_update_ns(struct nvmf_namespace *ns,
|
||||
struct nvme_namespace_data *data);
|
||||
|
||||
/* nvmf_qpair.c */
|
||||
struct nvmf_host_qpair *nvmf_init_qp(struct nvmf_softc *sc,
|
||||
enum nvmf_trtype trtype, struct nvmf_handoff_qpair_params *handoff,
|
||||
const char *name);
|
||||
void nvmf_shutdown_qp(struct nvmf_host_qpair *qp);
|
||||
void nvmf_destroy_qp(struct nvmf_host_qpair *qp);
|
||||
struct nvmf_request *nvmf_allocate_request(struct nvmf_host_qpair *qp,
|
||||
void *sqe, nvmf_request_complete_t *cb, void *cb_arg, int how);
|
||||
void nvmf_submit_request(struct nvmf_request *req);
|
||||
void nvmf_free_request(struct nvmf_request *req);
|
||||
|
||||
/* nvmf_sim.c */
|
||||
int nvmf_init_sim(struct nvmf_softc *sc);
|
||||
void nvmf_disconnect_sim(struct nvmf_softc *sc);
|
||||
void nvmf_reconnect_sim(struct nvmf_softc *sc);
|
||||
void nvmf_destroy_sim(struct nvmf_softc *sc);
|
||||
void nvmf_sim_rescan_ns(struct nvmf_softc *sc, uint32_t id);
|
||||
|
||||
#endif /* !__NVMF_VAR_H__ */
|
|
@ -1,4 +1,5 @@
|
|||
SUBDIR= nvmf_tcp \
|
||||
SUBDIR= nvmf \
|
||||
nvmf_tcp \
|
||||
nvmf_transport
|
||||
|
||||
.include <bsd.subdir.mk>
|
||||
|
|
13
sys/modules/nvmf/nvmf/Makefile
Normal file
13
sys/modules/nvmf/nvmf/Makefile
Normal file
|
@ -0,0 +1,13 @@
|
|||
.PATH: ${SRCTOP}/sys/dev/nvmf/host
|
||||
|
||||
KMOD= nvmf
|
||||
|
||||
SRCS= nvmf.c \
|
||||
nvmf_aer.c \
|
||||
nvmf_cmd.c \
|
||||
nvmf_ctldev.c \
|
||||
nvmf_ns.c \
|
||||
nvmf_qpair.c \
|
||||
nvmf_sim.c
|
||||
|
||||
.include <bsd.kmod.mk>
|
Loading…
Reference in a new issue