nvme: Add Linux copatible ioctls

Add the NVME_IOCTL_ID, NVME_IOCTL_ADMIN_CMD, and NVME_IOCTL_IO_CMD Linux
compatible ioctls. These may be run on either an I/O (ns) dev or a nvme
(admin) dev. Linux allows both on either device, and programs use this
and aren't careful about having the right device open. Emulate this
feature, and implement these ioctls. The data is passed in into the
kernel in host byte order (not converted to le). Results are returned in
host order.

The timeout field is ignore, and the metadata and metadata_len fields
must be zero.

The addr field can be null, even when the data_len is non zero (FreeBSD's
ioctl interface prohibits this, Linux's just ignores the inconsistency).

Only the cdw10 is returned from the command: the status is not returned
in 'result' field. XXX need to verify that this is what Linux does on an
error signaled from the drive.

No external include file is yet available for this: most programs that
call this interface either use a linux-specific path <linux/nvme.h> or
have their own private copy of the data. It's unclear the best thing to
do.

Also, create a /dev/nvmeXnY as an alias for /dev/nvmeXnsY.

These changes allow a native build of nvme-cli to work for everything
that doesn't depend on sysfs entries in /sys, calls that use metadata,
send / receive drive data and sed functionality not in our nvme driver.

Sponsored by:		Netflix
Co-Authored-by:		Chuck Tuffli <chuck@freebsd.org>
Reviewed by:		chuck
Differential Revision:	https://reviews.freebsd.org/D45415
This commit is contained in:
Warner Losh 2024-06-14 16:40:08 -06:00
parent 5198178f3e
commit 1bce7cd885
4 changed files with 190 additions and 2 deletions

View file

@ -1902,6 +1902,7 @@ struct thread;
struct nvme_namespace;
struct nvme_controller;
struct nvme_consumer;
struct nvme_passthru_cmd;
typedef void (*nvme_cb_fn_t)(void *, const struct nvme_completion *);
@ -1921,6 +1922,11 @@ int nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
uint32_t nsid, int is_user_buffer,
int is_admin_cmd);
int nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr,
struct nvme_passthru_cmd *npc,
uint32_t nsid, bool is_user,
bool is_admin);
/* Admin functions */
void nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr,
uint8_t feature, uint32_t cdw11,

View file

@ -43,6 +43,7 @@
#include <vm/vm.h>
#include "nvme_private.h"
#include "nvme_linux.h"
#define B4_CHK_RDY_DELAY_MS 2300 /* work around controller bug */
@ -1269,7 +1270,7 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
ret = EFAULT;
goto err;
}
req = nvme_allocate_request_vaddr(buf->b_data, pt->len,
req = nvme_allocate_request_vaddr(buf->b_data, pt->len,
nvme_pt_done, pt);
} else
req = nvme_allocate_request_vaddr(pt->buf, pt->len,
@ -1314,6 +1315,103 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
return (ret);
}
static void
nvme_npc_done(void *arg, const struct nvme_completion *cpl)
{
struct nvme_passthru_cmd *npc = arg;
struct mtx *mtx = (void *)(uintptr_t)npc->metadata;
npc->result = cpl->cdw0; /* cpl in host order by now */
mtx_lock(mtx);
npc->metadata = 0;
wakeup(npc);
mtx_unlock(mtx);
}
/* XXX refactor? */
int
nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr,
struct nvme_passthru_cmd *npc, uint32_t nsid, bool is_user, bool is_admin)
{
struct nvme_request *req;
struct mtx *mtx;
struct buf *buf = NULL;
int ret = 0;
/*
* We don't support metadata.
*/
if (npc->metadata != 0 || npc->metadata_len != 0)
return (EIO);
if (npc->data_len > 0 && npc->addr != 0) {
if (npc->data_len > ctrlr->max_xfer_size) {
nvme_printf(ctrlr,
"npc->data_len (%d) exceeds max_xfer_size (%d)\n",
npc->data_len, ctrlr->max_xfer_size);
return (EIO);
}
/* We only support data out or data in commands, but not both at once. */
if ((npc->opcode & 0x3) == 0 || (npc->opcode & 0x3) == 3)
return (EINVAL);
if (is_user) {
/*
* Ensure the user buffer is wired for the duration of
* this pass-through command.
*/
PHOLD(curproc);
buf = uma_zalloc(pbuf_zone, M_WAITOK);
buf->b_iocmd = npc->opcode & 1 ? BIO_WRITE : BIO_READ;
if (vmapbuf(buf, (void *)npc->addr, npc->data_len, 1) < 0) {
ret = EFAULT;
goto err;
}
req = nvme_allocate_request_vaddr(buf->b_data, npc->data_len,
nvme_npc_done, npc);
} else
req = nvme_allocate_request_vaddr((void *)npc->addr, npc->data_len,
nvme_npc_done, npc);
} else
req = nvme_allocate_request_null(nvme_npc_done, npc);
req->cmd.opc = npc->opcode;
req->cmd.fuse = npc->flags;
req->cmd.rsvd2 = htole16(npc->cdw2);
req->cmd.rsvd3 = htole16(npc->cdw3);
req->cmd.cdw10 = htole32(npc->cdw10);
req->cmd.cdw11 = htole32(npc->cdw11);
req->cmd.cdw12 = htole32(npc->cdw12);
req->cmd.cdw13 = htole32(npc->cdw13);
req->cmd.cdw14 = htole32(npc->cdw14);
req->cmd.cdw15 = htole32(npc->cdw15);
req->cmd.nsid = htole32(nsid);
mtx = mtx_pool_find(mtxpool_sleep, npc);
npc->metadata = (uintptr_t) mtx;
/* XXX no timeout passed down */
if (is_admin)
nvme_ctrlr_submit_admin_request(ctrlr, req);
else
nvme_ctrlr_submit_io_request(ctrlr, req);
mtx_lock(mtx);
while (npc->metadata != 0)
mtx_sleep(npc, mtx, PRIBIO, "nvme_npc", 0);
mtx_unlock(mtx);
if (buf != NULL) {
vunmapbuf(buf);
err:
uma_zfree(pbuf_zone, buf);
PRELE(curproc);
}
return (ret);
}
static int
nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
struct thread *td)
@ -1324,6 +1422,7 @@ nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
ctrlr = cdev->si_drv1;
switch (cmd) {
case NVME_IOCTL_RESET: /* Linux compat */
case NVME_RESET_CONTROLLER:
nvme_ctrlr_reset(ctrlr);
break;
@ -1342,6 +1441,19 @@ nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
case NVME_GET_MAX_XFER_SIZE:
*(uint64_t *)arg = ctrlr->max_xfer_size;
break;
/* Linux Compatible (see nvme_linux.h) */
case NVME_IOCTL_ID:
td->td_retval[0] = 0xfffffffful;
return (0);
case NVME_IOCTL_ADMIN_CMD:
case NVME_IOCTL_IO_CMD: {
struct nvme_passthru_cmd *npc = (struct nvme_passthru_cmd *)arg;
return (nvme_ctrlr_linux_passthru_cmd(ctrlr, npc, npc->nsid, true,
cmd == NVME_IOCTL_ADMIN_CMD));
}
default:
return (ENOTTY);
}

58
sys/dev/nvme/nvme_linux.h Normal file
View file

@ -0,0 +1,58 @@
/*-
* Copyright (c) 2024, Netflix Inc.
* Written by Warner Losh
*
* SPDX-License-Identifier: BSD-2-Clause
*/
/*
* Linux compatible NVME ioctls. So far we just support ID, ADMIN_CMD and
* IO_CMD. The rest are not supported.
*/
#include <sys/ioccom.h>
#include <sys/_types.h>
struct nvme_passthru_cmd {
__uint8_t opcode;
__uint8_t flags;
__uint16_t rsvd1;
__uint32_t nsid;
__uint32_t cdw2;
__uint32_t cdw3;
__uint64_t metadata;
__uint64_t addr;
__uint32_t metadata_len;
__uint32_t data_len;
__uint32_t cdw10;
__uint32_t cdw11;
__uint32_t cdw12;
__uint32_t cdw13;
__uint32_t cdw14;
__uint32_t cdw15;
__uint32_t timeout_ms;
__uint32_t result;
};
#define nvme_admin_cmd nvme_passthru_cmd
/*
* Linux nvme ioctls, commented out ones are not supported
*/
#define NVME_IOCTL_ID _IO('N', 0x40)
#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd)
/* #define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) */
#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd)
#define NVME_IOCTL_RESET _IO('N', 0x44)
/* #define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) */
/* #define NVME_IOCTL_RESCAN _IO('N', 0x46) */
/* #define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64) */
/* #define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64) */
/* #define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64) */
/* io_uring async commands: */
/* #define NVME_URING_CMD_IO _IOWR('N', 0x80, struct nvme_uring_cmd) */
/* #define NVME_URING_CMD_IO_VEC _IOWR('N', 0x81, struct nvme_uring_cmd) */
/* #define NVME_URING_CMD_ADMIN _IOWR('N', 0x82, struct nvme_uring_cmd) */
/* #define NVME_URING_CMD_ADMIN_VEC _IOWR('N', 0x83, struct nvme_uring_cmd) */

View file

@ -43,6 +43,7 @@
#include <geom/geom.h>
#include "nvme_private.h"
#include "nvme_linux.h"
static void nvme_bio_child_inbed(struct bio *parent, int bio_error);
static void nvme_bio_child_done(void *arg,
@ -93,6 +94,18 @@ nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
case DIOCGSECTORSIZE:
*(u_int *)arg = nvme_ns_get_sector_size(ns);
break;
/* Linux Compatible (see nvme_linux.h) */
case NVME_IOCTL_ID:
td->td_retval[0] = ns->id;
return (0);
case NVME_IOCTL_ADMIN_CMD:
case NVME_IOCTL_IO_CMD: {
struct nvme_passthru_cmd *npc = (struct nvme_passthru_cmd *)arg;
return (nvme_ctrlr_linux_passthru_cmd(ctrlr, npc, ns->id, true,
cmd == NVME_IOCTL_ADMIN_CMD));
}
default:
return (ENOTTY);
}
@ -610,7 +623,6 @@ nvme_ns_construct(struct nvme_namespace *ns, uint32_t id,
return (ENXIO);
ns->cdev->si_drv2 = make_dev_alias(ns->cdev, "%sns%d",
device_get_nameunit(ctrlr->dev), ns->id);
ns->cdev->si_flags |= SI_UNMAPPED;
return (0);