freebsd-src/usr.sbin/nvmfd/io.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

678 lines
16 KiB
C
Raw Normal View History

/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2023-2024 Chelsio Communications, Inc.
* Written by: John Baldwin <jhb@FreeBSD.org>
*/
#include <sys/sysctl.h>
#include <err.h>
#include <errno.h>
#include <libnvmf.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "internal.h"
struct io_controller {
struct controller *c;
u_int num_io_queues;
u_int active_io_queues;
struct nvmf_qpair **io_qpairs;
int *io_sockets;
struct nvme_firmware_page fp;
struct nvme_health_information_page hip;
uint16_t partial_dur;
uint16_t partial_duw;
uint16_t cntlid;
char hostid[16];
char hostnqn[NVME_NQN_FIELD_SIZE];
};
static struct nvmf_association *io_na;
static pthread_cond_t io_cond;
static pthread_mutex_t io_na_mutex;
static struct io_controller *io_controller;
static const char *nqn;
static char serial[NVME_SERIAL_NUMBER_LENGTH];
void
init_io(const char *subnqn)
{
struct nvmf_association_params aparams;
u_long hostid;
size_t len;
memset(&aparams, 0, sizeof(aparams));
aparams.sq_flow_control = !flow_control_disable;
aparams.dynamic_controller_model = true;
aparams.max_admin_qsize = NVME_MAX_ADMIN_ENTRIES;
aparams.max_io_qsize = NVMF_MAX_IO_ENTRIES;
aparams.tcp.pda = 0;
aparams.tcp.header_digests = header_digests;
aparams.tcp.data_digests = data_digests;
aparams.tcp.maxr2t = 1;
aparams.tcp.maxh2cdata = 256 * 1024;
io_na = nvmf_allocate_association(NVMF_TRTYPE_TCP, true,
&aparams);
if (io_na == NULL)
err(1, "Failed to create I/O controller association");
nqn = subnqn;
/* Generate a serial number from the kern.hostid node. */
len = sizeof(hostid);
if (sysctlbyname("kern.hostid", &hostid, &len, NULL, 0) == -1)
err(1, "sysctl: kern.hostid");
nvmf_controller_serial(serial, sizeof(serial), hostid);
pthread_cond_init(&io_cond, NULL);
pthread_mutex_init(&io_na_mutex, NULL);
if (kernel_io)
init_ctl_port(subnqn, &aparams);
}
void
shutdown_io(void)
{
if (kernel_io)
shutdown_ctl_port(nqn);
}
static void
handle_get_log_page(struct io_controller *ioc, const struct nvmf_capsule *nc,
const struct nvme_command *cmd)
{
uint64_t offset;
uint32_t numd;
size_t len;
uint8_t lid;
lid = le32toh(cmd->cdw10) & 0xff;
numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16;
offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32;
if (offset % 3 != 0)
goto error;
len = (numd + 1) * 4;
switch (lid) {
case NVME_LOG_ERROR:
{
void *buf;
if (len % sizeof(struct nvme_error_information_entry) != 0)
goto error;
buf = calloc(1, len);
nvmf_send_controller_data(nc, buf, len);
free(buf);
return;
}
case NVME_LOG_HEALTH_INFORMATION:
if (len != sizeof(ioc->hip))
goto error;
nvmf_send_controller_data(nc, &ioc->hip, sizeof(ioc->hip));
return;
case NVME_LOG_FIRMWARE_SLOT:
if (len != sizeof(ioc->fp))
goto error;
nvmf_send_controller_data(nc, &ioc->fp, sizeof(ioc->fp));
return;
default:
warnx("Unsupported page %#x for GET_LOG_PAGE\n", lid);
goto error;
}
error:
nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
}
static bool
handle_io_identify_command(const struct nvmf_capsule *nc,
const struct nvme_command *cmd)
{
struct nvme_namespace_data nsdata;
struct nvme_ns_list nslist;
uint32_t nsid;
uint8_t cns;
cns = le32toh(cmd->cdw10) & 0xFF;
switch (cns) {
case 0: /* Namespace data. */
if (!device_namespace_data(le32toh(cmd->nsid), &nsdata)) {
nvmf_send_generic_error(nc,
NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
return (true);
}
nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata));
return (true);
case 2: /* Active namespace list. */
nsid = le32toh(cmd->nsid);
if (nsid >= 0xfffffffe) {
nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
return (true);
}
device_active_nslist(nsid, &nslist);
nvmf_send_controller_data(nc, &nslist, sizeof(nslist));
return (true);
case 3: /* Namespace Identification Descriptor list. */
if (!device_identification_descriptor(le32toh(cmd->nsid),
&nsdata)) {
nvmf_send_generic_error(nc,
NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
return (true);
}
nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata));
return (true);
default:
return (false);
}
}
static void
handle_set_features(struct io_controller *ioc, const struct nvmf_capsule *nc,
const struct nvme_command *cmd)
{
struct nvme_completion cqe;
uint8_t fid;
fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10));
switch (fid) {
case NVME_FEAT_NUMBER_OF_QUEUES:
{
uint32_t num_queues;
if (ioc->num_io_queues != 0) {
nvmf_send_generic_error(nc,
NVME_SC_COMMAND_SEQUENCE_ERROR);
return;
}
num_queues = le32toh(cmd->cdw11) & 0xffff;
/* 5.12.1.7: 65535 is invalid. */
if (num_queues == 65535)
goto error;
/* Fabrics requires the same number of SQs and CQs. */
if (le32toh(cmd->cdw11) >> 16 != num_queues)
goto error;
/* Convert to 1's based */
num_queues++;
/* Lock to synchronize with handle_io_qpair. */
pthread_mutex_lock(&io_na_mutex);
ioc->num_io_queues = num_queues;
ioc->io_qpairs = calloc(num_queues, sizeof(*ioc->io_qpairs));
ioc->io_sockets = calloc(num_queues, sizeof(*ioc->io_sockets));
pthread_mutex_unlock(&io_na_mutex);
nvmf_init_cqe(&cqe, nc, 0);
cqe.cdw0 = cmd->cdw11;
nvmf_send_response(nc, &cqe);
return;
}
case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
{
uint32_t aer_mask;
aer_mask = le32toh(cmd->cdw11);
/* Check for any reserved or unimplemented feature bits. */
if ((aer_mask & 0xffffc000) != 0)
goto error;
/* No AERs are generated by this daemon. */
nvmf_send_success(nc);
return;
}
default:
warnx("Unsupported feature ID %u for SET_FEATURES", fid);
goto error;
}
error:
nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
}
static bool
admin_command(const struct nvmf_capsule *nc, const struct nvme_command *cmd,
void *arg)
{
struct io_controller *ioc = arg;
switch (cmd->opc) {
case NVME_OPC_GET_LOG_PAGE:
handle_get_log_page(ioc, nc, cmd);
return (true);
case NVME_OPC_IDENTIFY:
return (handle_io_identify_command(nc, cmd));
case NVME_OPC_SET_FEATURES:
handle_set_features(ioc, nc, cmd);
return (true);
case NVME_OPC_ASYNC_EVENT_REQUEST:
/* Ignore and never complete. */
return (true);
case NVME_OPC_KEEP_ALIVE:
nvmf_send_success(nc);
return (true);
default:
return (false);
}
}
static void
handle_admin_qpair(struct io_controller *ioc)
{
pthread_setname_np(pthread_self(), "admin queue");
controller_handle_admin_commands(ioc->c, admin_command, ioc);
pthread_mutex_lock(&io_na_mutex);
for (u_int i = 0; i < ioc->num_io_queues; i++) {
if (ioc->io_qpairs[i] == NULL || ioc->io_sockets[i] == -1)
continue;
close(ioc->io_sockets[i]);
ioc->io_sockets[i] = -1;
}
/* Wait for I/O threads to notice. */
while (ioc->active_io_queues > 0)
pthread_cond_wait(&io_cond, &io_na_mutex);
io_controller = NULL;
pthread_mutex_unlock(&io_na_mutex);
free_controller(ioc->c);
free(ioc);
}
static bool
handle_io_fabrics_command(const struct nvmf_capsule *nc,
const struct nvmf_fabric_cmd *fc)
{
switch (fc->fctype) {
case NVMF_FABRIC_COMMAND_CONNECT:
warnx("CONNECT command on connected queue");
nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
break;
case NVMF_FABRIC_COMMAND_DISCONNECT:
{
const struct nvmf_fabric_disconnect_cmd *dis =
(const struct nvmf_fabric_disconnect_cmd *)fc;
if (dis->recfmt != htole16(0)) {
nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC,
NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT);
break;
}
nvmf_send_success(nc);
return (true);
}
default:
warnx("Unsupported fabrics command %#x", fc->fctype);
nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE);
break;
}
return (false);
}
static void
hip_add(uint64_t pair[2], uint64_t addend)
{
uint64_t old, new;
old = le64toh(pair[0]);
new = old + addend;
pair[0] = htole64(new);
if (new < old)
pair[1] += htole64(1);
}
static uint64_t
cmd_lba(const struct nvme_command *cmd)
{
return ((uint64_t)le32toh(cmd->cdw11) << 32 | le32toh(cmd->cdw10));
}
static u_int
cmd_nlb(const struct nvme_command *cmd)
{
return ((le32toh(cmd->cdw12) & 0xffff) + 1);
}
static void
handle_read(struct io_controller *ioc, const struct nvmf_capsule *nc,
const struct nvme_command *cmd)
{
size_t len;
len = nvmf_capsule_data_len(nc);
device_read(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc);
hip_add(ioc->hip.host_read_commands, 1);
len /= 512;
len += ioc->partial_dur;
if (len > 1000)
hip_add(ioc->hip.data_units_read, len / 1000);
ioc->partial_dur = len % 1000;
}
static void
handle_write(struct io_controller *ioc, const struct nvmf_capsule *nc,
const struct nvme_command *cmd)
{
size_t len;
len = nvmf_capsule_data_len(nc);
device_write(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc);
hip_add(ioc->hip.host_write_commands, 1);
len /= 512;
len += ioc->partial_duw;
if (len > 1000)
hip_add(ioc->hip.data_units_written, len / 1000);
ioc->partial_duw = len % 1000;
}
static void
handle_flush(const struct nvmf_capsule *nc, const struct nvme_command *cmd)
{
device_flush(le32toh(cmd->nsid), nc);
}
static bool
handle_io_commands(struct io_controller *ioc, struct nvmf_qpair *qp)
{
const struct nvme_command *cmd;
struct nvmf_capsule *nc;
int error;
bool disconnect;
disconnect = false;
while (!disconnect) {
error = nvmf_controller_receive_capsule(qp, &nc);
if (error != 0) {
if (error != ECONNRESET)
warnc(error, "Failed to read command capsule");
break;
}
cmd = nvmf_capsule_sqe(nc);
switch (cmd->opc) {
case NVME_OPC_FLUSH:
if (cmd->nsid == htole32(0xffffffff)) {
nvmf_send_generic_error(nc,
NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
break;
}
handle_flush(nc, cmd);
break;
case NVME_OPC_WRITE:
handle_write(ioc, nc, cmd);
break;
case NVME_OPC_READ:
handle_read(ioc, nc, cmd);
break;
case NVME_OPC_FABRICS_COMMANDS:
disconnect = handle_io_fabrics_command(nc,
(const struct nvmf_fabric_cmd *)cmd);
break;
default:
warnx("Unsupported NVM opcode %#x", cmd->opc);
nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE);
break;
}
nvmf_free_capsule(nc);
}
return (disconnect);
}
static void
handle_io_qpair(struct io_controller *ioc, struct nvmf_qpair *qp, int qid)
{
char name[64];
bool disconnect;
snprintf(name, sizeof(name), "I/O queue %d", qid);
pthread_setname_np(pthread_self(), name);
disconnect = handle_io_commands(ioc, qp);
pthread_mutex_lock(&io_na_mutex);
if (disconnect)
ioc->io_qpairs[qid - 1] = NULL;
if (ioc->io_sockets[qid - 1] != -1) {
close(ioc->io_sockets[qid - 1]);
ioc->io_sockets[qid - 1] = -1;
}
ioc->active_io_queues--;
if (ioc->active_io_queues == 0)
pthread_cond_broadcast(&io_cond);
pthread_mutex_unlock(&io_na_mutex);
}
static void
connect_admin_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc,
const struct nvmf_fabric_connect_data *data)
{
struct nvme_controller_data cdata;
struct io_controller *ioc;
int error;
/* Can only have one active I/O controller at a time. */
pthread_mutex_lock(&io_na_mutex);
if (io_controller != NULL) {
pthread_mutex_unlock(&io_na_mutex);
nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC,
NVMF_FABRIC_SC_CONTROLLER_BUSY);
goto error;
}
error = nvmf_finish_accept(nc, 2);
if (error != 0) {
pthread_mutex_unlock(&io_na_mutex);
warnc(error, "Failed to send CONNECT response");
goto error;
}
ioc = calloc(1, sizeof(*ioc));
ioc->cntlid = 2;
memcpy(ioc->hostid, data->hostid, sizeof(ioc->hostid));
memcpy(ioc->hostnqn, data->hostnqn, sizeof(ioc->hostnqn));
nvmf_init_io_controller_data(qp, serial, nqn, device_count(),
NVMF_IOCCSZ, &cdata);
ioc->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1);
memcpy(ioc->fp.revision[0], cdata.fr, sizeof(cdata.fr));
ioc->hip.power_cycles[0] = 1;
ioc->c = init_controller(qp, &cdata);
io_controller = ioc;
pthread_mutex_unlock(&io_na_mutex);
nvmf_free_capsule(nc);
handle_admin_qpair(ioc);
close(s);
return;
error:
nvmf_free_capsule(nc);
close(s);
}
static void
connect_io_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc,
const struct nvmf_fabric_connect_data *data, uint16_t qid)
{
struct io_controller *ioc;
int error;
pthread_mutex_lock(&io_na_mutex);
if (io_controller == NULL) {
pthread_mutex_unlock(&io_na_mutex);
warnx("Attempt to create I/O qpair without admin qpair");
nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
goto error;
}
if (memcmp(io_controller->hostid, data->hostid,
sizeof(data->hostid)) != 0) {
pthread_mutex_unlock(&io_na_mutex);
warnx("hostid mismatch for I/O qpair CONNECT");
nvmf_connect_invalid_parameters(nc, true,
offsetof(struct nvmf_fabric_connect_data, hostid));
goto error;
}
if (le16toh(data->cntlid) != io_controller->cntlid) {
pthread_mutex_unlock(&io_na_mutex);
warnx("cntlid mismatch for I/O qpair CONNECT");
nvmf_connect_invalid_parameters(nc, true,
offsetof(struct nvmf_fabric_connect_data, cntlid));
goto error;
}
if (memcmp(io_controller->hostnqn, data->hostnqn,
sizeof(data->hostid)) != 0) {
pthread_mutex_unlock(&io_na_mutex);
warnx("host NQN mismatch for I/O qpair CONNECT");
nvmf_connect_invalid_parameters(nc, true,
offsetof(struct nvmf_fabric_connect_data, hostnqn));
goto error;
}
if (io_controller->num_io_queues == 0) {
pthread_mutex_unlock(&io_na_mutex);
warnx("Attempt to create I/O qpair without enabled queues");
nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
goto error;
}
if (qid > io_controller->num_io_queues) {
pthread_mutex_unlock(&io_na_mutex);
warnx("Attempt to create invalid I/O qpair %u", qid);
nvmf_connect_invalid_parameters(nc, false,
offsetof(struct nvmf_fabric_connect_cmd, qid));
goto error;
}
if (io_controller->io_qpairs[qid - 1] != NULL) {
pthread_mutex_unlock(&io_na_mutex);
warnx("Attempt to re-create I/O qpair %u", qid);
nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
goto error;
}
error = nvmf_finish_accept(nc, io_controller->cntlid);
if (error != 0) {
pthread_mutex_unlock(&io_na_mutex);
warnc(error, "Failed to send CONNECT response");
goto error;
}
ioc = io_controller;
ioc->active_io_queues++;
ioc->io_qpairs[qid - 1] = qp;
ioc->io_sockets[qid - 1] = s;
pthread_mutex_unlock(&io_na_mutex);
nvmf_free_capsule(nc);
handle_io_qpair(ioc, qp, qid);
return;
error:
nvmf_free_capsule(nc);
close(s);
}
static void *
io_socket_thread(void *arg)
{
struct nvmf_fabric_connect_data data;
struct nvmf_qpair_params qparams;
const struct nvmf_fabric_connect_cmd *cmd;
struct nvmf_capsule *nc;
struct nvmf_qpair *qp;
int s;
pthread_detach(pthread_self());
s = (intptr_t)arg;
memset(&qparams, 0, sizeof(qparams));
qparams.tcp.fd = s;
nc = NULL;
qp = nvmf_accept(io_na, &qparams, &nc, &data);
if (qp == NULL) {
warnx("Failed to create I/O qpair: %s",
nvmf_association_error(io_na));
goto error;
}
if (kernel_io) {
ctl_handoff_qpair(qp, nvmf_capsule_sqe(nc), &data);
goto error;
}
if (strcmp(data.subnqn, nqn) != 0) {
warn("I/O qpair with invalid SubNQN: %.*s",
(int)sizeof(data.subnqn), data.subnqn);
nvmf_connect_invalid_parameters(nc, true,
offsetof(struct nvmf_fabric_connect_data, subnqn));
goto error;
}
/* Is this an admin or I/O queue pair? */
cmd = nvmf_capsule_sqe(nc);
if (cmd->qid == 0)
connect_admin_qpair(s, qp, nc, &data);
else
connect_io_qpair(s, qp, nc, &data, le16toh(cmd->qid));
nvmf_free_qpair(qp);
return (NULL);
error:
if (nc != NULL)
nvmf_free_capsule(nc);
if (qp != NULL)
nvmf_free_qpair(qp);
close(s);
return (NULL);
}
void
handle_io_socket(int s)
{
pthread_t thr;
int error;
error = pthread_create(&thr, NULL, io_socket_thread,
(void *)(uintptr_t)s);
if (error != 0) {
warnc(error, "Failed to create I/O qpair thread");
close(s);
}
}