/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2023-2024 Chelsio Communications, Inc. * Written by: John Baldwin */ #include #include #include #include #include #include #include #include #include #include "internal.h" struct io_controller { struct controller *c; u_int num_io_queues; u_int active_io_queues; struct nvmf_qpair **io_qpairs; int *io_sockets; struct nvme_firmware_page fp; struct nvme_health_information_page hip; uint16_t partial_dur; uint16_t partial_duw; uint16_t cntlid; char hostid[16]; char hostnqn[NVME_NQN_FIELD_SIZE]; }; static struct nvmf_association *io_na; static pthread_cond_t io_cond; static pthread_mutex_t io_na_mutex; static struct io_controller *io_controller; static const char *nqn; static char serial[NVME_SERIAL_NUMBER_LENGTH]; void init_io(const char *subnqn) { struct nvmf_association_params aparams; u_long hostid; size_t len; memset(&aparams, 0, sizeof(aparams)); aparams.sq_flow_control = !flow_control_disable; aparams.dynamic_controller_model = true; aparams.max_admin_qsize = NVME_MAX_ADMIN_ENTRIES; aparams.max_io_qsize = NVMF_MAX_IO_ENTRIES; aparams.tcp.pda = 0; aparams.tcp.header_digests = header_digests; aparams.tcp.data_digests = data_digests; aparams.tcp.maxr2t = 1; aparams.tcp.maxh2cdata = 256 * 1024; io_na = nvmf_allocate_association(NVMF_TRTYPE_TCP, true, &aparams); if (io_na == NULL) err(1, "Failed to create I/O controller association"); nqn = subnqn; /* Generate a serial number from the kern.hostid node. */ len = sizeof(hostid); if (sysctlbyname("kern.hostid", &hostid, &len, NULL, 0) == -1) err(1, "sysctl: kern.hostid"); nvmf_controller_serial(serial, sizeof(serial), hostid); pthread_cond_init(&io_cond, NULL); pthread_mutex_init(&io_na_mutex, NULL); if (kernel_io) init_ctl_port(subnqn, &aparams); } void shutdown_io(void) { if (kernel_io) shutdown_ctl_port(nqn); } static void handle_get_log_page(struct io_controller *ioc, const struct nvmf_capsule *nc, const struct nvme_command *cmd) { uint64_t offset; uint32_t numd; size_t len; uint8_t lid; lid = le32toh(cmd->cdw10) & 0xff; numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16; offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32; if (offset % 3 != 0) goto error; len = (numd + 1) * 4; switch (lid) { case NVME_LOG_ERROR: { void *buf; if (len % sizeof(struct nvme_error_information_entry) != 0) goto error; buf = calloc(1, len); nvmf_send_controller_data(nc, buf, len); free(buf); return; } case NVME_LOG_HEALTH_INFORMATION: if (len != sizeof(ioc->hip)) goto error; nvmf_send_controller_data(nc, &ioc->hip, sizeof(ioc->hip)); return; case NVME_LOG_FIRMWARE_SLOT: if (len != sizeof(ioc->fp)) goto error; nvmf_send_controller_data(nc, &ioc->fp, sizeof(ioc->fp)); return; default: warnx("Unsupported page %#x for GET_LOG_PAGE\n", lid); goto error; } error: nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); } static bool handle_io_identify_command(const struct nvmf_capsule *nc, const struct nvme_command *cmd) { struct nvme_namespace_data nsdata; struct nvme_ns_list nslist; uint32_t nsid; uint8_t cns; cns = le32toh(cmd->cdw10) & 0xFF; switch (cns) { case 0: /* Namespace data. */ if (!device_namespace_data(le32toh(cmd->nsid), &nsdata)) { nvmf_send_generic_error(nc, NVME_SC_INVALID_NAMESPACE_OR_FORMAT); return (true); } nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata)); return (true); case 2: /* Active namespace list. */ nsid = le32toh(cmd->nsid); if (nsid >= 0xfffffffe) { nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); return (true); } device_active_nslist(nsid, &nslist); nvmf_send_controller_data(nc, &nslist, sizeof(nslist)); return (true); case 3: /* Namespace Identification Descriptor list. */ if (!device_identification_descriptor(le32toh(cmd->nsid), &nsdata)) { nvmf_send_generic_error(nc, NVME_SC_INVALID_NAMESPACE_OR_FORMAT); return (true); } nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata)); return (true); default: return (false); } } static void handle_set_features(struct io_controller *ioc, const struct nvmf_capsule *nc, const struct nvme_command *cmd) { struct nvme_completion cqe; uint8_t fid; fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10)); switch (fid) { case NVME_FEAT_NUMBER_OF_QUEUES: { uint32_t num_queues; if (ioc->num_io_queues != 0) { nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); return; } num_queues = le32toh(cmd->cdw11) & 0xffff; /* 5.12.1.7: 65535 is invalid. */ if (num_queues == 65535) goto error; /* Fabrics requires the same number of SQs and CQs. */ if (le32toh(cmd->cdw11) >> 16 != num_queues) goto error; /* Convert to 1's based */ num_queues++; /* Lock to synchronize with handle_io_qpair. */ pthread_mutex_lock(&io_na_mutex); ioc->num_io_queues = num_queues; ioc->io_qpairs = calloc(num_queues, sizeof(*ioc->io_qpairs)); ioc->io_sockets = calloc(num_queues, sizeof(*ioc->io_sockets)); pthread_mutex_unlock(&io_na_mutex); nvmf_init_cqe(&cqe, nc, 0); cqe.cdw0 = cmd->cdw11; nvmf_send_response(nc, &cqe); return; } case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: { uint32_t aer_mask; aer_mask = le32toh(cmd->cdw11); /* Check for any reserved or unimplemented feature bits. */ if ((aer_mask & 0xffffc000) != 0) goto error; /* No AERs are generated by this daemon. */ nvmf_send_success(nc); return; } default: warnx("Unsupported feature ID %u for SET_FEATURES", fid); goto error; } error: nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); } static bool admin_command(const struct nvmf_capsule *nc, const struct nvme_command *cmd, void *arg) { struct io_controller *ioc = arg; switch (cmd->opc) { case NVME_OPC_GET_LOG_PAGE: handle_get_log_page(ioc, nc, cmd); return (true); case NVME_OPC_IDENTIFY: return (handle_io_identify_command(nc, cmd)); case NVME_OPC_SET_FEATURES: handle_set_features(ioc, nc, cmd); return (true); case NVME_OPC_ASYNC_EVENT_REQUEST: /* Ignore and never complete. */ return (true); case NVME_OPC_KEEP_ALIVE: nvmf_send_success(nc); return (true); default: return (false); } } static void handle_admin_qpair(struct io_controller *ioc) { pthread_setname_np(pthread_self(), "admin queue"); controller_handle_admin_commands(ioc->c, admin_command, ioc); pthread_mutex_lock(&io_na_mutex); for (u_int i = 0; i < ioc->num_io_queues; i++) { if (ioc->io_qpairs[i] == NULL || ioc->io_sockets[i] == -1) continue; close(ioc->io_sockets[i]); ioc->io_sockets[i] = -1; } /* Wait for I/O threads to notice. */ while (ioc->active_io_queues > 0) pthread_cond_wait(&io_cond, &io_na_mutex); io_controller = NULL; pthread_mutex_unlock(&io_na_mutex); free_controller(ioc->c); free(ioc); } static bool handle_io_fabrics_command(const struct nvmf_capsule *nc, const struct nvmf_fabric_cmd *fc) { switch (fc->fctype) { case NVMF_FABRIC_COMMAND_CONNECT: warnx("CONNECT command on connected queue"); nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); break; case NVMF_FABRIC_COMMAND_DISCONNECT: { const struct nvmf_fabric_disconnect_cmd *dis = (const struct nvmf_fabric_disconnect_cmd *)fc; if (dis->recfmt != htole16(0)) { nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC, NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT); break; } nvmf_send_success(nc); return (true); } default: warnx("Unsupported fabrics command %#x", fc->fctype); nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE); break; } return (false); } static void hip_add(uint64_t pair[2], uint64_t addend) { uint64_t old, new; old = le64toh(pair[0]); new = old + addend; pair[0] = htole64(new); if (new < old) pair[1] += htole64(1); } static uint64_t cmd_lba(const struct nvme_command *cmd) { return ((uint64_t)le32toh(cmd->cdw11) << 32 | le32toh(cmd->cdw10)); } static u_int cmd_nlb(const struct nvme_command *cmd) { return ((le32toh(cmd->cdw12) & 0xffff) + 1); } static void handle_read(struct io_controller *ioc, const struct nvmf_capsule *nc, const struct nvme_command *cmd) { size_t len; len = nvmf_capsule_data_len(nc); device_read(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc); hip_add(ioc->hip.host_read_commands, 1); len /= 512; len += ioc->partial_dur; if (len > 1000) hip_add(ioc->hip.data_units_read, len / 1000); ioc->partial_dur = len % 1000; } static void handle_write(struct io_controller *ioc, const struct nvmf_capsule *nc, const struct nvme_command *cmd) { size_t len; len = nvmf_capsule_data_len(nc); device_write(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc); hip_add(ioc->hip.host_write_commands, 1); len /= 512; len += ioc->partial_duw; if (len > 1000) hip_add(ioc->hip.data_units_written, len / 1000); ioc->partial_duw = len % 1000; } static void handle_flush(const struct nvmf_capsule *nc, const struct nvme_command *cmd) { device_flush(le32toh(cmd->nsid), nc); } static bool handle_io_commands(struct io_controller *ioc, struct nvmf_qpair *qp) { const struct nvme_command *cmd; struct nvmf_capsule *nc; int error; bool disconnect; disconnect = false; while (!disconnect) { error = nvmf_controller_receive_capsule(qp, &nc); if (error != 0) { if (error != ECONNRESET) warnc(error, "Failed to read command capsule"); break; } cmd = nvmf_capsule_sqe(nc); switch (cmd->opc) { case NVME_OPC_FLUSH: if (cmd->nsid == htole32(0xffffffff)) { nvmf_send_generic_error(nc, NVME_SC_INVALID_NAMESPACE_OR_FORMAT); break; } handle_flush(nc, cmd); break; case NVME_OPC_WRITE: handle_write(ioc, nc, cmd); break; case NVME_OPC_READ: handle_read(ioc, nc, cmd); break; case NVME_OPC_FABRICS_COMMANDS: disconnect = handle_io_fabrics_command(nc, (const struct nvmf_fabric_cmd *)cmd); break; default: warnx("Unsupported NVM opcode %#x", cmd->opc); nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE); break; } nvmf_free_capsule(nc); } return (disconnect); } static void handle_io_qpair(struct io_controller *ioc, struct nvmf_qpair *qp, int qid) { char name[64]; bool disconnect; snprintf(name, sizeof(name), "I/O queue %d", qid); pthread_setname_np(pthread_self(), name); disconnect = handle_io_commands(ioc, qp); pthread_mutex_lock(&io_na_mutex); if (disconnect) ioc->io_qpairs[qid - 1] = NULL; if (ioc->io_sockets[qid - 1] != -1) { close(ioc->io_sockets[qid - 1]); ioc->io_sockets[qid - 1] = -1; } ioc->active_io_queues--; if (ioc->active_io_queues == 0) pthread_cond_broadcast(&io_cond); pthread_mutex_unlock(&io_na_mutex); } static void connect_admin_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc, const struct nvmf_fabric_connect_data *data) { struct nvme_controller_data cdata; struct io_controller *ioc; int error; /* Can only have one active I/O controller at a time. */ pthread_mutex_lock(&io_na_mutex); if (io_controller != NULL) { pthread_mutex_unlock(&io_na_mutex); nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC, NVMF_FABRIC_SC_CONTROLLER_BUSY); goto error; } error = nvmf_finish_accept(nc, 2); if (error != 0) { pthread_mutex_unlock(&io_na_mutex); warnc(error, "Failed to send CONNECT response"); goto error; } ioc = calloc(1, sizeof(*ioc)); ioc->cntlid = 2; memcpy(ioc->hostid, data->hostid, sizeof(ioc->hostid)); memcpy(ioc->hostnqn, data->hostnqn, sizeof(ioc->hostnqn)); nvmf_init_io_controller_data(qp, serial, nqn, device_count(), NVMF_IOCCSZ, &cdata); ioc->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1); memcpy(ioc->fp.revision[0], cdata.fr, sizeof(cdata.fr)); ioc->hip.power_cycles[0] = 1; ioc->c = init_controller(qp, &cdata); io_controller = ioc; pthread_mutex_unlock(&io_na_mutex); nvmf_free_capsule(nc); handle_admin_qpair(ioc); close(s); return; error: nvmf_free_capsule(nc); close(s); } static void connect_io_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc, const struct nvmf_fabric_connect_data *data, uint16_t qid) { struct io_controller *ioc; int error; pthread_mutex_lock(&io_na_mutex); if (io_controller == NULL) { pthread_mutex_unlock(&io_na_mutex); warnx("Attempt to create I/O qpair without admin qpair"); nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); goto error; } if (memcmp(io_controller->hostid, data->hostid, sizeof(data->hostid)) != 0) { pthread_mutex_unlock(&io_na_mutex); warnx("hostid mismatch for I/O qpair CONNECT"); nvmf_connect_invalid_parameters(nc, true, offsetof(struct nvmf_fabric_connect_data, hostid)); goto error; } if (le16toh(data->cntlid) != io_controller->cntlid) { pthread_mutex_unlock(&io_na_mutex); warnx("cntlid mismatch for I/O qpair CONNECT"); nvmf_connect_invalid_parameters(nc, true, offsetof(struct nvmf_fabric_connect_data, cntlid)); goto error; } if (memcmp(io_controller->hostnqn, data->hostnqn, sizeof(data->hostid)) != 0) { pthread_mutex_unlock(&io_na_mutex); warnx("host NQN mismatch for I/O qpair CONNECT"); nvmf_connect_invalid_parameters(nc, true, offsetof(struct nvmf_fabric_connect_data, hostnqn)); goto error; } if (io_controller->num_io_queues == 0) { pthread_mutex_unlock(&io_na_mutex); warnx("Attempt to create I/O qpair without enabled queues"); nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); goto error; } if (qid > io_controller->num_io_queues) { pthread_mutex_unlock(&io_na_mutex); warnx("Attempt to create invalid I/O qpair %u", qid); nvmf_connect_invalid_parameters(nc, false, offsetof(struct nvmf_fabric_connect_cmd, qid)); goto error; } if (io_controller->io_qpairs[qid - 1] != NULL) { pthread_mutex_unlock(&io_na_mutex); warnx("Attempt to re-create I/O qpair %u", qid); nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); goto error; } error = nvmf_finish_accept(nc, io_controller->cntlid); if (error != 0) { pthread_mutex_unlock(&io_na_mutex); warnc(error, "Failed to send CONNECT response"); goto error; } ioc = io_controller; ioc->active_io_queues++; ioc->io_qpairs[qid - 1] = qp; ioc->io_sockets[qid - 1] = s; pthread_mutex_unlock(&io_na_mutex); nvmf_free_capsule(nc); handle_io_qpair(ioc, qp, qid); return; error: nvmf_free_capsule(nc); close(s); } static void * io_socket_thread(void *arg) { struct nvmf_fabric_connect_data data; struct nvmf_qpair_params qparams; const struct nvmf_fabric_connect_cmd *cmd; struct nvmf_capsule *nc; struct nvmf_qpair *qp; int s; pthread_detach(pthread_self()); s = (intptr_t)arg; memset(&qparams, 0, sizeof(qparams)); qparams.tcp.fd = s; nc = NULL; qp = nvmf_accept(io_na, &qparams, &nc, &data); if (qp == NULL) { warnx("Failed to create I/O qpair: %s", nvmf_association_error(io_na)); goto error; } if (kernel_io) { ctl_handoff_qpair(qp, nvmf_capsule_sqe(nc), &data); goto error; } if (strcmp(data.subnqn, nqn) != 0) { warn("I/O qpair with invalid SubNQN: %.*s", (int)sizeof(data.subnqn), data.subnqn); nvmf_connect_invalid_parameters(nc, true, offsetof(struct nvmf_fabric_connect_data, subnqn)); goto error; } /* Is this an admin or I/O queue pair? */ cmd = nvmf_capsule_sqe(nc); if (cmd->qid == 0) connect_admin_qpair(s, qp, nc, &data); else connect_io_qpair(s, qp, nc, &data, le16toh(cmd->qid)); nvmf_free_qpair(qp); return (NULL); error: if (nc != NULL) nvmf_free_capsule(nc); if (qp != NULL) nvmf_free_qpair(qp); close(s); return (NULL); } void handle_io_socket(int s) { pthread_t thr; int error; error = pthread_create(&thr, NULL, io_socket_thread, (void *)(uintptr_t)s); if (error != 0) { warnc(error, "Failed to create I/O qpair thread"); close(s); } }