libnvmf: Add internal library to support NVMe over Fabrics

libnvmf provides APIs for transmitting and receiving Command and
Response capsules along with data associated with NVMe commands.
Capsules are represented by 'struct nvmf_capsule' objects.

Capsules are transmitted and received on queue pairs represented by
'struct nvmf_qpair' objects.

Queue pairs belong to an association represented by a 'struct
nvmf_association' object.

libnvmf provides additional helper APIs to assist with constructing
command capsules for a host, response capsules for a controller,
connecting queue pairs to a remote controller and optionally
offloading connected queues to an in-kernel host, accepting queue pair
connections from remote hosts and optionally offloading connected
queues to an in-kernel controller, constructing controller data
structures for local controllers, etc.

libnvmf also includes an internal transport abstraction as well as an
implementation of a userspace TCP transport.

libnvmf is primarily intended for ease of use and low-traffic use cases
such as establishing connections that are handed off to the kernel.
As such, it uses a simple API built on blocking I/O.

For a host, a consumer first populates an 'struct
nvmf_association_params' with a set of parameters shared by all queue
pairs for a single association such as whether or not to use SQ flow
control and header and data digests and creates a 'struct
nvmf_association' object.  The consumer is responsible for
establishing a TCP socket for each queue pair.  This socket is
included in the 'struct nvmf_qpair_params' passed to 'nvmf_connect' to
complete transport-specific negotiation, send a Fabrics Connect
command, and wait for the Connect reply. Upon success, a new 'struct
nvmf_qpair' object is returned.  This queue pair can then be used to
send and receive capsules.  A command capsule is allocated, populated
with an SQE and optional data buffer, and transmitted via
nvmf_host_transmit_command.  The consumer can then wait for a reply
via nvmf_host_wait_for_response.  The library also provides some
wrapper functions such as nvmf_read_property and nvmf_write_property
which send a command and wait for a response synchronously.

For a controller, a consumer uses a single association for a set of
incoming connections.  A consumer can choose to use multiple
associations (e.g. a separate association for connections to a
discovery controller listening on a different port than I/O
controllers).  The consumer is responsible for accepting TCP sockets
directly, but once a socket has been accepted it is passed to
nvmf_accept to perform transport-specific negotiation and wait for the
Connect command.  Similar to nvmf_connect, nvmf_accept returns a newly
construct nvmf_qpair.  However, in contrast to nvmf_connect,
nvmf_accept does not complete the Fabrics negotiation.  The consumer
must explicitly send a response capsule before waiting for additional
command capsules to arrive.  In particular, in the kernel offload
case, the Connect command and data are provided to the kernel
controller and the Connect response capsule is sent by the kernel once
it is ready to handle the new queue pair.

For userspace controller command handling, the consumer uses
nvmf_controller_receive_capsule to wait for a command capsule.
nvmf_receive_controller_data is used to retrieve any data from a
command (e.g. the data for a WRITE command).  It can be called
multiple times to split the data transfer into smaller sizes.
nvmf_send_controller_data is used to send data to a remote host in
response to a command.  It also sends a response capsule indicating
success, or an error if an internal error occurs.  nvmf_send_response
is used to send a response without associated data.  There are also
several convenience wrappers such as nvmf_send_success and
nvmf_send_generic_error.

Reviewed by:	imp
Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D44710
This commit is contained in:
John Baldwin 2024-05-02 16:28:16 -07:00
parent 2f7b0de1de
commit 2da066ef6d
9 changed files with 3623 additions and 0 deletions

View File

@ -78,6 +78,7 @@ SUBDIR= ${SUBDIR_BOOTSTRAP} \
libnetbsd \
libnetmap \
libnv \
libnvmf \
libopenbsd \
libpam \
libpathconv \

22
lib/libnvmf/Makefile Normal file
View File

@ -0,0 +1,22 @@
.PATH: ${SRCTOP}/sys/dev/nvmf/controller
.PATH: ${SRCTOP}/sys/libkern
LIB= nvmf
INTERNALLIB=
PACKAGE= nvmf
INCS= libnvmf.h
SRCS= gsb_crc32.c \
nvmf_controller.c \
nvmf_host.c \
nvmf_tcp.c \
nvmf_transport.c \
nvmft_subr.c
CFLAGS+= -I${SRCTOP}/sys/dev/nvmf/controller
CFLAGS+= -I${SRCTOP}/sys/dev/nvmf
.include <bsd.lib.mk>
CWARNFLAGS.gsb_crc32.c= -Wno-cast-align

116
lib/libnvmf/internal.h Normal file
View File

@ -0,0 +1,116 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022-2024 Chelsio Communications, Inc.
* Written by: John Baldwin <jhb@FreeBSD.org>
*/
#ifndef __LIBNVMF_INTERNAL_H__
#define __LIBNVMF_INTERNAL_H__
#include <sys/queue.h>
struct nvmf_transport_ops {
/* Association management. */
struct nvmf_association *(*allocate_association)(bool controller,
const struct nvmf_association_params *params);
void (*update_association)(struct nvmf_association *na,
const struct nvme_controller_data *cdata);
void (*free_association)(struct nvmf_association *na);
/* Queue pair management. */
struct nvmf_qpair *(*allocate_qpair)(struct nvmf_association *na,
const struct nvmf_qpair_params *params);
void (*free_qpair)(struct nvmf_qpair *qp);
/* Create params for kernel handoff. */
int (*kernel_handoff_params)(struct nvmf_qpair *qp,
struct nvmf_handoff_qpair_params *qparams);
/* Capsule operations. */
struct nvmf_capsule *(*allocate_capsule)(struct nvmf_qpair *qp);
void (*free_capsule)(struct nvmf_capsule *nc);
int (*transmit_capsule)(struct nvmf_capsule *nc);
int (*receive_capsule)(struct nvmf_qpair *qp,
struct nvmf_capsule **ncp);
uint8_t (*validate_command_capsule)(const struct nvmf_capsule *nc);
/* Transferring controller data. */
size_t (*capsule_data_len)(const struct nvmf_capsule *nc);
int (*receive_controller_data)(const struct nvmf_capsule *nc,
uint32_t data_offset, void *buf, size_t len);
int (*send_controller_data)(const struct nvmf_capsule *nc,
const void *buf, size_t len);
};
struct nvmf_association {
struct nvmf_transport_ops *na_ops;
enum nvmf_trtype na_trtype;
bool na_controller;
struct nvmf_association_params na_params;
/* Each qpair holds a reference on an association. */
u_int na_refs;
char *na_last_error;
};
struct nvmf_qpair {
struct nvmf_association *nq_association;
bool nq_admin;
uint16_t nq_cid; /* host only */
/*
* Queue sizes. This assumes the same size for both the
* completion and submission queues within a pair.
*/
u_int nq_qsize;
/* Flow control management for submission queues. */
bool nq_flow_control;
uint16_t nq_sqhd;
uint16_t nq_sqtail; /* host only */
/* Value in response to/from CONNECT. */
uint16_t nq_cntlid;
uint32_t nq_kato; /* valid on admin queue only */
TAILQ_HEAD(, nvmf_capsule) nq_rx_capsules;
};
struct nvmf_capsule {
struct nvmf_qpair *nc_qpair;
/* Either a SQE or CQE. */
union {
struct nvme_command nc_sqe;
struct nvme_completion nc_cqe;
};
int nc_qe_len;
/*
* Is SQHD in received capsule valid? False for locally-
* synthesized responses.
*/
bool nc_sqhd_valid;
/* Data buffer. */
bool nc_send_data;
void *nc_data;
size_t nc_data_len;
TAILQ_ENTRY(nvmf_capsule) nc_link;
};
extern struct nvmf_transport_ops tcp_ops;
void na_clear_error(struct nvmf_association *na);
void na_error(struct nvmf_association *na, const char *fmt, ...);
int nvmf_kernel_handoff_params(struct nvmf_qpair *qp,
struct nvmf_handoff_qpair_params *qparams);
#endif /* !__LIBNVMF_INTERNAL_H__ */

363
lib/libnvmf/libnvmf.h Normal file
View File

@ -0,0 +1,363 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022-2024 Chelsio Communications, Inc.
* Written by: John Baldwin <jhb@FreeBSD.org>
*/
#ifndef __LIBNVMF_H__
#define __LIBNVMF_H__
#include <sys/uio.h>
#include <stdbool.h>
#include <stddef.h>
#include <dev/nvme/nvme.h>
#include <dev/nvmf/nvmf.h>
#include <dev/nvmf/nvmf_proto.h>
struct nvmf_capsule;
struct nvmf_association;
struct nvmf_qpair;
/*
* Parameters shared by all queue-pairs of an association. Note that
* this contains the requested values used to initiate transport
* negotiation.
*/
struct nvmf_association_params {
bool sq_flow_control; /* SQ flow control required. */
bool dynamic_controller_model; /* Controller only */
uint16_t max_admin_qsize; /* Controller only */
uint32_t max_io_qsize; /* Controller only, 0 for discovery */
union {
struct {
uint8_t pda; /* Tx-side PDA. */
bool header_digests;
bool data_digests;
uint32_t maxr2t; /* Host only */
uint32_t maxh2cdata; /* Controller only */
} tcp;
};
};
/* Parameters specific to a single queue pair of an association. */
struct nvmf_qpair_params {
bool admin; /* Host only */
union {
struct {
int fd;
} tcp;
};
};
/* Transport-independent APIs. */
/*
* A host should allocate a new association for each association with
* a controller. After the admin queue has been allocated and the
* controller's data has been fetched, it should be passed to
* nvmf_update_association to update internal transport-specific
* parameters before allocating I/O queues.
*
* A controller uses a single association to manage all incoming
* queues since it is not known until after parsing the CONNECT
* command which transport queues are admin vs I/O and which
* controller they are created against.
*/
struct nvmf_association *nvmf_allocate_association(enum nvmf_trtype trtype,
bool controller, const struct nvmf_association_params *params);
void nvmf_update_assocation(struct nvmf_association *na,
const struct nvme_controller_data *cdata);
void nvmf_free_association(struct nvmf_association *na);
/* The most recent association-wide error message. */
const char *nvmf_association_error(const struct nvmf_association *na);
/*
* A queue pair represents either an Admin or I/O
* submission/completion queue pair.
*
* Each open qpair holds a reference on its association. Once queue
* pairs are allocated, callers can safely free the association to
* ease bookkeeping.
*
* If nvmf_allocate_qpair fails, a detailed error message can be obtained
* from nvmf_association_error.
*/
struct nvmf_qpair *nvmf_allocate_qpair(struct nvmf_association *na,
const struct nvmf_qpair_params *params);
void nvmf_free_qpair(struct nvmf_qpair *qp);
/*
* Capsules are either commands (host -> controller) or responses
* (controller -> host). A single data buffer segment may be
* associated with a command capsule. Transmitted data is not copied
* by this API but instead must be preserved until the capsule is
* transmitted and freed.
*/
struct nvmf_capsule *nvmf_allocate_command(struct nvmf_qpair *qp,
const void *sqe);
struct nvmf_capsule *nvmf_allocate_response(struct nvmf_qpair *qp,
const void *cqe);
void nvmf_free_capsule(struct nvmf_capsule *nc);
int nvmf_capsule_append_data(struct nvmf_capsule *nc,
void *buf, size_t len, bool send);
int nvmf_transmit_capsule(struct nvmf_capsule *nc);
int nvmf_receive_capsule(struct nvmf_qpair *qp, struct nvmf_capsule **ncp);
const void *nvmf_capsule_sqe(const struct nvmf_capsule *nc);
const void *nvmf_capsule_cqe(const struct nvmf_capsule *nc);
/* Return a string name for a transport type. */
const char *nvmf_transport_type(uint8_t trtype);
/* Validate a NVMe Qualified Name. */
bool nvmf_nqn_valid(const char *nqn);
/* Controller-specific APIs. */
/*
* A controller calls this function to check for any
* transport-specific errors (invalid fields) in a received command
* capsule. The callback returns a generic command status value:
* NVME_SC_SUCCESS if no error is found.
*/
uint8_t nvmf_validate_command_capsule(const struct nvmf_capsule *nc);
/*
* A controller calls this function to query the amount of data
* associated with a command capsule.
*/
size_t nvmf_capsule_data_len(const struct nvmf_capsule *cc);
/*
* A controller calls this function to receive data associated with a
* command capsule (e.g. the data for a WRITE command). This can
* either return in-capsule data or fetch data from the host
* (e.g. using a R2T PDU over TCP). The received command capsule
* should be passed in 'nc'. The received data is stored in '*buf'.
*/
int nvmf_receive_controller_data(const struct nvmf_capsule *nc,
uint32_t data_offset, void *buf, size_t len);
/*
* A controller calls this function to send data in response to a
* command along with a response capsule. If the data transfer
* succeeds, a success response is sent. If the data transfer fails,
* an appropriate error status capsule is sent. Regardless, a
* response capsule is always sent.
*/
int nvmf_send_controller_data(const struct nvmf_capsule *nc,
const void *buf, size_t len);
/*
* Construct a CQE for a reply to a command capsule in 'nc' with the
* completion status 'status'. This is useful when additional CQE
* info is required beyond the completion status.
*/
void nvmf_init_cqe(void *cqe, const struct nvmf_capsule *nc,
uint16_t status);
/*
* Construct and send a response capsule to a command capsule with
* the supplied CQE.
*/
int nvmf_send_response(const struct nvmf_capsule *nc, const void *cqe);
/*
* Wait for a single command capsule and return it in *ncp. This can
* fail if an invalid capsule is received or an I/O error occurs.
*/
int nvmf_controller_receive_capsule(struct nvmf_qpair *qp,
struct nvmf_capsule **ncp);
/* Send a response capsule from a controller. */
int nvmf_controller_transmit_response(struct nvmf_capsule *nc);
/* Construct and send an error response capsule. */
int nvmf_send_error(const struct nvmf_capsule *cc, uint8_t sc_type,
uint8_t sc_status);
/*
* Construct and send an error response capsule using a generic status
* code.
*/
int nvmf_send_generic_error(const struct nvmf_capsule *nc,
uint8_t sc_status);
/* Construct and send a simple success response capsule. */
int nvmf_send_success(const struct nvmf_capsule *nc);
/*
* Allocate a new queue pair and wait for the CONNECT command capsule.
* If this fails, a detailed error message can be obtained from
* nvmf_association_error. On success, the command capsule is saved
* in '*ccp' and the connect data is saved in 'data'. The caller
* must send an explicit response and free the the command capsule.
*/
struct nvmf_qpair *nvmf_accept(struct nvmf_association *na,
const struct nvmf_qpair_params *params, struct nvmf_capsule **ccp,
struct nvmf_fabric_connect_data *data);
/*
* Construct and send a response capsule with the Fabrics CONNECT
* invalid parameters error status. If data is true the offset is
* relative to the CONNECT data structure, otherwise the offset is
* relative to the SQE.
*/
void nvmf_connect_invalid_parameters(const struct nvmf_capsule *cc,
bool data, uint16_t offset);
/* Construct and send a response capsule for a successful CONNECT. */
int nvmf_finish_accept(const struct nvmf_capsule *cc, uint16_t cntlid);
/* Compute the initial state of CAP for a controller. */
uint64_t nvmf_controller_cap(struct nvmf_qpair *qp);
/* Generate a serial number string from a host ID. */
void nvmf_controller_serial(char *buf, size_t len, u_long hostid);
/*
* Populate an Identify Controller data structure for a Discovery
* controller.
*/
void nvmf_init_discovery_controller_data(struct nvmf_qpair *qp,
struct nvme_controller_data *cdata);
/*
* Populate an Identify Controller data structure for an I/O
* controller.
*/
void nvmf_init_io_controller_data(struct nvmf_qpair *qp, const char *serial,
const char *subnqn, int nn, uint32_t ioccsz,
struct nvme_controller_data *cdata);
/*
* Validate if a new value for CC is legal given the existing values of
* CAP and CC.
*/
bool nvmf_validate_cc(struct nvmf_qpair *qp, uint64_t cap, uint32_t old_cc,
uint32_t new_cc);
/* Return the log page id (LID) of a GET_LOG_PAGE command. */
uint8_t nvmf_get_log_page_id(const struct nvme_command *cmd);
/* Return the requested data length of a GET_LOG_PAGE command. */
uint64_t nvmf_get_log_page_length(const struct nvme_command *cmd);
/* Return the requested data offset of a GET_LOG_PAGE command. */
uint64_t nvmf_get_log_page_offset(const struct nvme_command *cmd);
/* Prepare to handoff a controller qpair. */
int nvmf_handoff_controller_qpair(struct nvmf_qpair *qp,
struct nvmf_handoff_controller_qpair *h);
/* Host-specific APIs. */
/*
* Connect to an admin or I/O queue. If this fails, a detailed error
* message can be obtained from nvmf_association_error.
*/
struct nvmf_qpair *nvmf_connect(struct nvmf_association *na,
const struct nvmf_qpair_params *params, uint16_t qid, u_int queue_size,
const uint8_t hostid[16], uint16_t cntlid, const char *subnqn,
const char *hostnqn, uint32_t kato);
/* Return the CNTLID for a queue returned from CONNECT. */
uint16_t nvmf_cntlid(struct nvmf_qpair *qp);
/*
* Send a command to the controller. This can fail with EBUSY if the
* submission queue is full.
*/
int nvmf_host_transmit_command(struct nvmf_capsule *nc);
/*
* Wait for a response to a command. If there are no outstanding
* commands in the SQ, fails with EWOULDBLOCK.
*/
int nvmf_host_receive_response(struct nvmf_qpair *qp,
struct nvmf_capsule **rcp);
/*
* Wait for a response to a specific command. The command must have been
* succesfully sent previously.
*/
int nvmf_host_wait_for_response(struct nvmf_capsule *cc,
struct nvmf_capsule **rcp);
/* Build a KeepAlive command. */
struct nvmf_capsule *nvmf_keepalive(struct nvmf_qpair *qp);
/* Read a controller property. */
int nvmf_read_property(struct nvmf_qpair *qp, uint32_t offset, uint8_t size,
uint64_t *value);
/* Write a controller property. */
int nvmf_write_property(struct nvmf_qpair *qp, uint32_t offset,
uint8_t size, uint64_t value);
/* Construct a 16-byte HostId from kern.hostuuid. */
int nvmf_hostid_from_hostuuid(uint8_t hostid[16]);
/* Construct a NQN from kern.hostuuid. */
int nvmf_nqn_from_hostuuid(char nqn[NVMF_NQN_MAX_LEN]);
/* Fetch controller data via IDENTIFY. */
int nvmf_host_identify_controller(struct nvmf_qpair *qp,
struct nvme_controller_data *data);
/* Fetch namespace data via IDENTIFY. */
int nvmf_host_identify_namespace(struct nvmf_qpair *qp, uint32_t nsid,
struct nvme_namespace_data *nsdata);
/*
* Fetch discovery log page. The memory for the log page is allocated
* by malloc() and returned in *logp. The caller must free the
* memory.
*/
int nvmf_host_fetch_discovery_log_page(struct nvmf_qpair *qp,
struct nvme_discovery_log **logp);
/*
* Request a desired number of I/O queues via SET_FEATURES. The
* number of actual I/O queues available is returned in *actual on
* success.
*/
int nvmf_host_request_queues(struct nvmf_qpair *qp, u_int requested,
u_int *actual);
/*
* Handoff active host association to the kernel. This frees the
* qpairs (even on error).
*/
int nvmf_handoff_host(struct nvmf_qpair *admin_qp, u_int num_queues,
struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata);
/*
* Disconnect an active host association previously handed off to the
* kernel. *name is either the name of the device (nvmeX) for this
* association or the remote subsystem NQN.
*/
int nvmf_disconnect_host(const char *host);
/*
* Disconnect all active host associations previously handed off to
* the kernel.
*/
int nvmf_disconnect_all(void);
/*
* Fetch reconnect parameters from an existing kernel host to use for
* establishing a new association.
*/
int nvmf_reconnect_params(int fd, struct nvmf_reconnect_params *rparams);
/*
* Handoff active host association to an existing host in the kernel.
* This frees the qpairs (even on error).
*/
int nvmf_reconnect_host(int fd, struct nvmf_qpair *admin_qp,
u_int num_queues, struct nvmf_qpair **io_queues,
const struct nvme_controller_data *cdata);
#endif /* !__LIBNVMF_H__ */

View File

@ -0,0 +1,463 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 Chelsio Communications, Inc.
* Written by: John Baldwin <jhb@FreeBSD.org>
*/
#include <sys/utsname.h>
#include <assert.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include "libnvmf.h"
#include "internal.h"
#include "nvmft_subr.h"
void
nvmf_init_cqe(void *cqe, const struct nvmf_capsule *nc, uint16_t status)
{
struct nvme_completion *cpl = cqe;
const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
memset(cpl, 0, sizeof(*cpl));
cpl->cid = cmd->cid;
cpl->status = htole16(status);
}
static struct nvmf_capsule *
nvmf_simple_response(const struct nvmf_capsule *nc, uint8_t sc_type,
uint8_t sc_status)
{
struct nvme_completion cpl;
uint16_t status;
status = NVMEF(NVME_STATUS_SCT, sc_type) |
NVMEF(NVME_STATUS_SC, sc_status);
nvmf_init_cqe(&cpl, nc, status);
return (nvmf_allocate_response(nc->nc_qpair, &cpl));
}
int
nvmf_controller_receive_capsule(struct nvmf_qpair *qp,
struct nvmf_capsule **ncp)
{
struct nvmf_capsule *nc;
int error;
uint8_t sc_status;
*ncp = NULL;
error = nvmf_receive_capsule(qp, &nc);
if (error != 0)
return (error);
sc_status = nvmf_validate_command_capsule(nc);
if (sc_status != NVME_SC_SUCCESS) {
nvmf_send_generic_error(nc, sc_status);
nvmf_free_capsule(nc);
return (EPROTO);
}
*ncp = nc;
return (0);
}
int
nvmf_controller_transmit_response(struct nvmf_capsule *nc)
{
struct nvmf_qpair *qp = nc->nc_qpair;
/* Set SQHD. */
if (qp->nq_flow_control) {
qp->nq_sqhd = (qp->nq_sqhd + 1) % qp->nq_qsize;
nc->nc_cqe.sqhd = htole16(qp->nq_sqhd);
} else
nc->nc_cqe.sqhd = 0;
return (nvmf_transmit_capsule(nc));
}
int
nvmf_send_response(const struct nvmf_capsule *cc, const void *cqe)
{
struct nvmf_capsule *rc;
int error;
rc = nvmf_allocate_response(cc->nc_qpair, cqe);
if (rc == NULL)
return (ENOMEM);
error = nvmf_controller_transmit_response(rc);
nvmf_free_capsule(rc);
return (error);
}
int
nvmf_send_error(const struct nvmf_capsule *cc, uint8_t sc_type,
uint8_t sc_status)
{
struct nvmf_capsule *rc;
int error;
rc = nvmf_simple_response(cc, sc_type, sc_status);
error = nvmf_controller_transmit_response(rc);
nvmf_free_capsule(rc);
return (error);
}
int
nvmf_send_generic_error(const struct nvmf_capsule *nc, uint8_t sc_status)
{
return (nvmf_send_error(nc, NVME_SCT_GENERIC, sc_status));
}
int
nvmf_send_success(const struct nvmf_capsule *nc)
{
return (nvmf_send_generic_error(nc, NVME_SC_SUCCESS));
}
void
nvmf_connect_invalid_parameters(const struct nvmf_capsule *cc, bool data,
uint16_t offset)
{
struct nvmf_fabric_connect_rsp rsp;
struct nvmf_capsule *rc;
nvmf_init_cqe(&rsp, cc,
NVMEF(NVME_STATUS_SCT, NVME_SCT_COMMAND_SPECIFIC) |
NVMEF(NVME_STATUS_SC, NVMF_FABRIC_SC_INVALID_PARAM));
rsp.status_code_specific.invalid.ipo = htole16(offset);
rsp.status_code_specific.invalid.iattr = data ? 1 : 0;
rc = nvmf_allocate_response(cc->nc_qpair, &rsp);
nvmf_transmit_capsule(rc);
nvmf_free_capsule(rc);
}
struct nvmf_qpair *
nvmf_accept(struct nvmf_association *na, const struct nvmf_qpair_params *params,
struct nvmf_capsule **ccp, struct nvmf_fabric_connect_data *data)
{
static const char hostid_zero[sizeof(data->hostid)];
const struct nvmf_fabric_connect_cmd *cmd;
struct nvmf_qpair *qp;
struct nvmf_capsule *cc, *rc;
u_int qsize;
int error;
uint16_t cntlid;
uint8_t sc_status;
qp = NULL;
cc = NULL;
rc = NULL;
*ccp = NULL;
na_clear_error(na);
if (!na->na_controller) {
na_error(na, "Cannot accept on a host");
goto error;
}
qp = nvmf_allocate_qpair(na, params);
if (qp == NULL)
goto error;
/* Read the CONNECT capsule. */
error = nvmf_receive_capsule(qp, &cc);
if (error != 0) {
na_error(na, "Failed to receive CONNECT: %s", strerror(error));
goto error;
}
sc_status = nvmf_validate_command_capsule(cc);
if (sc_status != 0) {
na_error(na, "CONNECT command failed to validate: %u",
sc_status);
rc = nvmf_simple_response(cc, NVME_SCT_GENERIC, sc_status);
goto error;
}
cmd = nvmf_capsule_sqe(cc);
if (cmd->opcode != NVME_OPC_FABRICS_COMMANDS ||
cmd->fctype != NVMF_FABRIC_COMMAND_CONNECT) {
na_error(na, "Invalid opcode in CONNECT (%u,%u)", cmd->opcode,
cmd->fctype);
rc = nvmf_simple_response(cc, NVME_SCT_GENERIC,
NVME_SC_INVALID_OPCODE);
goto error;
}
if (cmd->recfmt != htole16(0)) {
na_error(na, "Unsupported CONNECT record format %u",
le16toh(cmd->recfmt));
rc = nvmf_simple_response(cc, NVME_SCT_COMMAND_SPECIFIC,
NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT);
goto error;
}
qsize = le16toh(cmd->sqsize) + 1;
if (cmd->qid == 0) {
/* Admin queue limits. */
if (qsize < NVME_MIN_ADMIN_ENTRIES ||
qsize > NVME_MAX_ADMIN_ENTRIES ||
qsize > na->na_params.max_admin_qsize) {
na_error(na, "Invalid queue size %u", qsize);
nvmf_connect_invalid_parameters(cc, false,
offsetof(struct nvmf_fabric_connect_cmd, sqsize));
goto error;
}
qp->nq_admin = true;
} else {
/* I/O queues not allowed for discovery. */
if (na->na_params.max_io_qsize == 0) {
na_error(na, "I/O queue on discovery controller");
nvmf_connect_invalid_parameters(cc, false,
offsetof(struct nvmf_fabric_connect_cmd, qid));
goto error;
}
/* I/O queue limits. */
if (qsize < NVME_MIN_IO_ENTRIES ||
qsize > NVME_MAX_IO_ENTRIES ||
qsize > na->na_params.max_io_qsize) {
na_error(na, "Invalid queue size %u", qsize);
nvmf_connect_invalid_parameters(cc, false,
offsetof(struct nvmf_fabric_connect_cmd, sqsize));
goto error;
}
/* KATO is reserved for I/O queues. */
if (cmd->kato != 0) {
na_error(na,
"KeepAlive timeout specified for I/O queue");
nvmf_connect_invalid_parameters(cc, false,
offsetof(struct nvmf_fabric_connect_cmd, kato));
goto error;
}
qp->nq_admin = false;
}
qp->nq_qsize = qsize;
/* Fetch CONNECT data. */
if (nvmf_capsule_data_len(cc) != sizeof(*data)) {
na_error(na, "Invalid data payload length for CONNECT: %zu",
nvmf_capsule_data_len(cc));
nvmf_connect_invalid_parameters(cc, false,
offsetof(struct nvmf_fabric_connect_cmd, sgl1));
goto error;
}
error = nvmf_receive_controller_data(cc, 0, data, sizeof(*data));
if (error != 0) {
na_error(na, "Failed to read data for CONNECT: %s",
strerror(error));
rc = nvmf_simple_response(cc, NVME_SCT_GENERIC,
NVME_SC_DATA_TRANSFER_ERROR);
goto error;
}
/* The hostid must be non-zero. */
if (memcmp(data->hostid, hostid_zero, sizeof(hostid_zero)) == 0) {
na_error(na, "HostID in CONNECT data is zero");
nvmf_connect_invalid_parameters(cc, true,
offsetof(struct nvmf_fabric_connect_data, hostid));
goto error;
}
cntlid = le16toh(data->cntlid);
if (cmd->qid == 0) {
if (na->na_params.dynamic_controller_model) {
if (cntlid != NVMF_CNTLID_DYNAMIC) {
na_error(na, "Invalid controller ID %#x",
cntlid);
nvmf_connect_invalid_parameters(cc, true,
offsetof(struct nvmf_fabric_connect_data,
cntlid));
goto error;
}
} else {
if (cntlid > NVMF_CNTLID_STATIC_MAX &&
cntlid != NVMF_CNTLID_STATIC_ANY) {
na_error(na, "Invalid controller ID %#x",
cntlid);
nvmf_connect_invalid_parameters(cc, true,
offsetof(struct nvmf_fabric_connect_data,
cntlid));
goto error;
}
}
} else {
/* Wildcard Controller IDs are only valid on an Admin queue. */
if (cntlid > NVMF_CNTLID_STATIC_MAX) {
na_error(na, "Invalid controller ID %#x", cntlid);
nvmf_connect_invalid_parameters(cc, true,
offsetof(struct nvmf_fabric_connect_data, cntlid));
goto error;
}
}
/* Simple validation of each NQN. */
if (!nvmf_nqn_valid(data->subnqn)) {
na_error(na, "Invalid SubNQN %.*s", (int)sizeof(data->subnqn),
data->subnqn);
nvmf_connect_invalid_parameters(cc, true,
offsetof(struct nvmf_fabric_connect_data, subnqn));
goto error;
}
if (!nvmf_nqn_valid(data->hostnqn)) {
na_error(na, "Invalid HostNQN %.*s", (int)sizeof(data->hostnqn),
data->hostnqn);
nvmf_connect_invalid_parameters(cc, true,
offsetof(struct nvmf_fabric_connect_data, hostnqn));
goto error;
}
if (na->na_params.sq_flow_control ||
(cmd->cattr & NVMF_CONNECT_ATTR_DISABLE_SQ_FC) == 0)
qp->nq_flow_control = true;
else
qp->nq_flow_control = false;
qp->nq_sqhd = 0;
qp->nq_kato = le32toh(cmd->kato);
*ccp = cc;
return (qp);
error:
if (rc != NULL) {
nvmf_transmit_capsule(rc);
nvmf_free_capsule(rc);
}
if (cc != NULL)
nvmf_free_capsule(cc);
if (qp != NULL)
nvmf_free_qpair(qp);
return (NULL);
}
int
nvmf_finish_accept(const struct nvmf_capsule *cc, uint16_t cntlid)
{
struct nvmf_fabric_connect_rsp rsp;
struct nvmf_qpair *qp = cc->nc_qpair;
struct nvmf_capsule *rc;
int error;
nvmf_init_cqe(&rsp, cc, 0);
if (qp->nq_flow_control)
rsp.sqhd = htole16(qp->nq_sqhd);
else
rsp.sqhd = htole16(0xffff);
rsp.status_code_specific.success.cntlid = htole16(cntlid);
rc = nvmf_allocate_response(qp, &rsp);
if (rc == NULL)
return (ENOMEM);
error = nvmf_transmit_capsule(rc);
nvmf_free_capsule(rc);
if (error == 0)
qp->nq_cntlid = cntlid;
return (error);
}
uint64_t
nvmf_controller_cap(struct nvmf_qpair *qp)
{
const struct nvmf_association *na = qp->nq_association;
return (_nvmf_controller_cap(na->na_params.max_io_qsize,
NVMF_CC_EN_TIMEOUT));
}
bool
nvmf_validate_cc(struct nvmf_qpair *qp, uint64_t cap, uint32_t old_cc,
uint32_t new_cc)
{
const struct nvmf_association *na = qp->nq_association;
return (_nvmf_validate_cc(na->na_params.max_io_qsize, cap, old_cc,
new_cc));
}
void
nvmf_init_discovery_controller_data(struct nvmf_qpair *qp,
struct nvme_controller_data *cdata)
{
const struct nvmf_association *na = qp->nq_association;
struct utsname utsname;
char *cp;
memset(cdata, 0, sizeof(*cdata));
/*
* 5.2 Figure 37 states model name and serial are reserved,
* but Linux includes them. Don't bother with serial, but
* do set model name.
*/
uname(&utsname);
nvmf_strpad(cdata->mn, utsname.sysname, sizeof(cdata->mn));
nvmf_strpad(cdata->fr, utsname.release, sizeof(cdata->fr));
cp = memchr(cdata->fr, '-', sizeof(cdata->fr));
if (cp != NULL)
memset(cp, ' ', sizeof(cdata->fr) - (cp - (char *)cdata->fr));
cdata->ctrlr_id = htole16(qp->nq_cntlid);
cdata->ver = htole32(NVME_REV(1, 4));
cdata->cntrltype = 2;
cdata->lpa = NVMEF(NVME_CTRLR_DATA_LPA_EXT_DATA, 1);
cdata->elpe = 0;
cdata->maxcmd = htole16(na->na_params.max_admin_qsize);
/* Transport-specific? */
cdata->sgls = htole32(
NVMEF(NVME_CTRLR_DATA_SGLS_TRANSPORT_DATA_BLOCK, 1) |
NVMEF(NVME_CTRLR_DATA_SGLS_ADDRESS_AS_OFFSET, 1) |
NVMEF(NVME_CTRLR_DATA_SGLS_NVM_COMMAND_SET, 1));
strlcpy(cdata->subnqn, NVMF_DISCOVERY_NQN, sizeof(cdata->subnqn));
}
void
nvmf_init_io_controller_data(struct nvmf_qpair *qp, const char *serial,
const char *subnqn, int nn, uint32_t ioccsz,
struct nvme_controller_data *cdata)
{
const struct nvmf_association *na = qp->nq_association;
struct utsname utsname;
uname(&utsname);
_nvmf_init_io_controller_data(qp->nq_cntlid, na->na_params.max_io_qsize,
serial, utsname.sysname, utsname.release, subnqn, nn, ioccsz,
sizeof(struct nvme_completion), cdata);
}
uint8_t
nvmf_get_log_page_id(const struct nvme_command *cmd)
{
assert(cmd->opc == NVME_OPC_GET_LOG_PAGE);
return (le32toh(cmd->cdw10) & 0xff);
}
uint64_t
nvmf_get_log_page_length(const struct nvme_command *cmd)
{
uint32_t numd;
assert(cmd->opc == NVME_OPC_GET_LOG_PAGE);
numd = le32toh(cmd->cdw10) >> 16 | (le32toh(cmd->cdw11) & 0xffff) << 16;
return ((numd + 1) * 4);
}
uint64_t
nvmf_get_log_page_offset(const struct nvme_command *cmd)
{
assert(cmd->opc == NVME_OPC_GET_LOG_PAGE);
return (le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32);
}
int
nvmf_handoff_controller_qpair(struct nvmf_qpair *qp,
struct nvmf_handoff_controller_qpair *h)
{
h->trtype = qp->nq_association->na_trtype;
return (nvmf_kernel_handoff_params(qp, &h->params));
}

911
lib/libnvmf/nvmf_host.c Normal file
View File

@ -0,0 +1,911 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 Chelsio Communications, Inc.
* Written by: John Baldwin <jhb@FreeBSD.org>
*/
#include <sys/sysctl.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <uuid.h>
#include "libnvmf.h"
#include "internal.h"
static void
nvmf_init_sqe(void *sqe, uint8_t opcode)
{
struct nvme_command *cmd = sqe;
memset(cmd, 0, sizeof(*cmd));
cmd->opc = opcode;
}
static void
nvmf_init_fabrics_sqe(void *sqe, uint8_t fctype)
{
struct nvmf_capsule_cmd *cmd = sqe;
nvmf_init_sqe(sqe, NVME_OPC_FABRICS_COMMANDS);
cmd->fctype = fctype;
}
struct nvmf_qpair *
nvmf_connect(struct nvmf_association *na,
const struct nvmf_qpair_params *params, uint16_t qid, u_int queue_size,
const uint8_t hostid[16], uint16_t cntlid, const char *subnqn,
const char *hostnqn, uint32_t kato)
{
struct nvmf_fabric_connect_cmd cmd;
struct nvmf_fabric_connect_data data;
const struct nvmf_fabric_connect_rsp *rsp;
struct nvmf_qpair *qp;
struct nvmf_capsule *cc, *rc;
int error;
uint16_t sqhd, status;
qp = NULL;
cc = NULL;
rc = NULL;
na_clear_error(na);
if (na->na_controller) {
na_error(na, "Cannot connect on a controller");
goto error;
}
if (params->admin != (qid == 0)) {
na_error(na, "Admin queue must use Queue ID 0");
goto error;
}
if (qid == 0) {
if (queue_size < NVME_MIN_ADMIN_ENTRIES ||
queue_size > NVME_MAX_ADMIN_ENTRIES) {
na_error(na, "Invalid queue size %u", queue_size);
goto error;
}
} else {
if (queue_size < NVME_MIN_IO_ENTRIES ||
queue_size > NVME_MAX_IO_ENTRIES) {
na_error(na, "Invalid queue size %u", queue_size);
goto error;
}
/* KATO is only for Admin queues. */
if (kato != 0) {
na_error(na, "Cannot set KATO on I/O queues");
goto error;
}
}
qp = nvmf_allocate_qpair(na, params);
if (qp == NULL)
goto error;
nvmf_init_fabrics_sqe(&cmd, NVMF_FABRIC_COMMAND_CONNECT);
cmd.recfmt = 0;
cmd.qid = htole16(qid);
/* N.B. sqsize is 0's based. */
cmd.sqsize = htole16(queue_size - 1);
if (!na->na_params.sq_flow_control)
cmd.cattr |= NVMF_CONNECT_ATTR_DISABLE_SQ_FC;
cmd.kato = htole32(kato);
cc = nvmf_allocate_command(qp, &cmd);
if (cc == NULL) {
na_error(na, "Failed to allocate command capsule: %s",
strerror(errno));
goto error;
}
memset(&data, 0, sizeof(data));
memcpy(data.hostid, hostid, sizeof(data.hostid));
data.cntlid = htole16(cntlid);
strlcpy(data.subnqn, subnqn, sizeof(data.subnqn));
strlcpy(data.hostnqn, hostnqn, sizeof(data.hostnqn));
error = nvmf_capsule_append_data(cc, &data, sizeof(data), true);
if (error != 0) {
na_error(na, "Failed to append data to CONNECT capsule: %s",
strerror(error));
goto error;
}
error = nvmf_transmit_capsule(cc);
if (error != 0) {
na_error(na, "Failed to transmit CONNECT capsule: %s",
strerror(errno));
goto error;
}
error = nvmf_receive_capsule(qp, &rc);
if (error != 0) {
na_error(na, "Failed to receive CONNECT response: %s",
strerror(error));
goto error;
}
rsp = (const struct nvmf_fabric_connect_rsp *)&rc->nc_cqe;
status = le16toh(rc->nc_cqe.status);
if (status != 0) {
if (NVME_STATUS_GET_SC(status) == NVMF_FABRIC_SC_INVALID_PARAM)
na_error(na,
"CONNECT invalid parameter IATTR: %#x IPO: %#x",
rsp->status_code_specific.invalid.iattr,
rsp->status_code_specific.invalid.ipo);
else
na_error(na, "CONNECT failed, status %#x", status);
goto error;
}
if (rc->nc_cqe.cid != cmd.cid) {
na_error(na, "Mismatched CID in CONNECT response");
goto error;
}
if (!rc->nc_sqhd_valid) {
na_error(na, "CONNECT response without valid SQHD");
goto error;
}
sqhd = le16toh(rsp->sqhd);
if (sqhd == 0xffff) {
if (na->na_params.sq_flow_control) {
na_error(na, "Controller disabled SQ flow control");
goto error;
}
qp->nq_flow_control = false;
} else {
qp->nq_flow_control = true;
qp->nq_sqhd = sqhd;
qp->nq_sqtail = sqhd;
}
if (rsp->status_code_specific.success.authreq) {
na_error(na, "CONNECT response requests authentication\n");
goto error;
}
qp->nq_qsize = queue_size;
qp->nq_cntlid = le16toh(rsp->status_code_specific.success.cntlid);
qp->nq_kato = kato;
/* XXX: Save qid in qp? */
return (qp);
error:
if (rc != NULL)
nvmf_free_capsule(rc);
if (cc != NULL)
nvmf_free_capsule(cc);
if (qp != NULL)
nvmf_free_qpair(qp);
return (NULL);
}
uint16_t
nvmf_cntlid(struct nvmf_qpair *qp)
{
return (qp->nq_cntlid);
}
int
nvmf_host_transmit_command(struct nvmf_capsule *nc)
{
struct nvmf_qpair *qp = nc->nc_qpair;
uint16_t new_sqtail;
int error;
/* Fail if the queue is full. */
new_sqtail = (qp->nq_sqtail + 1) % qp->nq_qsize;
if (new_sqtail == qp->nq_sqhd)
return (EBUSY);
nc->nc_sqe.cid = htole16(qp->nq_cid);
/* 4.2 Skip CID of 0xFFFF. */
qp->nq_cid++;
if (qp->nq_cid == 0xFFFF)
qp->nq_cid = 0;
error = nvmf_transmit_capsule(nc);
if (error != 0)
return (error);
qp->nq_sqtail = new_sqtail;
return (0);
}
/* Receive a single capsule and update SQ FC accounting. */
static int
nvmf_host_receive_capsule(struct nvmf_qpair *qp, struct nvmf_capsule **ncp)
{
struct nvmf_capsule *nc;
int error;
/* If the SQ is empty, there is no response to wait for. */
if (qp->nq_sqhd == qp->nq_sqtail)
return (EWOULDBLOCK);
error = nvmf_receive_capsule(qp, &nc);
if (error != 0)
return (error);
if (qp->nq_flow_control) {
if (nc->nc_sqhd_valid)
qp->nq_sqhd = le16toh(nc->nc_cqe.sqhd);
} else {
/*
* If SQ FC is disabled, just advance the head for
* each response capsule received so that we track the
* number of outstanding commands.
*/
qp->nq_sqhd = (qp->nq_sqhd + 1) % qp->nq_qsize;
}
*ncp = nc;
return (0);
}
int
nvmf_host_receive_response(struct nvmf_qpair *qp, struct nvmf_capsule **ncp)
{
struct nvmf_capsule *nc;
/* Return the oldest previously received response. */
if (!TAILQ_EMPTY(&qp->nq_rx_capsules)) {
nc = TAILQ_FIRST(&qp->nq_rx_capsules);
TAILQ_REMOVE(&qp->nq_rx_capsules, nc, nc_link);
*ncp = nc;
return (0);
}
return (nvmf_host_receive_capsule(qp, ncp));
}
int
nvmf_host_wait_for_response(struct nvmf_capsule *cc,
struct nvmf_capsule **rcp)
{
struct nvmf_qpair *qp = cc->nc_qpair;
struct nvmf_capsule *rc;
int error;
/* Check if a response was already received. */
TAILQ_FOREACH(rc, &qp->nq_rx_capsules, nc_link) {
if (rc->nc_cqe.cid == cc->nc_sqe.cid) {
TAILQ_REMOVE(&qp->nq_rx_capsules, rc, nc_link);
*rcp = rc;
return (0);
}
}
/* Wait for a response. */
for (;;) {
error = nvmf_host_receive_capsule(qp, &rc);
if (error != 0)
return (error);
if (rc->nc_cqe.cid != cc->nc_sqe.cid) {
TAILQ_INSERT_TAIL(&qp->nq_rx_capsules, rc, nc_link);
continue;
}
*rcp = rc;
return (0);
}
}
struct nvmf_capsule *
nvmf_keepalive(struct nvmf_qpair *qp)
{
struct nvme_command cmd;
if (!qp->nq_admin) {
errno = EINVAL;
return (NULL);
}
nvmf_init_sqe(&cmd, NVME_OPC_KEEP_ALIVE);
return (nvmf_allocate_command(qp, &cmd));
}
static struct nvmf_capsule *
nvmf_get_property(struct nvmf_qpair *qp, uint32_t offset, uint8_t size)
{
struct nvmf_fabric_prop_get_cmd cmd;
nvmf_init_fabrics_sqe(&cmd, NVMF_FABRIC_COMMAND_PROPERTY_GET);
switch (size) {
case 4:
cmd.attrib.size = NVMF_PROP_SIZE_4;
break;
case 8:
cmd.attrib.size = NVMF_PROP_SIZE_8;
break;
default:
errno = EINVAL;
return (NULL);
}
cmd.ofst = htole32(offset);
return (nvmf_allocate_command(qp, &cmd));
}
int
nvmf_read_property(struct nvmf_qpair *qp, uint32_t offset, uint8_t size,
uint64_t *value)
{
struct nvmf_capsule *cc, *rc;
const struct nvmf_fabric_prop_get_rsp *rsp;
uint16_t status;
int error;
if (!qp->nq_admin)
return (EINVAL);
cc = nvmf_get_property(qp, offset, size);
if (cc == NULL)
return (errno);
error = nvmf_host_transmit_command(cc);
if (error != 0) {
nvmf_free_capsule(cc);
return (error);
}
error = nvmf_host_wait_for_response(cc, &rc);
nvmf_free_capsule(cc);
if (error != 0)
return (error);
rsp = (const struct nvmf_fabric_prop_get_rsp *)&rc->nc_cqe;
status = le16toh(rc->nc_cqe.status);
if (status != 0) {
printf("NVMF: PROPERTY_GET failed, status %#x\n", status);
nvmf_free_capsule(rc);
return (EIO);
}
if (size == 8)
*value = le64toh(rsp->value.u64);
else
*value = le32toh(rsp->value.u32.low);
nvmf_free_capsule(rc);
return (0);
}
static struct nvmf_capsule *
nvmf_set_property(struct nvmf_qpair *qp, uint32_t offset, uint8_t size,
uint64_t value)
{
struct nvmf_fabric_prop_set_cmd cmd;
nvmf_init_fabrics_sqe(&cmd, NVMF_FABRIC_COMMAND_PROPERTY_SET);
switch (size) {
case 4:
cmd.attrib.size = NVMF_PROP_SIZE_4;
cmd.value.u32.low = htole32(value);
break;
case 8:
cmd.attrib.size = NVMF_PROP_SIZE_8;
cmd.value.u64 = htole64(value);
break;
default:
errno = EINVAL;
return (NULL);
}
cmd.ofst = htole32(offset);
return (nvmf_allocate_command(qp, &cmd));
}
int
nvmf_write_property(struct nvmf_qpair *qp, uint32_t offset, uint8_t size,
uint64_t value)
{
struct nvmf_capsule *cc, *rc;
uint16_t status;
int error;
if (!qp->nq_admin)
return (EINVAL);
cc = nvmf_set_property(qp, offset, size, value);
if (cc == NULL)
return (errno);
error = nvmf_host_transmit_command(cc);
if (error != 0) {
nvmf_free_capsule(cc);
return (error);
}
error = nvmf_host_wait_for_response(cc, &rc);
nvmf_free_capsule(cc);
if (error != 0)
return (error);
status = le16toh(rc->nc_cqe.status);
if (status != 0) {
printf("NVMF: PROPERTY_SET failed, status %#x\n", status);
nvmf_free_capsule(rc);
return (EIO);
}
nvmf_free_capsule(rc);
return (0);
}
int
nvmf_hostid_from_hostuuid(uint8_t hostid[16])
{
char hostuuid_str[64];
uuid_t hostuuid;
size_t len;
uint32_t status;
len = sizeof(hostuuid_str);
if (sysctlbyname("kern.hostuuid", hostuuid_str, &len, NULL, 0) != 0)
return (errno);
uuid_from_string(hostuuid_str, &hostuuid, &status);
switch (status) {
case uuid_s_ok:
break;
case uuid_s_no_memory:
return (ENOMEM);
default:
return (EINVAL);
}
uuid_enc_le(hostid, &hostuuid);
return (0);
}
int
nvmf_nqn_from_hostuuid(char nqn[NVMF_NQN_MAX_LEN])
{
char hostuuid_str[64];
size_t len;
len = sizeof(hostuuid_str);
if (sysctlbyname("kern.hostuuid", hostuuid_str, &len, NULL, 0) != 0)
return (errno);
strlcpy(nqn, NVMF_NQN_UUID_PRE, NVMF_NQN_MAX_LEN);
strlcat(nqn, hostuuid_str, NVMF_NQN_MAX_LEN);
return (0);
}
int
nvmf_host_identify_controller(struct nvmf_qpair *qp,
struct nvme_controller_data *cdata)
{
struct nvme_command cmd;
struct nvmf_capsule *cc, *rc;
int error;
uint16_t status;
if (!qp->nq_admin)
return (EINVAL);
nvmf_init_sqe(&cmd, NVME_OPC_IDENTIFY);
/* 5.15.1 Use CNS of 0x01 for controller data. */
cmd.cdw10 = htole32(1);
cc = nvmf_allocate_command(qp, &cmd);
if (cc == NULL)
return (errno);
error = nvmf_capsule_append_data(cc, cdata, sizeof(*cdata), false);
if (error != 0) {
nvmf_free_capsule(cc);
return (error);
}
error = nvmf_host_transmit_command(cc);
if (error != 0) {
nvmf_free_capsule(cc);
return (error);
}
error = nvmf_host_wait_for_response(cc, &rc);
nvmf_free_capsule(cc);
if (error != 0)
return (error);
status = le16toh(rc->nc_cqe.status);
if (status != 0) {
printf("NVMF: IDENTIFY failed, status %#x\n", status);
nvmf_free_capsule(rc);
return (EIO);
}
nvmf_free_capsule(rc);
return (0);
}
int
nvmf_host_identify_namespace(struct nvmf_qpair *qp, uint32_t nsid,
struct nvme_namespace_data *nsdata)
{
struct nvme_command cmd;
struct nvmf_capsule *cc, *rc;
int error;
uint16_t status;
if (!qp->nq_admin)
return (EINVAL);
nvmf_init_sqe(&cmd, NVME_OPC_IDENTIFY);
/* 5.15.1 Use CNS of 0x00 for namespace data. */
cmd.cdw10 = htole32(0);
cmd.nsid = htole32(nsid);
cc = nvmf_allocate_command(qp, &cmd);
if (cc == NULL)
return (errno);
error = nvmf_capsule_append_data(cc, nsdata, sizeof(*nsdata), false);
if (error != 0) {
nvmf_free_capsule(cc);
return (error);
}
error = nvmf_host_transmit_command(cc);
if (error != 0) {
nvmf_free_capsule(cc);
return (error);
}
error = nvmf_host_wait_for_response(cc, &rc);
nvmf_free_capsule(cc);
if (error != 0)
return (error);
status = le16toh(rc->nc_cqe.status);
if (status != 0) {
printf("NVMF: IDENTIFY failed, status %#x\n", status);
nvmf_free_capsule(rc);
return (EIO);
}
nvmf_free_capsule(rc);
return (0);
}
static int
nvmf_get_discovery_log_page(struct nvmf_qpair *qp, uint64_t offset, void *buf,
size_t len)
{
struct nvme_command cmd;
struct nvmf_capsule *cc, *rc;
size_t numd;
int error;
uint16_t status;
if (len % 4 != 0 || len == 0 || offset % 4 != 0)
return (EINVAL);
numd = (len / 4) - 1;
nvmf_init_sqe(&cmd, NVME_OPC_GET_LOG_PAGE);
cmd.cdw10 = htole32(numd << 16 | NVME_LOG_DISCOVERY);
cmd.cdw11 = htole32(numd >> 16);
cmd.cdw12 = htole32(offset);
cmd.cdw13 = htole32(offset >> 32);
cc = nvmf_allocate_command(qp, &cmd);
if (cc == NULL)
return (errno);
error = nvmf_capsule_append_data(cc, buf, len, false);
if (error != 0) {
nvmf_free_capsule(cc);
return (error);
}
error = nvmf_host_transmit_command(cc);
if (error != 0) {
nvmf_free_capsule(cc);
return (error);
}
error = nvmf_host_wait_for_response(cc, &rc);
nvmf_free_capsule(cc);
if (error != 0)
return (error);
status = le16toh(rc->nc_cqe.status);
if (NVMEV(NVME_STATUS_SC, status) ==
NVMF_FABRIC_SC_LOG_RESTART_DISCOVERY) {
nvmf_free_capsule(rc);
return (EAGAIN);
}
if (status != 0) {
printf("NVMF: GET_LOG_PAGE failed, status %#x\n", status);
nvmf_free_capsule(rc);
return (EIO);
}
nvmf_free_capsule(rc);
return (0);
}
int
nvmf_host_fetch_discovery_log_page(struct nvmf_qpair *qp,
struct nvme_discovery_log **logp)
{
struct nvme_discovery_log hdr, *log;
size_t payload_len;
int error;
if (!qp->nq_admin)
return (EINVAL);
log = NULL;
for (;;) {
error = nvmf_get_discovery_log_page(qp, 0, &hdr, sizeof(hdr));
if (error != 0)
return (error);
nvme_discovery_log_swapbytes(&hdr);
if (hdr.recfmt != 0) {
printf("NVMF: Unsupported discovery log format: %d\n",
hdr.recfmt);
return (EINVAL);
}
if (hdr.numrec > 1024) {
printf("NVMF: Too many discovery log entries: %ju\n",
(uintmax_t)hdr.numrec);
return (EFBIG);
}
payload_len = sizeof(log->entries[0]) * hdr.numrec;
log = reallocf(log, sizeof(*log) + payload_len);
if (log == NULL)
return (ENOMEM);
*log = hdr;
if (hdr.numrec == 0)
break;
error = nvmf_get_discovery_log_page(qp, sizeof(hdr),
log->entries, payload_len);
if (error == EAGAIN)
continue;
if (error != 0) {
free(log);
return (error);
}
/* Re-read the header and check the generation count. */
error = nvmf_get_discovery_log_page(qp, 0, &hdr, sizeof(hdr));
if (error != 0) {
free(log);
return (error);
}
nvme_discovery_log_swapbytes(&hdr);
if (log->genctr != hdr.genctr)
continue;
for (u_int i = 0; i < log->numrec; i++)
nvme_discovery_log_entry_swapbytes(&log->entries[i]);
break;
}
*logp = log;
return (0);
}
int
nvmf_host_request_queues(struct nvmf_qpair *qp, u_int requested, u_int *actual)
{
struct nvme_command cmd;
struct nvmf_capsule *cc, *rc;
int error;
uint16_t status;
if (!qp->nq_admin || requested < 1 || requested > 65535)
return (EINVAL);
/* The number of queues is 0's based. */
requested--;
nvmf_init_sqe(&cmd, NVME_OPC_SET_FEATURES);
cmd.cdw10 = htole32(NVME_FEAT_NUMBER_OF_QUEUES);
/* Same number of completion and submission queues. */
cmd.cdw11 = htole32((requested << 16) | requested);
cc = nvmf_allocate_command(qp, &cmd);
if (cc == NULL)
return (errno);
error = nvmf_host_transmit_command(cc);
if (error != 0) {
nvmf_free_capsule(cc);
return (error);
}
error = nvmf_host_wait_for_response(cc, &rc);
nvmf_free_capsule(cc);
if (error != 0)
return (error);
status = le16toh(rc->nc_cqe.status);
if (status != 0) {
printf("NVMF: SET_FEATURES failed, status %#x\n", status);
nvmf_free_capsule(rc);
return (EIO);
}
*actual = (le32toh(rc->nc_cqe.cdw0) & 0xffff) + 1;
nvmf_free_capsule(rc);
return (0);
}
static bool
is_queue_pair_idle(struct nvmf_qpair *qp)
{
if (qp->nq_sqhd != qp->nq_sqtail)
return (false);
if (!TAILQ_EMPTY(&qp->nq_rx_capsules))
return (false);
return (true);
}
static int
prepare_queues_for_handoff(struct nvmf_handoff_host *hh,
struct nvmf_qpair *admin_qp, u_int num_queues,
struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata)
{
struct nvmf_handoff_qpair_params *io;
u_int i;
int error;
memset(hh, 0, sizeof(*hh));
/* All queue pairs must be idle. */
if (!is_queue_pair_idle(admin_qp))
return (EBUSY);
for (i = 0; i < num_queues; i++) {
if (!is_queue_pair_idle(io_queues[i]))
return (EBUSY);
}
/* First, the admin queue. */
hh->trtype = admin_qp->nq_association->na_trtype;
hh->kato = admin_qp->nq_kato;
error = nvmf_kernel_handoff_params(admin_qp, &hh->admin);
if (error)
return (error);
/* Next, the I/O queues. */
hh->num_io_queues = num_queues;
io = calloc(num_queues, sizeof(*io));
for (i = 0; i < num_queues; i++) {
error = nvmf_kernel_handoff_params(io_queues[i], &io[i]);
if (error) {
free(io);
return (error);
}
}
hh->io = io;
hh->cdata = cdata;
return (0);
}
int
nvmf_handoff_host(struct nvmf_qpair *admin_qp, u_int num_queues,
struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata)
{
struct nvmf_handoff_host hh;
u_int i;
int error, fd;
fd = open("/dev/nvmf", O_RDWR);
if (fd == -1) {
error = errno;
goto out;
}
error = prepare_queues_for_handoff(&hh, admin_qp, num_queues, io_queues,
cdata);
if (error != 0)
goto out;
if (ioctl(fd, NVMF_HANDOFF_HOST, &hh) == -1)
error = errno;
free(hh.io);
out:
if (fd >= 0)
close(fd);
for (i = 0; i < num_queues; i++)
(void)nvmf_free_qpair(io_queues[i]);
(void)nvmf_free_qpair(admin_qp);
return (error);
}
int
nvmf_disconnect_host(const char *host)
{
int error, fd;
error = 0;
fd = open("/dev/nvmf", O_RDWR);
if (fd == -1) {
error = errno;
goto out;
}
if (ioctl(fd, NVMF_DISCONNECT_HOST, &host) == -1)
error = errno;
out:
if (fd >= 0)
close(fd);
return (error);
}
int
nvmf_disconnect_all(void)
{
int error, fd;
error = 0;
fd = open("/dev/nvmf", O_RDWR);
if (fd == -1) {
error = errno;
goto out;
}
if (ioctl(fd, NVMF_DISCONNECT_ALL) == -1)
error = errno;
out:
if (fd >= 0)
close(fd);
return (error);
}
int
nvmf_reconnect_params(int fd, struct nvmf_reconnect_params *rparams)
{
if (ioctl(fd, NVMF_RECONNECT_PARAMS, rparams) == -1)
return (errno);
return (0);
}
int
nvmf_reconnect_host(int fd, struct nvmf_qpair *admin_qp, u_int num_queues,
struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata)
{
struct nvmf_handoff_host hh;
u_int i;
int error;
error = prepare_queues_for_handoff(&hh, admin_qp, num_queues, io_queues,
cdata);
if (error != 0)
goto out;
if (ioctl(fd, NVMF_RECONNECT_HOST, &hh) == -1)
error = errno;
free(hh.io);
out:
for (i = 0; i < num_queues; i++)
(void)nvmf_free_qpair(io_queues[i]);
(void)nvmf_free_qpair(admin_qp);
return (error);
}

1474
lib/libnvmf/nvmf_tcp.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,269 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022-2024 Chelsio Communications, Inc.
* Written by: John Baldwin <jhb@FreeBSD.org>
*/
#include <sys/refcount.h>
#include <assert.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libnvmf.h"
#include "internal.h"
struct nvmf_association *
nvmf_allocate_association(enum nvmf_trtype trtype, bool controller,
const struct nvmf_association_params *params)
{
struct nvmf_transport_ops *ops;
struct nvmf_association *na;
switch (trtype) {
case NVMF_TRTYPE_TCP:
ops = &tcp_ops;
break;
default:
errno = EINVAL;
return (NULL);
}
na = ops->allocate_association(controller, params);
if (na == NULL)
return (NULL);
na->na_ops = ops;
na->na_trtype = trtype;
na->na_controller = controller;
na->na_params = *params;
na->na_last_error = NULL;
refcount_init(&na->na_refs, 1);
return (na);
}
void
nvmf_update_assocation(struct nvmf_association *na,
const struct nvme_controller_data *cdata)
{
na->na_ops->update_association(na, cdata);
}
void
nvmf_free_association(struct nvmf_association *na)
{
if (refcount_release(&na->na_refs)) {
free(na->na_last_error);
na->na_ops->free_association(na);
}
}
const char *
nvmf_association_error(const struct nvmf_association *na)
{
return (na->na_last_error);
}
void
na_clear_error(struct nvmf_association *na)
{
free(na->na_last_error);
na->na_last_error = NULL;
}
void
na_error(struct nvmf_association *na, const char *fmt, ...)
{
va_list ap;
char *str;
if (na->na_last_error != NULL)
return;
va_start(ap, fmt);
vasprintf(&str, fmt, ap);
va_end(ap);
na->na_last_error = str;
}
struct nvmf_qpair *
nvmf_allocate_qpair(struct nvmf_association *na,
const struct nvmf_qpair_params *params)
{
struct nvmf_qpair *qp;
na_clear_error(na);
qp = na->na_ops->allocate_qpair(na, params);
if (qp == NULL)
return (NULL);
refcount_acquire(&na->na_refs);
qp->nq_association = na;
qp->nq_admin = params->admin;
TAILQ_INIT(&qp->nq_rx_capsules);
return (qp);
}
void
nvmf_free_qpair(struct nvmf_qpair *qp)
{
struct nvmf_association *na;
struct nvmf_capsule *nc, *tc;
TAILQ_FOREACH_SAFE(nc, &qp->nq_rx_capsules, nc_link, tc) {
TAILQ_REMOVE(&qp->nq_rx_capsules, nc, nc_link);
nvmf_free_capsule(nc);
}
na = qp->nq_association;
na->na_ops->free_qpair(qp);
nvmf_free_association(na);
}
struct nvmf_capsule *
nvmf_allocate_command(struct nvmf_qpair *qp, const void *sqe)
{
struct nvmf_capsule *nc;
nc = qp->nq_association->na_ops->allocate_capsule(qp);
if (nc == NULL)
return (NULL);
nc->nc_qpair = qp;
nc->nc_qe_len = sizeof(struct nvme_command);
memcpy(&nc->nc_sqe, sqe, nc->nc_qe_len);
/* 4.2 of NVMe base spec: Fabrics always uses SGL. */
nc->nc_sqe.fuse &= ~NVMEM(NVME_CMD_PSDT);
nc->nc_sqe.fuse |= NVMEF(NVME_CMD_PSDT, NVME_PSDT_SGL);
return (nc);
}
struct nvmf_capsule *
nvmf_allocate_response(struct nvmf_qpair *qp, const void *cqe)
{
struct nvmf_capsule *nc;
nc = qp->nq_association->na_ops->allocate_capsule(qp);
if (nc == NULL)
return (NULL);
nc->nc_qpair = qp;
nc->nc_qe_len = sizeof(struct nvme_completion);
memcpy(&nc->nc_cqe, cqe, nc->nc_qe_len);
return (nc);
}
int
nvmf_capsule_append_data(struct nvmf_capsule *nc, void *buf, size_t len,
bool send)
{
if (nc->nc_qe_len == sizeof(struct nvme_completion))
return (EINVAL);
if (nc->nc_data_len != 0)
return (EBUSY);
nc->nc_data = buf;
nc->nc_data_len = len;
nc->nc_send_data = send;
return (0);
}
void
nvmf_free_capsule(struct nvmf_capsule *nc)
{
nc->nc_qpair->nq_association->na_ops->free_capsule(nc);
}
int
nvmf_transmit_capsule(struct nvmf_capsule *nc)
{
return (nc->nc_qpair->nq_association->na_ops->transmit_capsule(nc));
}
int
nvmf_receive_capsule(struct nvmf_qpair *qp, struct nvmf_capsule **ncp)
{
return (qp->nq_association->na_ops->receive_capsule(qp, ncp));
}
const void *
nvmf_capsule_sqe(const struct nvmf_capsule *nc)
{
assert(nc->nc_qe_len == sizeof(struct nvme_command));
return (&nc->nc_sqe);
}
const void *
nvmf_capsule_cqe(const struct nvmf_capsule *nc)
{
assert(nc->nc_qe_len == sizeof(struct nvme_completion));
return (&nc->nc_cqe);
}
uint8_t
nvmf_validate_command_capsule(const struct nvmf_capsule *nc)
{
assert(nc->nc_qe_len == sizeof(struct nvme_command));
if (NVMEV(NVME_CMD_PSDT, nc->nc_sqe.fuse) != NVME_PSDT_SGL)
return (NVME_SC_INVALID_FIELD);
return (nc->nc_qpair->nq_association->na_ops->validate_command_capsule(nc));
}
size_t
nvmf_capsule_data_len(const struct nvmf_capsule *nc)
{
return (nc->nc_qpair->nq_association->na_ops->capsule_data_len(nc));
}
int
nvmf_receive_controller_data(const struct nvmf_capsule *nc,
uint32_t data_offset, void *buf, size_t len)
{
return (nc->nc_qpair->nq_association->na_ops->receive_controller_data(nc,
data_offset, buf, len));
}
int
nvmf_send_controller_data(const struct nvmf_capsule *nc, const void *buf,
size_t len)
{
return (nc->nc_qpair->nq_association->na_ops->send_controller_data(nc,
buf, len));
}
int
nvmf_kernel_handoff_params(struct nvmf_qpair *qp,
struct nvmf_handoff_qpair_params *qparams)
{
memset(qparams, 0, sizeof(*qparams));
qparams->admin = qp->nq_admin;
qparams->sq_flow_control = qp->nq_flow_control;
qparams->qsize = qp->nq_qsize;
qparams->sqhd = qp->nq_sqhd;
qparams->sqtail = qp->nq_sqtail;
return (qp->nq_association->na_ops->kernel_handoff_params(qp, qparams));
}
const char *
nvmf_transport_type(uint8_t trtype)
{
static _Thread_local char buf[8];
switch (trtype) {
case NVMF_TRTYPE_RDMA:
return ("RDMA");
case NVMF_TRTYPE_FC:
return ("Fibre Channel");
case NVMF_TRTYPE_TCP:
return ("TCP");
case NVMF_TRTYPE_INTRA_HOST:
return ("Intra-host");
default:
snprintf(buf, sizeof(buf), "0x%02x\n", trtype);
return (buf);
}
}

View File

@ -56,6 +56,7 @@ _INTERNALLIBS= \
netbsd \
ntp \
ntpevent \
nvmf \
openbsd \
opts \
parse \
@ -599,6 +600,9 @@ LIBNV?= ${LIBNVDIR}/libnv${PIE_SUFFIX}.a
LIBISCSIUTILDIR= ${_LIB_OBJTOP}/lib/libiscsiutil
LIBISCSIUTIL?= ${LIBISCSIUTILDIR}/libiscsiutil${PIE_SUFFIX}.a
LIBNVMFDIR= ${_LIB_OBJTOP}/lib/libnvmf
LIBNVMF?= ${LIBNVMFDIR}/libnvmf${PIE_SUFFIX}.a
LIBTELNETDIR= ${_LIB_OBJTOP}/lib/libtelnet
LIBTELNET?= ${LIBTELNETDIR}/libtelnet${PIE_SUFFIX}.a