freebsd-src/lib/libnvmf/nvmf_transport.c
John Baldwin 2da066ef6d libnvmf: Add internal library to support NVMe over Fabrics
libnvmf provides APIs for transmitting and receiving Command and
Response capsules along with data associated with NVMe commands.
Capsules are represented by 'struct nvmf_capsule' objects.

Capsules are transmitted and received on queue pairs represented by
'struct nvmf_qpair' objects.

Queue pairs belong to an association represented by a 'struct
nvmf_association' object.

libnvmf provides additional helper APIs to assist with constructing
command capsules for a host, response capsules for a controller,
connecting queue pairs to a remote controller and optionally
offloading connected queues to an in-kernel host, accepting queue pair
connections from remote hosts and optionally offloading connected
queues to an in-kernel controller, constructing controller data
structures for local controllers, etc.

libnvmf also includes an internal transport abstraction as well as an
implementation of a userspace TCP transport.

libnvmf is primarily intended for ease of use and low-traffic use cases
such as establishing connections that are handed off to the kernel.
As such, it uses a simple API built on blocking I/O.

For a host, a consumer first populates an 'struct
nvmf_association_params' with a set of parameters shared by all queue
pairs for a single association such as whether or not to use SQ flow
control and header and data digests and creates a 'struct
nvmf_association' object.  The consumer is responsible for
establishing a TCP socket for each queue pair.  This socket is
included in the 'struct nvmf_qpair_params' passed to 'nvmf_connect' to
complete transport-specific negotiation, send a Fabrics Connect
command, and wait for the Connect reply. Upon success, a new 'struct
nvmf_qpair' object is returned.  This queue pair can then be used to
send and receive capsules.  A command capsule is allocated, populated
with an SQE and optional data buffer, and transmitted via
nvmf_host_transmit_command.  The consumer can then wait for a reply
via nvmf_host_wait_for_response.  The library also provides some
wrapper functions such as nvmf_read_property and nvmf_write_property
which send a command and wait for a response synchronously.

For a controller, a consumer uses a single association for a set of
incoming connections.  A consumer can choose to use multiple
associations (e.g. a separate association for connections to a
discovery controller listening on a different port than I/O
controllers).  The consumer is responsible for accepting TCP sockets
directly, but once a socket has been accepted it is passed to
nvmf_accept to perform transport-specific negotiation and wait for the
Connect command.  Similar to nvmf_connect, nvmf_accept returns a newly
construct nvmf_qpair.  However, in contrast to nvmf_connect,
nvmf_accept does not complete the Fabrics negotiation.  The consumer
must explicitly send a response capsule before waiting for additional
command capsules to arrive.  In particular, in the kernel offload
case, the Connect command and data are provided to the kernel
controller and the Connect response capsule is sent by the kernel once
it is ready to handle the new queue pair.

For userspace controller command handling, the consumer uses
nvmf_controller_receive_capsule to wait for a command capsule.
nvmf_receive_controller_data is used to retrieve any data from a
command (e.g. the data for a WRITE command).  It can be called
multiple times to split the data transfer into smaller sizes.
nvmf_send_controller_data is used to send data to a remote host in
response to a command.  It also sends a response capsule indicating
success, or an error if an internal error occurs.  nvmf_send_response
is used to send a response without associated data.  There are also
several convenience wrappers such as nvmf_send_success and
nvmf_send_generic_error.

Reviewed by:	imp
Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D44710
2024-05-02 16:28:16 -07:00

270 lines
5.7 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022-2024 Chelsio Communications, Inc.
* Written by: John Baldwin <jhb@FreeBSD.org>
*/
#include <sys/refcount.h>
#include <assert.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libnvmf.h"
#include "internal.h"
struct nvmf_association *
nvmf_allocate_association(enum nvmf_trtype trtype, bool controller,
const struct nvmf_association_params *params)
{
struct nvmf_transport_ops *ops;
struct nvmf_association *na;
switch (trtype) {
case NVMF_TRTYPE_TCP:
ops = &tcp_ops;
break;
default:
errno = EINVAL;
return (NULL);
}
na = ops->allocate_association(controller, params);
if (na == NULL)
return (NULL);
na->na_ops = ops;
na->na_trtype = trtype;
na->na_controller = controller;
na->na_params = *params;
na->na_last_error = NULL;
refcount_init(&na->na_refs, 1);
return (na);
}
void
nvmf_update_assocation(struct nvmf_association *na,
const struct nvme_controller_data *cdata)
{
na->na_ops->update_association(na, cdata);
}
void
nvmf_free_association(struct nvmf_association *na)
{
if (refcount_release(&na->na_refs)) {
free(na->na_last_error);
na->na_ops->free_association(na);
}
}
const char *
nvmf_association_error(const struct nvmf_association *na)
{
return (na->na_last_error);
}
void
na_clear_error(struct nvmf_association *na)
{
free(na->na_last_error);
na->na_last_error = NULL;
}
void
na_error(struct nvmf_association *na, const char *fmt, ...)
{
va_list ap;
char *str;
if (na->na_last_error != NULL)
return;
va_start(ap, fmt);
vasprintf(&str, fmt, ap);
va_end(ap);
na->na_last_error = str;
}
struct nvmf_qpair *
nvmf_allocate_qpair(struct nvmf_association *na,
const struct nvmf_qpair_params *params)
{
struct nvmf_qpair *qp;
na_clear_error(na);
qp = na->na_ops->allocate_qpair(na, params);
if (qp == NULL)
return (NULL);
refcount_acquire(&na->na_refs);
qp->nq_association = na;
qp->nq_admin = params->admin;
TAILQ_INIT(&qp->nq_rx_capsules);
return (qp);
}
void
nvmf_free_qpair(struct nvmf_qpair *qp)
{
struct nvmf_association *na;
struct nvmf_capsule *nc, *tc;
TAILQ_FOREACH_SAFE(nc, &qp->nq_rx_capsules, nc_link, tc) {
TAILQ_REMOVE(&qp->nq_rx_capsules, nc, nc_link);
nvmf_free_capsule(nc);
}
na = qp->nq_association;
na->na_ops->free_qpair(qp);
nvmf_free_association(na);
}
struct nvmf_capsule *
nvmf_allocate_command(struct nvmf_qpair *qp, const void *sqe)
{
struct nvmf_capsule *nc;
nc = qp->nq_association->na_ops->allocate_capsule(qp);
if (nc == NULL)
return (NULL);
nc->nc_qpair = qp;
nc->nc_qe_len = sizeof(struct nvme_command);
memcpy(&nc->nc_sqe, sqe, nc->nc_qe_len);
/* 4.2 of NVMe base spec: Fabrics always uses SGL. */
nc->nc_sqe.fuse &= ~NVMEM(NVME_CMD_PSDT);
nc->nc_sqe.fuse |= NVMEF(NVME_CMD_PSDT, NVME_PSDT_SGL);
return (nc);
}
struct nvmf_capsule *
nvmf_allocate_response(struct nvmf_qpair *qp, const void *cqe)
{
struct nvmf_capsule *nc;
nc = qp->nq_association->na_ops->allocate_capsule(qp);
if (nc == NULL)
return (NULL);
nc->nc_qpair = qp;
nc->nc_qe_len = sizeof(struct nvme_completion);
memcpy(&nc->nc_cqe, cqe, nc->nc_qe_len);
return (nc);
}
int
nvmf_capsule_append_data(struct nvmf_capsule *nc, void *buf, size_t len,
bool send)
{
if (nc->nc_qe_len == sizeof(struct nvme_completion))
return (EINVAL);
if (nc->nc_data_len != 0)
return (EBUSY);
nc->nc_data = buf;
nc->nc_data_len = len;
nc->nc_send_data = send;
return (0);
}
void
nvmf_free_capsule(struct nvmf_capsule *nc)
{
nc->nc_qpair->nq_association->na_ops->free_capsule(nc);
}
int
nvmf_transmit_capsule(struct nvmf_capsule *nc)
{
return (nc->nc_qpair->nq_association->na_ops->transmit_capsule(nc));
}
int
nvmf_receive_capsule(struct nvmf_qpair *qp, struct nvmf_capsule **ncp)
{
return (qp->nq_association->na_ops->receive_capsule(qp, ncp));
}
const void *
nvmf_capsule_sqe(const struct nvmf_capsule *nc)
{
assert(nc->nc_qe_len == sizeof(struct nvme_command));
return (&nc->nc_sqe);
}
const void *
nvmf_capsule_cqe(const struct nvmf_capsule *nc)
{
assert(nc->nc_qe_len == sizeof(struct nvme_completion));
return (&nc->nc_cqe);
}
uint8_t
nvmf_validate_command_capsule(const struct nvmf_capsule *nc)
{
assert(nc->nc_qe_len == sizeof(struct nvme_command));
if (NVMEV(NVME_CMD_PSDT, nc->nc_sqe.fuse) != NVME_PSDT_SGL)
return (NVME_SC_INVALID_FIELD);
return (nc->nc_qpair->nq_association->na_ops->validate_command_capsule(nc));
}
size_t
nvmf_capsule_data_len(const struct nvmf_capsule *nc)
{
return (nc->nc_qpair->nq_association->na_ops->capsule_data_len(nc));
}
int
nvmf_receive_controller_data(const struct nvmf_capsule *nc,
uint32_t data_offset, void *buf, size_t len)
{
return (nc->nc_qpair->nq_association->na_ops->receive_controller_data(nc,
data_offset, buf, len));
}
int
nvmf_send_controller_data(const struct nvmf_capsule *nc, const void *buf,
size_t len)
{
return (nc->nc_qpair->nq_association->na_ops->send_controller_data(nc,
buf, len));
}
int
nvmf_kernel_handoff_params(struct nvmf_qpair *qp,
struct nvmf_handoff_qpair_params *qparams)
{
memset(qparams, 0, sizeof(*qparams));
qparams->admin = qp->nq_admin;
qparams->sq_flow_control = qp->nq_flow_control;
qparams->qsize = qp->nq_qsize;
qparams->sqhd = qp->nq_sqhd;
qparams->sqtail = qp->nq_sqtail;
return (qp->nq_association->na_ops->kernel_handoff_params(qp, qparams));
}
const char *
nvmf_transport_type(uint8_t trtype)
{
static _Thread_local char buf[8];
switch (trtype) {
case NVMF_TRTYPE_RDMA:
return ("RDMA");
case NVMF_TRTYPE_FC:
return ("Fibre Channel");
case NVMF_TRTYPE_TCP:
return ("TCP");
case NVMF_TRTYPE_INTRA_HOST:
return ("Intra-host");
default:
snprintf(buf, sizeof(buf), "0x%02x\n", trtype);
return (buf);
}
}