mirror of
https://github.com/freebsd/freebsd-src
synced 2024-10-14 20:37:06 +00:00
cxgbe: Support TCP_USE_DDP on offloaded TOE connections
When this socket option is enabled, relatively large contiguous buffers are allocated and used to receive data from the remote connection. When data is received a wrapper M_EXT mbuf is queued to the socket's receive buffer. This reduces the length of the linked list of received mbufs and allows consumers to consume receive data in larger chunks. To minimize reprogramming the page pods in the adapter, receive buffers for a given connection are recycled. When a buffer has been fully consumed by the receiver and freed, the buffer is placed on a per-connection free buffers list. The size of the receive buffers defaults to 256k and can be set via the hw.cxgbe.toe.ddp_rcvbuf_len sysctl. The hw.cxgbe.toe.ddp_rcvbuf_cache sysctl (defaults to 4) determines the maximum number of free buffers cached per connection. Note that this limit does not apply to "in-flight" receive buffers that are associated with mbufs in the socket's receive buffer. Co-authored-by: Navdeep Parhar <np@FreeBSD.org> Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D44001
This commit is contained in:
parent
3d0a736796
commit
eba13bbc37
|
@ -690,6 +690,10 @@ struct sge_ofld_rxq {
|
|||
uint64_t rx_aio_ddp_octets;
|
||||
u_long rx_toe_tls_records;
|
||||
u_long rx_toe_tls_octets;
|
||||
u_long rx_toe_ddp_octets;
|
||||
counter_u64_t ddp_buffer_alloc;
|
||||
counter_u64_t ddp_buffer_reuse;
|
||||
counter_u64_t ddp_buffer_free;
|
||||
} __aligned(CACHE_LINE_SIZE);
|
||||
|
||||
static inline struct sge_ofld_rxq *
|
||||
|
@ -1344,6 +1348,8 @@ extern int t4_tmr_idx;
|
|||
extern int t4_pktc_idx;
|
||||
extern unsigned int t4_qsize_rxq;
|
||||
extern unsigned int t4_qsize_txq;
|
||||
extern int t4_ddp_rcvbuf_len;
|
||||
extern unsigned int t4_ddp_rcvbuf_cache;
|
||||
extern device_method_t cxgbe_methods[];
|
||||
|
||||
int t4_os_find_pci_capability(struct adapter *, int);
|
||||
|
|
|
@ -412,6 +412,15 @@ SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 14, CTLFLAG_RDTUN,
|
|||
&t4_toe_rexmt_backoff[14], 0, "");
|
||||
SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 15, CTLFLAG_RDTUN,
|
||||
&t4_toe_rexmt_backoff[15], 0, "");
|
||||
|
||||
int t4_ddp_rcvbuf_len = 256 * 1024;
|
||||
SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, ddp_rcvbuf_len, CTLFLAG_RWTUN,
|
||||
&t4_ddp_rcvbuf_len, 0, "length of each DDP RX buffer");
|
||||
|
||||
unsigned int t4_ddp_rcvbuf_cache = 4;
|
||||
SYSCTL_UINT(_hw_cxgbe_toe, OID_AUTO, ddp_rcvbuf_cache, CTLFLAG_RWTUN,
|
||||
&t4_ddp_rcvbuf_cache, 0,
|
||||
"maximum number of free DDP RX buffers to cache per connection");
|
||||
#endif
|
||||
|
||||
#ifdef DEV_NETMAP
|
||||
|
@ -12046,6 +12055,10 @@ clear_stats(struct adapter *sc, u_int port_id)
|
|||
ofld_rxq->rx_aio_ddp_octets = 0;
|
||||
ofld_rxq->rx_toe_tls_records = 0;
|
||||
ofld_rxq->rx_toe_tls_octets = 0;
|
||||
ofld_rxq->rx_toe_ddp_octets = 0;
|
||||
counter_u64_zero(ofld_rxq->ddp_buffer_alloc);
|
||||
counter_u64_zero(ofld_rxq->ddp_buffer_reuse);
|
||||
counter_u64_zero(ofld_rxq->ddp_buffer_free);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -4098,6 +4098,9 @@ alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, int idx,
|
|||
ofld_rxq->rx_iscsi_ddp_setup_ok = counter_u64_alloc(M_WAITOK);
|
||||
ofld_rxq->rx_iscsi_ddp_setup_error =
|
||||
counter_u64_alloc(M_WAITOK);
|
||||
ofld_rxq->ddp_buffer_alloc = counter_u64_alloc(M_WAITOK);
|
||||
ofld_rxq->ddp_buffer_reuse = counter_u64_alloc(M_WAITOK);
|
||||
ofld_rxq->ddp_buffer_free = counter_u64_alloc(M_WAITOK);
|
||||
add_ofld_rxq_sysctls(&vi->ctx, oid, ofld_rxq);
|
||||
}
|
||||
|
||||
|
@ -4132,6 +4135,9 @@ free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq)
|
|||
MPASS(!(ofld_rxq->iq.flags & IQ_SW_ALLOCATED));
|
||||
counter_u64_free(ofld_rxq->rx_iscsi_ddp_setup_ok);
|
||||
counter_u64_free(ofld_rxq->rx_iscsi_ddp_setup_error);
|
||||
counter_u64_free(ofld_rxq->ddp_buffer_alloc);
|
||||
counter_u64_free(ofld_rxq->ddp_buffer_reuse);
|
||||
counter_u64_free(ofld_rxq->ddp_buffer_free);
|
||||
bzero(ofld_rxq, sizeof(*ofld_rxq));
|
||||
}
|
||||
}
|
||||
|
@ -4158,6 +4164,18 @@ add_ofld_rxq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid,
|
|||
SYSCTL_ADD_ULONG(ctx, children, OID_AUTO,
|
||||
"rx_toe_tls_octets", CTLFLAG_RD, &ofld_rxq->rx_toe_tls_octets,
|
||||
"# of payload octets in received TOE TLS records");
|
||||
SYSCTL_ADD_ULONG(ctx, children, OID_AUTO,
|
||||
"rx_toe_ddp_octets", CTLFLAG_RD, &ofld_rxq->rx_toe_ddp_octets,
|
||||
"# of payload octets received via TCP DDP");
|
||||
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO,
|
||||
"ddp_buffer_alloc", CTLFLAG_RD, &ofld_rxq->ddp_buffer_alloc,
|
||||
"# of DDP RCV buffers allocated");
|
||||
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO,
|
||||
"ddp_buffer_reuse", CTLFLAG_RD, &ofld_rxq->ddp_buffer_reuse,
|
||||
"# of DDP RCV buffers reused");
|
||||
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO,
|
||||
"ddp_buffer_free", CTLFLAG_RD, &ofld_rxq->ddp_buffer_free,
|
||||
"# of DDP RCV buffers freed");
|
||||
|
||||
oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "iscsi",
|
||||
CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TOE iSCSI statistics");
|
||||
|
|
|
@ -1352,8 +1352,6 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
|
|||
if (toep->flags & TPF_ABORT_SHUTDOWN)
|
||||
goto done;
|
||||
|
||||
so = inp->inp_socket;
|
||||
socantrcvmore(so);
|
||||
if (ulp_mode(toep) == ULP_MODE_TCPDDP) {
|
||||
DDP_LOCK(toep);
|
||||
if (__predict_false(toep->ddp.flags &
|
||||
|
@ -1361,6 +1359,8 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
|
|||
handle_ddp_close(toep, tp, cpl->rcv_nxt);
|
||||
DDP_UNLOCK(toep);
|
||||
}
|
||||
so = inp->inp_socket;
|
||||
socantrcvmore(so);
|
||||
|
||||
if (ulp_mode(toep) == ULP_MODE_RDMA ||
|
||||
(ulp_mode(toep) == ULP_MODE_ISCSI && chip_id(sc) >= CHELSIO_T6)) {
|
||||
|
@ -1782,7 +1782,8 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
|
|||
sbappendstream_locked(sb, m, 0);
|
||||
t4_rcvd_locked(&toep->td->tod, tp);
|
||||
|
||||
if (ulp_mode(toep) == ULP_MODE_TCPDDP && toep->ddp.waiting_count > 0 &&
|
||||
if (ulp_mode(toep) == ULP_MODE_TCPDDP &&
|
||||
(toep->ddp.flags & DDP_AIO) != 0 && toep->ddp.waiting_count > 0 &&
|
||||
sbavail(sb) != 0) {
|
||||
CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__,
|
||||
tid);
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1950,6 +1950,35 @@ t4_tom_deactivate(struct adapter *sc)
|
|||
return (rc);
|
||||
}
|
||||
|
||||
static int
|
||||
t4_ctloutput_tom(struct socket *so, struct sockopt *sopt)
|
||||
{
|
||||
struct tcpcb *tp = sototcpcb(so);
|
||||
struct toepcb *toep = tp->t_toe;
|
||||
int error, optval;
|
||||
|
||||
if (sopt->sopt_level == IPPROTO_TCP && sopt->sopt_name == TCP_USE_DDP) {
|
||||
if (sopt->sopt_dir != SOPT_SET)
|
||||
return (EOPNOTSUPP);
|
||||
|
||||
if (sopt->sopt_td != NULL) {
|
||||
/* Only settable by the kernel. */
|
||||
return (EPERM);
|
||||
}
|
||||
|
||||
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
||||
sizeof(optval));
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
if (optval != 0)
|
||||
return (t4_enable_ddp_rcv(so, toep));
|
||||
else
|
||||
return (EOPNOTSUPP);
|
||||
}
|
||||
return (tcp_ctloutput(so, sopt));
|
||||
}
|
||||
|
||||
static int
|
||||
t4_aio_queue_tom(struct socket *so, struct kaiocb *job)
|
||||
{
|
||||
|
@ -1989,9 +2018,11 @@ t4_tom_mod_load(void)
|
|||
t4_tls_mod_load();
|
||||
|
||||
bcopy(&tcp_protosw, &toe_protosw, sizeof(toe_protosw));
|
||||
toe_protosw.pr_ctloutput = t4_ctloutput_tom;
|
||||
toe_protosw.pr_aio_queue = t4_aio_queue_tom;
|
||||
|
||||
bcopy(&tcp6_protosw, &toe6_protosw, sizeof(toe6_protosw));
|
||||
toe6_protosw.pr_ctloutput = t4_ctloutput_tom;
|
||||
toe6_protosw.pr_aio_queue = t4_aio_queue_tom;
|
||||
|
||||
return (t4_register_uld(&tom_uld_info));
|
||||
|
|
|
@ -85,6 +85,8 @@ enum {
|
|||
DDP_BUF1_ACTIVE = (1 << 4), /* buffer 1 in use (not invalidated) */
|
||||
DDP_TASK_ACTIVE = (1 << 5), /* requeue task is queued / running */
|
||||
DDP_DEAD = (1 << 6), /* toepcb is shutting down */
|
||||
DDP_AIO = (1 << 7), /* DDP used for AIO, not so_rcv */
|
||||
DDP_RCVBUF = (1 << 8), /* DDP used for so_rcv, not AIO */
|
||||
};
|
||||
|
||||
struct bio;
|
||||
|
@ -156,25 +158,51 @@ TAILQ_HEAD(pagesetq, pageset);
|
|||
|
||||
#define PS_PPODS_WRITTEN 0x0001 /* Page pods written to the card. */
|
||||
|
||||
struct ddp_buffer {
|
||||
struct pageset *ps;
|
||||
|
||||
struct kaiocb *job;
|
||||
int cancel_pending;
|
||||
struct ddp_rcv_buffer {
|
||||
TAILQ_ENTRY(ddp_rcv_buffer) link;
|
||||
void *buf;
|
||||
struct ppod_reservation prsv;
|
||||
size_t len;
|
||||
u_int refs;
|
||||
};
|
||||
|
||||
struct ddp_buffer {
|
||||
union {
|
||||
/* DDP_AIO fields */
|
||||
struct {
|
||||
struct pageset *ps;
|
||||
struct kaiocb *job;
|
||||
int cancel_pending;
|
||||
};
|
||||
|
||||
/* DDP_RCVBUF fields */
|
||||
struct {
|
||||
struct ddp_rcv_buffer *drb;
|
||||
uint32_t placed;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* (a) - DDP_AIO only
|
||||
* (r) - DDP_RCVBUF only
|
||||
*/
|
||||
struct ddp_pcb {
|
||||
struct mtx lock;
|
||||
u_int flags;
|
||||
int active_id; /* the currently active DDP buffer */
|
||||
struct ddp_buffer db[2];
|
||||
TAILQ_HEAD(, pageset) cached_pagesets;
|
||||
TAILQ_HEAD(, kaiocb) aiojobq;
|
||||
u_int waiting_count;
|
||||
union {
|
||||
TAILQ_HEAD(, pageset) cached_pagesets; /* (a) */
|
||||
TAILQ_HEAD(, ddp_rcv_buffer) cached_buffers; /* (r) */
|
||||
};
|
||||
TAILQ_HEAD(, kaiocb) aiojobq; /* (a) */
|
||||
u_int waiting_count; /* (a) */
|
||||
u_int active_count;
|
||||
u_int cached_count;
|
||||
int active_id; /* the currently active DDP buffer */
|
||||
struct task requeue_task;
|
||||
struct kaiocb *queueing;
|
||||
struct mtx lock;
|
||||
struct kaiocb *queueing; /* (a) */
|
||||
struct mtx cache_lock; /* (r) */
|
||||
};
|
||||
|
||||
struct toepcb {
|
||||
|
@ -230,6 +258,8 @@ ulp_mode(struct toepcb *toep)
|
|||
#define DDP_LOCK(toep) mtx_lock(&(toep)->ddp.lock)
|
||||
#define DDP_UNLOCK(toep) mtx_unlock(&(toep)->ddp.lock)
|
||||
#define DDP_ASSERT_LOCKED(toep) mtx_assert(&(toep)->ddp.lock, MA_OWNED)
|
||||
#define DDP_CACHE_LOCK(toep) mtx_lock(&(toep)->ddp.cache_lock)
|
||||
#define DDP_CACHE_UNLOCK(toep) mtx_unlock(&(toep)->ddp.cache_lock)
|
||||
|
||||
/*
|
||||
* Compressed state for embryonic connections for a listener.
|
||||
|
@ -502,6 +532,7 @@ int t4_write_page_pods_for_sgl(struct adapter *, struct toepcb *,
|
|||
struct ppod_reservation *, struct ctl_sg_entry *, int, int, struct mbufq *);
|
||||
void t4_free_page_pods(struct ppod_reservation *);
|
||||
int t4_aio_queue_ddp(struct socket *, struct kaiocb *);
|
||||
int t4_enable_ddp_rcv(struct socket *, struct toepcb *);
|
||||
void t4_ddp_mod_load(void);
|
||||
void t4_ddp_mod_unload(void);
|
||||
void ddp_assert_empty(struct toepcb *);
|
||||
|
|
Loading…
Reference in a new issue