cxgbe: Enable TOE TLS RX when an RX key is provided via setsockopt().

Rather than requiring a socket to be created as a TLS socket from the
get go, switch a TOE socket from "plain" TOE to TLS mode when a
receive key is added to the socket.

The firmware is only able to switch a "plain" TOE connection to TLS
mode if the head of the pending socket data is the start of a TLS
record, so the connection is migrated to TLS mode as a multi-step
process.

When TOE TLS RX is enabled, the associated connection's receive side
is frozen via a flag in the TCB.  The state of the socket buffer is
then examined to determine if the pending data in the socket buffer
ends on a TLS record boundary.  If so, the connection is migrated to
TLS mode and unfrozen.  Otherwise, the connection is unfrozen
temporarily until more data arrives.  Once more data arrives, the
receive queue is frozen again and rechecked.  This continues until the
connection is paused at a record boundary.  Any records received
before TLS mode is enabled are decrypted as software records.

Note that this removes the 'rx_tls_ports' sysctl.  TOE TLS offload for
receive is now enabled automatically on existing TOE connections when
using a KTLS-aware SSL library just as it was previously enabled
automatically for TLS transmit.  This also enables TLS offload for TOE
connections which enable TLS after passing initial data in the clear
(e.g. STARTTLS with SMTP).

Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D37351
This commit is contained in:
John Baldwin 2022-11-15 12:08:51 -08:00
parent 21186bdb2d
commit 2ff447ee3b
7 changed files with 342 additions and 413 deletions

View file

@ -225,9 +225,6 @@ struct tom_tunables {
int ddp;
int rx_coalesce;
int tls;
int tls_rx_timeout;
int *tls_rx_ports;
int num_tls_rx_ports;
int tx_align;
int tx_zcopy;
int cop_managed_offloading;

View file

@ -414,11 +414,6 @@ SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 14, CTLFLAG_RDTUN,
&t4_toe_rexmt_backoff[14], 0, "");
SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 15, CTLFLAG_RDTUN,
&t4_toe_rexmt_backoff[15], 0, "");
static int t4_toe_tls_rx_timeout = 5;
SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, tls_rx_timeout, CTLFLAG_RDTUN,
&t4_toe_tls_rx_timeout, 0,
"Timeout in seconds to downgrade TLS sockets to plain TOE");
#endif
#ifdef DEV_NETMAP
@ -833,8 +828,6 @@ static int sysctl_cpus(SYSCTL_HANDLER_ARGS);
static int sysctl_reset(SYSCTL_HANDLER_ARGS);
#ifdef TCP_OFFLOAD
static int sysctl_tls(SYSCTL_HANDLER_ARGS);
static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS);
static int sysctl_tls_rx_timeout(SYSCTL_HANDLER_ARGS);
static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS);
@ -1867,7 +1860,6 @@ t4_detach_common(device_t dev)
free(sc->tids.hpftid_tab, M_CXGBE);
free_hftid_hash(&sc->tids);
free(sc->tids.tid_tab, M_CXGBE);
free(sc->tt.tls_rx_ports, M_CXGBE);
t4_destroy_dma_tag(sc);
callout_drain(&sc->ktls_tick);
@ -5743,10 +5735,9 @@ set_params__post_init(struct adapter *sc)
if (sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS &&
sc->toecaps & FW_CAPS_CONFIG_TOE) {
/*
* Limit TOE connections to 2 reassembly "islands". This is
* required for TOE TLS connections to downgrade to plain TOE
* connections if an unsupported TLS version or ciphersuite is
* used.
* Limit TOE connections to 2 reassembly "islands".
* This is required to permit migrating TOE
* connections to UPL_MODE_TLS.
*/
t4_tp_wr_bits_indirect(sc, A_TP_FRAG_CONFIG,
V_PASSMODE(M_PASSMODE), V_PASSMODE(2));
@ -7683,17 +7674,6 @@ t4_sysctls(struct adapter *sc)
CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, sysctl_tls, "I",
"Inline TLS allowed");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_ports",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
sysctl_tls_rx_ports, "I",
"TCP ports that use inline TLS+TOE RX");
sc->tt.tls_rx_timeout = t4_toe_tls_rx_timeout;
SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_timeout",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
sysctl_tls_rx_timeout, "I",
"Timeout in seconds to downgrade TLS sockets to plain TOE");
sc->tt.tx_align = -1;
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
@ -11287,97 +11267,6 @@ sysctl_tls(SYSCTL_HANDLER_ARGS)
}
static int
sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS)
{
struct adapter *sc = arg1;
int *old_ports, *new_ports;
int i, new_count, rc;
if (req->newptr == NULL && req->oldptr == NULL)
return (SYSCTL_OUT(req, NULL, imax(sc->tt.num_tls_rx_ports, 1) *
sizeof(sc->tt.tls_rx_ports[0])));
rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tlsrx");
if (rc)
return (rc);
if (hw_off_limits(sc)) {
rc = ENXIO;
goto done;
}
if (sc->tt.num_tls_rx_ports == 0) {
i = -1;
rc = SYSCTL_OUT(req, &i, sizeof(i));
} else
rc = SYSCTL_OUT(req, sc->tt.tls_rx_ports,
sc->tt.num_tls_rx_ports * sizeof(sc->tt.tls_rx_ports[0]));
if (rc == 0 && req->newptr != NULL) {
new_count = req->newlen / sizeof(new_ports[0]);
new_ports = malloc(new_count * sizeof(new_ports[0]), M_CXGBE,
M_WAITOK);
rc = SYSCTL_IN(req, new_ports, new_count *
sizeof(new_ports[0]));
if (rc)
goto err;
/* Allow setting to a single '-1' to clear the list. */
if (new_count == 1 && new_ports[0] == -1) {
ADAPTER_LOCK(sc);
old_ports = sc->tt.tls_rx_ports;
sc->tt.tls_rx_ports = NULL;
sc->tt.num_tls_rx_ports = 0;
ADAPTER_UNLOCK(sc);
free(old_ports, M_CXGBE);
} else {
for (i = 0; i < new_count; i++) {
if (new_ports[i] < 1 ||
new_ports[i] > IPPORT_MAX) {
rc = EINVAL;
goto err;
}
}
ADAPTER_LOCK(sc);
old_ports = sc->tt.tls_rx_ports;
sc->tt.tls_rx_ports = new_ports;
sc->tt.num_tls_rx_ports = new_count;
ADAPTER_UNLOCK(sc);
free(old_ports, M_CXGBE);
new_ports = NULL;
}
err:
free(new_ports, M_CXGBE);
}
done:
end_synchronized_op(sc, 0);
return (rc);
}
static int
sysctl_tls_rx_timeout(SYSCTL_HANDLER_ARGS)
{
struct adapter *sc = arg1;
int v, rc;
v = sc->tt.tls_rx_timeout;
rc = sysctl_handle_int(oidp, &v, 0, req);
if (rc != 0 || req->newptr == NULL)
return (rc);
if (v < 0)
return (EINVAL);
if (v != 0 && !(sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS))
return (ENOTSUP);
sc->tt.tls_rx_timeout = v;
return (0);
}
static void
unit_conv(char *buf, size_t len, u_int val, u_int factor)
{
@ -12869,9 +12758,6 @@ tweak_tunables(void)
if (t4_pktc_idx_ofld < -1 || t4_pktc_idx_ofld >= SGE_NCOUNTERS)
t4_pktc_idx_ofld = PKTC_IDX_OFLD;
if (t4_toe_tls_rx_timeout < 0)
t4_toe_tls_rx_timeout = 0;
#else
if (t4_rdmacaps_allowed == -1)
t4_rdmacaps_allowed = 0;

View file

@ -98,10 +98,6 @@ send_flowc_wr(struct toepcb *toep, struct tcpcb *tp)
nparams = 8;
else
nparams = 6;
if (ulp_mode(toep) == ULP_MODE_TLS)
nparams++;
if (toep->tls.fcplenmax != 0)
nparams++;
if (toep->params.tc_idx != -1) {
MPASS(toep->params.tc_idx >= 0 &&
toep->params.tc_idx < sc->params.nsched_cls);
@ -148,10 +144,6 @@ send_flowc_wr(struct toepcb *toep, struct tcpcb *tp)
__func__, toep->tid, toep->params.emss, toep->params.sndbuf,
tp ? tp->snd_nxt : 0, tp ? tp->rcv_nxt : 0);
if (ulp_mode(toep) == ULP_MODE_TLS)
FLOWC_PARAM(ULP_MODE, ulp_mode(toep));
if (toep->tls.fcplenmax != 0)
FLOWC_PARAM(TXDATAPLEN_MAX, toep->tls.fcplenmax);
if (toep->params.tc_idx != -1)
FLOWC_PARAM(SCHEDCLASS, toep->params.tc_idx);
#undef FLOWC_PARAM
@ -395,9 +387,6 @@ make_established(struct toepcb *toep, uint32_t iss, uint32_t irs, uint16_t opt)
send_flowc_wr(toep, tp);
soisconnected(so);
if (ulp_mode(toep) == ULP_MODE_TLS)
tls_establish(toep);
}
int
@ -421,23 +410,6 @@ send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
return (credits);
}
void
send_rx_modulate(struct adapter *sc, struct toepcb *toep)
{
struct wrqe *wr;
struct cpl_rx_data_ack *req;
wr = alloc_wrqe(sizeof(*req), toep->ctrlq);
if (wr == NULL)
return;
req = wrtod(wr);
INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid);
req->credit_dack = htobe32(F_RX_MODULATE_RX);
t4_wrq_tx(sc, wr);
}
void
t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp)
{
@ -459,8 +431,7 @@ t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp)
rx_credits = send_rx_credits(sc, toep, rx_credits);
tp->rcv_wnd += rx_credits;
tp->rcv_adv += rx_credits;
} else if (toep->flags & TPF_FORCE_CREDITS)
send_rx_modulate(sc, toep);
}
}
void
@ -1823,6 +1794,8 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
tid);
ddp_queue_toep(toep);
}
if (toep->flags & TPF_TLS_STARTING)
tls_received_starting_data(sc, toep, sb, len);
sorwakeup_locked(so);
SOCKBUF_UNLOCK_ASSERT(sb);
if (ulp_mode(toep) == ULP_MODE_TCPDDP)

View file

@ -86,14 +86,14 @@ tls_tx_key(struct toepcb *toep)
return (tls_ofld->tx_key_addr >= 0);
}
/* Set TLS Key-Id in TCB */
/* Set TF_RX_QUIESCE to pause receive. */
static void
t4_set_tls_keyid(struct toepcb *toep, unsigned int key_id)
t4_set_rx_quiesce(struct toepcb *toep)
{
struct adapter *sc = td_adapter(toep->td);
t4_set_tls_tcb_field(toep, W_TCB_RX_TLS_KEY_TAG,
V_TCB_RX_TLS_KEY_TAG(M_TCB_RX_TLS_BUF_TAG),
V_TCB_RX_TLS_KEY_TAG(key_id));
t4_set_tcb_field(sc, &toep->ofld_txq->wrq, toep, W_TCB_T_FLAGS,
V_TF_RX_QUIESCE(1), V_TF_RX_QUIESCE(1), 1, CPL_COOKIE_TOM);
}
/* Clear TF_RX_QUIESCE to re-enable receive. */
@ -104,27 +104,6 @@ t4_clear_rx_quiesce(struct toepcb *toep)
t4_set_tls_tcb_field(toep, W_TCB_T_FLAGS, V_TF_RX_QUIESCE(1), 0);
}
static void
tls_clr_ofld_mode(struct toepcb *toep)
{
tls_stop_handshake_timer(toep);
KASSERT(toep->tls.rx_key_addr == -1,
("%s: tid %d has RX key", __func__, toep->tid));
/* Switch to plain TOE mode. */
t4_set_tls_tcb_field(toep, W_TCB_ULP_RAW,
V_TCB_ULP_RAW(V_TF_TLS_ENABLE(1)),
V_TCB_ULP_RAW(V_TF_TLS_ENABLE(0)));
t4_set_tls_tcb_field(toep, W_TCB_ULP_TYPE,
V_TCB_ULP_TYPE(M_TCB_ULP_TYPE), V_TCB_ULP_TYPE(ULP_MODE_NONE));
t4_clear_rx_quiesce(toep);
toep->flags &= ~(TPF_FORCE_CREDITS | TPF_TLS_ESTABLISHED);
toep->params.ulp_mode = ULP_MODE_NONE;
}
/* TLS/DTLS content type for CPL SFO */
static inline unsigned char
tls_content_type(unsigned char content_type)
@ -226,88 +205,29 @@ tls_program_key_id(struct toepcb *toep, struct ktls_session *tls,
return (0);
}
/*
* In some cases a client connection can hang without sending the
* ServerHelloDone message from the NIC to the host. Send a dummy
* RX_DATA_ACK with RX_MODULATE to unstick the connection.
*/
static void
tls_send_handshake_ack(void *arg)
{
struct toepcb *toep = arg;
struct tls_ofld_info *tls_ofld = &toep->tls;
struct adapter *sc = td_adapter(toep->td);
/* Bail without rescheduling if the connection has closed. */
if ((toep->flags & (TPF_FIN_SENT | TPF_ABORT_SHUTDOWN)) != 0)
return;
/*
* If this connection has timed out without receiving more
* data, downgrade to plain TOE mode and don't re-arm the
* timer.
*/
if (sc->tt.tls_rx_timeout != 0) {
struct inpcb *inp;
struct tcpcb *tp;
inp = toep->inp;
tp = intotcpcb(inp);
if ((ticks - tp->t_rcvtime) >= sc->tt.tls_rx_timeout) {
CTR2(KTR_CXGBE, "%s: tid %d clr_ofld_mode", __func__,
toep->tid);
tls_clr_ofld_mode(toep);
return;
}
}
/*
* XXX: Does not have the t4_get_tcb() checks to refine the
* workaround.
*/
callout_schedule(&tls_ofld->handshake_timer, TLS_SRV_HELLO_RD_TM * hz);
CTR2(KTR_CXGBE, "%s: tid %d sending RX_DATA_ACK", __func__, toep->tid);
send_rx_modulate(sc, toep);
}
static void
tls_start_handshake_timer(struct toepcb *toep)
{
struct tls_ofld_info *tls_ofld = &toep->tls;
INP_WLOCK_ASSERT(toep->inp);
callout_reset(&tls_ofld->handshake_timer, TLS_SRV_HELLO_BKOFF_TM * hz,
tls_send_handshake_ack, toep);
}
void
tls_stop_handshake_timer(struct toepcb *toep)
{
struct tls_ofld_info *tls_ofld = &toep->tls;
INP_WLOCK_ASSERT(toep->inp);
callout_stop(&tls_ofld->handshake_timer);
}
int
tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction)
{
struct adapter *sc = td_adapter(toep->td);
int error, explicit_iv_size, key_offset, mac_first;
int error, explicit_iv_size, mac_first;
if (!can_tls_offload(td_adapter(toep->td)))
if (!can_tls_offload(sc))
return (EINVAL);
switch (ulp_mode(toep)) {
case ULP_MODE_TLS:
break;
case ULP_MODE_NONE:
case ULP_MODE_TCPDDP:
if (direction != KTLS_TX)
if (direction == KTLS_RX) {
if (ulp_mode(toep) != ULP_MODE_NONE)
return (EINVAL);
break;
default:
return (EINVAL);
if ((toep->flags & TPF_TLS_STARTING) != 0)
return (EINVAL);
} else {
switch (ulp_mode(toep)) {
case ULP_MODE_NONE:
case ULP_MODE_TLS:
case ULP_MODE_TCPDDP:
break;
default:
return (EINVAL);
}
}
switch (tls->params.cipher_algorithm) {
@ -319,8 +239,7 @@ tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction)
case 256 / 8:
break;
default:
error = EINVAL;
goto clr_ofld;
return (EINVAL);
}
switch (tls->params.auth_algorithm) {
case CRYPTO_SHA1_HMAC:
@ -328,16 +247,14 @@ tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction)
case CRYPTO_SHA2_384_HMAC:
break;
default:
error = EPROTONOSUPPORT;
goto clr_ofld;
return (EPROTONOSUPPORT);
}
explicit_iv_size = AES_BLOCK_LEN;
mac_first = 1;
break;
case CRYPTO_AES_NIST_GCM_16:
if (tls->params.iv_len != SALT_SIZE) {
error = EINVAL;
goto clr_ofld;
return (EINVAL);
}
switch (tls->params.cipher_key_len) {
case 128 / 8:
@ -345,23 +262,20 @@ tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction)
case 256 / 8:
break;
default:
error = EINVAL;
goto clr_ofld;
return (EINVAL);
}
explicit_iv_size = 8;
mac_first = 0;
break;
default:
error = EPROTONOSUPPORT;
goto clr_ofld;
return (EPROTONOSUPPORT);
}
/* Only TLS 1.1 and TLS 1.2 are currently supported. */
if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE ||
tls->params.tls_vminor < TLS_MINOR_VER_ONE ||
tls->params.tls_vminor > TLS_MINOR_VER_TWO) {
error = EPROTONOSUPPORT;
goto clr_ofld;
return (EPROTONOSUPPORT);
}
/* Bail if we already have a key. */
@ -374,11 +288,8 @@ tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction)
}
error = tls_program_key_id(toep, tls, direction);
if (error) {
if (direction == KTLS_RX)
goto clr_ofld;
if (error)
return (error);
}
if (direction == KTLS_TX) {
toep->tls.scmd0.seqno_numivs =
@ -406,42 +317,16 @@ tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction)
tls->params.max_frame_len;
toep->tls.tx_key_info_size = t4_tls_key_info_size(tls);
} else {
/* Stop timer on handshake completion */
tls_stop_handshake_timer(toep);
toep->flags &= ~TPF_FORCE_CREDITS;
toep->flags |= TPF_TLS_RECEIVE;
toep->flags |= TPF_TLS_STARTING | TPF_TLS_RX_QUIESCED;
toep->tls.rx_version = tls->params.tls_vmajor << 8 |
tls->params.tls_vminor;
/*
* RX key tags are an index into the key portion of MA
* memory stored as an offset from the base address in
* units of 64 bytes.
*/
key_offset = toep->tls.rx_key_addr - sc->vres.key.start;
t4_set_tls_keyid(toep, key_offset / 64);
t4_set_tls_tcb_field(toep, W_TCB_ULP_RAW,
V_TCB_ULP_RAW(M_TCB_ULP_RAW),
V_TCB_ULP_RAW((V_TF_TLS_KEY_SIZE(3) |
V_TF_TLS_CONTROL(1) |
V_TF_TLS_ACTIVE(1) |
V_TF_TLS_ENABLE(1))));
t4_set_tls_tcb_field(toep, W_TCB_TLS_SEQ,
V_TCB_TLS_SEQ(M_TCB_TLS_SEQ),
V_TCB_TLS_SEQ(0));
t4_clear_rx_quiesce(toep);
CTR2(KTR_CXGBE, "%s: tid %d setting RX_QUIESCE", __func__,
toep->tid);
t4_set_rx_quiesce(toep);
}
return (0);
clr_ofld:
if (ulp_mode(toep) == ULP_MODE_TLS) {
CTR2(KTR_CXGBE, "%s: tid %d clr_ofld_mode", __func__,
toep->tid);
tls_clr_ofld_mode(toep);
}
return (error);
}
void
@ -453,42 +338,10 @@ tls_init_toep(struct toepcb *toep)
tls_ofld->tx_key_addr = -1;
}
void
tls_establish(struct toepcb *toep)
{
/*
* Enable PDU extraction.
*
* XXX: Supposedly this should be done by the firmware when
* the ULP_MODE FLOWC parameter is set in send_flowc_wr(), but
* in practice this seems to be required.
*/
CTR2(KTR_CXGBE, "%s: tid %d setting TLS_ENABLE", __func__, toep->tid);
t4_set_tls_tcb_field(toep, W_TCB_ULP_RAW, V_TCB_ULP_RAW(M_TCB_ULP_RAW),
V_TCB_ULP_RAW(V_TF_TLS_ENABLE(1)));
toep->flags |= TPF_FORCE_CREDITS | TPF_TLS_ESTABLISHED;
callout_init_rw(&toep->tls.handshake_timer, &toep->inp->inp_lock, 0);
tls_start_handshake_timer(toep);
}
void
tls_detach(struct toepcb *toep)
{
if (toep->flags & TPF_TLS_ESTABLISHED) {
tls_stop_handshake_timer(toep);
toep->flags &= ~TPF_TLS_ESTABLISHED;
}
}
void
tls_uninit_toep(struct toepcb *toep)
{
MPASS((toep->flags & TPF_TLS_ESTABLISHED) == 0);
clear_tls_keyid(toep);
}
@ -943,7 +796,7 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
struct mbuf *tls_data;
struct tls_get_record *tgr;
struct mbuf *control;
int pdu_length, rx_credits;
int pdu_length, rx_credits, trailer_len;
#if defined(KTR) || defined(INVARIANTS)
int len;
#endif
@ -1005,6 +858,9 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
/* Report decryption errors as EBADMSG. */
if ((tls_hdr_pkt->res_to_mac_error & M_TLSRX_HDR_PKT_ERROR) != 0) {
CTR4(KTR_CXGBE, "%s: tid %u TLS error %#x ddp_vld %#x",
__func__, toep->tid, tls_hdr_pkt->res_to_mac_error,
be32toh(cpl->ddp_valid));
m_freem(m);
m_freem(tls_data);
@ -1018,49 +874,16 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
return (0);
}
/* Allocate the control message mbuf. */
control = sbcreatecontrol(NULL, sizeof(*tgr), TLS_GET_RECORD,
IPPROTO_TCP, M_NOWAIT);
if (control == NULL) {
m_freem(m);
m_freem(tls_data);
CURVNET_SET(toep->vnet);
so->so_error = ENOBUFS;
sorwakeup(so);
INP_WUNLOCK(inp);
CURVNET_RESTORE();
return (0);
}
tgr = (struct tls_get_record *)
CMSG_DATA(mtod(control, struct cmsghdr *));
memset(tgr, 0, sizeof(*tgr));
tgr->tls_type = tls_hdr_pkt->type;
tgr->tls_vmajor = be16toh(tls_hdr_pkt->version) >> 8;
tgr->tls_vminor = be16toh(tls_hdr_pkt->version) & 0xff;
m_freem(m);
if (tls_data != NULL) {
m_last(tls_data)->m_flags |= M_EOR;
tgr->tls_length = htobe16(tls_data->m_pkthdr.len);
} else
tgr->tls_length = 0;
m = tls_data;
/* Handle data received after the socket is closed. */
sb = &so->so_rcv;
SOCKBUF_LOCK(sb);
if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
struct epoch_tracker et;
CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
__func__, tid, pdu_length);
m_freem(m);
m_freem(control);
m_freem(tls_data);
SOCKBUF_UNLOCK(sb);
INP_WUNLOCK(inp);
@ -1068,7 +891,7 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
if (tp != NULL)
INP_WUNLOCK(inp);
NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
@ -1077,10 +900,63 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
}
/*
* Not all of the bytes on the wire are included in the socket buffer
* (e.g. the MAC of the TLS record). However, those bytes are included
* in the TCP sequence space.
* If there is any data in the 'sb_mtls' chain of the socket
* or we aren't able to allocate the control mbuf, append the
* record as a CSUM_TLS_DECRYPTED packet to 'sb_mtls' rather
* than as a decrypted record to 'sb_m'.
*/
if (sb->sb_mtls != NULL)
control = NULL;
else
control = sbcreatecontrol(NULL, sizeof(*tgr), TLS_GET_RECORD,
IPPROTO_TCP, M_NOWAIT);
if (control != NULL) {
tgr = (struct tls_get_record *)
CMSG_DATA(mtod(control, struct cmsghdr *));
memset(tgr, 0, sizeof(*tgr));
tgr->tls_type = tls_hdr_pkt->type;
tgr->tls_vmajor = be16toh(tls_hdr_pkt->version) >> 8;
tgr->tls_vminor = be16toh(tls_hdr_pkt->version) & 0xff;
if (tls_data != NULL) {
m_last(tls_data)->m_flags |= M_EOR;
tgr->tls_length = htobe16(tls_data->m_pkthdr.len);
} else
tgr->tls_length = 0;
m_freem(m);
m = tls_data;
} else {
M_ASSERTPKTHDR(m);
/* It's ok that any explicit IV is missing. */
m->m_len = sb->sb_tls_info->params.tls_hlen;
m->m_pkthdr.csum_flags |= CSUM_TLS_DECRYPTED;
m->m_pkthdr.len = m->m_len;
if (tls_data != NULL) {
m->m_pkthdr.len += tls_data->m_pkthdr.len;
m_demote_pkthdr(tls_data);
m->m_next = tls_data;
}
/*
* Grow the chain by the trailer, but without
* contents. The trailer will be thrown away by
* ktls_decrypt. Note that ktls_decrypt assumes the
* trailer is tls_tlen bytes long, so append that many
* bytes not the actual trailer size computed from
* pdu_length.
*/
trailer_len = sb->sb_tls_info->params.tls_tlen;
if (tls_data != NULL) {
m_last(tls_data)->m_len += trailer_len;
tls_data = NULL;
} else
m->m_len += trailer_len;
m->m_pkthdr.len += trailer_len;
tls_hdr_pkt->length = htobe16(m->m_pkthdr.len -
sizeof(struct tls_record_layer));
}
/* receive buffer autosize */
MPASS(toep->vnet == so->so_vnet);
@ -1097,7 +973,10 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
sb->sb_flags &= ~SB_AUTOSIZE;
}
sbappendcontrol_locked(sb, m, control, 0);
if (control != NULL)
sbappendcontrol_locked(sb, m, control, 0);
else
sbappendstream_locked(sb, m, 0);
rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
#ifdef VERBOSE_TRACES
CTR4(KTR_CXGBE, "%s: tid %u rx_credits %u rcv_wnd %u",
@ -1223,12 +1102,242 @@ do_rx_data_tls(const struct cpl_rx_data *cpl, struct toepcb *toep,
m_freem(m);
}
/* SET_TCB_FIELD sent as a ULP command looks like this */
#define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \
sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core))
static inline void *
mk_set_tcb_field_ulp(struct ulp_txpkt *ulpmc, struct toepcb *toep,
uint64_t word, uint64_t mask, uint64_t val)
{
struct ulptx_idata *ulpsc;
struct cpl_set_tcb_field_core *req;
ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0));
ulpmc->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16));
ulpsc = (struct ulptx_idata *)(ulpmc + 1);
ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
ulpsc->len = htobe32(sizeof(*req));
req = (struct cpl_set_tcb_field_core *)(ulpsc + 1);
OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tid));
req->reply_ctrl = htobe16(V_NO_REPLY(1) |
V_QUEUENO(toep->ofld_rxq->iq.abs_id));
req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0));
req->mask = htobe64(mask);
req->val = htobe64(val);
ulpsc = (struct ulptx_idata *)(req + 1);
if (LEN__SET_TCB_FIELD_ULP % 16) {
ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
ulpsc->len = htobe32(0);
return (ulpsc + 1);
}
return (ulpsc);
}
/*
* Send a work request setting multiple TCB fields to enable
* ULP_MODE_TLS.
*/
static void
tls_update_tcb(struct adapter *sc, struct toepcb *toep, uint64_t seqno)
{
struct wrqe *wr;
struct work_request_hdr *wrh;
struct ulp_txpkt *ulpmc;
int fields, key_offset, len;
KASSERT(ulp_mode(toep) == ULP_MODE_NONE,
("%s: tid %d already ULP_MODE_TLS", __func__, toep->tid));
fields = 0;
/* 2 writes for the overlay region */
fields += 2;
/* W_TCB_TLS_SEQ */
fields++;
/* W_TCB_ULP_RAW */
fields++;
/* W_TCB_ULP_TYPE */
fields ++;
/* W_TCB_T_FLAGS */
fields++;
len = sizeof(*wrh) + fields * roundup2(LEN__SET_TCB_FIELD_ULP, 16);
KASSERT(len <= SGE_MAX_WR_LEN,
("%s: WR with %d TCB field updates too large", __func__, fields));
wr = alloc_wrqe(len, toep->ctrlq);
if (wr == NULL) {
/* XXX */
panic("%s: out of memory", __func__);
}
wrh = wrtod(wr);
INIT_ULPTX_WRH(wrh, len, 1, 0); /* atomic */
ulpmc = (struct ulp_txpkt *)(wrh + 1);
/*
* Clear the TLS overlay region: 1023:832.
*
* Words 26/27 are always set to zero. Words 28/29
* contain seqno and are set when enabling TLS
* decryption. Word 30 is zero and Word 31 contains
* the keyid.
*/
ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, 26,
0xffffffffffffffff, 0);
/*
* RX key tags are an index into the key portion of MA
* memory stored as an offset from the base address in
* units of 64 bytes.
*/
key_offset = toep->tls.rx_key_addr - sc->vres.key.start;
ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, 30,
0xffffffffffffffff,
(uint64_t)V_TCB_RX_TLS_KEY_TAG(key_offset / 64) << 32);
CTR3(KTR_CXGBE, "%s: tid %d enable TLS seqno %lu", __func__,
toep->tid, seqno);
ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_TLS_SEQ,
V_TCB_TLS_SEQ(M_TCB_TLS_SEQ), V_TCB_TLS_SEQ(seqno));
ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_ULP_RAW,
V_TCB_ULP_RAW(M_TCB_ULP_RAW),
V_TCB_ULP_RAW((V_TF_TLS_KEY_SIZE(3) | V_TF_TLS_CONTROL(1) |
V_TF_TLS_ACTIVE(1) | V_TF_TLS_ENABLE(1))));
toep->flags &= ~TPF_TLS_STARTING;
toep->flags |= TPF_TLS_RECEIVE;
/* Set the ULP mode to ULP_MODE_TLS. */
toep->params.ulp_mode = ULP_MODE_TLS;
ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_ULP_TYPE,
V_TCB_ULP_TYPE(M_TCB_ULP_TYPE),
V_TCB_ULP_TYPE(ULP_MODE_TLS));
/* Clear TF_RX_QUIESCE. */
ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_T_FLAGS,
V_TF_RX_QUIESCE(1), 0);
t4_wrq_tx(sc, wr);
}
/*
* Examine the pending data in the socket buffer and either enable TLS
* RX or request more encrypted data.
*/
static void
tls_check_rx_sockbuf(struct adapter *sc, struct toepcb *toep,
struct sockbuf *sb)
{
uint64_t seqno;
size_t resid;
bool have_header;
SOCKBUF_LOCK_ASSERT(sb);
MPASS(toep->tls.rx_resid == 0);
have_header = ktls_pending_rx_info(sb, &seqno, &resid);
CTR5(KTR_CXGBE, "%s: tid %d have_header %d seqno %lu resid %zu",
__func__, toep->tid, have_header, seqno, resid);
/*
* If we have a partial header or we need fewer bytes than the
* size of a TLS record, re-enable receive and pause again once
* we get more data to try again.
*/
if (!have_header || resid != 0) {
CTR(KTR_CXGBE, "%s: tid %d waiting for more data", __func__,
toep->tid);
toep->flags &= ~TPF_TLS_RX_QUIESCED;
t4_clear_rx_quiesce(toep);
return;
}
tls_update_tcb(sc, toep, seqno);
}
void
tls_received_starting_data(struct adapter *sc, struct toepcb *toep,
struct sockbuf *sb, int len)
{
MPASS(toep->flags & TPF_TLS_STARTING);
/*
* A previous call to tls_check_rx_sockbuf needed more data.
* Now that more data has arrived, quiesce receive again and
* check the state once the quiesce has completed.
*/
if ((toep->flags & TPF_TLS_RX_QUIESCED) == 0) {
CTR(KTR_CXGBE, "%s: tid %d quiescing", __func__, toep->tid);
toep->flags |= TPF_TLS_RX_QUIESCED;
t4_set_rx_quiesce(toep);
return;
}
KASSERT(len <= toep->tls.rx_resid,
("%s: received excess bytes %d (waiting for %zu)", __func__, len,
toep->tls.rx_resid));
toep->tls.rx_resid -= len;
if (toep->tls.rx_resid != 0)
return;
tls_check_rx_sockbuf(sc, toep, sb);
}
static int
do_tls_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
struct adapter *sc = iq->adapter;
const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1);
unsigned int tid = GET_TID(cpl);
struct toepcb *toep;
struct inpcb *inp;
struct socket *so;
struct sockbuf *sb;
if (cpl->status != CPL_ERR_NONE)
panic("XXX: tcp_rpl failed: %d", cpl->status);
toep = lookup_tid(sc, tid);
inp = toep->inp;
switch (cpl->cookie) {
case V_WORD(W_TCB_T_FLAGS) | V_COOKIE(CPL_COOKIE_TOM):
INP_WLOCK(inp);
if ((toep->flags & TPF_TLS_STARTING) == 0)
panic("%s: connection is not starting TLS RX\n",
__func__);
so = inp->inp_socket;
sb = &so->so_rcv;
SOCKBUF_LOCK(sb);
tls_check_rx_sockbuf(sc, toep, sb);
SOCKBUF_UNLOCK(sb);
INP_WUNLOCK(inp);
break;
default:
panic("XXX: unknown tcb_rpl offset %#x, cookie %#x",
G_WORD(cpl->cookie), G_COOKIE(cpl->cookie));
}
return (0);
}
void
t4_tls_mod_load(void)
{
t4_register_cpl_handler(CPL_TLS_DATA, do_tls_data);
t4_register_cpl_handler(CPL_RX_TLS_CMP, do_rx_tls_cmp);
t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, do_tls_tcb_rpl,
CPL_COOKIE_TOM);
}
void
@ -1237,6 +1346,7 @@ t4_tls_mod_unload(void)
t4_register_cpl_handler(CPL_TLS_DATA, NULL);
t4_register_cpl_handler(CPL_RX_TLS_CMP, NULL);
t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, NULL, CPL_COOKIE_TOM);
}
#endif /* TCP_OFFLOAD */
#endif /* KERN_TLS */

View file

@ -35,11 +35,6 @@
#ifdef _KERNEL
/* Timeouts for handshake timer in seconds. */
#define TLS_SRV_HELLO_DONE 9
#define TLS_SRV_HELLO_RD_TM 5
#define TLS_SRV_HELLO_BKOFF_TM 15
#define CONTENT_TYPE_CCS 20
#define CONTENT_TYPE_ALERT 21
#define CONTENT_TYPE_HANDSHAKE 22
@ -84,7 +79,7 @@ struct tls_ofld_info {
struct tls_scmd scmd0;
u_int iv_len;
unsigned int tx_key_info_size;
struct callout handshake_timer;
size_t rx_resid;
};
struct tls_hdr {

View file

@ -389,9 +389,6 @@ t4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp)
}
#endif
if (ulp_mode(toep) == ULP_MODE_TLS)
tls_detach(toep);
tp->tod = NULL;
tp->t_toe = NULL;
tp->t_flags &= ~TF_TOE;
@ -1019,8 +1016,6 @@ final_cpl_received(struct toepcb *toep)
if (ulp_mode(toep) == ULP_MODE_TCPDDP)
release_ddp_resources(toep);
else if (ulp_mode(toep) == ULP_MODE_TLS)
tls_detach(toep);
toep->inp = NULL;
need_wakeup = (toep->flags & TPF_WAITING_FOR_FINAL) != 0;
toep->flags &= ~(TPF_CPL_PENDING | TPF_WAITING_FOR_FINAL);
@ -1259,26 +1254,6 @@ select_ntuple(struct vi_info *vi, struct l2t_entry *e)
return (htobe64(V_FILTER_TUPLE(ntuple)));
}
static int
is_tls_sock(struct socket *so, struct adapter *sc)
{
struct inpcb *inp = sotoinpcb(so);
int i, rc;
/* XXX: Eventually add a SO_WANT_TLS socket option perhaps? */
rc = 0;
ADAPTER_LOCK(sc);
for (i = 0; i < sc->tt.num_tls_rx_ports; i++) {
if (inp->inp_lport == htons(sc->tt.tls_rx_ports[i]) ||
inp->inp_fport == htons(sc->tt.tls_rx_ports[i])) {
rc = 1;
break;
}
}
ADAPTER_UNLOCK(sc);
return (rc);
}
/*
* Initialize various connection parameters.
*/
@ -1350,10 +1325,7 @@ init_conn_params(struct vi_info *vi , struct offload_settings *s,
cp->tx_align = 0;
/* ULP mode. */
if (can_tls_offload(sc) &&
(s->tls > 0 || (s->tls < 0 && is_tls_sock(so, sc))))
cp->ulp_mode = ULP_MODE_TLS;
else if (s->ddp > 0 ||
if (s->ddp > 0 ||
(s->ddp < 0 && sc->tt.ddp && (so_options_get(so) & SO_NO_DDP) == 0))
cp->ulp_mode = ULP_MODE_TCPDDP;
else
@ -1362,8 +1334,6 @@ init_conn_params(struct vi_info *vi , struct offload_settings *s,
/* Rx coalescing. */
if (s->rx_coalesce >= 0)
cp->rx_coalesce = s->rx_coalesce > 0 ? 1 : 0;
else if (cp->ulp_mode == ULP_MODE_TLS)
cp->rx_coalesce = 0;
else if (tt->rx_coalesce >= 0)
cp->rx_coalesce = tt->rx_coalesce > 0 ? 1 : 0;
else

View file

@ -71,11 +71,11 @@ enum {
TPF_CPL_PENDING = (1 << 7), /* haven't received the last CPL */
TPF_SYNQE = (1 << 8), /* synq_entry, not really a toepcb */
TPF_SYNQE_EXPANDED = (1 << 9), /* toepcb ready, tid context updated */
TPF_FORCE_CREDITS = (1 << 10), /* always send credits */
TPF_TLS_STARTING = (1 << 10), /* starting TLS receive */
TPF_KTLS = (1 << 11), /* send TLS records from KTLS */
TPF_INITIALIZED = (1 << 12), /* init_toepcb has been called */
TPF_TLS_RECEIVE = (1 << 13), /* should receive TLS records */
TPF_TLS_ESTABLISHED = (1 << 14), /* TLS handshake timer initialized */
TPF_TLS_RX_QUIESCED = (1 << 14), /* RX quiesced for TLS RX startup */
TPF_WAITING_FOR_FINAL = (1<< 15), /* waiting for wakeup on final CPL */
};
@ -471,7 +471,6 @@ void send_abort_rpl(struct adapter *, struct sge_ofld_txq *, int , int);
void send_flowc_wr(struct toepcb *, struct tcpcb *);
void send_reset(struct adapter *, struct toepcb *, uint32_t);
int send_rx_credits(struct adapter *, struct toepcb *, int);
void send_rx_modulate(struct adapter *, struct toepcb *);
void make_established(struct toepcb *, uint32_t, uint32_t, uint16_t);
int t4_close_conn(struct adapter *, struct toepcb *);
void t4_rcvd(struct toedev *, struct tcpcb *);
@ -524,12 +523,11 @@ const struct offload_settings *lookup_offload_policy(struct adapter *, int,
bool can_tls_offload(struct adapter *);
void do_rx_data_tls(const struct cpl_rx_data *, struct toepcb *, struct mbuf *);
void t4_push_ktls(struct adapter *, struct toepcb *, int);
void tls_received_starting_data(struct adapter *, struct toepcb *,
struct sockbuf *, int);
void t4_tls_mod_load(void);
void t4_tls_mod_unload(void);
void tls_detach(struct toepcb *);
void tls_establish(struct toepcb *);
void tls_init_toep(struct toepcb *);
void tls_stop_handshake_timer(struct toepcb *);
int tls_tx_key(struct toepcb *);
void tls_uninit_toep(struct toepcb *);
int tls_alloc_ktls(struct toepcb *, struct ktls_session *, int);