tcp: simplify stack switching protocol

Before this patch, a stack (tfb) accepts a tcpcb (tp), if the
tp->t_state is TCPS_CLOSED or tfb->tfb_tcp_handoff_ok is not NULL
and tfb->tfb_tcp_handoff_ok(tp) returns 0.
After this patch, the only check is tfb->tfb_tcp_handoff_ok(tp)
returns 0. tfb->tfb_tcp_handoff_ok must always be provided.
For existing TCP stacks (FreeBSD, RACK and BBR) there is no
functional change. However, the logic is simpler.

Reviewed by:		lstewart, peter_lei_ieee_.org, rrs
MFC after:		1 week
Sponsored by:		Netflix, Inc.
Differential Revision:	https://reviews.freebsd.org/D45253
This commit is contained in:
Michael Tuexen 2024-06-06 08:29:05 +02:00
parent 9c5d7e4a0c
commit 86c9325d34
4 changed files with 30 additions and 65 deletions

View file

@ -23,7 +23,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE. .\" SUCH DAMAGE.
.\" .\"
.Dd March 10, 2017 .Dd June 6, 2024
.Dt TCP_FUNCTIONS 9 .Dt TCP_FUNCTIONS 9
.Os .Os
.Sh NAME .Sh NAME
@ -176,9 +176,10 @@ struct tcp_function_block {
uint32_t, u_int); uint32_t, u_int);
int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t); int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t); void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
/* Optional functions */ /* Optional function */
void (*tfb_tcp_rexmit_tmr)(struct tcpcb *); void (*tfb_tcp_rexmit_tmr)(struct tcpcb *);
void (*tfb_tcp_handoff_ok)(struct tcpcb *); /* Mandatory function */
int (*tfb_tcp_handoff_ok)(struct tcpcb *);
/* System use */ /* System use */
volatile uint32_t tfb_refcnt; volatile uint32_t tfb_refcnt;
uint32_t tfb_flags; uint32_t tfb_flags;
@ -261,37 +262,21 @@ However, care must be taken to ensure the retransmit timer leaves the
TCP control block in a valid state for the remainder of the retransmit TCP control block in a valid state for the remainder of the retransmit
timer logic. timer logic.
.Pp .Pp
A user may select a new TCP stack before calling A user may select a new TCP stack before calling at any time.
.Xr connect 2 Therefore, the function pointer
or
.Xr listen 2 .
Optionally, a TCP stack may also allow a user to begin using the TCP stack for
a connection that is in a later state by setting a non-NULL function pointer in
the
.Va tfb_tcp_handoff_ok .Va tfb_tcp_handoff_ok
field. field must be non-NULL.
If this field is non-NULL and a user attempts to select that TCP stack after If a user attempts to select that TCP stack, the kernel will call the function
calling pointed to by the
.Xr connect 2
or
.Xr listen 2
for that socket, the kernel will call the function pointed to by the
.Va tfb_tcp_handoff_ok .Va tfb_tcp_handoff_ok
field. field.
The function should return 0 if the user is allowed to switch the socket to use The function should return 0 if the user is allowed to switch the socket to use
the TCP stack. the TCP stack. In this case, the kernel will call the function pointed to by
Otherwise, the function should return an error code, which will be returned to .Va tfb_tcp_fb_init
the user. if this function pointer is non-NULL and finally perform the stack switch.
If the If the user is not allowed to switch the socket, the function should undo any
.Va tfb_tcp_handoff_ok changes it made to the connection state configuration and return an error code,
field is which will be returned to the user.
.Dv NULL
and a user attempts to select the TCP stack after calling
.Xr connect 2
or
.Xr listen 2
for that socket, the operation will fail and the kernel will return
.Er EINVAL .
.Pp .Pp
The The
.Va tfb_refcnt .Va tfb_refcnt

View file

@ -516,8 +516,7 @@ tcp_switch_back_to_default(struct tcpcb *tp)
tfb = NULL; tfb = NULL;
} }
/* Does the stack accept this connection? */ /* Does the stack accept this connection? */
if (tfb != NULL && tfb->tfb_tcp_handoff_ok != NULL && if (tfb != NULL && (*tfb->tfb_tcp_handoff_ok)(tp)) {
(*tfb->tfb_tcp_handoff_ok)(tp)) {
refcount_release(&tfb->tfb_refcnt); refcount_release(&tfb->tfb_refcnt);
tfb = NULL; tfb = NULL;
} }
@ -551,11 +550,9 @@ tcp_switch_back_to_default(struct tcpcb *tp)
/* there always should be a default */ /* there always should be a default */
panic("Can't refer to tcp_def_funcblk"); panic("Can't refer to tcp_def_funcblk");
} }
if (tfb->tfb_tcp_handoff_ok != NULL) { if ((*tfb->tfb_tcp_handoff_ok)(tp)) {
if ((*tfb->tfb_tcp_handoff_ok) (tp)) { /* The default stack cannot say no */
/* The default stack cannot say no */ panic("Default stack rejects a new session?");
panic("Default stack rejects a new session?");
}
} }
if (tfb->tfb_tcp_fb_init != NULL && if (tfb->tfb_tcp_fb_init != NULL &&
(*tfb->tfb_tcp_fb_init)(tp, &ptr)) { (*tfb->tfb_tcp_fb_init)(tp, &ptr)) {
@ -1186,6 +1183,7 @@ register_tcp_functions_as_names(struct tcp_function_block *blk, int wait,
if ((blk->tfb_tcp_output == NULL) || if ((blk->tfb_tcp_output == NULL) ||
(blk->tfb_tcp_do_segment == NULL) || (blk->tfb_tcp_do_segment == NULL) ||
(blk->tfb_tcp_ctloutput == NULL) || (blk->tfb_tcp_ctloutput == NULL) ||
(blk->tfb_tcp_handoff_ok == NULL) ||
(strlen(blk->tfb_tcp_block_name) == 0)) { (strlen(blk->tfb_tcp_block_name) == 0)) {
/* /*
* These functions are required and you * These functions are required and you

View file

@ -1731,32 +1731,17 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
return (0); return (0);
} }
if (tp->t_state != TCPS_CLOSED) {
/*
* The user has advanced the state
* past the initial point, we may not
* be able to switch.
*/
if (blk->tfb_tcp_handoff_ok != NULL) {
/*
* Does the stack provide a
* query mechanism, if so it may
* still be possible?
*/
error = (*blk->tfb_tcp_handoff_ok)(tp);
} else
error = EINVAL;
if (error) {
refcount_release(&blk->tfb_refcnt);
INP_WUNLOCK(inp);
return(error);
}
}
if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) { if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
refcount_release(&blk->tfb_refcnt); refcount_release(&blk->tfb_refcnt);
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
return (ENOENT); return (ENOENT);
} }
error = (*blk->tfb_tcp_handoff_ok)(tp);
if (error) {
refcount_release(&blk->tfb_refcnt);
INP_WUNLOCK(inp);
return (error);
}
/* /*
* Ensure the new stack takes ownership with a * Ensure the new stack takes ownership with a
* clean slate on peak rate threshold. * clean slate on peak rate threshold.

View file

@ -542,13 +542,10 @@ typedef enum {
#define TCP_FUNC_OUTPUT_CANDROP 0x02 /* tfb_tcp_output may ask tcp_drop */ #define TCP_FUNC_OUTPUT_CANDROP 0x02 /* tfb_tcp_output may ask tcp_drop */
/** /**
* Adding a tfb_tcp_handoff_ok function allows the socket * tfb_tcp_handoff_ok is a mandatory function allowing
* option to change stacks to query you even if the * to query a stack, if it can take over a tcpcb.
* connection is in a later stage. You return 0 to * You return 0 to say you can take over and run your stack,
* say you can take over and run your stack, you return * you return non-zero (an error number) to say no you can't.
* non-zero (an error number) to say no you can't.
* If the function is undefined you can only change
* in the early states (before connect or listen).
* *
* tfb_tcp_fb_init is used to allow the new stack to * tfb_tcp_fb_init is used to allow the new stack to
* setup its control block. Among the things it must * setup its control block. Among the things it must