tcp: simplify stack switching protocol

Before this patch, a stack (tfb) accepts a tcpcb (tp), if the
tp->t_state is TCPS_CLOSED or tfb->tfb_tcp_handoff_ok is not NULL
and tfb->tfb_tcp_handoff_ok(tp) returns 0.
After this patch, the only check is tfb->tfb_tcp_handoff_ok(tp)
returns 0. tfb->tfb_tcp_handoff_ok must always be provided.
For existing TCP stacks (FreeBSD, RACK and BBR) there is no
functional change. However, the logic is simpler.

Reviewed by:		lstewart, peter_lei_ieee_.org, rrs
MFC after:		1 week
Sponsored by:		Netflix, Inc.
Differential Revision:	https://reviews.freebsd.org/D45253
This commit is contained in:
Michael Tuexen 2024-06-06 08:29:05 +02:00
parent 9c5d7e4a0c
commit 86c9325d34
4 changed files with 30 additions and 65 deletions

View File

@ -23,7 +23,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.Dd March 10, 2017
.Dd June 6, 2024
.Dt TCP_FUNCTIONS 9
.Os
.Sh NAME
@ -176,9 +176,10 @@ struct tcp_function_block {
uint32_t, u_int);
int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
/* Optional functions */
/* Optional function */
void (*tfb_tcp_rexmit_tmr)(struct tcpcb *);
void (*tfb_tcp_handoff_ok)(struct tcpcb *);
/* Mandatory function */
int (*tfb_tcp_handoff_ok)(struct tcpcb *);
/* System use */
volatile uint32_t tfb_refcnt;
uint32_t tfb_flags;
@ -261,37 +262,21 @@ However, care must be taken to ensure the retransmit timer leaves the
TCP control block in a valid state for the remainder of the retransmit
timer logic.
.Pp
A user may select a new TCP stack before calling
.Xr connect 2
or
.Xr listen 2 .
Optionally, a TCP stack may also allow a user to begin using the TCP stack for
a connection that is in a later state by setting a non-NULL function pointer in
the
A user may select a new TCP stack before calling at any time.
Therefore, the function pointer
.Va tfb_tcp_handoff_ok
field.
If this field is non-NULL and a user attempts to select that TCP stack after
calling
.Xr connect 2
or
.Xr listen 2
for that socket, the kernel will call the function pointed to by the
field must be non-NULL.
If a user attempts to select that TCP stack, the kernel will call the function
pointed to by the
.Va tfb_tcp_handoff_ok
field.
The function should return 0 if the user is allowed to switch the socket to use
the TCP stack.
Otherwise, the function should return an error code, which will be returned to
the user.
If the
.Va tfb_tcp_handoff_ok
field is
.Dv NULL
and a user attempts to select the TCP stack after calling
.Xr connect 2
or
.Xr listen 2
for that socket, the operation will fail and the kernel will return
.Er EINVAL .
the TCP stack. In this case, the kernel will call the function pointed to by
.Va tfb_tcp_fb_init
if this function pointer is non-NULL and finally perform the stack switch.
If the user is not allowed to switch the socket, the function should undo any
changes it made to the connection state configuration and return an error code,
which will be returned to the user.
.Pp
The
.Va tfb_refcnt

View File

@ -516,8 +516,7 @@ tcp_switch_back_to_default(struct tcpcb *tp)
tfb = NULL;
}
/* Does the stack accept this connection? */
if (tfb != NULL && tfb->tfb_tcp_handoff_ok != NULL &&
(*tfb->tfb_tcp_handoff_ok)(tp)) {
if (tfb != NULL && (*tfb->tfb_tcp_handoff_ok)(tp)) {
refcount_release(&tfb->tfb_refcnt);
tfb = NULL;
}
@ -551,11 +550,9 @@ tcp_switch_back_to_default(struct tcpcb *tp)
/* there always should be a default */
panic("Can't refer to tcp_def_funcblk");
}
if (tfb->tfb_tcp_handoff_ok != NULL) {
if ((*tfb->tfb_tcp_handoff_ok) (tp)) {
/* The default stack cannot say no */
panic("Default stack rejects a new session?");
}
if ((*tfb->tfb_tcp_handoff_ok)(tp)) {
/* The default stack cannot say no */
panic("Default stack rejects a new session?");
}
if (tfb->tfb_tcp_fb_init != NULL &&
(*tfb->tfb_tcp_fb_init)(tp, &ptr)) {
@ -1186,6 +1183,7 @@ register_tcp_functions_as_names(struct tcp_function_block *blk, int wait,
if ((blk->tfb_tcp_output == NULL) ||
(blk->tfb_tcp_do_segment == NULL) ||
(blk->tfb_tcp_ctloutput == NULL) ||
(blk->tfb_tcp_handoff_ok == NULL) ||
(strlen(blk->tfb_tcp_block_name) == 0)) {
/*
* These functions are required and you

View File

@ -1731,32 +1731,17 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
INP_WUNLOCK(inp);
return (0);
}
if (tp->t_state != TCPS_CLOSED) {
/*
* The user has advanced the state
* past the initial point, we may not
* be able to switch.
*/
if (blk->tfb_tcp_handoff_ok != NULL) {
/*
* Does the stack provide a
* query mechanism, if so it may
* still be possible?
*/
error = (*blk->tfb_tcp_handoff_ok)(tp);
} else
error = EINVAL;
if (error) {
refcount_release(&blk->tfb_refcnt);
INP_WUNLOCK(inp);
return(error);
}
}
if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
refcount_release(&blk->tfb_refcnt);
INP_WUNLOCK(inp);
return (ENOENT);
}
error = (*blk->tfb_tcp_handoff_ok)(tp);
if (error) {
refcount_release(&blk->tfb_refcnt);
INP_WUNLOCK(inp);
return (error);
}
/*
* Ensure the new stack takes ownership with a
* clean slate on peak rate threshold.

View File

@ -542,13 +542,10 @@ typedef enum {
#define TCP_FUNC_OUTPUT_CANDROP 0x02 /* tfb_tcp_output may ask tcp_drop */
/**
* Adding a tfb_tcp_handoff_ok function allows the socket
* option to change stacks to query you even if the
* connection is in a later stage. You return 0 to
* say you can take over and run your stack, you return
* non-zero (an error number) to say no you can't.
* If the function is undefined you can only change
* in the early states (before connect or listen).
* tfb_tcp_handoff_ok is a mandatory function allowing
* to query a stack, if it can take over a tcpcb.
* You return 0 to say you can take over and run your stack,
* you return non-zero (an error number) to say no you can't.
*
* tfb_tcp_fb_init is used to allow the new stack to
* setup its control block. Among the things it must