Merge branch 'mptcp-fixes-for-v6-6'

Mat Martineau says:

====================
mptcp: Fixes for v6.6

Patch 1 corrects the logic for MP_JOIN tests where 0 RSTs are expected.

Patch 2 ensures MPTCP packets are not incorrectly coalesced in the TCP
backlog queue.

Patch 3 avoids a zero-window probe and associated WARN_ON_ONCE() in an
expected MPTCP reinjection scenario.

Patches 4 & 5 allow an initial MPTCP subflow to be closed cleanly
instead of always sending RST. Associated selftest is updated.
====================

Link: https://lore.kernel.org/r/20231018-send-net-20231018-v1-0-17ecb002e41d@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2023-10-19 09:10:02 -07:00
commit 1c1f14f92b
3 changed files with 43 additions and 15 deletions

View file

@ -1869,6 +1869,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
#ifdef CONFIG_TLS_DEVICE
tail->decrypted != skb->decrypted ||
#endif
!mptcp_skb_can_collapse(tail, skb) ||
thtail->doff != th->doff ||
memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
goto no_coalesce;

View file

@ -1298,7 +1298,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (copy == 0) {
u64 snd_una = READ_ONCE(msk->snd_una);
if (snd_una != msk->snd_nxt) {
if (snd_una != msk->snd_nxt || tcp_write_queue_tail(ssk)) {
tcp_remove_empty_skb(ssk);
return 0;
}
@ -1306,11 +1306,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
zero_window_probe = true;
data_seq = snd_una - 1;
copy = 1;
/* all mptcp-level data is acked, no skbs should be present into the
* ssk write queue
*/
WARN_ON_ONCE(reuse_skb);
}
copy = min_t(size_t, copy, info->limit - info->sent);
@ -1339,7 +1334,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (reuse_skb) {
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
mpext->data_len += copy;
WARN_ON_ONCE(zero_window_probe);
goto out;
}
@ -2354,6 +2348,26 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
#define MPTCP_CF_PUSH BIT(1)
#define MPTCP_CF_FASTCLOSE BIT(2)
/* be sure to send a reset only if the caller asked for it, also
* clean completely the subflow status when the subflow reaches
* TCP_CLOSE state
*/
static void __mptcp_subflow_disconnect(struct sock *ssk,
struct mptcp_subflow_context *subflow,
unsigned int flags)
{
if (((1 << ssk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
(flags & MPTCP_CF_FASTCLOSE)) {
/* The MPTCP code never wait on the subflow sockets, TCP-level
* disconnect should never fail
*/
WARN_ON_ONCE(tcp_disconnect(ssk, 0));
mptcp_subflow_ctx_reset(subflow);
} else {
tcp_shutdown(ssk, SEND_SHUTDOWN);
}
}
/* subflow sockets can be either outgoing (connect) or incoming
* (accept).
*
@ -2391,7 +2405,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) {
/* be sure to force the tcp_disconnect() path,
/* be sure to force the tcp_close path
* to generate the egress reset
*/
ssk->sk_lingertime = 0;
@ -2401,11 +2415,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
/* The MPTCP code never wait on the subflow sockets, TCP-level
* disconnect should never fail
*/
WARN_ON_ONCE(tcp_disconnect(ssk, 0));
mptcp_subflow_ctx_reset(subflow);
__mptcp_subflow_disconnect(ssk, subflow, flags);
release_sock(ssk);
goto out;

View file

@ -1432,7 +1432,9 @@ chk_rst_nr()
count=$(get_counter ${ns_tx} "MPTcpExtMPRstTx")
if [ -z "$count" ]; then
print_skip
elif [ $count -lt $rst_tx ]; then
# accept more rst than expected except if we don't expect any
elif { [ $rst_tx -ne 0 ] && [ $count -lt $rst_tx ]; } ||
{ [ $rst_tx -eq 0 ] && [ $count -ne 0 ]; }; then
fail_test "got $count MP_RST[s] TX expected $rst_tx"
else
print_ok
@ -1442,7 +1444,9 @@ chk_rst_nr()
count=$(get_counter ${ns_rx} "MPTcpExtMPRstRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" -lt "$rst_rx" ]; then
# accept more rst than expected except if we don't expect any
elif { [ $rst_rx -ne 0 ] && [ $count -lt $rst_rx ]; } ||
{ [ $rst_rx -eq 0 ] && [ $count -ne 0 ]; }; then
fail_test "got $count MP_RST[s] RX expected $rst_rx"
else
print_ok
@ -2305,6 +2309,7 @@ remove_tests()
chk_join_nr 1 1 1
chk_rm_tx_nr 1
chk_rm_nr 1 1
chk_rst_nr 0 0
fi
# multiple subflows, remove
@ -2317,6 +2322,7 @@ remove_tests()
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_rm_nr 2 2
chk_rst_nr 0 0
fi
# single address, remove
@ -2329,6 +2335,7 @@ remove_tests()
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_rm_nr 1 1 invert
chk_rst_nr 0 0
fi
# subflow and signal, remove
@ -2342,6 +2349,7 @@ remove_tests()
chk_join_nr 2 2 2
chk_add_nr 1 1
chk_rm_nr 1 1
chk_rst_nr 0 0
fi
# subflows and signal, remove
@ -2356,6 +2364,7 @@ remove_tests()
chk_join_nr 3 3 3
chk_add_nr 1 1
chk_rm_nr 2 2
chk_rst_nr 0 0
fi
# addresses remove
@ -2370,6 +2379,7 @@ remove_tests()
chk_join_nr 3 3 3
chk_add_nr 3 3
chk_rm_nr 3 3 invert
chk_rst_nr 0 0
fi
# invalid addresses remove
@ -2384,6 +2394,7 @@ remove_tests()
chk_join_nr 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
chk_rst_nr 0 0
fi
# subflows and signal, flush
@ -2398,6 +2409,7 @@ remove_tests()
chk_join_nr 3 3 3
chk_add_nr 1 1
chk_rm_nr 1 3 invert simult
chk_rst_nr 0 0
fi
# subflows flush
@ -2417,6 +2429,7 @@ remove_tests()
else
chk_rm_nr 3 3
fi
chk_rst_nr 0 0
fi
# addresses flush
@ -2431,6 +2444,7 @@ remove_tests()
chk_join_nr 3 3 3
chk_add_nr 3 3
chk_rm_nr 3 3 invert simult
chk_rst_nr 0 0
fi
# invalid addresses flush
@ -2445,6 +2459,7 @@ remove_tests()
chk_join_nr 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
chk_rst_nr 0 0
fi
# remove id 0 subflow
@ -2456,6 +2471,7 @@ remove_tests()
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_rm_nr 1 1
chk_rst_nr 0 0
fi
# remove id 0 address
@ -2468,6 +2484,7 @@ remove_tests()
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_rm_nr 1 1 invert
chk_rst_nr 0 0 invert
fi
}