From b9cfb50f710222184916481d210e11aaabb18c5a Mon Sep 17 00:00:00 2001 From: Tom Finet Date: Tue, 27 Feb 2024 23:24:32 +0000 Subject: [PATCH] Kernel/Net: Add TCPSocket timer for TimeWait moving to Closed RFC9293 states that from the TimeWait state the TCPSocket should wait the MSL (2mins) for delayed segments to expire so that their sequence numbers do not clash with a new connection's sequence numbers using the same ip address and port number. The wait also ensures the remote TCP peer has received the ACK to their FIN segment. --- Kernel/Net/TCPSocket.cpp | 37 +++++++++++++++++++++++++++++-------- Kernel/Net/TCPSocket.h | 9 ++++++++- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/Kernel/Net/TCPSocket.cpp b/Kernel/Net/TCPSocket.cpp index ca20ecf82f..4a44d94063 100644 --- a/Kernel/Net/TCPSocket.cpp +++ b/Kernel/Net/TCPSocket.cpp @@ -74,21 +74,40 @@ void TCPSocket::set_state(State new_state) // are packets on the way which we wouldn't want a new socket to get hit // with, so there's no point in keeping the receive buffer around. drop_receive_buffer(); - } - if (new_state == State::Closed) { - closing_sockets().with_exclusive([&](auto& table) { - table.remove(tuple()); + auto deadline = TimeManagement::the().current_time(CLOCK_MONOTONIC_COARSE) + maximum_segment_lifetime; + auto timer_was_added = TimerQueue::the().add_timer_without_id(*m_timer, CLOCK_MONOTONIC_COARSE, deadline, [&]() { + dbgln_if(TCP_SOCKET_DEBUG, "TCPSocket({}) TimeWait timer elpased", this); + if (m_state == State::TimeWait) { + m_state = State::Closed; + do_state_closed(); + } }); - if (m_originator) - release_to_originator(); + if (!timer_was_added) [[unlikely]] { + dbgln_if(TCP_SOCKET_DEBUG, "TCPSocket({}) TimeWait timer deadline is in the past", this); + m_state = State::Closed; + new_state = State::Closed; + } } + if (new_state == State::Closed) + do_state_closed(); + if (previous_role != m_role || was_disconnected != protocol_is_disconnected()) evaluate_block_conditions(); } +void TCPSocket::do_state_closed() +{ + if (m_originator) + release_to_originator(); + + closing_sockets().with_exclusive([&](auto& table) { + table.remove(tuple()); + }); +} + static Singleton>>> s_socket_closing; MutexProtected>>& TCPSocket::closing_sockets() @@ -165,10 +184,11 @@ void TCPSocket::release_for_accept(NonnullRefPtr socket) [[maybe_unused]] auto rc = queue_connection_from(move(socket)); } -TCPSocket::TCPSocket(int protocol, NonnullOwnPtr receive_buffer, NonnullOwnPtr scratch_buffer) +TCPSocket::TCPSocket(int protocol, NonnullOwnPtr receive_buffer, NonnullOwnPtr scratch_buffer, NonnullRefPtr timer) : IPv4Socket(SOCK_STREAM, protocol, move(receive_buffer), move(scratch_buffer)) , m_last_ack_sent_time(TimeManagement::the().monotonic_time()) , m_last_retransmit_time(TimeManagement::the().monotonic_time()) + , m_timer(timer) { } @@ -183,7 +203,8 @@ ErrorOr> TCPSocket::try_create(int protocol, NonnullOwn { // Note: Scratch buffer is only used for SOCK_STREAM sockets. auto scratch_buffer = TRY(KBuffer::try_create_with_size("TCPSocket: Scratch buffer"sv, 65536)); - return adopt_nonnull_ref_or_enomem(new (nothrow) TCPSocket(protocol, move(receive_buffer), move(scratch_buffer))); + auto timer = TRY(adopt_nonnull_ref_or_enomem(new (nothrow) Timer)); + return adopt_nonnull_ref_or_enomem(new (nothrow) TCPSocket(protocol, move(receive_buffer), move(scratch_buffer), timer)); } ErrorOr TCPSocket::protocol_size(ReadonlyBytes raw_ipv4_packet) diff --git a/Kernel/Net/TCPSocket.h b/Kernel/Net/TCPSocket.h index f2e82494a0..ef8ddc87d9 100644 --- a/Kernel/Net/TCPSocket.h +++ b/Kernel/Net/TCPSocket.h @@ -15,6 +15,7 @@ #include #include #include +#include namespace Kernel { @@ -179,7 +180,7 @@ protected: void set_direction(Direction direction) { m_direction = direction; } private: - explicit TCPSocket(int protocol, NonnullOwnPtr receive_buffer, NonnullOwnPtr scratch_buffer); + explicit TCPSocket(int protocol, NonnullOwnPtr receive_buffer, NonnullOwnPtr scratch_buffer, NonnullRefPtr timer); virtual StringView class_name() const override { return "TCPSocket"sv; } virtual void shut_down_for_writing() override; @@ -192,6 +193,8 @@ private: virtual ErrorOr protocol_bind() override; virtual ErrorOr protocol_listen() override; + void do_state_closed(); + void enqueue_for_retransmit(); void dequeue_for_retransmit(); @@ -236,6 +239,8 @@ private: u32 m_last_ack_number_sent { 0 }; MonotonicTime m_last_ack_sent_time; + static constexpr Duration maximum_segment_lifetime = Duration::from_seconds(120); + // FIXME: Make this configurable (sysctl) static constexpr u32 maximum_retransmits = 5; MonotonicTime m_last_retransmit_time; @@ -253,6 +258,8 @@ private: Optional m_registered_socket_tuple; + NonnullRefPtr m_timer; + public: using RetransmitList = IntrusiveList<&TCPSocket::m_retransmit_list_node>; static MutexProtected& sockets_for_retransmit();