Kernel: Try to retransmit lost TCP packets

Previously we didn't retransmit lost TCP packets which would cause
connections to hang if packets were lost. Also we now time out
TCP connections after a number of retransmission attempts.
This commit is contained in:
Gunnar Beutner 2021-05-13 10:49:10 +02:00 committed by Andreas Kling
parent 9daec809b7
commit 08aa3a91e3
3 changed files with 101 additions and 16 deletions

View file

@ -30,6 +30,7 @@ static void handle_udp(const IPv4Packet&, const Time& packet_timestamp);
static void handle_tcp(const IPv4Packet&, const Time& packet_timestamp);
static void send_delayed_tcp_ack(RefPtr<TCPSocket> socket);
static void flush_delayed_tcp_acks();
static void retransmit_tcp_packets();
static Thread* network_task = nullptr;
static HashTable<RefPtr<TCPSocket>>* delayed_ack_sockets;
@ -90,6 +91,7 @@ void NetworkTask_main(void*)
for (;;) {
flush_delayed_tcp_acks();
retransmit_tcp_packets();
size_t packet_size = dequeue_packet(buffer, buffer_size, packet_timestamp);
if (!packet_size) {
auto timeout_time = Time::from_milliseconds(500);
@ -606,4 +608,21 @@ void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp)
}
}
void retransmit_tcp_packets()
{
// We must keep the sockets alive until after we've unlocked the hash table
// in case retransmit_packets() realizes that it wants to close the socket.
NonnullRefPtrVector<TCPSocket, 16> sockets;
{
Locker locker(TCPSocket::sockets_for_retransmit().lock(), LockMode::Shared);
for (auto& socket : TCPSocket::sockets_for_retransmit().resource())
sockets.append(*socket);
}
for (auto& socket : sockets) {
Locker socket_locker(socket.lock());
socket.retransmit_packets();
}
}
}

View file

@ -132,6 +132,7 @@ void TCPSocket::release_for_accept(RefPtr<TCPSocket> socket)
TCPSocket::TCPSocket(int protocol)
: IPv4Socket(SOCK_STREAM, protocol)
{
m_last_retransmit_time = kgettimeofday();
}
TCPSocket::~TCPSocket()
@ -139,6 +140,8 @@ TCPSocket::~TCPSocket()
Locker locker(sockets_by_tuple().lock());
sockets_by_tuple().resource().remove(tuple());
dequeue_for_retransmit();
dbgln_if(TCP_SOCKET_DEBUG, "~TCPSocket in state {}", to_string(state()));
}
@ -221,13 +224,6 @@ KResult TCPSocket::send_tcp_packet(u16 flags, const UserOrKernelBuffer* payload,
tcp_packet.set_checksum(compute_tcp_checksum(local_address(), peer_address(), tcp_packet, payload_size));
if (tcp_packet.has_syn() || payload_size > 0) {
Locker locker(m_not_acked_lock);
m_not_acked.append({ m_sequence_number, move(buffer) });
send_outgoing_packets(routing_decision);
return KSuccess;
}
auto packet_buffer = UserOrKernelBuffer::for_kernel_buffer(buffer.data());
auto result = routing_decision.adapter->send_ipv4(
local_address(), routing_decision.next_hop, peer_address(), IPv4Protocol::TCP,
@ -237,19 +233,23 @@ KResult TCPSocket::send_tcp_packet(u16 flags, const UserOrKernelBuffer* payload,
m_packets_out++;
m_bytes_out += buffer_size;
if (tcp_packet.has_syn() || payload_size > 0) {
Locker locker(m_not_acked_lock);
m_not_acked.append({ m_sequence_number, move(buffer) });
enqueue_for_retransmit();
}
return KSuccess;
}
void TCPSocket::send_outgoing_packets(RoutingDecision& routing_decision)
void TCPSocket::do_retransmit_packets()
{
auto now = kgettimeofday();
auto routing_decision = route_to(peer_address(), local_address(), bound_interface());
if (routing_decision.is_zero())
return;
Locker locker(m_not_acked_lock, Lock::Mode::Shared);
for (auto& packet : m_not_acked) {
auto diff = now - packet.tx_time;
if (diff <= Time::from_nanoseconds(500'000'000))
continue;
packet.tx_time = now;
packet.tx_counter++;
if constexpr (TCP_SOCKET_DEBUG) {
@ -314,6 +314,11 @@ void TCPSocket::receive_tcp_packet(const TCPPacket& packet, u16 size)
}
}
if (m_not_acked.is_empty()) {
m_retransmit_attempts = 0;
dequeue_for_retransmit();
}
dbgln_if(TCP_SOCKET_DEBUG, "TCPSocket: receive_tcp_packet acknowledged {} packets", removed);
}
@ -436,7 +441,10 @@ KResult TCPSocket::protocol_connect(FileDescription& description, ShouldBlock sh
VERIFY(setup_state() == SetupState::Completed);
if (has_error()) { // TODO: check unblock_flags
m_role = Role::None;
return ECONNREFUSED;
if (error() == TCPSocket::Error::RetransmitTimeout)
return ETIMEDOUT;
else
return ECONNREFUSED;
}
return KSuccess;
}
@ -514,4 +522,51 @@ KResult TCPSocket::close()
return result;
}
static AK::Singleton<Lockable<HashTable<TCPSocket*>>> s_sockets_for_retransmit;
Lockable<HashTable<TCPSocket*>>& TCPSocket::sockets_for_retransmit()
{
return *s_sockets_for_retransmit;
}
void TCPSocket::enqueue_for_retransmit()
{
Locker locker(sockets_for_retransmit().lock());
sockets_for_retransmit().resource().set(this);
}
void TCPSocket::dequeue_for_retransmit()
{
Locker locker(sockets_for_retransmit().lock());
sockets_for_retransmit().resource().remove(this);
}
void TCPSocket::retransmit_packets()
{
auto now = kgettimeofday();
// RFC6298 says we should have at least one second between retransmits. According to
// RFC1122 we must do exponential backoff - even for SYN packets.
i64 retransmit_interval = 1;
for (decltype(m_retransmit_attempts) i = 0; i < m_retransmit_attempts; i++)
retransmit_interval *= 2;
if (m_last_retransmit_time > now - Time::from_seconds(retransmit_interval))
return;
dbgln_if(TCP_SOCKET_DEBUG, "TCPSocket({}) handling retransmit", this);
m_last_retransmit_time = now;
++m_retransmit_attempts;
if (m_retransmit_attempts > maximum_retransmits) {
set_state(TCPSocket::State::Closed);
set_error(TCPSocket::Error::RetransmitTimeout);
set_setup_state(Socket::SetupState::Completed);
return;
}
do_retransmit_packets();
}
}

View file

@ -93,6 +93,7 @@ public:
FINDuringConnect,
RSTDuringConnect,
UnexpectedFlagsDuringConnect,
RetransmitTimeout,
};
static const char* to_string(Error error)
@ -136,7 +137,6 @@ public:
KResult send_ack(bool allow_duplicate = false);
KResult send_tcp_packet(u16 flags, const UserOrKernelBuffer* = nullptr, size_t = 0);
void send_outgoing_packets(RoutingDecision&);
void receive_tcp_packet(const TCPPacket&, u16 size);
bool should_delay_next_ack() const;
@ -152,6 +152,9 @@ public:
void release_to_originator();
void release_for_accept(RefPtr<TCPSocket>);
static Lockable<HashTable<TCPSocket*>>& sockets_for_retransmit();
void retransmit_packets();
virtual KResult close() override;
protected:
@ -173,6 +176,10 @@ private:
virtual KResult protocol_bind() override;
virtual KResult protocol_listen() override;
void do_retransmit_packets();
void enqueue_for_retransmit();
void dequeue_for_retransmit();
WeakPtr<TCPSocket> m_originator;
HashMap<IPv4SocketTuple, NonnullRefPtr<TCPSocket>> m_pending_release_for_accept;
Direction m_direction { Direction::Unspecified };
@ -190,7 +197,6 @@ private:
u32 ack_number { 0 };
ByteBuffer buffer;
int tx_counter { 0 };
Time tx_time {};
};
Lock m_not_acked_lock { "TCPSocket unacked packets" };
@ -200,6 +206,11 @@ private:
u32 m_last_ack_number_sent { 0 };
Time m_last_ack_sent_time;
// FIXME: Make this configurable (sysctl)
static constexpr u32 maximum_retransmits = 5;
Time m_last_retransmit_time;
u32 m_retransmit_attempts { 0 };
};
}