resolved: Fix DoT timeout on multiple answer records

When sending multiple DNS questions to a DNS-over-TLS server (e.g. a question
for A and AAAA records, as is typical) on the same session, the server may
answer to each question in a separate TLS record, but it may also aggregate
multiple answers in a single TLS record.
(Some servers do this very often (e.g. Cloudflare 1.0.0.1), some do it sometimes
(e.g. Google 8.8.8.8) and some seem to never do it (e.g. Quad9 9.9.9.10)).

Both cases should be handled equivalently, as the byte stream is the same, but
when multiple answers came in a single TLS record, usually the first answer was
processed, but the second answer was entirely ignored, which caused a 10s delay
until the resolution timed out and the missing question was retried.
This can be reproduced by configuring one of the offending server and running
`resolvectl query google.com --cache=no` a few times.

To be notified of incoming data, systemd-resolved listens to `EPOLLIN` events
on the underlying socket. However, when DNS-over-TLS is used, the TLS library
(OpenSSL or GnuTLS) may read and buffer the entire TLS record when reading the
first answer, so usually no further `EPOLLIN` events will be generated, and the
second answer will never be processed.

To avoid this, if there's buffered TLS data, generate a "fake" EPOLLIN event.
This is hacky, but it makes this case transparent to the rest of the IO code.
This commit is contained in:
Joan Bruguera 2022-01-15 17:33:25 +01:00 committed by Yu Watanabe
parent a42a93830f
commit 2aaf6bb6e9
4 changed files with 61 additions and 2 deletions

View file

@ -6,6 +6,7 @@
#include "alloc-util.h"
#include "fd-util.h"
#include "io-util.h"
#include "macro.h"
#include "missing_network.h"
#include "resolved-dns-stream.h"
#include "resolved-manager.h"
@ -280,13 +281,15 @@ static int on_stream_timeout(sd_event_source *es, usec_t usec, void *userdata) {
return dns_stream_complete(s, ETIMEDOUT);
}
static int on_stream_io(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
_cleanup_(dns_stream_unrefp) DnsStream *s = dns_stream_ref(userdata); /* Protect stream while we process it */
static int on_stream_io_impl(DnsStream *s, uint32_t revents) {
bool progressed = false;
int r;
assert(s);
/* This returns 1 when possible remaining stream exists, 0 on completed
stream or recoverable error, and negative errno on failure. */
#if ENABLE_DNS_OVER_TLS
if (s->encrypted) {
r = dnstls_stream_on_io(s, revents);
@ -441,6 +444,44 @@ static int on_stream_io(sd_event_source *es, int fd, uint32_t revents, void *use
log_warning_errno(errno, "Couldn't restart TCP connection timeout, ignoring: %m");
}
return 1;
}
static int on_stream_io(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
_cleanup_(dns_stream_unrefp) DnsStream *s = dns_stream_ref(userdata); /* Protect stream while we process it */
int r;
assert(s);
r = on_stream_io_impl(s, revents);
if (r <= 0)
return r;
#if ENABLE_DNS_OVER_TLS
if (!s->encrypted)
return 0;
/* When using DNS-over-TLS, the underlying TLS library may read the entire TLS record
and buffer it internally. If this happens, we will not receive further EPOLLIN events,
and unless there's some unrelated activity on the socket, we will hang until time out.
To avoid this, if there's buffered TLS data, generate a "fake" EPOLLIN event.
This is hacky, but it makes this case transparent to the rest of the IO code. */
while (dnstls_stream_has_buffered_data(s)) {
uint32_t events;
/* Make sure the stream still wants to process more data... */
r = sd_event_source_get_io_events(s->io_event_source, &events);
if (r < 0)
return r;
if (!FLAGS_SET(events, EPOLLIN))
break;
r = on_stream_io_impl(s, EPOLLIN);
if (r <= 0)
return r;
}
#endif
return 0;
}

View file

@ -211,6 +211,14 @@ ssize_t dnstls_stream_read(DnsStream *stream, void *buf, size_t count) {
return ss;
}
bool dnstls_stream_has_buffered_data(DnsStream *stream) {
assert(stream);
assert(stream->encrypted);
assert(stream->dnstls_data.session);
return gnutls_record_check_pending(stream->dnstls_data.session) > 0;
}
void dnstls_server_free(DnsServer *server) {
assert(server);

View file

@ -367,6 +367,14 @@ ssize_t dnstls_stream_read(DnsStream *stream, void *buf, size_t count) {
return ss;
}
bool dnstls_stream_has_buffered_data(DnsStream *stream) {
assert(stream);
assert(stream->encrypted);
assert(stream->dnstls_data.ssl);
return SSL_has_pending(stream->dnstls_data.ssl) > 0;
}
void dnstls_server_free(DnsServer *server) {
assert(server);

View file

@ -3,6 +3,7 @@
#if ENABLE_DNS_OVER_TLS
#include <stdbool.h>
#include <stdint.h>
typedef struct DnsServer DnsServer;
@ -28,6 +29,7 @@ int dnstls_stream_on_io(DnsStream *stream, uint32_t revents);
int dnstls_stream_shutdown(DnsStream *stream, int error);
ssize_t dnstls_stream_write(DnsStream *stream, const char *buf, size_t count);
ssize_t dnstls_stream_read(DnsStream *stream, void *buf, size_t count);
bool dnstls_stream_has_buffered_data(DnsStream *stream);
void dnstls_server_free(DnsServer *server);