resolved: added serve stale feature implementation of RFC 8767

serve stale feature to keep the DNS resource records beyond TTL to return them as stale records in case of upstream server is not reachable or returns negative response.
SD_RESOLVED_NO_STALE flag has been added to disable serving stale records via dbus.
added serve stale test cases to TEST-75-RESOLVED
Fixes: #21815
This commit is contained in:
Kiran Vemula 2023-06-08 18:42:11 +05:30 committed by Luca Boccassi
parent f03b74cb53
commit 5ed91481ab
19 changed files with 189 additions and 22 deletions

View file

@ -457,6 +457,7 @@ node /org/freedesktop/resolve1 {
#define SD_RESOLVED_NO_ZONE (UINT64_C(1) << 13)
#define SD_RESOLVED_NO_TRUST_ANCHOR (UINT64_C(1) << 14)
#define SD_RESOLVED_NO_NETWORK (UINT64_C(1) << 15)
#define SD_RESOLVED_NO_STALE (UINT64_C(1) << 24)
/* Output: Security */
#define SD_RESOLVED_AUTHENTICATED (UINT64_C(1) << 9)
@ -506,13 +507,14 @@ node /org/freedesktop/resolve1 {
<para>NO_VALIDATE can be set to disable validation via DNSSEC even if it would normally be
used.</para>
<para>The next four flags allow disabling certain sources during resolution. NO_SYNTHESIZE disables
<para>The next six flags allow disabling certain sources during resolution. NO_SYNTHESIZE disables
synthetic records, e.g. the local host name, see section SYNTHETIC RECORDS in
<citerefentry><refentrytitle>systemd-resolved.service</refentrytitle><manvolnum>8</manvolnum></citerefentry>
for more information. NO_CACHE disables the use of the cache of previously resolved records. NO_ZONE
disables answers using locally registered public LLMNR/mDNS resource records. NO_TRUST_ANCHOR
disables answers using locally configured trust anchors. NO_NETWORK requires all answers to be
provided without using the network, i.e. either from local sources or the cache.</para>
provided without using the network, i.e. either from local sources or the cache. NO_STALE flag
can be set to disable answering request with stale records.</para>
<para>The AUTHENTICATED bit is defined only in the output flags of the four functions. If set, the
returned data has been fully authenticated. Specifically, this bit is set for all DNSSEC-protected

View file

@ -400,6 +400,14 @@
query response are shown. Otherwise, this output is suppressed.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>--stale-data=</option><replaceable>BOOL</replaceable></term>
<listitem><para>Takes a boolean parameter; used in conjunction with <command>query</command>. If true
(the default), lookups are answered with stale data (expired resource records) if
possible. If false, the stale data is not considered for the lookup request.</para></listitem>
</varlistentry>
<xi:include href="standard-options.xml" xpointer="json" />
<varlistentry>

View file

@ -335,6 +335,21 @@ DNSStubListenerExtra=udp:[2001:db8:0:f102::13]:9953</programlisting>
url="https://www.iab.org/documents/correspondence-reports-documents/2013-2/iab-statement-dotless-domains-considered-harmful/">IAB
Statement</ulink>, and may create a privacy and security risk.</para></listitem>
</varlistentry>
<varlistentry>
<term>StaleRetentionSec=<replaceable>SECONDS</replaceable></term>
<listitem><para>Takes a duration value, which determines the length of time DNS resource records can be retained
in the cache beyond their Time To Live (TTL). This allows these records to be returned as stale records.
By default, this value is set to zero, meaning that DNS resource records are not stored in the cache after their TTL expires.</para>
<para>This is useful when a DNS server failure occurs or becomes unreachable.
In such cases, systemd-resolved continues to use the stale records to answer DNS queries, particularly when no valid response
can be obtained from the upstream DNS servers. However, this doesn't apply to NXDOMAIN responses, as those are still perfectly valid responses.
This feature enhances resilience against DNS infrastructure failures and outages.</para>
<para>systemd-resolved always attempts to reach the upstream DNS servers first, before providing the client application with any stale data.
If this feature is enabled, cache will not be flushed when changing servers.</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>

View file

@ -37,7 +37,7 @@ _resolvectl() {
[STANDALONE]='-h --help --version -4 -6 --legend=no --cname=no
--validate=no --synthesize=no --cache=no --zone=no
--trust-anchor=no --network=no --service-address=no
--service-txt=no --search=no --no-pager'
--service-txt=no --search=no --stale-data=no --no-pager'
[ARG]='-t --type -c --class -i --interface -p --protocol --raw'
)
local -A VERBS=(

View file

@ -3063,6 +3063,7 @@ static int native_help(void) {
" --validate=BOOL Allow DNSSEC validation (default: yes)\n"
" --synthesize=BOOL Allow synthetic response (default: yes)\n"
" --cache=BOOL Allow response from cache (default: yes)\n"
" --stale-data=BOOL Allow response from cache with stale data (default: yes)\n"
" --zone=BOOL Allow response from locally registered mDNS/LLMNR\n"
" records (default: yes)\n"
" --trust-anchor=BOOL Allow response from local trust anchor (default:\n"
@ -3422,6 +3423,7 @@ static int native_parse_argv(int argc, char *argv[]) {
ARG_SEARCH,
ARG_NO_PAGER,
ARG_JSON,
ARG_STALE_DATA
};
static const struct option options[] = {
@ -3445,6 +3447,7 @@ static int native_parse_argv(int argc, char *argv[]) {
{ "search", required_argument, NULL, ARG_SEARCH },
{ "no-pager", no_argument, NULL, ARG_NO_PAGER },
{ "json", required_argument, NULL, ARG_JSON },
{ "stale-data", required_argument, NULL, ARG_STALE_DATA },
{}
};
@ -3582,6 +3585,13 @@ static int native_parse_argv(int argc, char *argv[]) {
SET_FLAG(arg_flags, SD_RESOLVED_NO_CACHE, r == 0);
break;
case ARG_STALE_DATA:
r = parse_boolean_argument("--stale-data=", optarg, NULL);
if (r < 0)
return r;
SET_FLAG(arg_flags, SD_RESOLVED_NO_STALE, r == 0);
break;
case ARG_ZONE:
r = parse_boolean_argument("--zone=", optarg, NULL);
if (r < 0)

View file

@ -360,6 +360,7 @@ static int validate_and_mangle_flags(
SD_RESOLVED_NO_ZONE|
SD_RESOLVED_NO_TRUST_ANCHOR|
SD_RESOLVED_NO_NETWORK|
SD_RESOLVED_NO_STALE|
ok))
return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid flags parameter");

View file

@ -70,6 +70,9 @@
/* Output: Result was (at least partially) answered from network */
#define SD_RESOLVED_FROM_NETWORK (UINT64_C(1) << 23)
/* Input: Don't answer request with stale data */
#define SD_RESOLVED_NO_STALE (UINT64_C(1) << 24)
#define SD_RESOLVED_LLMNR (SD_RESOLVED_LLMNR_IPV4|SD_RESOLVED_LLMNR_IPV6)
#define SD_RESOLVED_MDNS (SD_RESOLVED_MDNS_IPV4|SD_RESOLVED_MDNS_IPV6)
#define SD_RESOLVED_PROTOCOLS_ALL (SD_RESOLVED_MDNS|SD_RESOLVED_LLMNR|SD_RESOLVED_DNS)

View file

@ -15,9 +15,12 @@
* leave DNS caches unbounded, but that's crazy. */
#define CACHE_MAX 4096
/* We never keep any item longer than 2h in our cache */
/* We never keep any item longer than 2h in our cache unless StaleRetentionSec is greater than zero. */
#define CACHE_TTL_MAX_USEC (2 * USEC_PER_HOUR)
/* The max TTL for stale data is set to 30 seconds. See RFC 8767, Section 6. */
#define CACHE_STALE_TTL_MAX_USEC (30 * USEC_PER_SEC)
/* How long to cache strange rcodes, i.e. rcodes != SUCCESS and != NXDOMAIN (specifically: that's only SERVFAIL for
* now) */
#define CACHE_TTL_STRANGE_RCODE_USEC (10 * USEC_PER_SEC)
@ -42,7 +45,8 @@ struct DnsCacheItem {
DnsAnswer *answer; /* The full validated answer, if this is an RRset acquired via a "primary" lookup */
DnsPacket *full_packet; /* The full packet this information was acquired with */
usec_t until;
usec_t until; /* If StaleRetentionSec is greater than zero, until is set to a duration of StaleRetentionSec from the time of TTL expiry. If StaleRetentionSec is zero, both until and until_valid will be set to ttl. */
usec_t until_valid; /* The key is for storing the time when the TTL set to expire. */
uint64_t query_flags; /* SD_RESOLVED_AUTHENTICATED and/or SD_RESOLVED_CONFIDENTIAL */
DnssecResult dnssec_result;
@ -311,7 +315,7 @@ static DnsCacheItem* dns_cache_get(DnsCache *c, DnsResourceRecord *rr) {
return NULL;
}
static usec_t calculate_until(
static usec_t calculate_until_valid(
DnsResourceRecord *rr,
uint32_t min_ttl,
uint32_t nsec_ttl,
@ -350,6 +354,13 @@ static usec_t calculate_until(
return timestamp + u;
}
static usec_t calculate_until(
usec_t until_valid,
usec_t stale_retention_usec) {
return stale_retention_usec > 0 ? usec_add(until_valid, stale_retention_usec) : until_valid;
}
static void dns_cache_item_update_positive(
DnsCache *c,
DnsCacheItem *i,
@ -363,7 +374,8 @@ static void dns_cache_item_update_positive(
usec_t timestamp,
int ifindex,
int owner_family,
const union in_addr_union *owner_address) {
const union in_addr_union *owner_address,
usec_t stale_retention_usec) {
assert(c);
assert(i);
@ -386,7 +398,8 @@ static void dns_cache_item_update_positive(
DNS_PACKET_REPLACE(i->full_packet, dns_packet_ref(full_packet));
i->until = calculate_until(rr, min_ttl, UINT32_MAX, timestamp, false);
i->until_valid = calculate_until_valid(rr, min_ttl, UINT32_MAX, timestamp, false);
i->until = calculate_until(i->until_valid, stale_retention_usec);
i->query_flags = query_flags & CACHEABLE_QUERY_FLAGS;
i->shared_owner = shared_owner;
i->dnssec_result = dnssec_result;
@ -411,7 +424,8 @@ static int dns_cache_put_positive(
usec_t timestamp,
int ifindex,
int owner_family,
const union in_addr_union *owner_address) {
const union in_addr_union *owner_address,
usec_t stale_retention_usec) {
char key_str[DNS_RESOURCE_KEY_STRING_MAX];
DnsCacheItem *existing;
@ -458,7 +472,8 @@ static int dns_cache_put_positive(
timestamp,
ifindex,
owner_family,
owner_address);
owner_address,
stale_retention_usec);
return 0;
}
@ -477,13 +492,19 @@ static int dns_cache_put_positive(
if (!i)
return -ENOMEM;
/* If StaleRetentionSec is greater than zero, the 'until' property is set to a duration
* of StaleRetentionSec from the time of TTL expiry.
* If StaleRetentionSec is zero, both the 'until' and 'until_valid' are set to the TTL duration,
* leading to the eviction of the record once the TTL expires.*/
usec_t until_valid = calculate_until_valid(rr, min_ttl, UINT32_MAX, timestamp, false);
*i = (DnsCacheItem) {
.type = DNS_CACHE_POSITIVE,
.key = dns_resource_key_ref(rr->key),
.rr = dns_resource_record_ref(rr),
.answer = dns_answer_ref(answer),
.full_packet = dns_packet_ref(full_packet),
.until = calculate_until(rr, min_ttl, UINT32_MAX, timestamp, false),
.until = calculate_until(until_valid, stale_retention_usec),
.until_valid = until_valid,
.query_flags = query_flags & CACHEABLE_QUERY_FLAGS,
.shared_owner = shared_owner,
.dnssec_result = dnssec_result,
@ -583,7 +604,7 @@ static int dns_cache_put_negative(
* of some other RR. Let's better take the lowest option here than a needlessly high one */
i->until =
i->type == DNS_CACHE_RCODE ? timestamp + CACHE_TTL_STRANGE_RCODE_USEC :
calculate_until(soa, dns_answer_min_ttl(answer), nsec_ttl, timestamp, true);
calculate_until_valid(soa, dns_answer_min_ttl(answer), nsec_ttl, timestamp, true);
if (i->type == DNS_CACHE_NXDOMAIN) {
/* NXDOMAIN entries should apply equally to all types, so we use ANY as
@ -679,7 +700,8 @@ int dns_cache_put(
DnssecResult dnssec_result,
uint32_t nsec_ttl,
int owner_family,
const union in_addr_union *owner_address) {
const union in_addr_union *owner_address,
usec_t stale_retention_usec) {
DnsResourceRecord *soa = NULL;
bool weird_rcode = false;
@ -775,7 +797,8 @@ int dns_cache_put(
timestamp,
item->ifindex,
owner_family,
owner_address);
owner_address,
stale_retention_usec);
if (r < 0)
goto fail;
}
@ -831,7 +854,8 @@ int dns_cache_put(
nsec_ttl,
timestamp,
soa,
owner_family, owner_address);
owner_family,
owner_address);
if (r < 0)
goto fail;
@ -1024,6 +1048,14 @@ int dns_cache_lookup(
goto miss;
}
/* Skip the next part if ttl is expired and requested with no stale flag. */
if (FLAGS_SET(query_flags, SD_RESOLVED_NO_STALE) && j->until_valid < current) {
log_debug("Requested with no stale and TTL expired for %s",
dns_resource_key_to_string(key, key_str, sizeof key_str));
goto miss;
}
if (j->type == DNS_CACHE_NXDOMAIN)
nxdomain = true;
else if (j->type == DNS_CACHE_RCODE)
@ -1056,6 +1088,10 @@ int dns_cache_lookup(
dnssec_result = _DNSSEC_RESULT_INVALID;
}
/* If the question is being resolved using stale data, the clamp TTL will be set to CACHE_STALE_TTL_MAX_USEC. */
usec_t until = FLAGS_SET(query_flags, SD_RESOLVED_NO_STALE) ? j->until_valid
: usec_add(current, CACHE_STALE_TTL_MAX_USEC);
/* Append the answer RRs to our answer. Ideally we have the answer object, which we
* preferably use. But if the cached entry was generated as "side-effect" of a reply,
* i.e. from validated auxiliary records rather than from the main reply, then we use the
@ -1076,7 +1112,7 @@ int dns_cache_lookup(
item->flags,
item->rrsig,
query_flags,
j->until,
until,
current);
if (r < 0)
return r;
@ -1091,7 +1127,7 @@ int dns_cache_lookup(
FLAGS_SET(j->query_flags, SD_RESOLVED_AUTHENTICATED) ? DNS_ANSWER_AUTHENTICATED : 0,
NULL,
query_flags,
j->until,
until,
current);
if (r < 0)
return r;

View file

@ -35,7 +35,8 @@ int dns_cache_put(
DnssecResult dnssec_result,
uint32_t nsec_ttl,
int owner_family,
const union in_addr_union *owner_address);
const union in_addr_union *owner_address,
usec_t stale_retention_usec);
int dns_cache_lookup(
DnsCache *c,

View file

@ -823,7 +823,8 @@ static void dns_transaction_cache_answer(DnsTransaction *t) {
t->answer_dnssec_result,
t->answer_nsec_ttl,
t->received->family,
&t->received->sender);
&t->received->sender,
t->scope->manager->stale_retention_usec);
}
static bool dns_transaction_dnssec_is_live(DnsTransaction *t) {
@ -1697,10 +1698,18 @@ static int dns_transaction_prepare(DnsTransaction *t, usec_t ts) {
/* Let's then prune all outdated entries */
dns_cache_prune(&t->scope->cache);
/* For the initial attempt or when no stale data is requested, disable serve stale
* and answer the question from the cache (honors ttl property).
* On the second attempt, if StaleRetentionSec is greater than zero,
* try to answer the question using stale date (honors until property) */
uint64_t query_flags = t->query_flags;
if (t->n_attempts == 1 || t->scope->manager->stale_retention_usec == 0)
query_flags |= SD_RESOLVED_NO_STALE;
r = dns_cache_lookup(
&t->scope->cache,
dns_transaction_key(t),
t->query_flags,
query_flags,
&t->answer_rcode,
&t->answer,
&t->received,
@ -1716,6 +1725,13 @@ static int dns_transaction_prepare(DnsTransaction *t, usec_t ts) {
* packet. */
dns_transaction_reset_answer(t);
else {
if (t->n_attempts > 1 && !FLAGS_SET(query_flags, SD_RESOLVED_NO_STALE)) {
char key_str[DNS_RESOURCE_KEY_STRING_MAX];
log_debug("Serve Stale response rcode=%s for %s",
FORMAT_DNS_RCODE(t->answer_rcode),
dns_resource_key_to_string(dns_transaction_key(t), key_str, sizeof key_str));
}
t->answer_source = DNS_TRANSACTION_CACHE;
if (t->answer_rcode == DNS_RCODE_SUCCESS)
dns_transaction_complete(t, DNS_TRANSACTION_SUCCESS);

View file

@ -32,3 +32,4 @@ Resolve.ReadEtcHosts, config_parse_bool, 0,
Resolve.ResolveUnicastSingleLabel, config_parse_bool, 0, offsetof(Manager, resolve_unicast_single_label)
Resolve.DNSStubListenerExtra, config_parse_dns_stub_listener_extra, 0, offsetof(Manager, dns_extra_stub_listeners)
Resolve.CacheFromLocalhost, config_parse_bool, 0, offsetof(Manager, cache_from_localhost)
Resolve.StaleRetentionSec, config_parse_sec, 0, offsetof(Manager, stale_retention_usec)

View file

@ -726,7 +726,8 @@ DnsServer* link_set_dns_server(Link *l, DnsServer *s) {
dns_server_unref(l->current_dns_server);
l->current_dns_server = dns_server_ref(s);
if (l->unicast_scope)
/* Skip flushing the cache if server stale feature is enabled. */
if (l->unicast_scope && l->manager->stale_retention_usec == 0)
dns_cache_flush(&l->unicast_scope->cache);
return s;

View file

@ -43,6 +43,7 @@ struct Manager {
DnsCacheMode enable_cache;
bool cache_from_localhost;
DnsStubListenerMode dns_stub_listener_mode;
usec_t stale_retention_usec;
#if ENABLE_DNS_OVER_TLS
DnsTlsManagerData dnstls_data;

View file

@ -445,7 +445,8 @@ static int on_mdns_packet(sd_event_source *s, int fd, uint32_t revents, void *us
_DNSSEC_RESULT_INVALID,
UINT32_MAX,
p->family,
&p->sender);
&p->sender,
scope->manager->stale_retention_usec);
} else if (dns_packet_validate_query(p) > 0) {
log_debug("Got mDNS query packet for id %u", DNS_PACKET_ID(p));

View file

@ -142,6 +142,7 @@ static bool validate_and_mangle_flags(
SD_RESOLVED_NO_ZONE|
SD_RESOLVED_NO_TRUST_ANCHOR|
SD_RESOLVED_NO_NETWORK|
SD_RESOLVED_NO_STALE|
ok))
return false;

View file

@ -32,3 +32,4 @@
#DNSStubListenerExtra=
#ReadEtcHosts=yes
#ResolveUnicastSingleLabel=no
#StaleRetentionSec=0

View file

@ -36,6 +36,11 @@ test_append_files() {
# Install DNS-related utilities (usually found in the bind-utils package)
image_install delv dig host nslookup
if command -v nft >/dev/null; then
# Install nftables
image_install nft
fi
}
do_test "$@"

View file

@ -20,3 +20,4 @@ ns1 AAAA fd00:dead:beef:cafe::1
A 10.0.0.101
AAAA fd00:dead:beef:cafe::101
mail A 10.0.0.111
stale1 1 A 10.0.0.112

View file

@ -515,5 +515,68 @@ grep -qF "fd00:dead:beef:cafe::123" "$RUN_OUT"
systemctl stop resmontest.service
# Test serve stale feature if nftables is installed
if command -v nft >/dev/null; then
### Test without serve stale feature ###
NFT_FILTER_NAME=dns_port_filter
drop_dns_outbound_traffic() {
nft add table inet $NFT_FILTER_NAME
nft add chain inet $NFT_FILTER_NAME output \{ type filter hook output priority 0 \; \}
nft add rule inet $NFT_FILTER_NAME output ip daddr 10.0.0.1 udp dport 53 drop
nft add rule inet $NFT_FILTER_NAME output ip daddr 10.0.0.1 tcp dport 53 drop
nft add rule inet $NFT_FILTER_NAME output ip6 daddr fd00:dead:beef:cafe::1 udp dport 53 drop
nft add rule inet $NFT_FILTER_NAME output ip6 daddr fd00:dead:beef:cafe::1 tcp dport 53 drop
}
run dig stale1.unsigned.test -t A
grep -qE "NOERROR" "$RUN_OUT"
sleep 2
drop_dns_outbound_traffic
set +e
run dig stale1.unsigned.test -t A
set -eux
grep -qE "no servers could be reached" "$RUN_OUT"
nft flush ruleset
### Test TIMEOUT with serve stale feature ###
mkdir -p /run/systemd/resolved.conf.d
{
echo "[Resolve]"
echo "StaleRetentionSec=1d"
} >/run/systemd/resolved.conf.d/test.conf
ln -svf /run/systemd/resolve/stub-resolv.conf /etc/resolv.conf
systemctl restart systemd-resolved.service
systemctl service-log-level systemd-resolved.service debug
run dig stale1.unsigned.test -t A
grep -qE "NOERROR" "$RUN_OUT"
sleep 2
drop_dns_outbound_traffic
run dig stale1.unsigned.test -t A
grep -qE "NOERROR" "$RUN_OUT"
grep -qE "10.0.0.112" "$RUN_OUT"
nft flush ruleset
### Test NXDOMAIN with serve stale feature ###
# NXDOMAIN response should replace the cache with NXDOMAIN response
run dig stale1.unsigned.test -t A
grep -qE "NOERROR" "$RUN_OUT"
# Delete stale1 record from zone
knotc zone-begin unsigned.test
knotc zone-unset unsigned.test stale1 A
knotc zone-commit unsigned.test
knotc reload
sleep 2
run dig stale1.unsigned.test -t A
grep -qE "NXDOMAIN" "$RUN_OUT"
nft flush ruleset
else
echo "nftables is not installed. Skipped serve stale feature test."
fi
touch /testok
rm /failed