resolve: dedup entries in /etc/hosts

This improves the performance of parsing the file and reduces memory pressure.

Running 'fuzz-etc-hosts timeout-strv' with valgrind,

Before:
total heap usage: 321,020 allocs, 321,020 frees, 15,820,387,193 bytes allocated
real    0m23.531s
user    0m21.458s
sys     0m1.961s

After:
total heap usage: 112,408 allocs, 112,408 frees, 7,297,480 bytes allocated
real    0m8.664s
user    0m8.545s
sys     0m0.065s

Hopefully fixes oss-fuzz#47708 (https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=47708).
This commit is contained in:
Yu Watanabe 2022-12-07 23:39:56 +09:00
parent 59dad407a2
commit 16a6bc5a7a
4 changed files with 64 additions and 58 deletions

View file

@ -22,7 +22,7 @@ static EtcHostsItemByAddress *etc_hosts_item_by_address_free(EtcHostsItemByAddre
if (!item)
return NULL;
strv_free(item->names);
set_free(item->names);
return mfree(item);
}
@ -41,7 +41,7 @@ static EtcHostsItemByName *etc_hosts_item_by_name_free(EtcHostsItemByName *item)
return NULL;
free(item->name);
free(item->addresses);
set_free(item->addresses);
return mfree(item);
}
@ -153,20 +153,21 @@ static int parse_line(EtcHosts *hosts, unsigned nr, const char *line) {
continue;
}
r = strv_extend_with_size(&item->names, &item->n_names, name);
if (r < 0)
return log_oom();
bn = hashmap_get(hosts->by_name, name);
if (!bn) {
_cleanup_(etc_hosts_item_by_name_freep) EtcHostsItemByName *new_item = NULL;
_cleanup_free_ char *name_copy = NULL;
name_copy = strdup(name);
if (!name_copy)
return log_oom();
new_item = new(EtcHostsItemByName, 1);
if (!new_item)
return log_oom();
*new_item = (EtcHostsItemByName) {
.name = TAKE_PTR(name),
.name = TAKE_PTR(name_copy),
};
r = hashmap_ensure_put(&hosts->by_name, &by_name_hash_ops, new_item->name, new_item);
@ -176,10 +177,21 @@ static int parse_line(EtcHosts *hosts, unsigned nr, const char *line) {
bn = TAKE_PTR(new_item);
}
if (!GREEDY_REALLOC(bn->addresses, bn->n_addresses + 1))
return log_oom();
if (!set_contains(bn->addresses, &address)) {
_cleanup_free_ struct in_addr_data *address_copy = NULL;
bn->addresses[bn->n_addresses++] = &item->address;
address_copy = newdup(struct in_addr_data, &address, 1);
if (!address_copy)
return log_oom();
r = set_ensure_consume(&bn->addresses, &in_addr_data_hash_ops_free, TAKE_PTR(address_copy));
if (r < 0)
return log_oom();
}
r = set_ensure_consume(&item->names, &dns_name_hash_ops_free, TAKE_PTR(name));
if (r < 0)
return log_oom();
}
if (!found)
@ -217,6 +229,7 @@ static void strip_localhost(EtcHosts *hosts) {
for (size_t j = 0; j < ELEMENTSOF(local_in_addrs); j++) {
bool all_localhost, all_local_address;
EtcHostsItemByAddress *item;
const char *name;
item = hashmap_get(hosts->by_address, local_in_addrs + j);
if (!item)
@ -224,8 +237,8 @@ static void strip_localhost(EtcHosts *hosts) {
/* Check whether all hostnames the loopback address points to are localhost ones */
all_localhost = true;
STRV_FOREACH(i, item->names)
if (!is_localhost(*i)) {
SET_FOREACH(name, item->names)
if (!is_localhost(name)) {
all_localhost = false;
break;
}
@ -236,17 +249,18 @@ static void strip_localhost(EtcHosts *hosts) {
/* Now check if the names listed for this address actually all point back just to this
* address (or the other loopback address). If not, let's stay away from this too. */
all_local_address = true;
STRV_FOREACH(i, item->names) {
SET_FOREACH(name, item->names) {
EtcHostsItemByName *n;
struct in_addr_data *a;
n = hashmap_get(hosts->by_name, *i);
n = hashmap_get(hosts->by_name, name);
if (!n) /* No reverse entry? Then almost certainly the entry already got deleted from
* the previous iteration of this loop, i.e. via the other protocol */
break;
/* Now check if the addresses of this item are all localhost addresses */
for (size_t m = 0; m < n->n_addresses; m++)
if (!in_addr_is_localhost(n->addresses[m]->family, &n->addresses[m]->address)) {
SET_FOREACH(a, n->addresses)
if (!in_addr_is_localhost(a->family, &a->address)) {
all_local_address = false;
break;
}
@ -258,8 +272,8 @@ static void strip_localhost(EtcHosts *hosts) {
if (!all_local_address)
continue;
STRV_FOREACH(i, item->names)
etc_hosts_item_by_name_free(hashmap_remove(hosts->by_name, *i));
SET_FOREACH(name, item->names)
etc_hosts_item_by_name_free(hashmap_remove(hosts->by_name, name));
assert_se(hashmap_remove(hosts->by_address, local_in_addrs + j) == item);
etc_hosts_item_by_address_free(item);
@ -397,18 +411,20 @@ static int etc_hosts_lookup_by_address(
}
if (found_ptr) {
r = dns_answer_reserve(answer, item->n_names);
const char *n;
r = dns_answer_reserve(answer, set_size(item->names));
if (r < 0)
return r;
STRV_FOREACH(n, item->names) {
SET_FOREACH(n, item->names) {
_cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
rr = dns_resource_record_new(found_ptr);
if (!rr)
return -ENOMEM;
rr->ptr.name = strdup(*n);
rr->ptr.name = strdup(n);
if (!rr->ptr.name)
return -ENOMEM;
@ -428,6 +444,7 @@ static int etc_hosts_lookup_by_name(
DnsAnswer **answer) {
bool found_a = false, found_aaaa = false;
const struct in_addr_data *a;
EtcHostsItemByName *item;
DnsResourceKey *t;
int r;
@ -439,7 +456,7 @@ static int etc_hosts_lookup_by_name(
item = hashmap_get(hosts->by_name, name);
if (item) {
r = dns_answer_reserve(answer, item->n_addresses);
r = dns_answer_reserve(answer, set_size(item->addresses));
if (r < 0)
return r;
} else {
@ -469,14 +486,14 @@ static int etc_hosts_lookup_by_name(
break;
}
for (unsigned i = 0; item && i < item->n_addresses; i++) {
SET_FOREACH(a, item->addresses) {
_cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
if ((!found_a && item->addresses[i]->family == AF_INET) ||
(!found_aaaa && item->addresses[i]->family == AF_INET6))
if ((!found_a && a->family == AF_INET) ||
(!found_aaaa && a->family == AF_INET6))
continue;
r = dns_resource_record_new_address(&rr, item->addresses[i]->family, &item->addresses[i]->address, item->name);
r = dns_resource_record_new_address(&rr, a->family, &a->address, item->name);
if (r < 0)
return r;

View file

@ -7,16 +7,12 @@
typedef struct EtcHostsItemByAddress {
struct in_addr_data address;
char **names;
size_t n_names;
Set *names;
} EtcHostsItemByAddress;
typedef struct EtcHostsItemByName {
char *name;
struct in_addr_data **addresses;
size_t n_addresses;
Set *addresses;
} EtcHostsItemByName;
int etc_hosts_parse(EtcHosts *hosts, FILE *f);

View file

@ -27,13 +27,11 @@ TEST(parse_etc_hosts_system) {
assert_se(etc_hosts_parse(&hosts, f) == 0);
}
#define address_equal_4(_addr, _address) \
((_addr)->family == AF_INET && \
!memcmp(&(_addr)->address.in, &(struct in_addr) { .s_addr = (_address) }, 4))
#define has_4(_set, _address_str) \
set_contains(_set, &(struct in_addr_data) { .family = AF_INET, .address.in = { .s_addr = inet_addr(_address_str) } })
#define address_equal_6(_addr, ...) \
((_addr)->family == AF_INET6 && \
!memcmp(&(_addr)->address.in6, &(struct in6_addr) { .s6_addr = __VA_ARGS__}, 16) )
#define has_6(_set, ...) \
set_contains(_set, &(struct in_addr_data) { .family = AF_INET6, .address.in6 = { .s6_addr = __VA_ARGS__ } })
TEST(parse_etc_hosts) {
_cleanup_(unlink_tempfilep) char
@ -72,33 +70,29 @@ TEST(parse_etc_hosts) {
EtcHostsItemByName *bn;
assert_se(bn = hashmap_get(hosts.by_name, "some.where"));
assert_se(bn->n_addresses == 3);
assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 3);
assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.4")));
assert_se(address_equal_4(bn->addresses[1], inet_addr("1.2.3.5")));
assert_se(address_equal_6(bn->addresses[2], {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5}));
assert_se(set_size(bn->addresses) == 3);
assert_se(has_4(bn->addresses, "1.2.3.4"));
assert_se(has_4(bn->addresses, "1.2.3.5"));
assert_se(has_6(bn->addresses, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5}));
assert_se(bn = hashmap_get(hosts.by_name, "dash"));
assert_se(bn->n_addresses == 1);
assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 1);
assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.6")));
assert_se(set_size(bn->addresses) == 1);
assert_se(has_4(bn->addresses, "1.2.3.6"));
assert_se(bn = hashmap_get(hosts.by_name, "dash-dash.where-dash"));
assert_se(bn->n_addresses == 1);
assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 1);
assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.6")));
assert_se(set_size(bn->addresses) == 1);
assert_se(has_4(bn->addresses, "1.2.3.6"));
/* See https://tools.ietf.org/html/rfc1035#section-2.3.1 */
FOREACH_STRING(s, "bad-dash-", "-bad-dash", "-bad-dash.bad-")
assert_se(!hashmap_get(hosts.by_name, s));
assert_se(bn = hashmap_get(hosts.by_name, "before.comment"));
assert_se(bn->n_addresses == 4);
assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 4);
assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.9")));
assert_se(address_equal_4(bn->addresses[1], inet_addr("1.2.3.10")));
assert_se(address_equal_4(bn->addresses[2], inet_addr("1.2.3.11")));
assert_se(address_equal_4(bn->addresses[3], inet_addr("1.2.3.12")));
assert_se(set_size(bn->addresses) == 4);
assert_se(has_4(bn->addresses, "1.2.3.9"));
assert_se(has_4(bn->addresses, "1.2.3.10"));
assert_se(has_4(bn->addresses, "1.2.3.11"));
assert_se(has_4(bn->addresses, "1.2.3.12"));
assert_se(!hashmap_get(hosts.by_name, "within.comment"));
assert_se(!hashmap_get(hosts.by_name, "within.comment2"));
@ -113,9 +107,8 @@ TEST(parse_etc_hosts) {
assert_se(!set_contains(hosts.no_address, "multi.colon"));
assert_se(bn = hashmap_get(hosts.by_name, "some.other"));
assert_se(bn->n_addresses == 1);
assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 1);
assert_se(address_equal_6(bn->addresses[0], {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5}));
assert_se(set_size(bn->addresses) == 1);
assert_se(has_6(bn->addresses, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5}));
assert_se( set_contains(hosts.no_address, "some.where"));
assert_se( set_contains(hosts.no_address, "some.other"));

Binary file not shown.