From 621b10fe2c3203c537996e84c7c89b0ff994ad93 Mon Sep 17 00:00:00 2001 From: q66 Date: Thu, 6 Jun 2024 13:45:48 +0200 Subject: [PATCH 1/3] strbuf: use GREEDY_REALLOC to grow the buffer This allows us to reserve a bunch of capacity ahead of time, improving the performance of hwdb significantly thanks to not having to reallocate so many times. Before: ``` $ sudo time valgrind --leak-check=full ./systemd-hwdb update ==113297== Memcheck, a memory error detector ==113297== Copyright (C) 2002-2024, and GNU GPL'd, by Julian Seward et al. ==113297== Using Valgrind-3.23.0 and LibVEX; rerun with -h for copyright info ==113297== Command: ./systemd-hwdb update ==113297== ==113297== ==113297== HEAP SUMMARY: ==113297== in use at exit: 0 bytes in 0 blocks ==113297== total heap usage: 1,412,640 allocs, 1,412,640 frees, 117,920,009,195 bytes allocated ==113297== ==113297== All heap blocks were freed -- no leaks are possible ==113297== ==113297== For lists of detected and suppressed errors, rerun with: -s ==113297== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) 132.44user 21.15system 2:35.61elapsed 98%CPU (0avgtext+0avgdata 228560maxresident)k 0inputs+25296outputs (0major+6886930minor)pagefaults 0swaps ``` After: ``` $ sudo time valgrind --leak-check=full ./systemd-hwdb update ==112572== Memcheck, a memory error detector ==112572== Copyright (C) 2002-2024, and GNU GPL'd, by Julian Seward et al. ==112572== Using Valgrind-3.23.0 and LibVEX; rerun with -h for copyright info ==112572== Command: ./systemd-hwdb update ==112572== ==112572== ==112572== HEAP SUMMARY: ==112572== in use at exit: 0 bytes in 0 blocks ==112572== total heap usage: 1,320,113 allocs, 1,320,113 frees, 70,614,501 bytes allocated ==112572== ==112572== All heap blocks were freed -- no leaks are possible ==112572== ==112572== For lists of detected and suppressed errors, rerun with: -s ==112572== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) 21.94user 0.19system 0:22.23elapsed 99%CPU (0avgtext+0avgdata 229876maxresident)k 0inputs+25264outputs (0major+57275minor)pagefaults 0swaps ``` Co-authored-by: Yu Watanabe --- src/basic/strbuf.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/basic/strbuf.c b/src/basic/strbuf.c index 0617acc8d2..6d43955bb1 100644 --- a/src/basic/strbuf.c +++ b/src/basic/strbuf.c @@ -107,7 +107,6 @@ static void bubbleinsert(struct strbuf_node *node, /* add string, return the index/offset into the buffer */ ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) { uint8_t c; - char *buf_new; struct strbuf_child_entry *child; struct strbuf_node *node; ssize_t off; @@ -147,10 +146,8 @@ ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) { } /* add new string */ - buf_new = realloc(str->buf, str->len + len+1); - if (!buf_new) + if (!GREEDY_REALLOC(str->buf, str->len + len + 1)) return -ENOMEM; - str->buf = buf_new; off = str->len; memcpy(str->buf + off, s, len); str->len += len; From 34d4e6f32affb0d0b23c26c2df6837424f6b020b Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 12 Jun 2024 03:24:30 +0900 Subject: [PATCH 2/3] strbuf: several cleanups for strbuf_add_string() - add missing assertions, - use GREEDY_REALLOC() at one more place, - etc. Before: ``` $ sudo time valgrind --leak-check=full ./systemd-hwdb update ==112572== Memcheck, a memory error detector ==112572== Copyright (C) 2002-2024, and GNU GPL'd, by Julian Seward et al. ==112572== Using Valgrind-3.23.0 and LibVEX; rerun with -h for copyright info ==112572== Command: ./systemd-hwdb update ==112572== ==112572== ==112572== HEAP SUMMARY: ==112572== in use at exit: 0 bytes in 0 blocks ==112572== total heap usage: 1,320,113 allocs, 1,320,113 frees, 70,614,501 bytes allocated ==112572== ==112572== All heap blocks were freed -- no leaks are possible ==112572== ==112572== For lists of detected and suppressed errors, rerun with: -s ==112572== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) 21.94user 0.19system 0:22.23elapsed 99%CPU (0avgtext+0avgdata 229876maxresident)k 0inputs+25264outputs (0major+57275minor)pagefaults 0swaps ``` After: ``` $ sudo time valgrind --leak-check=full ./systemd-hwdb update [sudo] password for watanabe: ==114732== Memcheck, a memory error detector ==114732== Copyright (C) 2002-2024, and GNU GPL'd, by Julian Seward et al. ==114732== Using Valgrind-3.23.0 and LibVEX; rerun with -h for copyright info ==114732== Command: ./systemd-hwdb update ==114732== ==114732== ==114732== HEAP SUMMARY: ==114732== in use at exit: 0 bytes in 0 blocks ==114732== total heap usage: 1,276,406 allocs, 1,276,406 frees, 68,500,491 bytes allocated ==114732== ==114732== All heap blocks were freed -- no leaks are possible ==114732== ==114732== For lists of detected and suppressed errors, rerun with: -s ==114732== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) 21.91user 0.24system 0:22.26elapsed 99%CPU (0avgtext+0avgdata 233584maxresident)k 0inputs+25168outputs (0major+58237minor)pagefaults 0swaps ``` --- src/basic/strbuf.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/basic/strbuf.c b/src/basic/strbuf.c index 6d43955bb1..c81ba8a487 100644 --- a/src/basic/strbuf.c +++ b/src/basic/strbuf.c @@ -107,10 +107,11 @@ static void bubbleinsert(struct strbuf_node *node, /* add string, return the index/offset into the buffer */ ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) { uint8_t c; - struct strbuf_child_entry *child; - struct strbuf_node *node; ssize_t off; + assert(str); + assert(s || len == 0); + if (!str->root) return -EINVAL; @@ -123,10 +124,8 @@ ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) { } str->in_len += len; - node = str->root; + struct strbuf_node *node = str->root; for (size_t depth = 0; depth <= len; depth++) { - struct strbuf_child_entry search; - /* match against current node */ off = node->value_off + node->value_len - len; if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) { @@ -138,7 +137,7 @@ ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) { c = s[len - 1 - depth]; /* lookup child node */ - search.c = c; + struct strbuf_child_entry *child, search = { .c = c }; child = typesafe_bsearch(&search, node->children, node->children_count, strbuf_children_cmp); if (!child) break; @@ -165,13 +164,11 @@ ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) { }; /* extend array, add new entry, sort for bisection */ - child = reallocarray(node->children, node->children_count + 1, sizeof(struct strbuf_child_entry)); - if (!child) + if (!GREEDY_REALLOC(node->children, node->children_count + 1)) return -ENOMEM; str->nodes_count++; - node->children = child; bubbleinsert(node, c, TAKE_PTR(node_child)); return off; From 3bc70f104e01946bcc44a3f41066e22af01e95f0 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 12 Jun 2024 03:25:57 +0900 Subject: [PATCH 3/3] strbuf: make length for strbuf_add_string() optional --- src/basic/strbuf.c | 5 ++++- src/basic/strbuf.h | 5 ++++- src/libsystemd/sd-journal/catalog.c | 2 +- src/shared/hwdb-util.c | 10 +++++----- src/test/test-strbuf.c | 20 ++++++++------------ 5 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/basic/strbuf.c b/src/basic/strbuf.c index c81ba8a487..226893842b 100644 --- a/src/basic/strbuf.c +++ b/src/basic/strbuf.c @@ -105,13 +105,16 @@ static void bubbleinsert(struct strbuf_node *node, } /* add string, return the index/offset into the buffer */ -ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) { +ssize_t strbuf_add_string_full(struct strbuf *str, const char *s, size_t len) { uint8_t c; ssize_t off; assert(str); assert(s || len == 0); + if (len == SIZE_MAX) + len = strlen(s); + if (!str->root) return -EINVAL; diff --git a/src/basic/strbuf.h b/src/basic/strbuf.h index 6187c08683..405b34717e 100644 --- a/src/basic/strbuf.h +++ b/src/basic/strbuf.h @@ -33,7 +33,10 @@ struct strbuf_child_entry { }; struct strbuf* strbuf_new(void); -ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len); +ssize_t strbuf_add_string_full(struct strbuf *str, const char *s, size_t len); +static inline ssize_t strbuf_add_string(struct strbuf *str, const char *s) { + return strbuf_add_string_full(str, s, SIZE_MAX); +} void strbuf_complete(struct strbuf *str); struct strbuf* strbuf_free(struct strbuf *str); DEFINE_TRIVIAL_CLEANUP_FUNC(struct strbuf*, strbuf_free); diff --git a/src/libsystemd/sd-journal/catalog.c b/src/libsystemd/sd-journal/catalog.c index a0b673f65f..7dcc35d8d5 100644 --- a/src/libsystemd/sd-journal/catalog.c +++ b/src/libsystemd/sd-journal/catalog.c @@ -461,7 +461,7 @@ int catalog_update(const char* database, const char* root, const char* const* di SD_ID128_FORMAT_VAL(i->id), isempty(i->language) ? "C" : i->language); - offset = strbuf_add_string(sb, payload, strlen(payload)); + offset = strbuf_add_string(sb, payload); if (offset < 0) return log_oom(); diff --git a/src/shared/hwdb-util.c b/src/shared/hwdb-util.c index d96902c7f2..c99f510236 100644 --- a/src/shared/hwdb-util.c +++ b/src/shared/hwdb-util.c @@ -138,15 +138,15 @@ static int trie_node_add_value(struct trie *trie, struct trie_node *node, ssize_t k, v, fn = 0; struct trie_value_entry *val; - k = strbuf_add_string(trie->strings, key, strlen(key)); + k = strbuf_add_string(trie->strings, key); if (k < 0) return k; - v = strbuf_add_string(trie->strings, value, strlen(value)); + v = strbuf_add_string(trie->strings, value); if (v < 0) return v; if (!compat) { - fn = strbuf_add_string(trie->strings, filename, strlen(filename)); + fn = strbuf_add_string(trie->strings, filename); if (fn < 0) return fn; } @@ -224,7 +224,7 @@ static int trie_insert(struct trie *trie, struct trie_node *node, const char *se if (!s) return -ENOMEM; - off = strbuf_add_string(trie->strings, s, p); + off = strbuf_add_string_full(trie->strings, s, p); if (off < 0) return off; @@ -254,7 +254,7 @@ static int trie_insert(struct trie *trie, struct trie_node *node, const char *se if (!new_child) return -ENOMEM; - off = strbuf_add_string(trie->strings, search + i+1, strlen(search + i+1)); + off = strbuf_add_string(trie->strings, search + i+1); if (off < 0) return off; diff --git a/src/test/test-strbuf.c b/src/test/test-strbuf.c index c0a4d1167e..49ff205c53 100644 --- a/src/test/test-strbuf.c +++ b/src/test/test-strbuf.c @@ -8,10 +8,6 @@ #include "strv.h" #include "tests.h" -static ssize_t add_string(struct strbuf *sb, const char *s) { - return strbuf_add_string(sb, s, strlen(s)); -} - TEST(strbuf) { _cleanup_(strbuf_freep) struct strbuf *sb = NULL; _cleanup_strv_free_ char **l = NULL; @@ -19,14 +15,14 @@ TEST(strbuf) { sb = strbuf_new(); - a = add_string(sb, "waldo"); - b = add_string(sb, "foo"); - c = add_string(sb, "bar"); - d = add_string(sb, "waldo"); /* duplicate */ - e = add_string(sb, "aldo"); /* duplicate */ - f = add_string(sb, "do"); /* duplicate */ - g = add_string(sb, "waldorf"); /* not a duplicate: matches from tail */ - h = add_string(sb, ""); + a = strbuf_add_string(sb, "waldo"); + b = strbuf_add_string(sb, "foo"); + c = strbuf_add_string(sb, "bar"); + d = strbuf_add_string(sb, "waldo"); /* duplicate */ + e = strbuf_add_string(sb, "aldo"); /* duplicate */ + f = strbuf_add_string(sb, "do"); /* duplicate */ + g = strbuf_add_string(sb, "waldorf"); /* not a duplicate: matches from tail */ + h = strbuf_add_string(sb, ""); /* check the content of the buffer directly */ l = strv_parse_nulstr(sb->buf, sb->len);