From 2181fe6e464c4a2615db0df1189d1e6fb0575631 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:05 -0400 Subject: [PATCH 01/13] sparse-checkout: free string list in write_cone_to_file() We use a string list to hold sorted and de-duped patterns, but don't free it before leaving the function, causing a leak. This drops the number of leaks found in t7002 from 27 to 25. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 0f52e25249..8747191484 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -311,6 +311,8 @@ static void write_cone_to_file(FILE *fp, struct pattern_list *pl) fprintf(fp, "%s/\n", pattern); free(pattern); } + + string_list_clear(&sl, 0); } static int write_patterns_and_update(struct pattern_list *pl) From 4d7f95ed1f5c4a8842f8a9f9614ee0c66c85bfbd Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:10 -0400 Subject: [PATCH 02/13] sparse-checkout: pass string literals directly to add_pattern() The add_pattern() function takes a pattern string, but neither makes a copy of it nor takes ownership of the memory. So it is the caller's responsibility to make sure the string hangs around as long as the pattern_list which references it. There are a few cases in sparse-checkout where we use string literal patterns by stuffing them into a strbuf, detaching the buffer, and then passing the result into add_pattern(). This creates a leak when the pattern_list is eventually cleared, since we don't retain a copy of the detached buffer to free. But we can observe that the whole strbuf dance is unnecessary. The point was presumably[1] to satisfy the lifetime requirement of the string. But string literals have static duration; we can count on them lasting for the whole program. So we can fix the leak by just passing them directly. And as a bonus, that simplifies the code. The leaks can be seen in t7002, which drops from 25 leaks to 22 with this patch. It also makes t3602 and t1090 leak-free. In the long run, we will also want to clean up this (undocumented!) memory lifetime requirement of add_pattern(). But that can come in a later patch; passing the string literals directly will be the right thing either way. [1] The code in question comes from 416adc8711 (sparse-checkout: update working directory in-process for 'init', 2019-11-21) and 99dfa6f970 (sparse-checkout: use in-process update for disable subcommand, 2019-11-21), but I didn't see anything in their commit messages or on the list explaining the strbufs. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 11 +++-------- t/t1090-sparse-checkout-scope.sh | 1 + t/t3602-rm-sparse-checkout.sh | 1 + 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 8747191484..7c17ed238c 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -442,7 +442,6 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix) char *sparse_filename; int res; struct object_id oid; - struct strbuf pattern = STRBUF_INIT; static struct option builtin_sparse_checkout_init_options[] = { OPT_BOOL(0, "cone", &init_opts.cone_mode, @@ -493,10 +492,8 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix) return 0; } - strbuf_addstr(&pattern, "/*"); - add_pattern(strbuf_detach(&pattern, NULL), empty_base, 0, &pl, 0); - strbuf_addstr(&pattern, "!/*/"); - add_pattern(strbuf_detach(&pattern, NULL), empty_base, 0, &pl, 0); + add_pattern("/*", empty_base, 0, &pl, 0); + add_pattern("!/*/", empty_base, 0, &pl, 0); pl.use_cone_patterns = init_opts.cone_mode; return write_patterns_and_update(&pl); @@ -893,7 +890,6 @@ static int sparse_checkout_disable(int argc, const char **argv, OPT_END(), }; struct pattern_list pl; - struct strbuf match_all = STRBUF_INIT; /* * We do not exit early if !core_apply_sparse_checkout; due to the @@ -919,8 +915,7 @@ static int sparse_checkout_disable(int argc, const char **argv, pl.use_cone_patterns = 0; core_apply_sparse_checkout = 1; - strbuf_addstr(&match_all, "/*"); - add_pattern(strbuf_detach(&match_all, NULL), empty_base, 0, &pl, 0); + add_pattern("/*", empty_base, 0, &pl, 0); prepare_repo_settings(the_repository); the_repository->settings.sparse_index = 0; diff --git a/t/t1090-sparse-checkout-scope.sh b/t/t1090-sparse-checkout-scope.sh index 3a14218b24..da0e7714d5 100755 --- a/t/t1090-sparse-checkout-scope.sh +++ b/t/t1090-sparse-checkout-scope.sh @@ -6,6 +6,7 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME TEST_CREATE_REPO_NO_TEMPLATE=1 +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh test_expect_success 'setup' ' diff --git a/t/t3602-rm-sparse-checkout.sh b/t/t3602-rm-sparse-checkout.sh index 08580fd3dc..fcdefba48c 100755 --- a/t/t3602-rm-sparse-checkout.sh +++ b/t/t3602-rm-sparse-checkout.sh @@ -2,6 +2,7 @@ test_description='git rm in sparse checked out working trees' +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh test_expect_success 'setup' " From 4318d3ab6589af78cef344afbab100d639219468 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:14 -0400 Subject: [PATCH 03/13] dir.c: free strings in sparse cone pattern hashmaps The pattern_list structs used for cone-mode sparse lookups use a few extra hashmaps. These store pattern_entry structs, each of which has its own heap-allocated pattern string. When we clean up the hashmaps, we free the individual pattern_entry structs, but forget to clean up the embedded strings, causing memory leaks. We can fix this by iterating over the hashmaps to free the extra strings. This reduces the numbers of leaks in t7002 from 22 to 9. One alternative here would be to make the string a FLEX_ARRAY member of the pattern_entry. Then there's no extra free() required, and as a bonus it would be a little more efficient. However, some of the refactoring gets awkward, as we are often assigning strings allocated by helper functions. So let's just fix the leak for now, and we can explore bigger refactoring separately. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- dir.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/dir.c b/dir.c index 2d83f3311a..9cc4a68fbc 100644 --- a/dir.c +++ b/dir.c @@ -726,6 +726,17 @@ static char *dup_and_filter_pattern(const char *pattern) return result; } +static void clear_pattern_entry_hashmap(struct hashmap *map) +{ + struct hashmap_iter iter; + struct pattern_entry *entry; + + hashmap_for_each_entry(map, &iter, entry, ent) { + free(entry->pattern); + } + hashmap_clear_and_free(map, struct pattern_entry, ent); +} + static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern *given) { struct pattern_entry *translated; @@ -855,8 +866,8 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern clear_hashmaps: warning(_("disabling cone pattern matching")); - hashmap_clear_and_free(&pl->parent_hashmap, struct pattern_entry, ent); - hashmap_clear_and_free(&pl->recursive_hashmap, struct pattern_entry, ent); + clear_pattern_entry_hashmap(&pl->recursive_hashmap); + clear_pattern_entry_hashmap(&pl->parent_hashmap); pl->use_cone_patterns = 0; } @@ -956,8 +967,8 @@ void clear_pattern_list(struct pattern_list *pl) free(pl->patterns[i]); free(pl->patterns); free(pl->filebuf); - hashmap_clear_and_free(&pl->recursive_hashmap, struct pattern_entry, ent); - hashmap_clear_and_free(&pl->parent_hashmap, struct pattern_entry, ent); + clear_pattern_entry_hashmap(&pl->recursive_hashmap); + clear_pattern_entry_hashmap(&pl->parent_hashmap); memset(pl, 0, sizeof(*pl)); } From db83b64cda1813368e0c48b808c76422d4512b1b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:17 -0400 Subject: [PATCH 04/13] sparse-checkout: clear patterns when init() sees existing sparse file In sparse_checkout_init(), we first try to load patterns from an existing file. If we found any, we return immediately, but end up leaking the patterns we parsed. Fixing this reduces the number of leaks in t7002 from 9 down to 5. Note that there are two other exits from the function, but they don't need the same treatment: - if we can't resolve HEAD, we write out a hard-coded sparse file and return. But we know the pattern list is empty there, since we didn't find any in the on-disk file and we haven't yet added any of our own. - otherwise, we do populate the list and then tail-call into write_patterns_and_update(). But that function frees the pattern_list itself, so we don't need to. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 1 + 1 file changed, 1 insertion(+) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 7c17ed238c..923e6ecc0a 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -472,6 +472,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix) /* If we already have a sparse-checkout file, use it. */ if (res >= 0) { free(sparse_filename); + clear_pattern_list(&pl); return update_working_directory(NULL); } From 4c844c2f499a573e5801bd3cb83b8d42d3f1566a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:20 -0400 Subject: [PATCH 05/13] dir.c: free removed sparse-pattern hashmap entries In add_pattern_to_hashsets(), we remove entries from the recursive_hashmap when adding similar ones to the parent_hashmap. I won't pretend to understand all of what's going on here, but there's an obvious leak: whatever we removed from recursive_hashmap is not referenced anywhere else, and is never free()d. We can easily fix this by asking the hashmap to return a pointer to the old entry. This makes t7002 now completely leak-free. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- dir.c | 8 +++++++- t/t7002-mv-sparse-checkout.sh | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/dir.c b/dir.c index 9cc4a68fbc..d812d521b0 100644 --- a/dir.c +++ b/dir.c @@ -810,6 +810,8 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern if (given->patternlen > 2 && !strcmp(given->pattern + given->patternlen - 2, "/*")) { + struct pattern_entry *old; + if (!(given->flags & PATTERN_FLAG_NEGATIVE)) { /* Not a cone pattern. */ warning(_("unrecognized pattern: '%s'"), given->pattern); @@ -835,7 +837,11 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern } hashmap_add(&pl->parent_hashmap, &translated->ent); - hashmap_remove(&pl->recursive_hashmap, &translated->ent, &data); + old = hashmap_remove_entry(&pl->recursive_hashmap, translated, ent, &data); + if (old) { + free(old->pattern); + free(old); + } free(data); return; } diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh index 26582ae4e5..57969ce805 100755 --- a/t/t7002-mv-sparse-checkout.sh +++ b/t/t7002-mv-sparse-checkout.sh @@ -2,6 +2,7 @@ test_description='git mv in sparse working trees' +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh setup_sparse_checkout () { From eed1fbe73b46e5c7f87d724f97fba6d8a4391fc9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:22 -0400 Subject: [PATCH 06/13] dir.c: always copy input to add_pattern() The add_pattern() function has a subtle and undocumented gotcha: the pattern string you pass in must remain valid as long as the pattern_list is in use (and nor do we take ownership of it). This is easy to get wrong, causing either subtle bugs (because you free or reuse the string buffer) or leaks (because you copy the string, but don't track ownership separately). All of this "pattern" code was originally the "exclude" mechanism. So this _usually_ works OK because you add entries in one of two ways: 1. From the command-line (e.g., "--exclude"), in which case we're pointing to an argv entry which remains valid for the lifetime of the program. 2. From a file (e.g., ".gitignore"), in which case we read the whole file into a buffer, attach it to the pattern_list's "filebuf" entry, then parse the buffer in-place (adding NULs). The strings point into the filebuf, which is cleaned up when the whole pattern_list goes away. But other code, like sparse-checkout, reads individual lines from stdin and passes them one by one to add_pattern(), leaking each. We could fix this by refactoring it to take in the whole buffer at once, like (2) above, and stuff it in "filebuf". But given how subtle the interface is, let's just fix it to always copy the string. That seems at first like we'd be wasting extra memory, but we can mitigate that: a. The path_pattern struct already uses a FLEXPTR, since we sometimes make a copy (when we see "foo/", we strip off the trailing slash, requiring a modifiable copy of the string). Since we'll now always embed the string inside the struct, we can switch to the regular FLEX_ARRAY pattern, saving us 8 bytes of pointer. So patterns with a trailing slash and ones under 8 bytes actually get smaller. b. Now that we don't need the original string to hang around, we can get rid of the "filebuf" mechanism entirely, and just free the file contents after parsing. Since files are the sources we'd expect to have the largest pattern sets, we should mostly break even on stuffing the same data into the individual structs. This patch just adjusts the add_pattern() interface; it doesn't fix any leaky callers yet. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- dir.c | 15 +++++---------- dir.h | 6 ++---- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/dir.c b/dir.c index d812d521b0..8308d167c8 100644 --- a/dir.c +++ b/dir.c @@ -925,12 +925,7 @@ void add_pattern(const char *string, const char *base, int nowildcardlen; parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen); - if (flags & PATTERN_FLAG_MUSTBEDIR) { - FLEXPTR_ALLOC_MEM(pattern, pattern, string, patternlen); - } else { - pattern = xmalloc(sizeof(*pattern)); - pattern->pattern = string; - } + FLEX_ALLOC_MEM(pattern, pattern, string, patternlen); pattern->patternlen = patternlen; pattern->nowildcardlen = nowildcardlen; pattern->base = base; @@ -972,7 +967,6 @@ void clear_pattern_list(struct pattern_list *pl) for (i = 0; i < pl->nr; i++) free(pl->patterns[i]); free(pl->patterns); - free(pl->filebuf); clear_pattern_entry_hashmap(&pl->recursive_hashmap); clear_pattern_entry_hashmap(&pl->parent_hashmap); @@ -1166,6 +1160,7 @@ static int add_patterns(const char *fname, const char *base, int baselen, } add_patterns_from_buffer(buf, size, base, baselen, pl); + free(buf); return 0; } @@ -1173,16 +1168,15 @@ static int add_patterns_from_buffer(char *buf, size_t size, const char *base, int baselen, struct pattern_list *pl) { + char *orig = buf; int i, lineno = 1; char *entry; hashmap_init(&pl->recursive_hashmap, pl_hashmap_cmp, NULL, 0); hashmap_init(&pl->parent_hashmap, pl_hashmap_cmp, NULL, 0); - pl->filebuf = buf; - if (skip_utf8_bom(&buf, size)) - size -= buf - pl->filebuf; + size -= buf - orig; entry = buf; @@ -1222,6 +1216,7 @@ int add_patterns_from_blob_to_list( return r; add_patterns_from_buffer(buf, size, base, baselen, pl); + free(buf); return 0; } diff --git a/dir.h b/dir.h index b9e8e96128..1398a53fb4 100644 --- a/dir.h +++ b/dir.h @@ -62,7 +62,6 @@ struct path_pattern { */ struct pattern_list *pl; - const char *pattern; int patternlen; int nowildcardlen; const char *base; @@ -74,6 +73,8 @@ struct path_pattern { * and from -1 decrementing for patterns from CLI args. */ int srcpos; + + char pattern[FLEX_ARRAY]; }; /* used for hashmaps for cone patterns */ @@ -94,9 +95,6 @@ struct pattern_list { int nr; int alloc; - /* remember pointer to exclude file contents so we can free() */ - char *filebuf; - /* origin of list, e.g. path to filename, or descriptive string */ const char *src; From c3324649ed5af972c9e91740b46a6e2a063050d4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:25 -0400 Subject: [PATCH 07/13] sparse-checkout: reuse --stdin buffer when reading patterns When we read patterns from --stdin, we loop on strbuf_getline(), and detach each line we read to pass into add_pattern(). This used to be necessary because add_pattern() required that the pattern strings remain valid while the pattern_list was in use. But it also created a leak, since we didn't record the detached buffers anywhere else. Now that add_pattern() has been modified to make its own copy of the strings, we can stop detaching and fix the leak. This fixes 4 leaks detected in t1091. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 923e6ecc0a..75c07d5bb4 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -585,11 +585,10 @@ static void add_patterns_from_input(struct pattern_list *pl, if (file) { struct strbuf line = STRBUF_INIT; - while (!strbuf_getline(&line, file)) { - size_t len; - char *buf = strbuf_detach(&line, &len); - add_pattern(buf, empty_base, 0, pl, 0); - } + while (!strbuf_getline(&line, file)) + add_pattern(line.buf, empty_base, 0, pl, 0); + + strbuf_release(&line); } else { for (i = 0; i < argc; i++) add_pattern(argv[i], empty_base, 0, pl, 0); From d765fa03311631982017bdb3c755aff7ed3757d0 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:27 -0400 Subject: [PATCH 08/13] sparse-checkout: always free "line" strbuf after reading input In add_patterns_from_input(), we may read lines from a file with a loop like this: while (!strbuf_getline(&line, file)) { ... strbuf_to_cone_pattern(&line, pl); } /* we don't strbuf_release(&line) here! */ This generally is OK because strbuf_to_cone_pattern() consumes the buffer via strbuf_detach(). But we can leak in a few cases: 1. We don't always consume the buffer! If the line ends up empty after trimming, we leave strbuf_to_cone_pattern() without detaching. In most cases this is OK, because a subsequent getline() call will use the same buffer. But if you had an empty line at the end of file, for example, it would leak. 2. Even if strbuf_to_cone_pattern() always consumed the buffer, there's a subtle issue with strbuf_getline(). As we saw in 94e2aa555e (strbuf: fix leak when `appendwholeline()` fails with EOF, 2024-05-27), it's possible for it to return EOF with an allocated buffer (e.g., if the underlying getdelim() call saw an error). So we should always strbuf_release() after finishing a read loop like this. Note that even the code to read patterns from argv has the same problem. Because that also uses strbuf_to_cone_pattern(), we stuff each argv entry into a strbuf. It uses the same "line" strbuf as the getline code, but we should position the strbuf_release() to cover both code paths. This fixes at least 9 leaks found in t1091. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 1 + 1 file changed, 1 insertion(+) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 75c07d5bb4..8f8f5c359f 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -581,6 +581,7 @@ static void add_patterns_from_input(struct pattern_list *pl, strbuf_to_cone_pattern(&line, pl); } } + strbuf_release(&line); } else { if (file) { struct strbuf line = STRBUF_INIT; From a14d49ca846df43f706691b6219140673a65500b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:30 -0400 Subject: [PATCH 09/13] sparse-checkout: refactor temporary sparse_checkout_patterns In update_working_directory(), we take in a pattern_list, attach it to the repository index by assigning it to index->sparse_checkout_patterns, and then call unpack_trees. Afterwards, we remove it by setting index->sparse_checkout_patterns back to NULL. But there are two possible leaks here: 1. If the index already had a populated sparse_checkout_patterns, we've obliterated it. We can fix this by saving and restoring it, rather than always setting it back to NULL. 2. We may call the function with a NULL pattern_list, expecting it to use the on-disk sparse file. In that case, the index routines will lazy-load the sparse patterns automatically. But now at the end of the function when we restore the patterns, we'll leak those lazy-loaded ones! We can fix this by freeing the pattern list before overwriting its pointer whenever it does not match what was passed in (in practice this should only happen when the passed-in list is NULL, but this is erring on the defensive side). Together these remove 48 indirect leaks found in t1091. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 8f8f5c359f..b84d2e1c80 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -205,11 +205,13 @@ static int update_working_directory(struct pattern_list *pl) struct unpack_trees_options o; struct lock_file lock_file = LOCK_INIT; struct repository *r = the_repository; + struct pattern_list *old_pl; /* If no branch has been checked out, there are no updates to make. */ if (is_index_unborn(r->index)) return UPDATE_SPARSITY_SUCCESS; + old_pl = r->index->sparse_checkout_patterns; r->index->sparse_checkout_patterns = pl; memset(&o, 0, sizeof(o)); @@ -241,7 +243,12 @@ static int update_working_directory(struct pattern_list *pl) clean_tracked_sparse_directories(r); - r->index->sparse_checkout_patterns = NULL; + if (r->index->sparse_checkout_patterns != pl) { + clear_pattern_list(r->index->sparse_checkout_patterns); + FREE_AND_NULL(r->index->sparse_checkout_patterns); + } + r->index->sparse_checkout_patterns = old_pl; + return result; } From 008f59d2d6b3be1017d8d321985b89d85659df04 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:32 -0400 Subject: [PATCH 10/13] sparse-checkout: free sparse_filename after use We allocate a heap buffer via get_sparse_checkout_filename(). Most calls remember to free it, but sparse_checkout_init() forgets to, causing a leak. Ironically, it remembers to do so in the error return paths, but not in the path that makes it all the way to the function end! Fixing this clears up 6 leaks from t1091. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index b84d2e1c80..79342480eb 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -500,6 +500,8 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix) return 0; } + free(sparse_filename); + add_pattern("/*", empty_base, 0, &pl, 0); add_pattern("!/*/", empty_base, 0, &pl, 0); pl.use_cone_patterns = init_opts.cone_mode; From 521e04e6e8d234e89a5bbeac0b432724b98ac508 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:35 -0400 Subject: [PATCH 11/13] sparse-checkout: free pattern list in sparse_checkout_list() In sparse_checkout_list(), we create a pattern_list that needs to eventually be cleared. We remember to do so in the regular code path, but the cone-mode path does an early return, and forgets to clean up. We could fix the leak by adding a new call to clear_pattern_list(). But we can simplify even further by just skipping the early return, pushing the other code path (which consists now of only one line!) into an else block. That also matches the same cone/non-cone if/else used in some other functions. This fixes 15 leaks found in t1091. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 79342480eb..fb43bb7577 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -95,11 +95,10 @@ static int sparse_checkout_list(int argc, const char **argv, const char *prefix) quote_c_style(sl.items[i].string, NULL, stdout, 0); printf("\n"); } - - return 0; + } else { + write_patterns_to_file(stdout, &pl); } - write_patterns_to_file(stdout, &pl); clear_pattern_list(&pl); return 0; From a544b7da2c91f07c6d22d9c0b72911d32f836c54 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:37 -0400 Subject: [PATCH 12/13] sparse-checkout: free string list after displaying In sparse_checkout_list(), we put the hashmap entries into a string_list so we can sort them. But after printing, we forget to free the list. This patch drops 5 leaks from t1091. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index fb43bb7577..e648e035ab 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -95,6 +95,8 @@ static int sparse_checkout_list(int argc, const char **argv, const char *prefix) quote_c_style(sl.items[i].string, NULL, stdout, 0); printf("\n"); } + + string_list_clear(&sl, 0); } else { write_patterns_to_file(stdout, &pl); } From 6d107751b299e2d0439909d01439b245dd820c5d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:40 -0400 Subject: [PATCH 13/13] sparse-checkout: free duplicate hashmap entries In insert_recursive_pattern(), we create a new pattern_entry to insert into the parent_hashmap. If we find that the same entry already exists in the hashmap, we skip adding the new one. But we forget to free the new one, creating a leak. We can fix it by cleaning up the discarded entry. It would probably be possible to avoid creating it in the first place, but it's non-trivial. We'd have to define a "keydata" struct that lets us compare the existing entries to the broken-out fields. It's probably not worth the complexity, so we'll punt on that for now. There is one subtlety here: our insertion is happening in a loop, with each iteration looking at the pattern we just inserted (hence the "recursive" in the name). So if we skip insertion, what do we look at? The obvious answer is that we should remember the existing duplicate we found and use that. But I _think_ in that case, we probably already have all of the recursive bits already (from when the original entry was added). And so just breaking out of the loop would be correct. But I'm not 100% sure on that; after all, the original leaky code could have done the same break, but it didn't. So I went with the "obvious answer" above, which has no chance of changing the behavior aside from fixing the leak. With this patch, t1091 can now be marked leak-free. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 9 ++++++++- t/t1091-sparse-checkout-builtin.sh | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index e648e035ab..3f2bfce8fa 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -523,6 +523,7 @@ static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *pat char *slash = strrchr(e->pattern, '/'); char *oldpattern = e->pattern; size_t newlen; + struct pattern_entry *dup; if (!slash || slash == e->pattern) break; @@ -533,8 +534,14 @@ static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *pat e->pattern = xstrndup(oldpattern, newlen); hashmap_entry_init(&e->ent, fspathhash(e->pattern)); - if (!hashmap_get_entry(&pl->parent_hashmap, e, ent, NULL)) + dup = hashmap_get_entry(&pl->parent_hashmap, e, ent, NULL); + if (!dup) { hashmap_add(&pl->parent_hashmap, &e->ent); + } else { + free(e->pattern); + free(e); + e = dup; + } } } diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index ab3a105fff..8c5cd651b4 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -8,6 +8,7 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME GIT_TEST_SPLIT_INDEX=false export GIT_TEST_SPLIT_INDEX +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh list_files() {