From ee798742bd35d88770e4ef05a7944b5783790e60 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 25 Jan 2020 00:37:23 -0500 Subject: [PATCH 1/5] merge-recursive: silence -Wxor-used-as-pow warning The merge-recursive code uses stage number constants like this: add = &ci->ren1->dst_entry->stages[2 ^ 1]; ... add = &ci->ren2->dst_entry->stages[3 ^ 1]; The xor has the effect of flipping the "1" bit, so that "2 ^ 1" becomes "3" and "3 ^ 1" becomes "2", which correspond to the "ours" and "theirs" stages respectively. Unfortunately, clang-10 and up issue a warning for this code: merge-recursive.c:1759:40: error: result of '2 ^ 1' is 3; did you mean '1 << 1' (2)? [-Werror,-Wxor-used-as-pow] add = &ci->ren1->dst_entry->stages[2 ^ 1]; ~~^~~ 1 << 1 merge-recursive.c:1759:40: note: replace expression with '0x2 ^ 1' to silence this warning We could silence it by using 0x2, as the compiler mentions. Or by just using the constants "2" and "3" directly. But after digging into it, I do think this bit-flip is telling us something. If we just wrote: add = &ci->ren2->dst_entry->stages[2]; for the second one, you might think that "ren2" and "2" correspond. But they don't. The logic is: ren2 is theirs, which is stage 3, but we are interested in the opposite side's stage, so flip it to 2. So let's keep the bit-flipping, but let's also put it behind a named function, which will make its purpose a bit clearer. This also has the side effect of suppressing the warning (and an optimizing compiler should be able to easily turn it into a constant as before). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- merge-recursive.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/merge-recursive.c b/merge-recursive.c index 10dca5644b..e6aedd3cab 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -1712,6 +1712,15 @@ static char *find_path_for_conflict(struct merge_options *opt, return new_path; } +/* + * Toggle the stage number between "ours" and "theirs" (2 and 3) by flipping + * the 1-bit. + */ +static inline int flip_stage(int stage) +{ + return stage ^ 1; +} + static int handle_rename_rename_1to2(struct merge_options *opt, struct rename_conflict_info *ci) { @@ -1756,14 +1765,14 @@ static int handle_rename_rename_1to2(struct merge_options *opt, * such cases, we should keep the added file around, * resolving the conflict at that path in its favor. */ - add = &ci->ren1->dst_entry->stages[2 ^ 1]; + add = &ci->ren1->dst_entry->stages[flip_stage(2)]; if (is_valid(add)) { if (update_file(opt, 0, add, a->path)) return -1; } else remove_file_from_index(opt->repo->index, a->path); - add = &ci->ren2->dst_entry->stages[3 ^ 1]; + add = &ci->ren2->dst_entry->stages[flip_stage(3)]; if (is_valid(add)) { if (update_file(opt, 0, add, b->path)) return -1; @@ -1776,7 +1785,7 @@ static int handle_rename_rename_1to2(struct merge_options *opt, * rename/add collision. If not, we can write the file out * to the specified location. */ - add = &ci->ren1->dst_entry->stages[2 ^ 1]; + add = &ci->ren1->dst_entry->stages[flip_stage(2)]; if (is_valid(add)) { add->path = mfi.blob.path = a->path; if (handle_file_collision(opt, a->path, @@ -1797,7 +1806,7 @@ static int handle_rename_rename_1to2(struct merge_options *opt, return -1; } - add = &ci->ren2->dst_entry->stages[3 ^ 1]; + add = &ci->ren2->dst_entry->stages[flip_stage(3)]; if (is_valid(add)) { add->path = mfi.blob.path = b->path; if (handle_file_collision(opt, b->path, @@ -1846,7 +1855,7 @@ static int handle_rename_rename_2to1(struct merge_options *opt, path_side_1_desc = xstrfmt("version of %s from %s", path, a->path); path_side_2_desc = xstrfmt("version of %s from %s", path, b->path); ostage1 = ci->ren1->branch == opt->branch1 ? 3 : 2; - ostage2 = ostage1 ^ 1; + ostage2 = flip_stage(ostage1); ci->ren1->src_entry->stages[ostage1].path = a->path; ci->ren2->src_entry->stages[ostage2].path = b->path; if (merge_mode_and_contents(opt, a, c1, From 4c616c2ba1c176573bd1ae7c4ec3271d1f175448 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 25 Jan 2020 18:57:45 -0500 Subject: [PATCH 2/5] merge-recursive: use subtraction to flip stage The flip_stage() helper uses a bit-flipping xor to switch between "2" and "3". While clever, this relies on a property of those two numbers that is mostly coincidence. Let's write it as a subtraction; that's more clear and would extend to other numbers if somebody copies the logic. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- merge-recursive.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/merge-recursive.c b/merge-recursive.c index e6aedd3cab..aee1769a7a 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -1713,12 +1713,11 @@ static char *find_path_for_conflict(struct merge_options *opt, } /* - * Toggle the stage number between "ours" and "theirs" (2 and 3) by flipping - * the 1-bit. + * Toggle the stage number between "ours" and "theirs" (2 and 3). */ static inline int flip_stage(int stage) { - return stage ^ 1; + return (2 + 3) - stage; } static int handle_rename_rename_1to2(struct merge_options *opt, From d20bc01a51f7899deb7a04bdd7c2495789185297 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 29 Jan 2020 00:46:47 -0500 Subject: [PATCH 3/5] avoid computing zero offsets from NULL pointer The Undefined Behavior Sanitizer in clang-11 seems to have learned a new trick: it complains about computing offsets from a NULL pointer, even if that offset is 0. This causes numerous test failures. For example, from t1090: unpack-trees.c:1355:41: runtime error: applying zero offset to null pointer ... not ok 6 - in partial clone, sparse checkout only fetches needed blobs The code in question looks like this: struct cache_entry **cache_end = cache + nr; ... while (cache != cache_end) and we sometimes pass in a NULL and 0 for "cache" and "nr". This is conceptually fine, as "cache_end" would be equal to "cache" in this case, and we wouldn't enter the loop at all. But computing even a zero offset violates the C standard. And given the fact that UBSan is noticing this behavior, this might be a potential problem spot if the compiler starts making unexpected assumptions based on undefined behavior. So let's just avoid it, which is pretty easy. In some cases we can just switch to iterating with a numeric index (as we do in sequencer.c here). In other cases (like the cache_end one) the use of an end pointer is more natural; we can keep that by just explicitly checking for the NULL/0 case when assigning the end pointer. Note that there are two ways you can write this latter case, checking for the pointer: cache_end = cache ? cache + nr : cache; or the size: cache_end = nr ? cache + nr : cache; For the case of a NULL/0 ptr/len combo, they are equivalent. But writing it the second way (as this patch does) has the property that if somebody were to incorrectly pass a NULL pointer with a non-zero length, we'd continue to notice and segfault, rather than silently pretending the length was zero. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sequencer.c | 6 +++--- unpack-trees.c | 2 +- xdiff-interface.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sequencer.c b/sequencer.c index b9dbf1adb0..4d31ec3296 100644 --- a/sequencer.c +++ b/sequencer.c @@ -588,7 +588,7 @@ static int do_recursive_merge(struct repository *r, struct merge_options o; struct tree *next_tree, *base_tree, *head_tree; int clean; - char **xopt; + int i; struct lock_file index_lock = LOCK_INIT; if (repo_hold_locked_index(r, &index_lock, LOCK_REPORT_ON_ERROR) < 0) @@ -608,8 +608,8 @@ static int do_recursive_merge(struct repository *r, next_tree = next ? get_commit_tree(next) : empty_tree(r); base_tree = base ? get_commit_tree(base) : empty_tree(r); - for (xopt = opts->xopts; xopt != opts->xopts + opts->xopts_nr; xopt++) - parse_merge_opt(&o, *xopt); + for (i = 0; i < opts->xopts_nr; i++) + parse_merge_opt(&o, opts->xopts[i]); clean = merge_trees(&o, head_tree, diff --git a/unpack-trees.c b/unpack-trees.c index 2399b6818b..b4a9d97f25 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1348,7 +1348,7 @@ static int clear_ce_flags_1(struct index_state *istate, enum pattern_match_result default_match, int progress_nr) { - struct cache_entry **cache_end = cache + nr; + struct cache_entry **cache_end = nr ? cache + nr : cache; /* * Process all entries that have the given prefix and meet diff --git a/xdiff-interface.c b/xdiff-interface.c index 8509f9ea22..99661d43c6 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -84,8 +84,8 @@ static void trim_common_tail(mmfile_t *a, mmfile_t *b) { const int blk = 1024; long trimmed = 0, recovered = 0; - char *ap = a->ptr + a->size; - char *bp = b->ptr + b->size; + char *ap = a->size ? a->ptr + a->size : a->ptr; + char *bp = b->size ? b->ptr + b->size : b->ptr; long smaller = (a->size < b->size) ? a->size : b->size; while (blk + trimmed <= smaller && !memcmp(ap - blk, bp - blk, blk)) { From 3cd309c16f3b9370a90d2f4eb69002473020412a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 25 Jan 2020 00:39:29 -0500 Subject: [PATCH 4/5] xdiff: avoid computing non-zero offset from NULL pointer As with the previous commit, clang-11's UBSan complains about computing offsets from a NULL pointer, causing some tests to fail. In this case, though, we're actually computing a non-zero offset, which is even more dubious. From t7810: xdiff-interface.c:268:14: runtime error: applying non-zero offset 1 to null pointer ... not ok 131 - grep -p with userdiff The problem is our parsing of the funcname config. We count the number of lines in the string, allocate an array, and then loop over our allocated entries, parsing each line and moving our cursor to one past the trailing newline for the next iteration. But the final line will not generally have a trailing newline (since it's a config value), and hence we go to one past NULL. In practice this is OK, since our loop should terminate before we look at the value. But even computing such an invalid pointer technically violates the standard. We can fix it by leaving the pointer at NULL if we're at the end, rather than one-past. And while we're thinking about it, we can also document the variant by asserting that our initial line-count matches the second-pass of parsing. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- xdiff-interface.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xdiff-interface.c b/xdiff-interface.c index 99661d43c6..4d20069302 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -250,9 +250,13 @@ void xdiff_set_find_func(xdemitconf_t *xecfg, const char *value, int cflags) ALLOC_ARRAY(regs->array, regs->nr); for (i = 0; i < regs->nr; i++) { struct ff_reg *reg = regs->array + i; - const char *ep = strchr(value, '\n'), *expression; + const char *ep, *expression; char *buffer = NULL; + if (!value) + BUG("mismatch between line count and parsing"); + ep = strchr(value, '\n'); + reg->negate = (*value == '!'); if (reg->negate && i == regs->nr - 1) die("Last expression must not be negated: %s", value); @@ -265,7 +269,7 @@ void xdiff_set_find_func(xdemitconf_t *xecfg, const char *value, int cflags) if (regcomp(®->re, expression, cflags)) die("Invalid regexp to look for hunk header: %s", expression); free(buffer); - value = ep + 1; + value = ep ? ep + 1 : NULL; } } From cf82bff73f11d3197aa6b667002ea86da1dfa835 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 25 Jan 2020 00:44:29 -0500 Subject: [PATCH 5/5] obstack: avoid computing offsets from NULL pointer As with the previous two commits, UBSan with clang-11 complains about computing offsets from a NULL pointer. The failures in t4013 (and elsewhere) look like this: kwset.c:102:23: runtime error: applying non-zero offset 107820859019600 to null pointer ... not ok 79 - git log -SF master # magic is (not used) That line is not enlightening: ... = obstack_alloc(&kwset->obstack, sizeof (struct trie)); because obstack is implemented almost entirely in macros, and the actual problem is five macros deep (I temporarily converted them to inline functions to get better compiler errors, which was tedious but worked reasonably well). The actual problem is in these pointer-alignment macros: /* If B is the base of an object addressed by P, return the result of aligning P to the next multiple of A + 1. B and P must be of type char *. A + 1 must be a power of 2. */ #define __BPTR_ALIGN(B, P, A) ((B) + (((P) - (B) + (A)) & ~(A))) /* Similar to _BPTR_ALIGN (B, P, A), except optimize the common case where pointers can be converted to integers, aligned as integers, and converted back again. If PTR_INT_TYPE is narrower than a pointer (e.g., the AS/400), play it safe and compute the alignment relative to B. Otherwise, use the faster strategy of computing the alignment relative to 0. */ #define __PTR_ALIGN(B, P, A) \ __BPTR_ALIGN (sizeof (PTR_INT_TYPE) < sizeof (void *) ? (B) : (char *) 0, \ P, A) If we have a sufficiently-large integer pointer type, then we do the computation using a NULL pointer constant. That turns __BPTR_ALIGN() into something like: NULL + (P - NULL + A) & ~A and UBSan is complaining about adding the full value of P to that initial NULL. We can fix this by doing our math as an integer type, and then casting the result back to a pointer. The problem case only happens when we know that the integer type is large enough, so there should be no issue with truncation. Another option would be just simplify out all the 0's from __BPTR_ALIGN() for the NULL-pointer case. That probably wouldn't work for a platform where the NULL pointer isn't all-zeroes, but Git already wouldn't work on such a platform (due to our use of memset to set pointers in structs to NULL). But I tried here to keep as close to the original as possible. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- compat/obstack.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/compat/obstack.h b/compat/obstack.h index 01e7c81840..f90a46d9b9 100644 --- a/compat/obstack.h +++ b/compat/obstack.h @@ -135,8 +135,10 @@ extern "C" { alignment relative to 0. */ #define __PTR_ALIGN(B, P, A) \ - __BPTR_ALIGN (sizeof (PTR_INT_TYPE) < sizeof (void *) ? (B) : (char *) 0, \ - P, A) + (sizeof (PTR_INT_TYPE) < sizeof(void *) ? \ + __BPTR_ALIGN((B), (P), (A)) : \ + (void *)__BPTR_ALIGN((PTR_INT_TYPE)(void *)0, (PTR_INT_TYPE)(P), (A)) \ + ) #include