From c3e23dc117beae7e89e81aed28b0e082a14d672e Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 2 Jun 2015 17:57:25 +0200 Subject: [PATCH 1/4] t6301: new tests of for-each-ref error handling Add tests that for-each-ref correctly reports broken loose reference files and references that point at missing objects. In fact, two of these tests fail, because (1) NULL_SHA1 is not recognized as an invalid reference value, and (2) for-each-ref doesn't respect REF_ISBROKEN. Fixes to come. Note that when for-each-ref is run with a --format option that doesn't require the object to be looked up, then we should still notice if a loose reference file is corrupt or contains NULL_SHA1, but we don't notice if it points at a missing object because we don't do an object lookup. This is OK. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- t/t6301-for-each-ref-errors.sh | 56 ++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100755 t/t6301-for-each-ref-errors.sh diff --git a/t/t6301-for-each-ref-errors.sh b/t/t6301-for-each-ref-errors.sh new file mode 100755 index 0000000000..cf25244c26 --- /dev/null +++ b/t/t6301-for-each-ref-errors.sh @@ -0,0 +1,56 @@ +#!/bin/sh + +test_description='for-each-ref errors for broken refs' + +. ./test-lib.sh + +ZEROS=$_z40 +MISSING=abababababababababababababababababababab + +test_expect_success setup ' + git commit --allow-empty -m "Initial" && + git tag testtag && + git for-each-ref >full-list && + git for-each-ref --format="%(objectname) %(refname)" >brief-list +' + +test_expect_failure 'Broken refs are reported correctly' ' + r=refs/heads/bogus && + : >.git/$r && + test_when_finished "rm -f .git/$r" && + echo "warning: ignoring broken ref $r" >broken-err && + git for-each-ref >out 2>err && + test_cmp full-list out && + test_cmp broken-err err +' + +test_expect_failure 'NULL_SHA1 refs are reported correctly' ' + r=refs/heads/zeros && + echo $ZEROS >.git/$r && + test_when_finished "rm -f .git/$r" && + echo "warning: ignoring broken ref $r" >zeros-err && + git for-each-ref >out 2>err && + test_cmp full-list out && + test_cmp zeros-err err && + git for-each-ref --format="%(objectname) %(refname)" >brief-out 2>brief-err && + test_cmp brief-list brief-out && + test_cmp zeros-err brief-err +' + +test_expect_success 'Missing objects are reported correctly' ' + r=refs/heads/missing && + echo $MISSING >.git/$r && + test_when_finished "rm -f .git/$r" && + echo "fatal: missing object $MISSING for $r" >missing-err && + test_must_fail git for-each-ref 2>err && + test_cmp missing-err err && + ( + cat brief-list && + echo "$MISSING $r" + ) | sort -k 2 >missing-brief-expected && + git for-each-ref --format="%(objectname) %(refname)" >brief-out 2>brief-err && + test_cmp missing-brief-expected brief-out && + test_must_be_empty brief-err +' + +test_done From 8afc493d115e35092be8de3a441317f175928ab5 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 2 Jun 2015 17:57:26 +0200 Subject: [PATCH 2/4] for-each-ref: report broken references correctly If there is a loose reference file with invalid contents, "git for-each-ref" incorrectly reports the problem as being a missing object with name NULL_SHA1: $ echo '12345678' >.git/refs/heads/nonsense $ git for-each-ref fatal: missing object 0000000000000000000000000000000000000000 for refs/heads/nonsense With an explicit "--format" string, it can even report that the reference validly points at NULL_SHA1: $ git for-each-ref --format='%(objectname) %(refname)' 0000000000000000000000000000000000000000 refs/heads/nonsense $ echo $? 0 This has been broken since b7dd2d2 for-each-ref: Do not lookup objects when they will not be used (2009-05-27) , which changed for-each-ref from using for_each_ref() to using git_for_each_rawref() in order to avoid looking up the referred-to objects unnecessarily. (When "git for-each-ref" is given a "--format" string that doesn't include information about the pointed-to object, it does not look up the object at all, which makes it considerably faster. Iterating with DO_FOR_EACH_INCLUDE_BROKEN is essential to this optimization because otherwise for_each_ref() would itself need to check whether the object exists as part of its brokenness test.) But for_each_rawref() includes broken references in the iteration, and "git for-each-ref" doesn't itself reject references with REF_ISBROKEN. The result is that broken references are processed *as if* they had the value NULL_SHA1, which is the value stored in entries for broken references. Change "git for-each-ref" to emit warnings for references that are REF_ISBROKEN but to otherwise skip them. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- builtin/for-each-ref.c | 5 +++++ t/t6301-for-each-ref-errors.sh | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c index 008513c2f1..4a15f568f1 100644 --- a/builtin/for-each-ref.c +++ b/builtin/for-each-ref.c @@ -851,6 +851,11 @@ static int grab_single_ref(const char *refname, const unsigned char *sha1, int f return 0; } + if (flag & REF_ISBROKEN) { + warning("ignoring broken ref %s", refname); + return 0; + } + if (*cb->grab_pattern) { const char **pattern; int namelen = strlen(refname); diff --git a/t/t6301-for-each-ref-errors.sh b/t/t6301-for-each-ref-errors.sh index cf25244c26..72d2397e12 100755 --- a/t/t6301-for-each-ref-errors.sh +++ b/t/t6301-for-each-ref-errors.sh @@ -14,7 +14,7 @@ test_expect_success setup ' git for-each-ref --format="%(objectname) %(refname)" >brief-list ' -test_expect_failure 'Broken refs are reported correctly' ' +test_expect_success 'Broken refs are reported correctly' ' r=refs/heads/bogus && : >.git/$r && test_when_finished "rm -f .git/$r" && From f5517074f8f5cecc773b2ff927be4a059f2c9db0 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 3 Jun 2015 15:51:58 +0200 Subject: [PATCH 3/4] read_loose_refs(): simplify function logic Make it clearer that there are two possible ways to read the reference, but that we handle read errors uniformly regardless of which way it was read. This refactoring also makes the following change easier to implement. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/refs.c b/refs.c index 3a26ad4e65..3c311d44c1 100644 --- a/refs.c +++ b/refs.c @@ -1281,19 +1281,24 @@ static void read_loose_refs(const char *dirname, struct ref_dir *dir) create_dir_entry(refs, refname.buf, refname.len, 1)); } else { + int read_ok; + if (*refs->name) { hashclr(sha1); flag = 0; - if (resolve_gitlink_ref(refs->name, refname.buf, sha1) < 0) { - hashclr(sha1); - flag |= REF_ISBROKEN; - } - } else if (read_ref_full(refname.buf, - RESOLVE_REF_READING, - sha1, &flag)) { + read_ok = !resolve_gitlink_ref(refs->name, + refname.buf, sha1); + } else { + read_ok = !read_ref_full(refname.buf, + RESOLVE_REF_READING, + sha1, &flag); + } + + if (!read_ok) { hashclr(sha1); flag |= REF_ISBROKEN; } + if (check_refname_format(refname.buf, REFNAME_ALLOW_ONELEVEL)) { hashclr(sha1); From 501cf47cddfbf8040b6f9b8ac06d13094a70f729 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 3 Jun 2015 15:51:59 +0200 Subject: [PATCH 4/4] read_loose_refs(): treat NULL_SHA1 loose references as broken NULL_SHA1 is used to indicate an "invalid object name" throughout our code (and the code of other git implementations), so it is vastly more likely that an on-disk reference was set to this value due to a software bug than that NULL_SHA1 is the legitimate SHA-1 of an actual object. Therefore, if a loose reference has the value NULL_SHA1, consider it to be broken. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 10 ++++++++++ t/t6301-for-each-ref-errors.sh | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/refs.c b/refs.c index 3c311d44c1..07f8847e6d 100644 --- a/refs.c +++ b/refs.c @@ -1297,6 +1297,16 @@ static void read_loose_refs(const char *dirname, struct ref_dir *dir) if (!read_ok) { hashclr(sha1); flag |= REF_ISBROKEN; + } else if (is_null_sha1(sha1)) { + /* + * It is so astronomically unlikely + * that NULL_SHA1 is the SHA-1 of an + * actual object that we consider its + * appearance in a loose reference + * file to be repo corruption + * (probably due to a software bug). + */ + flag |= REF_ISBROKEN; } if (check_refname_format(refname.buf, diff --git a/t/t6301-for-each-ref-errors.sh b/t/t6301-for-each-ref-errors.sh index 72d2397e12..cdb67a03b7 100755 --- a/t/t6301-for-each-ref-errors.sh +++ b/t/t6301-for-each-ref-errors.sh @@ -24,7 +24,7 @@ test_expect_success 'Broken refs are reported correctly' ' test_cmp broken-err err ' -test_expect_failure 'NULL_SHA1 refs are reported correctly' ' +test_expect_success 'NULL_SHA1 refs are reported correctly' ' r=refs/heads/zeros && echo $ZEROS >.git/$r && test_when_finished "rm -f .git/$r" &&