From 5c07647d987d7f74f11ffadd39343c2f1540176c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 4 Apr 2019 20:37:42 -0700 Subject: [PATCH 1/7] t: move 'hex2oct' into test-lib-functions.sh The helper 'hex2oct' is used to convert base-16 encoded data into a base-8 binary form, and is useful for preparing data for commands that accept input in a binary format, such as 'git hash-object', via 'printf'. This helper is defined identically in three separate places throughout 't'. Move the definition to test-lib-function.sh, so that it can be used in new test suites, and its definition is not redundant. This will likewise make our job easier in the subsequent commit, which also uses 'hex2oct'. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/t1007-hash-object.sh | 4 ---- t/t1450-fsck.sh | 4 ---- t/t5601-clone.sh | 4 ---- t/test-lib-functions.sh | 6 ++++++ 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index a37753047e0..7099d33508a 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -199,10 +199,6 @@ test_expect_success 'too-short tree' ' test_i18ngrep "too-short tree object" err ' -hex2oct() { - perl -ne 'printf "\\%03o", hex for /../g' -} - test_expect_success 'malformed mode in tree' ' hex_sha1=$(echo foo | git hash-object --stdin -w) && bin_sha1=$(echo $hex_sha1 | hex2oct) && diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index c61f9721413..0b4d874de67 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -256,10 +256,6 @@ test_expect_success 'unparseable tree object' ' test_i18ngrep ! "fatal: empty filename in tree entry" out ' -hex2oct() { - perl -ne 'printf "\\%03o", hex for /../g' -} - test_expect_success 'tree entry with type mismatch' ' test_when_finished "remove_object \$blob" && test_when_finished "remove_object \$tree" && diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh index d6948cbdab0..3f499430106 100755 --- a/t/t5601-clone.sh +++ b/t/t5601-clone.sh @@ -611,10 +611,6 @@ test_expect_success 'GIT_TRACE_PACKFILE produces a usable pack' ' git -C replay.git index-pack -v --stdin Date: Tue, 9 Apr 2019 19:13:14 -0700 Subject: [PATCH 2/7] t: introduce tests for unexpected object types Call an object's type "unexpected" when the actual type of an object does not match Git's contextual expectation. For example, a tree entry whose mode differs from the object's actual type, or a commit's parent which is not another commit, and so on. This can manifest itself in various unfortunate ways, including Git SIGSEGV-ing under specific conditions. Consider the following example: Git traverses a blob (say, via `git rev-list`), and then tries to read out a tree-entry which lists that object as something other than a blob. In this case, `lookup_blob()` will return NULL, and the subsequent dereference will result in a SIGSEGV. Introduce tests that present objects of "unexpected" type in the above fashion to 'git rev-list'. Mark as failures the combinations that are already broken (i.e., they exhibit the segfault described above). In the cases that are not broken (i.e., they have NULL-ness checks or similar), mark these as expecting success. We might hit an unexpected type in two different ways (imagine we have a tree entry that claims to be a tree but actually points to a blob): - when we call lookup_tree(), we might find that we've already seen the object referenced as a blob, in which case we'd get NULL. We can exercise this with "git rev-list --objects $blob $tree", which guarantees that the blob will have been parsed before we look in the tree. These tests are marked as "seen" in the test script. - we call lookup_tree() successfully, but when we try to read the object, we find out it's something else. We construct our tests such that $blob is not otherwise mentioned in $tree. These tests are marked as "lone" in the script. We should check that we behave sensibly in both cases (especially because it is easy for a malicious actor to provoke one case or the other). Co-authored-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t6102-rev-list-unexpected-objects.sh | 123 +++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100755 t/t6102-rev-list-unexpected-objects.sh diff --git a/t/t6102-rev-list-unexpected-objects.sh b/t/t6102-rev-list-unexpected-objects.sh new file mode 100755 index 00000000000..15072ecce33 --- /dev/null +++ b/t/t6102-rev-list-unexpected-objects.sh @@ -0,0 +1,123 @@ +#!/bin/sh + +test_description='git rev-list should handle unexpected object types' + +. ./test-lib.sh + +test_expect_success 'setup well-formed objects' ' + blob="$(printf "foo" | git hash-object -w --stdin)" && + tree="$(printf "100644 blob $blob\tfoo" | git mktree)" && + commit="$(git commit-tree $tree -m "first commit")" && + git cat-file commit $commit >good-commit +' + +test_expect_success 'setup unexpected non-blob entry' ' + printf "100644 foo\0$(echo $tree | hex2oct)" >broken-tree && + broken_tree="$(git hash-object -w --literally -t tree broken-tree)" +' + +test_expect_failure 'traverse unexpected non-blob entry (lone)' ' + test_must_fail git rev-list --objects $broken_tree +' + +test_expect_failure 'traverse unexpected non-blob entry (seen)' ' + test_must_fail git rev-list --objects $tree $broken_tree +' + +test_expect_success 'setup unexpected non-tree entry' ' + printf "40000 foo\0$(echo $blob | hex2oct)" >broken-tree && + broken_tree="$(git hash-object -w --literally -t tree broken-tree)" +' + +test_expect_failure 'traverse unexpected non-tree entry (lone)' ' + test_must_fail git rev-list --objects $broken_tree +' + +test_expect_failure 'traverse unexpected non-tree entry (seen)' ' + test_must_fail git rev-list --objects $blob $broken_tree +' + +test_expect_success 'setup unexpected non-commit parent' ' + sed "/^author/ { h; s/.*/parent $blob/; G; }" broken-commit && + broken_commit="$(git hash-object -w --literally -t commit \ + broken-commit)" +' + +test_expect_success 'traverse unexpected non-commit parent (lone)' ' + test_must_fail git rev-list --objects $broken_commit >output 2>&1 && + test_i18ngrep "not a commit" output +' + +test_expect_success 'traverse unexpected non-commit parent (seen)' ' + test_must_fail git rev-list --objects $commit $broken_commit \ + >output 2>&1 && + test_i18ngrep "not a commit" output +' + +test_expect_success 'setup unexpected non-tree root' ' + sed -e "s/$tree/$blob/" broken-commit && + broken_commit="$(git hash-object -w --literally -t commit \ + broken-commit)" +' + +test_expect_failure 'traverse unexpected non-tree root (lone)' ' + test_must_fail git rev-list --objects $broken_commit +' + +test_expect_failure 'traverse unexpected non-tree root (seen)' ' + test_must_fail git rev-list --objects $blob $broken_commit +' + +test_expect_success 'setup unexpected non-commit tag' ' + git tag -a -m "tagged commit" tag $commit && + git cat-file tag tag >good-tag && + test_when_finished "git tag -d tag" && + sed -e "s/$commit/$blob/" broken-tag && + tag=$(git hash-object -w --literally -t tag broken-tag) +' + +test_expect_success 'traverse unexpected non-commit tag (lone)' ' + test_must_fail git rev-list --objects $tag +' + +test_expect_success 'traverse unexpected non-commit tag (seen)' ' + test_must_fail git rev-list --objects $blob $tag >output 2>&1 && + test_i18ngrep "not a commit" output +' + +test_expect_success 'setup unexpected non-tree tag' ' + git tag -a -m "tagged tree" tag $tree && + git cat-file tag tag >good-tag && + test_when_finished "git tag -d tag" && + sed -e "s/$tree/$blob/" broken-tag && + tag=$(git hash-object -w --literally -t tag broken-tag) +' + +test_expect_success 'traverse unexpected non-tree tag (lone)' ' + test_must_fail git rev-list --objects $tag +' + +test_expect_success 'traverse unexpected non-tree tag (seen)' ' + test_must_fail git rev-list --objects $blob $tag >output 2>&1 && + test_i18ngrep "not a tree" output +' + +test_expect_success 'setup unexpected non-blob tag' ' + git tag -a -m "tagged blob" tag $blob && + git cat-file tag tag >good-tag && + test_when_finished "git tag -d tag" && + sed -e "s/$blob/$commit/" broken-tag && + tag=$(git hash-object -w --literally -t tag broken-tag) +' + +test_expect_failure 'traverse unexpected non-blob tag (lone)' ' + test_must_fail git rev-list --objects $tag +' + +test_expect_success 'traverse unexpected non-blob tag (seen)' ' + test_must_fail git rev-list --objects $commit $tag >output 2>&1 && + test_i18ngrep "not a blob" output +' + +test_done From 23c204455bf2198806e8c7b0cd86b20a50a379d0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 9 Apr 2019 19:13:17 -0700 Subject: [PATCH 3/7] list-objects.c: handle unexpected non-blob entries Fix one of the cases described in the previous commit where a tree-entry that is promised to a blob is in fact a non-blob. When 'lookup_blob()' returns NULL, it is because Git has cached the requested object as a non-blob. In this case, prevent a SIGSEGV by 'die()'-ing immediately before attempting to dereference the result. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- list-objects.c | 5 +++++ t/t6102-rev-list-unexpected-objects.sh | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/list-objects.c b/list-objects.c index dc77361e11d..ea04bbdee6a 100644 --- a/list-objects.c +++ b/list-objects.c @@ -133,6 +133,11 @@ static void process_tree_contents(struct traversal_context *ctx, base, entry.path); else { struct blob *b = lookup_blob(ctx->revs->repo, &entry.oid); + if (!b) { + die(_("entry '%s' in tree %s has blob mode, " + "but is not a blob"), + entry.path, oid_to_hex(&tree->object.oid)); + } b->object.flags |= NOT_USER_GIVEN; process_blob(ctx, b, base, entry.path); } diff --git a/t/t6102-rev-list-unexpected-objects.sh b/t/t6102-rev-list-unexpected-objects.sh index 15072ecce33..1377c603783 100755 --- a/t/t6102-rev-list-unexpected-objects.sh +++ b/t/t6102-rev-list-unexpected-objects.sh @@ -20,8 +20,9 @@ test_expect_failure 'traverse unexpected non-blob entry (lone)' ' test_must_fail git rev-list --objects $broken_tree ' -test_expect_failure 'traverse unexpected non-blob entry (seen)' ' - test_must_fail git rev-list --objects $tree $broken_tree +test_expect_success 'traverse unexpected non-blob entry (seen)' ' + test_must_fail git rev-list --objects $tree $broken_tree >output 2>&1 && + test_i18ngrep "is not a blob" output ' test_expect_success 'setup unexpected non-tree entry' ' From b49e74eac480d167c3af8f1286fe520c3d7ce9e1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 9 Apr 2019 19:13:19 -0700 Subject: [PATCH 4/7] list-objects.c: handle unexpected non-tree entries Apply similar treatment as the previous commit for non-tree entries, too. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- list-objects.c | 5 +++++ t/t6102-rev-list-unexpected-objects.sh | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/list-objects.c b/list-objects.c index ea04bbdee6a..bb7e61ef4bb 100644 --- a/list-objects.c +++ b/list-objects.c @@ -125,6 +125,11 @@ static void process_tree_contents(struct traversal_context *ctx, if (S_ISDIR(entry.mode)) { struct tree *t = lookup_tree(ctx->revs->repo, &entry.oid); + if (!t) { + die(_("entry '%s' in tree %s has tree mode, " + "but is not a tree"), + entry.path, oid_to_hex(&tree->object.oid)); + } t->object.flags |= NOT_USER_GIVEN; process_tree(ctx, t, base, entry.path); } diff --git a/t/t6102-rev-list-unexpected-objects.sh b/t/t6102-rev-list-unexpected-objects.sh index 1377c603783..e3ec1955329 100755 --- a/t/t6102-rev-list-unexpected-objects.sh +++ b/t/t6102-rev-list-unexpected-objects.sh @@ -34,8 +34,9 @@ test_expect_failure 'traverse unexpected non-tree entry (lone)' ' test_must_fail git rev-list --objects $broken_tree ' -test_expect_failure 'traverse unexpected non-tree entry (seen)' ' - test_must_fail git rev-list --objects $blob $broken_tree +test_expect_success 'traverse unexpected non-tree entry (seen)' ' + test_must_fail git rev-list --objects $blob $broken_tree >output 2>&1 && + test_i18ngrep "is not a tree" output ' test_expect_success 'setup unexpected non-commit parent' ' From 834876630b21f832f648bc46a753291e4512ca8f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 9 Apr 2019 19:13:20 -0700 Subject: [PATCH 5/7] get_commit_tree(): return NULL for broken tree Return NULL from 'get_commit_tree()' when a commit's root tree is corrupt, doesn't exist, or points to an object which is not a tree. In [1], this situation became a BUG(), but it can certainly occur in cases which are not a bug in Git, for e.g., if a caller manually crafts a commit whose tree is corrupt in any of the above ways. Note that the expect_failure test in t6102 triggers this BUG(), but we can't flip it to expect_success yet. Solving this problem actually reveals a second bug. [1]: 7b8a21dba1 (commit-graph: lazy-load trees for commits, 2018-04-06) Co-authored-by: Taylor Blau Signed-off-by: Taylor Blau Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/commit.c b/commit.c index a5333c7ac6c..e2cde566a97 100644 --- a/commit.c +++ b/commit.c @@ -345,10 +345,10 @@ struct tree *get_commit_tree(const struct commit *commit) if (commit->maybe_tree || !commit->object.parsed) return commit->maybe_tree; - if (commit->graph_pos == COMMIT_NOT_FROM_GRAPH) - BUG("commit has NULL tree, but was not loaded from commit-graph"); + if (commit->graph_pos != COMMIT_NOT_FROM_GRAPH) + return get_commit_tree_in_graph(the_repository, commit); - return get_commit_tree_in_graph(the_repository, commit); + return NULL; } struct object_id *get_commit_tree_oid(const struct commit *commit) From ee4dfee2274d2fd743066fa9fa4d37441ee522f8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 9 Apr 2019 19:13:23 -0700 Subject: [PATCH 6/7] rev-list: let traversal die when --missing is not in use Commit 7c0fe330d5 (rev-list: handle missing tree objects properly, 2018-10-05) taught the traversal machinery used by git-rev-list to ignore missing trees, so that rev-list could handle them itself. However, it does so only by checking via oid_object_info_extended() that the object exists at all. This can miss several classes of errors that were previously detected by rev-list: - type mismatches (e.g., we expected a tree but got a blob) - failure to read the object data (e.g., due to bitrot on disk) This is especially important because we use "rev-list --objects" as our connectivity check to admit new objects to the repository, and it will now miss these cases (though the bitrot one is less important here, because we'd typically have just hashed and stored the object). There are a few options to fix this: 1. we could check these properties in rev-list when we do the existence check. This is probably too expensive in practice (perhaps even for a type check, but definitely for checking the whole content again, which implies loading each object into memory twice). 2. teach the traversal machinery to differentiate between a missing object, and one that could not be loaded as expected. This probably wouldn't be too hard to detect type mismatches, but detecting bitrot versus a truly missing object would require deep changes to the object-loading code. 3. have the traversal machinery communicate the failure to the caller, so that it can decide how to proceed without re-evaluting the object itself. Of those, I think (3) is probably the best path forward. However, this patch does none of them. In the name of expediently fixing the regression to a normal "rev-list --objects" that we use for connectivity checks, this simply restores the pre-7c0fe330d5 behavior of having the traversal die as soon as it fails to load a tree (when --missing is set to MA_ERROR, which is the default). Note that we can't get rid of the object-existence check in finish_object(), because this also handles blobs (which are not otherwise checked at all by the traversal code). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/rev-list.c | 4 +++- t/t6102-rev-list-unexpected-objects.sh | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 5b5b6dbb1c9..7de083b5f60 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -379,7 +379,6 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) repo_init_revisions(the_repository, &revs, prefix); revs.abbrev = DEFAULT_ABBREV; revs.commit_format = CMIT_FMT_UNSPECIFIED; - revs.do_not_die_on_missing_tree = 1; /* * Scan the argument list before invoking setup_revisions(), so that we @@ -409,6 +408,9 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) } } + if (arg_missing_action) + revs.do_not_die_on_missing_tree = 1; + argc = setup_revisions(argc, argv, &revs, &s_r_opt); memset(&info, 0, sizeof(info)); diff --git a/t/t6102-rev-list-unexpected-objects.sh b/t/t6102-rev-list-unexpected-objects.sh index e3ec1955329..28ee1bcb07c 100755 --- a/t/t6102-rev-list-unexpected-objects.sh +++ b/t/t6102-rev-list-unexpected-objects.sh @@ -30,7 +30,7 @@ test_expect_success 'setup unexpected non-tree entry' ' broken_tree="$(git hash-object -w --literally -t tree broken-tree)" ' -test_expect_failure 'traverse unexpected non-tree entry (lone)' ' +test_expect_success 'traverse unexpected non-tree entry (lone)' ' test_must_fail git rev-list --objects $broken_tree ' @@ -63,7 +63,7 @@ test_expect_success 'setup unexpected non-tree root' ' broken-commit)" ' -test_expect_failure 'traverse unexpected non-tree root (lone)' ' +test_expect_success 'traverse unexpected non-tree root (lone)' ' test_must_fail git rev-list --objects $broken_commit ' From 97dd512af7ce4afb4f638ef73b4770921c8ca3aa Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 9 Apr 2019 19:13:25 -0700 Subject: [PATCH 7/7] rev-list: detect broken root trees When the traversal machinery sees a commit without a root tree, it assumes that the tree was part of a BOUNDARY commit, and quietly ignores the tree. But it could also be caused by a commit whose root tree is broken or missing. Instead, let's die() when we see a NULL root tree. We can differentiate it from the BOUNDARY case by seeing if the commit was actually parsed. This covers that case, plus future-proofs us against any others where we might try to show an unparsed commit. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- list-objects.c | 3 +++ t/t6102-rev-list-unexpected-objects.sh | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/list-objects.c b/list-objects.c index bb7e61ef4bb..b5651ddd5bf 100644 --- a/list-objects.c +++ b/list-objects.c @@ -374,6 +374,9 @@ static void do_traverse(struct traversal_context *ctx) struct tree *tree = get_commit_tree(commit); tree->object.flags |= NOT_USER_GIVEN; add_pending_tree(ctx->revs, tree); + } else if (commit->object.parsed) { + die(_("unable to load root tree for commit %s"), + oid_to_hex(&commit->object.oid)); } ctx->show_commit(commit, ctx->show_data); diff --git a/t/t6102-rev-list-unexpected-objects.sh b/t/t6102-rev-list-unexpected-objects.sh index 28ee1bcb07c..28611c978e6 100755 --- a/t/t6102-rev-list-unexpected-objects.sh +++ b/t/t6102-rev-list-unexpected-objects.sh @@ -67,8 +67,10 @@ test_expect_success 'traverse unexpected non-tree root (lone)' ' test_must_fail git rev-list --objects $broken_commit ' -test_expect_failure 'traverse unexpected non-tree root (seen)' ' - test_must_fail git rev-list --objects $blob $broken_commit +test_expect_success 'traverse unexpected non-tree root (seen)' ' + test_must_fail git rev-list --objects $blob $broken_commit \ + >output 2>&1 && + test_i18ngrep "not a tree" output ' test_expect_success 'setup unexpected non-commit tag' '