diff --git a/Documentation/git.txt b/Documentation/git.txt index 11228956cd..3bac24cf8a 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -911,6 +911,16 @@ for full details. should not normally need to set this to `0`, but it may be useful when trying to salvage data from a corrupted repository. +`GIT_COMMIT_GRAPH_PARANOIA`:: + When loading a commit object from the commit-graph, Git performs an + existence check on the object in the object database. This is done to + avoid issues with stale commit-graphs that contain references to + already-deleted commits, but comes with a performance penalty. ++ +The default is "true", which enables the aforementioned behavior. +Setting this to "false" disables the existence check. This can lead to +a performance improvement at the cost of consistency. + `GIT_ALLOW_PROTOCOL`:: If set to a colon-separated list of protocols, behave as if `protocol.allow` is set to `never`, and each of the listed diff --git a/commit-graph.c b/commit-graph.c index c2b782af3b..ee66098e07 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1024,14 +1024,18 @@ int repo_find_commit_pos_in_graph(struct repository *r, struct commit *c, struct commit *lookup_commit_in_graph(struct repository *repo, const struct object_id *id) { + static int commit_graph_paranoia = -1; struct commit *commit; uint32_t pos; + if (commit_graph_paranoia == -1) + commit_graph_paranoia = git_env_bool(GIT_COMMIT_GRAPH_PARANOIA, 1); + if (!prepare_commit_graph(repo)) return NULL; if (!search_commit_pos_in_graph(id, repo->objects->commit_graph, &pos)) return NULL; - if (!has_object(repo, id, 0)) + if (commit_graph_paranoia && !has_object(repo, id, 0)) return NULL; commit = lookup_commit(repo, id); diff --git a/commit-graph.h b/commit-graph.h index c6870274c5..e519cb81cb 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -8,6 +8,12 @@ #define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE" #define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS" +/* + * This environment variable controls whether commits looked up via the + * commit graph will be double checked to exist in the object database. + */ +#define GIT_COMMIT_GRAPH_PARANOIA "GIT_COMMIT_GRAPH_PARANOIA" + /* * This method is only used to enhance coverage of the commit-graph * feature in the test suite with the GIT_TEST_COMMIT_GRAPH and diff --git a/commit.c b/commit.c index b3223478bc..8405d7c3fc 100644 --- a/commit.c +++ b/commit.c @@ -28,6 +28,7 @@ #include "shallow.h" #include "tree.h" #include "hook.h" +#include "parse.h" static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **); @@ -572,8 +573,21 @@ int repo_parse_commit_internal(struct repository *r, return -1; if (item->object.parsed) return 0; - if (use_commit_graph && parse_commit_in_graph(r, item)) + if (use_commit_graph && parse_commit_in_graph(r, item)) { + static int commit_graph_paranoia = -1; + + if (commit_graph_paranoia == -1) + commit_graph_paranoia = git_env_bool(GIT_COMMIT_GRAPH_PARANOIA, 1); + + if (commit_graph_paranoia && !has_object(r, &item->object.oid, 0)) { + unparse_commit(r, &item->object.oid); + return quiet_on_missing ? -1 : + error(_("commit %s exists in commit-graph but not in the object database"), + oid_to_hex(&item->object.oid)); + } + return 0; + } if (oid_object_info_extended(r, &item->object.oid, &oi, flags) < 0) return quiet_on_missing ? -1 : diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 6505ff595a..134239d40f 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -895,4 +895,52 @@ test_expect_success 'reader notices too-small generations chunk' ' test_cmp expect.err err ' +test_expect_success 'stale commit cannot be parsed when given directly' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit A && + test_commit B && + git commit-graph write --reachable && + + oid=$(git rev-parse B) && + rm .git/objects/"$(test_oid_to_path "$oid")" && + + # Verify that it is possible to read the commit from the + # commit graph when not being paranoid, ... + GIT_COMMIT_GRAPH_PARANOIA=false git rev-list B && + # ... but parsing the commit when double checking that + # it actually exists in the object database should fail. + test_must_fail git rev-list -1 B + ) +' + +test_expect_success 'stale commit cannot be parsed when traversing graph' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + + test_commit A && + test_commit B && + test_commit C && + git commit-graph write --reachable && + + # Corrupt the repository by deleting the intermediate commit + # object. Commands should notice that this object is absent and + # thus that the repository is corrupt even if the commit graph + # exists. + oid=$(git rev-parse B) && + rm .git/objects/"$(test_oid_to_path "$oid")" && + + # Again, we should be able to parse the commit when not + # being paranoid about commit graph staleness... + GIT_COMMIT_GRAPH_PARANOIA=false git rev-parse HEAD~2 && + # ... but fail when we are paranoid. + test_must_fail git rev-parse HEAD~2 2>error && + grep "error: commit $oid exists in commit-graph but not in the object database" error + ) +' + test_done