From f6761faaa17a98c025b34fffeeec71f0ec223b2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 21 Feb 2019 23:37:46 +0100 Subject: [PATCH 1/8] commit-graph tests: split up corrupt_graph_and_verify() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split up the corrupt_graph_and_verify() function added in d9b9f8a6fd ("commit-graph: verify catches corrupt signature", 2018-06-27) into its logical components of setting up the test itself, doing the corruption in a particular way with "dd", and then finally testing that stderr is what we expect. This allows for re-using everything except the now slimmer corrupt_graph_and_verify() to corrupt the graph in a way that doesn't involve inserting a given byte sequence at a given position, e.g. truncating it entirely to a custom value. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t5318-commit-graph.sh | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index d4bd1522fe..725b7ce51f 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -366,6 +366,19 @@ GRAPH_OCTOPUS_DATA_OFFSET=$(($GRAPH_COMMIT_DATA_OFFSET + \ GRAPH_BYTE_OCTOPUS=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4)) GRAPH_BYTE_FOOTER=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4 * $NUM_OCTOPUS_EDGES)) +corrupt_graph_setup() { + cd "$TRASH_DIRECTORY/full" && + test_when_finished mv commit-graph-backup $objdir/info/commit-graph && + cp $objdir/info/commit-graph commit-graph-backup +} + +corrupt_graph_verify() { + grepstr=$1 + test_must_fail git commit-graph verify 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "$grepstr" err +} + # usage: corrupt_graph_and_verify [] # Manipulates the commit-graph file at the position # by inserting the data, optionally zeroing the file @@ -376,17 +389,14 @@ corrupt_graph_and_verify() { pos=$1 data="${2:-\0}" grepstr=$3 - cd "$TRASH_DIRECTORY/full" && + corrupt_graph_setup && orig_size=$(wc -c < $objdir/info/commit-graph) && zero_pos=${4:-${orig_size}} && - test_when_finished mv commit-graph-backup $objdir/info/commit-graph && - cp $objdir/info/commit-graph commit-graph-backup && printf "$data" | dd of="$objdir/info/commit-graph" bs=1 seek="$pos" conv=notrunc && dd of="$objdir/info/commit-graph" bs=1 seek="$zero_pos" count=0 && generate_zero_bytes $(($orig_size - $zero_pos)) >>"$objdir/info/commit-graph" && - test_must_fail git commit-graph verify 2>test_err && - grep -v "^+" test_err >err && - test_i18ngrep "$grepstr" err + corrupt_graph_verify "$grepstr" + } test_expect_success 'detect bad signature' ' From 945944ca70cba5a1e0b54be94d53030b4a3ad7db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 21 Feb 2019 23:37:47 +0100 Subject: [PATCH 2/8] commit-graph tests: test a graph that's too small MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the recently split-up components of the corrupt_graph_and_verify() function to assert that we error on graphs that are too small. The error was added in 2a2e32bdc5 ("commit-graph: implement git commit-graph read", 2018-04-10), but there was no test for it. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t5318-commit-graph.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 725b7ce51f..733be2ed30 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -399,6 +399,12 @@ corrupt_graph_and_verify() { } +test_expect_success 'detect too small' ' + corrupt_graph_setup && + echo "a small graph" >$objdir/info/commit-graph && + corrupt_graph_verify "too small" +' + test_expect_success 'detect bad signature' ' corrupt_graph_and_verify 0 "\0" \ "graph signature" From 2ac138d568abc84e9447825a342365e0136180f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 25 Mar 2019 13:08:29 +0100 Subject: [PATCH 3/8] commit-graph: fix segfault on e.g. "git status" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When core.commitGraph=true is set, various common commands now consult the commit graph. Because the commit-graph code is very trusting of its input data, it's possibly to construct a graph that'll cause an immediate segfault on e.g. "status" (and e.g. "log", "blame", ...). In some other cases where git immediately exits with a cryptic error about the graph being broken. The root cause of this is that while the "commit-graph verify" sub-command exhaustively verifies the graph, other users of the graph simply trust the graph, and will e.g. deference data found at certain offsets as pointers, causing segfaults. This change does the bare minimum to ensure that we don't segfault in the common fill_commit_in_graph() codepath called by e.g. setup_revisions(), to do this instrument the "commit-graph verify" tests to always check if "status" would subsequently segfault. This fixes the following tests which would previously segfault: not ok 50 - detect low chunk count not ok 51 - detect missing OID fanout chunk not ok 52 - detect missing OID lookup chunk not ok 53 - detect missing commit data chunk Those happened because with the commit-graph enabled setup_revisions() would eventually call fill_commit_in_graph(), where e.g. g->chunk_commit_data is used early as an offset (and will be 0x0). With this change we get far enough to detect that the graph is broken, and show an error instead. E.g.: $ git status; echo $? error: commit-graph is missing the Commit Data chunk 1 That also sucks, we should *warn* and not hard-fail "status" just because the commit-graph is corrupt, but fixing is left to a follow-up change. A side-effect of changing the reporting from graph_report() to error() is that we now have an "error: " prefix for these even for "commit-graph verify". Pseudo-diff before/after: $ git commit-graph verify -commit-graph is missing the Commit Data chunk +error: commit-graph is missing the Commit Data chunk Changing that is OK. Various errors it emits now early on are prefixed with "error: ", moving these over and changing the output doesn't break anything. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- commit-graph.c | 43 ++++++++++++++++++++++++++++++++--------- t/t5318-commit-graph.sh | 3 ++- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 47e9be0a3a..f8201d888b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -112,6 +112,36 @@ struct commit_graph *load_commit_graph_one(const char *graph_file) return ret; } +static int verify_commit_graph_lite(struct commit_graph *g) +{ + /* + * Basic validation shared between parse_commit_graph() + * which'll be called every time the graph is used, and the + * much more expensive verify_commit_graph() used by + * "commit-graph verify". + * + * There should only be very basic checks here to ensure that + * we don't e.g. segfault in fill_commit_in_graph(), but + * because this is a very hot codepath nothing that e.g. loops + * over g->num_commits, or runs a checksum on the commit-graph + * itself. + */ + if (!g->chunk_oid_fanout) { + error("commit-graph is missing the OID Fanout chunk"); + return 1; + } + if (!g->chunk_oid_lookup) { + error("commit-graph is missing the OID Lookup chunk"); + return 1; + } + if (!g->chunk_commit_data) { + error("commit-graph is missing the Commit Data chunk"); + return 1; + } + + return 0; +} + struct commit_graph *parse_commit_graph(void *graph_map, int fd, size_t graph_size) { @@ -233,6 +263,9 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, last_chunk_offset = chunk_offset; } + if (verify_commit_graph_lite(graph)) + return NULL; + return graph; } @@ -1089,15 +1122,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) return 1; } - verify_commit_graph_error = 0; - - if (!g->chunk_oid_fanout) - graph_report("commit-graph is missing the OID Fanout chunk"); - if (!g->chunk_oid_lookup) - graph_report("commit-graph is missing the OID Lookup chunk"); - if (!g->chunk_commit_data) - graph_report("commit-graph is missing the Commit Data chunk"); - + verify_commit_graph_error = verify_commit_graph_lite(g); if (verify_commit_graph_error) return verify_commit_graph_error; diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 733be2ed30..b3f3e515fc 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -376,7 +376,8 @@ corrupt_graph_verify() { grepstr=$1 test_must_fail git commit-graph verify 2>test_err && grep -v "^+" test_err >err && - test_i18ngrep "$grepstr" err + test_i18ngrep "$grepstr" err && + test_might_fail git status --short } # usage: corrupt_graph_and_verify [] From 61df89c8e5559c2b524968f492d445421913bfdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 25 Mar 2019 13:08:30 +0100 Subject: [PATCH 4/8] commit-graph: don't early exit(1) on e.g. "git status" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the commit-graph loading code work as a library that returns an error code instead of calling exit(1) when the commit-graph is corrupt. This means that e.g. "status" will now report commit-graph corruption as an "error: [...]" at the top of its output, but then proceed to work normally. This required splitting up the load_commit_graph_one() function so that the code that deals with open()-ing and stat()-ing the graph can now be called independently as open_commit_graph(). This is needed because "commit-graph verify" where the graph doesn't exist isn't an error. See the third paragraph in 283e68c72f ("commit-graph: add 'verify' subcommand", 2018-06-27). There's a bug in that logic where we conflate the intended ENOENT with other errno values (e.g. EACCES), but this change doesn't address that. That'll be addressed in a follow-up change. I'm then splitting most of the logic out of load_commit_graph_one() into load_commit_graph_one_fd_st(), which allows for providing an existing file descriptor and stat information to the loading code. This isn't strictly needed, but it would be redundant and confusing to open() and stat() the file twice for some of the codepaths, this allows for calling open_commit_graph() followed by load_commit_graph_one_fd_st(). The "graph_file" still needs to be passed to that function for the the "graph file %s is too small" error message. This leaves load_commit_graph_one() unused by everything except the internal prepare_commit_graph_one() function, so let's mark it as "static". If someone needs it in the future we can remove the "static" attribute. I could also rewrite its sole remaining user ("prepare_commit_graph_one()") to use load_commit_graph_one_fd_st() instead, but let's leave it at this. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Ramsay Jones Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 21 +++++++++++++++++---- commit-graph.c | 42 +++++++++++++++++++++++++++++------------ commit-graph.h | 4 +++- t/t5318-commit-graph.sh | 2 +- 4 files changed, 51 insertions(+), 18 deletions(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 4ae502754c..32bcc63427 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -42,6 +42,9 @@ static int graph_verify(int argc, const char **argv) { struct commit_graph *graph = NULL; char *graph_name; + int open_ok; + int fd; + struct stat st; static struct option builtin_commit_graph_verify_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, @@ -58,11 +61,14 @@ static int graph_verify(int argc, const char **argv) opts.obj_dir = get_object_directory(); graph_name = get_commit_graph_filename(opts.obj_dir); - graph = load_commit_graph_one(graph_name); + open_ok = open_commit_graph(graph_name, &fd, &st); + if (!open_ok) + return 0; + graph = load_commit_graph_one_fd_st(graph_name, fd, &st); FREE_AND_NULL(graph_name); if (!graph) - return 0; + return 1; UNLEAK(graph); return verify_commit_graph(the_repository, graph); @@ -72,6 +78,9 @@ static int graph_read(int argc, const char **argv) { struct commit_graph *graph = NULL; char *graph_name; + int open_ok; + int fd; + struct stat st; static struct option builtin_commit_graph_read_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, @@ -88,10 +97,14 @@ static int graph_read(int argc, const char **argv) opts.obj_dir = get_object_directory(); graph_name = get_commit_graph_filename(opts.obj_dir); - graph = load_commit_graph_one(graph_name); + open_ok = open_commit_graph(graph_name, &fd, &st); + if (!open_ok) + die_errno(_("Could not open commit-graph '%s'"), graph_name); + + graph = load_commit_graph_one_fd_st(graph_name, fd, &st); if (!graph) - die("graph file %s does not exist", graph_name); + return 1; FREE_AND_NULL(graph_name); diff --git a/commit-graph.c b/commit-graph.c index f8201d888b..3acc032c1b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -80,25 +80,31 @@ static int commit_graph_compatible(struct repository *r) return 1; } -struct commit_graph *load_commit_graph_one(const char *graph_file) +int open_commit_graph(const char *graph_file, int *fd, struct stat *st) +{ + *fd = git_open(graph_file); + if (*fd < 0) + return 0; + if (fstat(*fd, st)) { + close(*fd); + return 0; + } + return 1; +} + +struct commit_graph *load_commit_graph_one_fd_st(const char *graph_file, + int fd, struct stat *st) { void *graph_map; size_t graph_size; - struct stat st; struct commit_graph *ret; - int fd = git_open(graph_file); - if (fd < 0) - return NULL; - if (fstat(fd, &st)) { - close(fd); - return NULL; - } - graph_size = xsize_t(st.st_size); + graph_size = xsize_t(st->st_size); if (graph_size < GRAPH_MIN_SIZE) { close(fd); - die(_("graph file %s is too small"), graph_file); + error(_("graph file %s is too small"), graph_file); + return NULL; } graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0); ret = parse_commit_graph(graph_map, fd, graph_size); @@ -106,7 +112,6 @@ struct commit_graph *load_commit_graph_one(const char *graph_file) if (!ret) { munmap(graph_map, graph_size); close(fd); - exit(1); } return ret; @@ -269,6 +274,19 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, return graph; } +static struct commit_graph *load_commit_graph_one(const char *graph_file) +{ + + struct stat st; + int fd; + int open_ok = open_commit_graph(graph_file, &fd, &st); + + if (!open_ok) + return NULL; + + return load_commit_graph_one_fd_st(graph_file, fd, &st); +} + static void prepare_commit_graph_one(struct repository *r, const char *obj_dir) { char *graph_name; diff --git a/commit-graph.h b/commit-graph.h index 096d8bac34..77cc739bc0 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -11,6 +11,7 @@ struct commit; char *get_commit_graph_filename(const char *obj_dir); +int open_commit_graph(const char *graph_file, int *fd, struct stat *st); /* * Given a commit struct, try to fill the commit struct info, including: @@ -52,7 +53,8 @@ struct commit_graph { const unsigned char *chunk_extra_edges; }; -struct commit_graph *load_commit_graph_one(const char *graph_file); +struct commit_graph *load_commit_graph_one_fd_st(const char *graph_file, + int fd, struct stat *st); struct commit_graph *parse_commit_graph(void *graph_map, int fd, size_t graph_size); diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index b3f3e515fc..0d012f55e5 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -377,7 +377,7 @@ corrupt_graph_verify() { test_must_fail git commit-graph verify 2>test_err && grep -v "^+" test_err >err && test_i18ngrep "$grepstr" err && - test_might_fail git status --short + git status --short } # usage: corrupt_graph_and_verify [] From 67a530fab3bbd1d4c3390d6f18e35ab10442c015 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 25 Mar 2019 13:08:31 +0100 Subject: [PATCH 5/8] commit-graph: don't pass filename to load_commit_graph_one_fd_st() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An earlier change implemented load_commit_graph_one_fd_st() in a way that was bug-compatible with earlier code in terms of the "graph file %s is too small" error message printing out the path to the commit-graph (".git/objects/info/commit-graph"). But change that, because: * A function that takes an already-open file descriptor also needing the filename isn't very intuitive. * The vast majority of errors we might emit when loading the graph come from parse_commit_graph(), which doesn't report the filename. Let's not do that either in this case for consistency. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 4 ++-- commit-graph.c | 7 +++---- commit-graph.h | 3 +-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 32bcc63427..8196fdbe9c 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -64,7 +64,7 @@ static int graph_verify(int argc, const char **argv) open_ok = open_commit_graph(graph_name, &fd, &st); if (!open_ok) return 0; - graph = load_commit_graph_one_fd_st(graph_name, fd, &st); + graph = load_commit_graph_one_fd_st(fd, &st); FREE_AND_NULL(graph_name); if (!graph) @@ -102,7 +102,7 @@ static int graph_read(int argc, const char **argv) if (!open_ok) die_errno(_("Could not open commit-graph '%s'"), graph_name); - graph = load_commit_graph_one_fd_st(graph_name, fd, &st); + graph = load_commit_graph_one_fd_st(fd, &st); if (!graph) return 1; diff --git a/commit-graph.c b/commit-graph.c index 3acc032c1b..a26d266663 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -92,8 +92,7 @@ int open_commit_graph(const char *graph_file, int *fd, struct stat *st) return 1; } -struct commit_graph *load_commit_graph_one_fd_st(const char *graph_file, - int fd, struct stat *st) +struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st) { void *graph_map; size_t graph_size; @@ -103,7 +102,7 @@ struct commit_graph *load_commit_graph_one_fd_st(const char *graph_file, if (graph_size < GRAPH_MIN_SIZE) { close(fd); - error(_("graph file %s is too small"), graph_file); + error(_("commit-graph file is too small")); return NULL; } graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0); @@ -284,7 +283,7 @@ static struct commit_graph *load_commit_graph_one(const char *graph_file) if (!open_ok) return NULL; - return load_commit_graph_one_fd_st(graph_file, fd, &st); + return load_commit_graph_one_fd_st(fd, &st); } static void prepare_commit_graph_one(struct repository *r, const char *obj_dir) diff --git a/commit-graph.h b/commit-graph.h index 77cc739bc0..ada7aea9ed 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -53,8 +53,7 @@ struct commit_graph { const unsigned char *chunk_extra_edges; }; -struct commit_graph *load_commit_graph_one_fd_st(const char *graph_file, - int fd, struct stat *st); +struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st); struct commit_graph *parse_commit_graph(void *graph_map, int fd, size_t graph_size); From 7b8ce9c673324d55e2b9d8331a796c74559b04c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 25 Mar 2019 13:08:32 +0100 Subject: [PATCH 6/8] commit-graph verify: detect inability to read the graph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change "commit-graph verify" to error on open() failures other than ENOENT. As noted in the third paragraph of 283e68c72f ("commit-graph: add 'verify' subcommand", 2018-06-27) and the test it added it's intentional that "commit-graph verify" doesn't error out when the file doesn't exist. But let's not be overly promiscuous in what we accept. If we can't read the file for other reasons, e.g. permission errors, bad file descriptor etc. we'd like to report an error to the user. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 4 +++- t/t5318-commit-graph.sh | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 8196fdbe9c..537fdfd0f0 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -62,8 +62,10 @@ static int graph_verify(int argc, const char **argv) graph_name = get_commit_graph_filename(opts.obj_dir); open_ok = open_commit_graph(graph_name, &fd, &st); - if (!open_ok) + if (!open_ok && errno == ENOENT) return 0; + if (!open_ok) + die_errno(_("Could not open commit-graph '%s'"), graph_name); graph = load_commit_graph_one_fd_st(fd, &st); FREE_AND_NULL(graph_name); diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 0d012f55e5..4601732b99 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -400,6 +400,12 @@ corrupt_graph_and_verify() { } +test_expect_success POSIXPERM,SANITY 'detect permission problem' ' + corrupt_graph_setup && + chmod 000 $objdir/info/commit-graph && + corrupt_graph_verify "Could not open" +' + test_expect_success 'detect too small' ' corrupt_graph_setup && echo "a small graph" >$objdir/info/commit-graph && From 43d356180556180b4ef6ac232a14498a5bb2b446 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 25 Mar 2019 13:08:33 +0100 Subject: [PATCH 7/8] commit-graph write: don't die if the existing graph is corrupt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the commit-graph is written we end up calling parse_commit(). This will in turn invoke code that'll consult the existing commit-graph about the commit, if the graph is corrupted we die. We thus get into a state where a failing "commit-graph verify" can't be followed-up with a "commit-graph write" if core.commitGraph=true is set, the graph either needs to be manually removed to proceed, or core.commitGraph needs to be set to "false". Change the "commit-graph write" codepath to use a new parse_commit_no_graph() helper instead of parse_commit() to avoid this. The latter will call repo_parse_commit_internal() with use_commit_graph=1 as seen in 177722b344 ("commit: integrate commit graph with commit parsing", 2018-04-10). Not using the old graph at all slows down the writing of the new graph by some small amount, but is a sensible way to prevent an error in the existing commit-graph from spreading. Just fixing the current issue would be likely to result in code that's inadvertently broken in the future. New code might use the commit-graph at a distance. To detect such cases introduce a "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD" setting used when we do our corruption tests, and test that a "write/verify" combo works after every one of our current test cases where we now detect commit-graph corruption. Some of the code changes here might be strictly unnecessary, e.g. I was unable to find cases where the parse_commit() called from write_graph_chunk_data() didn't exit early due to "item->object.parsed" being true in repo_parse_commit_internal() (before the use_commit_graph=1 has any effect). But let's also convert those cases for good measure, we do not have exhaustive tests for all possible types of commit-graph corruption. This might need to be re-visited if we learn to write the commit-graph incrementally, but probably not. Hopefully we'll just start by finding out what commits we have in total, then read the old graph(s) to see what they cover, and finally write a new graph file with everything that's missing. In that case the new graph writing code just needs to continue to use e.g. a parse_commit() that doesn't consult the existing commit-graphs. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- commit-graph.c | 10 +++++++--- commit-graph.h | 1 + commit.h | 6 ++++++ t/t5318-commit-graph.sh | 11 +++++++++-- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index a26d266663..34ecaaf857 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -311,6 +311,10 @@ static int prepare_commit_graph(struct repository *r) struct object_directory *odb; int config_value; + if (git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD, 0)) + die("dying as requested by the '%s' variable on commit-graph load!", + GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD); + if (r->objects->commit_graph_attempted) return !!r->objects->commit_graph; r->objects->commit_graph_attempted = 1; @@ -575,7 +579,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, uint32_t packedDate[2]; display_progress(progress, ++*progress_cnt); - parse_commit(*list); + parse_commit_no_graph(*list); hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len); parent = (*list)->parents; @@ -772,7 +776,7 @@ static void close_reachable(struct packed_oid_list *oids, int report_progress) display_progress(progress, i + 1); commit = lookup_commit(the_repository, &oids->list[i]); - if (commit && !parse_commit(commit)) + if (commit && !parse_commit_no_graph(commit)) add_missing_parents(oids, commit); } stop_progress(&progress); @@ -1021,7 +1025,7 @@ void write_commit_graph(const char *obj_dir, continue; commits.list[commits.nr] = lookup_commit(the_repository, &oids.list[i]); - parse_commit(commits.list[commits.nr]); + parse_commit_no_graph(commits.list[commits.nr]); for (parent = commits.list[commits.nr]->parents; parent; parent = parent->next) diff --git a/commit-graph.h b/commit-graph.h index ada7aea9ed..7dfb8c896f 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -7,6 +7,7 @@ #include "cache.h" #define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH" +#define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD" struct commit; diff --git a/commit.h b/commit.h index 42728c2906..5d33477e78 100644 --- a/commit.h +++ b/commit.h @@ -89,6 +89,12 @@ static inline int repo_parse_commit(struct repository *r, struct commit *item) { return repo_parse_commit_gently(r, item, 0); } + +static inline int parse_commit_no_graph(struct commit *commit) +{ + return repo_parse_commit_internal(the_repository, commit, 0, 0); +} + #ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS #define parse_commit_internal(item, quiet, use) repo_parse_commit_internal(the_repository, item, quiet, use) #define parse_commit_gently(item, quiet) repo_parse_commit_gently(the_repository, item, quiet) diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 4601732b99..e80c1cac02 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -377,7 +377,13 @@ corrupt_graph_verify() { test_must_fail git commit-graph verify 2>test_err && grep -v "^+" test_err >err && test_i18ngrep "$grepstr" err && - git status --short + if test "$2" != "no-copy" + then + cp $objdir/info/commit-graph commit-graph-pre-write-test + fi && + git status --short && + GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD=true git commit-graph write && + git commit-graph verify } # usage: corrupt_graph_and_verify [] @@ -403,7 +409,7 @@ corrupt_graph_and_verify() { test_expect_success POSIXPERM,SANITY 'detect permission problem' ' corrupt_graph_setup && chmod 000 $objdir/info/commit-graph && - corrupt_graph_verify "Could not open" + corrupt_graph_verify "Could not open" "no-copy" ' test_expect_success 'detect too small' ' @@ -522,6 +528,7 @@ test_expect_success 'git fsck (checks commit-graph)' ' git fsck && corrupt_graph_and_verify $GRAPH_BYTE_FOOTER "\00" \ "incorrect checksum" && + cp commit-graph-pre-write-test $objdir/info/commit-graph && test_must_fail git fsck ' From 93b4405ffe4ad9308740e7c1c71383bfc369baaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 25 Mar 2019 13:08:34 +0100 Subject: [PATCH 8/8] commit-graph: improve & i18n error messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the error emitted when a commit-graph file is corrupt so that we actually mention the commit-graph, e.g. change errors like: error: improper chunk offset 0000000000385e0c To: error: commit-graph improper chunk offset 0000000000385e0c As discussed in the commits leading up to this one the commit-graph machinery is now used by common commands like "status". If the graph was corrupt we'd often emit some error that gave no indication what was wrong. Now some of them are still cryptic, but they'll at least mention "commit-graph" to give the user a hint as to where to look. While I'm at it mark some of the strings that hadn't been marked for translation. It's clear from the commit history and the code that this was merely forgotten at the time, and wasn't intentional.p5 Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- commit-graph.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 34ecaaf857..66865acbd7 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -167,21 +167,21 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, graph_signature = get_be32(data); if (graph_signature != GRAPH_SIGNATURE) { - error(_("graph signature %X does not match signature %X"), + error(_("commit-graph signature %X does not match signature %X"), graph_signature, GRAPH_SIGNATURE); return NULL; } graph_version = *(unsigned char*)(data + 4); if (graph_version != GRAPH_VERSION) { - error(_("graph version %X does not match version %X"), + error(_("commit-graph version %X does not match version %X"), graph_version, GRAPH_VERSION); return NULL; } hash_version = *(unsigned char*)(data + 5); if (hash_version != oid_version()) { - error(_("hash version %X does not match version %X"), + error(_("commit-graph hash version %X does not match version %X"), hash_version, oid_version()); return NULL; } @@ -204,7 +204,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, if (data + graph_size - chunk_lookup < GRAPH_CHUNKLOOKUP_WIDTH) { - error(_("chunk lookup table entry missing; graph file may be incomplete")); + error(_("commit-graph chunk lookup table entry missing; file may be incomplete")); free(graph); return NULL; } @@ -215,7 +215,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH; if (chunk_offset > graph_size - the_hash_algo->rawsz) { - error(_("improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32), + error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32), (uint32_t)chunk_offset); free(graph); return NULL; @@ -252,7 +252,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, } if (chunk_repeated) { - error(_("chunk id %08x appears multiple times"), chunk_id); + error(_("commit-graph chunk id %08x appears multiple times"), chunk_id); free(graph); return NULL; } @@ -1162,7 +1162,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); if (i && oidcmp(&prev_oid, &cur_oid) >= 0) - graph_report("commit-graph has incorrect OID order: %s then %s", + graph_report(_("commit-graph has incorrect OID order: %s then %s"), oid_to_hex(&prev_oid), oid_to_hex(&cur_oid)); @@ -1172,14 +1172,14 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos); if (i != fanout_value) - graph_report("commit-graph has incorrect fanout value: fanout[%d] = %u != %u", + graph_report(_("commit-graph has incorrect fanout value: fanout[%d] = %u != %u"), cur_fanout_pos, fanout_value, i); cur_fanout_pos++; } graph_commit = lookup_commit(r, &cur_oid); if (!parse_commit_in_graph_one(r, g, graph_commit)) - graph_report("failed to parse %s from commit-graph", + graph_report(_("failed to parse commit %s from commit-graph"), oid_to_hex(&cur_oid)); } @@ -1187,7 +1187,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos); if (g->num_commits != fanout_value) - graph_report("commit-graph has incorrect fanout value: fanout[%d] = %u != %u", + graph_report(_("commit-graph has incorrect fanout value: fanout[%d] = %u != %u"), cur_fanout_pos, fanout_value, i); cur_fanout_pos++; @@ -1209,14 +1209,14 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) graph_commit = lookup_commit(r, &cur_oid); odb_commit = (struct commit *)create_object(r, cur_oid.hash, alloc_commit_node(r)); if (parse_commit_internal(odb_commit, 0, 0)) { - graph_report("failed to parse %s from object database", + graph_report(_("failed to parse commit %s from object database for commit-graph"), oid_to_hex(&cur_oid)); continue; } if (!oideq(&get_commit_tree_in_graph_one(r, g, graph_commit)->object.oid, get_commit_tree_oid(odb_commit))) - graph_report("root tree OID for commit %s in commit-graph is %s != %s", + graph_report(_("root tree OID for commit %s in commit-graph is %s != %s"), oid_to_hex(&cur_oid), oid_to_hex(get_commit_tree_oid(graph_commit)), oid_to_hex(get_commit_tree_oid(odb_commit))); @@ -1226,13 +1226,13 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) while (graph_parents) { if (odb_parents == NULL) { - graph_report("commit-graph parent list for commit %s is too long", + graph_report(_("commit-graph parent list for commit %s is too long"), oid_to_hex(&cur_oid)); break; } if (!oideq(&graph_parents->item->object.oid, &odb_parents->item->object.oid)) - graph_report("commit-graph parent for %s is %s != %s", + graph_report(_("commit-graph parent for %s is %s != %s"), oid_to_hex(&cur_oid), oid_to_hex(&graph_parents->item->object.oid), oid_to_hex(&odb_parents->item->object.oid)); @@ -1245,16 +1245,16 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) } if (odb_parents != NULL) - graph_report("commit-graph parent list for commit %s terminates early", + graph_report(_("commit-graph parent list for commit %s terminates early"), oid_to_hex(&cur_oid)); if (!graph_commit->generation) { if (generation_zero == GENERATION_NUMBER_EXISTS) - graph_report("commit-graph has generation number zero for commit %s, but non-zero elsewhere", + graph_report(_("commit-graph has generation number zero for commit %s, but non-zero elsewhere"), oid_to_hex(&cur_oid)); generation_zero = GENERATION_ZERO_EXISTS; } else if (generation_zero == GENERATION_ZERO_EXISTS) - graph_report("commit-graph has non-zero generation number for commit %s, but zero elsewhere", + graph_report(_("commit-graph has non-zero generation number for commit %s, but zero elsewhere"), oid_to_hex(&cur_oid)); if (generation_zero == GENERATION_ZERO_EXISTS) @@ -1269,13 +1269,13 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) max_generation--; if (graph_commit->generation != max_generation + 1) - graph_report("commit-graph generation for commit %s is %u != %u", + graph_report(_("commit-graph generation for commit %s is %u != %u"), oid_to_hex(&cur_oid), graph_commit->generation, max_generation + 1); if (graph_commit->date != odb_commit->date) - graph_report("commit date for commit %s in commit-graph is %"PRItime" != %"PRItime, + graph_report(_("commit date for commit %s in commit-graph is %"PRItime" != %"PRItime), oid_to_hex(&cur_oid), graph_commit->date, odb_commit->date);