Merge branch 'ds/commit-graph-gen-v2-fixes'

Fixes to the way generation number v2 in the commit-graph files are
(not) handled.

* ds/commit-graph-gen-v2-fixes:
  commit-graph: declare bankruptcy on GDAT chunks
  commit-graph: fix generation number v2 overflow values
  commit-graph: start parsing generation v2 (again)
  commit-graph: fix ordering bug in generation numbers
  t5318: extract helpers to lib-commit-graph.sh
  test-read-graph: include extra post-parse info
This commit is contained in:
Junio C Hamano 2022-03-16 17:53:09 -07:00
commit a54cc523ad
8 changed files with 176 additions and 58 deletions

View file

@ -93,7 +93,7 @@ CHUNK DATA:
2 bits of the lowest byte, storing the 33rd and 34th bit of the
commit time.
Generation Data (ID: {'G', 'D', 'A', 'T' }) (N * 4 bytes) [Optional]
Generation Data (ID: {'G', 'D', 'A', '2' }) (N * 4 bytes) [Optional]
* This list of 4-byte values store corrected commit date offsets for the
commits, arranged in the same order as commit data chunk.
* If the corrected commit date offset cannot be stored within 31 bits,
@ -104,7 +104,7 @@ CHUNK DATA:
by compatible versions of Git and in case of split commit-graph chains,
the topmost layer also has Generation Data chunk.
Generation Data Overflow (ID: {'G', 'D', 'O', 'V' }) [Optional]
Generation Data Overflow (ID: {'G', 'D', 'O', '2' }) [Optional]
* This list of 8-byte values stores the corrected commit date offsets
for commits with corrected commit date offsets that cannot be
stored within 31 bits.
@ -156,3 +156,11 @@ CHUNK DATA:
TRAILER:
H-byte HASH-checksum of all of the above.
== Historical Notes:
The Generation Data (GDA2) and Generation Data Overflow (GDO2) chunks have
the number '2' in their chunk IDs because a previous version of Git wrote
possibly erroneous data in these chunks with the IDs "GDAT" and "GDOV". By
changing the IDs, newer versions of Git will silently ignore those older
chunks and write the new information without trusting the incorrect data.

View file

@ -39,8 +39,8 @@ void git_test_write_commit_graph_or_die(void)
#define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
#define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
#define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
#define GRAPH_CHUNKID_GENERATION_DATA 0x47444154 /* "GDAT" */
#define GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW 0x47444f56 /* "GDOV" */
#define GRAPH_CHUNKID_GENERATION_DATA 0x47444132 /* "GDA2" */
#define GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW 0x47444f32 /* "GDO2" */
#define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
#define GRAPH_CHUNKID_BLOOMINDEXES 0x42494458 /* "BIDX" */
#define GRAPH_CHUNKID_BLOOMDATA 0x42444154 /* "BDAT" */
@ -407,6 +407,9 @@ struct commit_graph *parse_commit_graph(struct repository *r,
&graph->chunk_generation_data);
pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW,
&graph->chunk_generation_data_overflow);
if (graph->chunk_generation_data)
graph->read_generation_data = 1;
}
if (r->settings.commit_graph_read_changed_paths) {
@ -803,7 +806,7 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g,
die(_("commit-graph requires overflow generation data but has none"));
offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
graph_data->generation = get_be64(g->chunk_generation_data_overflow + 8 * offset_pos);
graph_data->generation = item->date + get_be64(g->chunk_generation_data_overflow + 8 * offset_pos);
} else
graph_data->generation = item->date + offset;
} else
@ -1556,12 +1559,16 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
if (current->date && current->date > max_corrected_commit_date)
max_corrected_commit_date = current->date - 1;
commit_graph_data_at(current)->generation = max_corrected_commit_date + 1;
if (commit_graph_data_at(current)->generation - current->date > GENERATION_NUMBER_V2_OFFSET_MAX)
ctx->num_generation_data_overflows++;
}
}
}
for (i = 0; i < ctx->commits.nr; i++) {
struct commit *c = ctx->commits.list[i];
timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
if (offset > GENERATION_NUMBER_V2_OFFSET_MAX)
ctx->num_generation_data_overflows++;
}
stop_progress(&ctx->progress);
}

View file

@ -3,6 +3,7 @@
#include "commit-graph.h"
#include "repository.h"
#include "object-store.h"
#include "bloom.h"
int cmd__read_graph(int argc, const char **argv)
{
@ -45,6 +46,18 @@ int cmd__read_graph(int argc, const char **argv)
printf(" bloom_data");
printf("\n");
printf("options:");
if (graph->bloom_filter_settings)
printf(" bloom(%"PRIu32",%"PRIu32",%"PRIu32")",
graph->bloom_filter_settings->hash_version,
graph->bloom_filter_settings->bits_per_entry,
graph->bloom_filter_settings->num_hashes);
if (graph->read_generation_data)
printf(" read_generation_data");
if (graph->topo_levels)
printf(" topo_levels");
printf("\n");
UNLEAK(graph);
return 0;

58
t/lib-commit-graph.sh Executable file
View file

@ -0,0 +1,58 @@
#!/bin/sh
# Helper functions for testing commit-graphs.
# Initialize OID cache with oid_version
test_oid_cache <<-EOF
oid_version sha1:1
oid_version sha256:2
EOF
graph_git_two_modes() {
git -c core.commitGraph=true $1 >output &&
git -c core.commitGraph=false $1 >expect &&
test_cmp expect output
}
graph_git_behavior() {
MSG=$1
DIR=$2
BRANCH=$3
COMPARE=$4
test_expect_success "check normal git operations: $MSG" '
cd "$TRASH_DIRECTORY/$DIR" &&
graph_git_two_modes "log --oneline $BRANCH" &&
graph_git_two_modes "log --topo-order $BRANCH" &&
graph_git_two_modes "log --graph $COMPARE..$BRANCH" &&
graph_git_two_modes "branch -vv" &&
graph_git_two_modes "merge-base -a $BRANCH $COMPARE"
'
}
graph_read_expect() {
OPTIONAL=""
NUM_CHUNKS=3
if test -n "$2"
then
OPTIONAL=" $2"
NUM_CHUNKS=$((3 + $(echo "$2" | wc -w)))
fi
GENERATION_VERSION=2
if test -n "$3"
then
GENERATION_VERSION=$3
fi
OPTIONS=
if test $GENERATION_VERSION -gt 1
then
OPTIONS=" read_generation_data"
fi
cat >expect <<- EOF
header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0
num_commits: $1
chunks: oid_fanout oid_lookup commit_metadata$OPTIONAL
options:$OPTIONS
EOF
test-tool read-graph >output &&
test_cmp expect output
}

View file

@ -48,6 +48,7 @@ graph_read_expect () {
header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0
num_commits: $1
chunks: oid_fanout oid_lookup commit_metadata generation_data bloom_indexes bloom_data
options: bloom(1,10,7) read_generation_data
EOF
test-tool read-graph >actual &&
test_cmp expect actual

View file

@ -29,12 +29,7 @@ test_expect_success 'setup full repo' '
cd "$TRASH_DIRECTORY/full" &&
git init &&
git config core.commitGraph true &&
objdir=".git/objects" &&
test_oid_cache <<-EOF
oid_version sha1:1
oid_version sha256:2
EOF
objdir=".git/objects"
'
test_expect_success POSIXPERM 'tweak umask for modebit tests' '
@ -69,46 +64,10 @@ test_expect_success 'create commits and repack' '
git repack
'
graph_git_two_modes() {
git -c core.commitGraph=true $1 >output &&
git -c core.commitGraph=false $1 >expect &&
test_cmp expect output
}
graph_git_behavior() {
MSG=$1
DIR=$2
BRANCH=$3
COMPARE=$4
test_expect_success "check normal git operations: $MSG" '
cd "$TRASH_DIRECTORY/$DIR" &&
graph_git_two_modes "log --oneline $BRANCH" &&
graph_git_two_modes "log --topo-order $BRANCH" &&
graph_git_two_modes "log --graph $COMPARE..$BRANCH" &&
graph_git_two_modes "branch -vv" &&
graph_git_two_modes "merge-base -a $BRANCH $COMPARE"
'
}
. "$TEST_DIRECTORY"/lib-commit-graph.sh
graph_git_behavior 'no graph' full commits/3 commits/1
graph_read_expect() {
OPTIONAL=""
NUM_CHUNKS=3
if test ! -z "$2"
then
OPTIONAL=" $2"
NUM_CHUNKS=$((3 + $(echo "$2" | wc -w)))
fi
cat >expect <<- EOF
header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0
num_commits: $1
chunks: oid_fanout oid_lookup commit_metadata$OPTIONAL
EOF
test-tool read-graph >output &&
test_cmp expect output
}
test_expect_success 'exit with correct error on bad input to --stdin-commits' '
cd "$TRASH_DIRECTORY/full" &&
# invalid, non-hex OID
@ -466,10 +425,10 @@ test_expect_success 'warn on improper hash version' '
)
'
test_expect_success 'lower layers have overflow chunk' '
test_expect_success TIME_IS_64BIT,TIME_T_IS_64BIT 'lower layers have overflow chunk' '
cd "$TRASH_DIRECTORY/full" &&
UNIX_EPOCH_ZERO="@0 +0000" &&
FUTURE_DATE="@2147483646 +0000" &&
FUTURE_DATE="@4147483646 +0000" &&
rm -f .git/objects/info/commit-graph &&
test_commit --date "$FUTURE_DATE" future-1 &&
test_commit --date "$UNIX_EPOCH_ZERO" old-1 &&
@ -497,7 +456,7 @@ test_expect_success 'git commit-graph verify' '
cd "$TRASH_DIRECTORY/full" &&
git rev-parse commits/8 | git -c commitGraph.generationVersion=1 commit-graph write --stdin-commits &&
git commit-graph verify >output &&
graph_read_expect 9 extra_edges
graph_read_expect 9 extra_edges 1
'
NUM_COMMITS=9
@ -825,10 +784,6 @@ test_expect_success 'set up and verify repo with generation data overflow chunk'
objdir=".git/objects" &&
UNIX_EPOCH_ZERO="@0 +0000" &&
FUTURE_DATE="@2147483646 +0000" &&
test_oid_cache <<-EOF &&
oid_version sha1:1
oid_version sha256:2
EOF
cd "$TRASH_DIRECTORY" &&
mkdir repo &&
cd repo &&

View file

@ -30,10 +30,16 @@ graph_read_expect() {
then
NUM_BASE=$2
fi
OPTIONS=
if test -z "$3"
then
OPTIONS=" read_generation_data"
fi
cat >expect <<- EOF
header: 43475048 1 $(test_oid oid_version) 4 $NUM_BASE
num_commits: $1
chunks: oid_fanout oid_lookup commit_metadata generation_data
options:$OPTIONS
EOF
test-tool read-graph >output &&
test_cmp expect output
@ -508,6 +514,7 @@ test_expect_success 'setup repo for mixed generation commit-graph-chain' '
header: 43475048 1 $(test_oid oid_version) 4 1
num_commits: $NUM_SECOND_LAYER_COMMITS
chunks: oid_fanout oid_lookup commit_metadata
options:
EOF
test_cmp expect output &&
git commit-graph verify &&
@ -540,6 +547,7 @@ test_expect_success 'do not write generation data chunk if not present on existi
header: 43475048 1 $(test_oid oid_version) 4 2
num_commits: $NUM_THIRD_LAYER_COMMITS
chunks: oid_fanout oid_lookup commit_metadata
options:
EOF
test_cmp expect output &&
git commit-graph verify
@ -581,6 +589,7 @@ test_expect_success 'do not write generation data chunk if the topmost remaining
header: 43475048 1 $(test_oid oid_version) 4 2
num_commits: $(($NUM_THIRD_LAYER_COMMITS + $NUM_FOURTH_LAYER_COMMITS))
chunks: oid_fanout oid_lookup commit_metadata
options:
EOF
test_cmp expect output &&
git commit-graph verify
@ -620,6 +629,7 @@ test_expect_success 'write generation data chunk if topmost remaining layer has
header: 43475048 1 $(test_oid oid_version) 5 1
num_commits: $(($NUM_SECOND_LAYER_COMMITS + $NUM_THIRD_LAYER_COMMITS + $NUM_FOURTH_LAYER_COMMITS + $NUM_FIFTH_LAYER_COMMITS))
chunks: oid_fanout oid_lookup commit_metadata generation_data
options: read_generation_data
EOF
test_cmp expect output
)

View file

@ -0,0 +1,66 @@
#!/bin/sh
test_description='commit graph with 64-bit timestamps'
. ./test-lib.sh
if ! test_have_prereq TIME_IS_64BIT || ! test_have_prereq TIME_T_IS_64BIT
then
skip_all='skipping 64-bit timestamp tests'
test_done
fi
. "$TEST_DIRECTORY"/lib-commit-graph.sh
UNIX_EPOCH_ZERO="@0 +0000"
FUTURE_DATE="@4147483646 +0000"
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
test_expect_success 'lower layers have overflow chunk' '
rm -f .git/objects/info/commit-graph &&
test_commit --date "$FUTURE_DATE" future-1 &&
test_commit --date "$UNIX_EPOCH_ZERO" old-1 &&
git commit-graph write --reachable &&
test_commit --date "$FUTURE_DATE" future-2 &&
test_commit --date "$UNIX_EPOCH_ZERO" old-2 &&
git commit-graph write --reachable --split=no-merge &&
test_commit extra &&
git commit-graph write --reachable --split=no-merge &&
git commit-graph write --reachable &&
graph_read_expect 5 "generation_data generation_data_overflow" &&
mv .git/objects/info/commit-graph commit-graph-upgraded &&
git commit-graph write --reachable &&
graph_read_expect 5 "generation_data generation_data_overflow" &&
test_cmp .git/objects/info/commit-graph commit-graph-upgraded
'
graph_git_behavior 'overflow' '' HEAD~2 HEAD
test_expect_success 'set up and verify repo with generation data overflow chunk' '
mkdir repo &&
cd repo &&
git init &&
test_commit --date "$UNIX_EPOCH_ZERO" 1 &&
test_commit 2 &&
test_commit --date "$UNIX_EPOCH_ZERO" 3 &&
git commit-graph write --reachable &&
graph_read_expect 3 generation_data &&
test_commit --date "$FUTURE_DATE" 4 &&
test_commit 5 &&
test_commit --date "$UNIX_EPOCH_ZERO" 6 &&
git branch left &&
git reset --hard 3 &&
test_commit 7 &&
test_commit --date "$FUTURE_DATE" 8 &&
test_commit 9 &&
git branch right &&
git reset --hard 3 &&
test_merge M left right &&
git commit-graph write --reachable &&
graph_read_expect 10 "generation_data generation_data_overflow" &&
git commit-graph verify
'
graph_git_behavior 'overflow 2' repo left right
test_done