git/t/t6000-rev-list-misc.sh
Patrick Steinhardt f45022dc2f connected: do not sort input revisions
In order to compute whether objects reachable from a set of tips are all
connected, we do a revision walk with these tips as positive references
and `--not --all`. `--not --all` will cause the revision walk to load
all preexisting references as uninteresting, which can be very expensive
in repositories with many references.

Benchmarking the git-rev-list(1) command highlights that by far the most
expensive single phase is initial sorting of the input revisions: after
all references have been loaded, we first sort commits by author date.
In a real-world repository with about 2.2 million references, it makes
up about 40% of the total runtime of git-rev-list(1).

Ultimately, the connectivity check shouldn't really bother about the
order of input revisions at all. We only care whether we can actually
walk all objects until we hit the cut-off point. So sorting the input is
a complete waste of time.

Introduce a new "--unsorted-input" flag to git-rev-list(1) which will
cause it to not sort the commits and adjust the connectivity check to
always pass the flag. This results in the following speedups, executed
in a clone of gitlab-org/gitlab [1]:

    Benchmark #1: git rev-list  --objects --quiet --not --all --not $(cat newrev)
      Time (mean ± σ):      7.639 s ±  0.065 s    [User: 7.304 s, System: 0.335 s]
      Range (min … max):    7.543 s …  7.742 s    10 runs

    Benchmark #2: git rev-list --unsorted-input --objects --quiet --not --all --not $newrev
      Time (mean ± σ):      4.995 s ±  0.044 s    [User: 4.657 s, System: 0.337 s]
      Range (min … max):    4.909 s …  5.048 s    10 runs

    Summary
      'git rev-list --unsorted-input --objects --quiet --not --all --not $(cat newrev)' ran
        1.53 ± 0.02 times faster than 'git rev-list  --objects --quiet --not --all --not $newrev'

[1]: https://gitlab.com/gitlab-org/gitlab.git. Note that not all refs
     are visible to clients.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-08-09 09:51:12 -07:00

203 lines
5.7 KiB
Bash
Executable file

#!/bin/sh
test_description='miscellaneous rev-list tests'
GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
. ./test-lib.sh
test_expect_success setup '
echo content1 >wanted_file &&
echo content2 >unwanted_file &&
git add wanted_file unwanted_file &&
test_tick &&
git commit -m one
'
test_expect_success 'rev-list --objects heeds pathspecs' '
git rev-list --objects HEAD -- wanted_file >output &&
grep wanted_file output &&
! grep unwanted_file output
'
test_expect_success 'rev-list --objects with pathspecs and deeper paths' '
mkdir foo &&
>foo/file &&
git add foo/file &&
test_tick &&
git commit -m two &&
git rev-list --objects HEAD -- foo >output &&
grep foo/file output &&
git rev-list --objects HEAD -- foo/file >output &&
grep foo/file output &&
! grep unwanted_file output
'
test_expect_success 'rev-list --objects with pathspecs and copied files' '
git checkout --orphan junio-testcase &&
git rm -rf . &&
mkdir two &&
echo frotz >one &&
cp one two/three &&
git add one two/three &&
test_tick &&
git commit -m that &&
ONE=$(git rev-parse HEAD:one) &&
git rev-list --objects HEAD two >output &&
grep "$ONE two/three" output &&
! grep one output
'
test_expect_success 'rev-list --objects --no-object-names has no space/names' '
git rev-list --objects --no-object-names HEAD >output &&
! grep wanted_file output &&
! grep unwanted_file output &&
! grep " " output
'
test_expect_success 'rev-list --objects --no-object-names works with cat-file' '
git rev-list --objects --no-object-names --all >list-output &&
git cat-file --batch-check <list-output >cat-output &&
! grep missing cat-output
'
test_expect_success '--no-object-names and --object-names are last-one-wins' '
git rev-list --objects --no-object-names --object-names --all >output &&
grep wanted_file output &&
git rev-list --objects --object-names --no-object-names --all >output &&
! grep wanted_file output
'
test_expect_success 'rev-list A..B and rev-list ^A B are the same' '
test_tick &&
git commit --allow-empty -m another &&
git tag -a -m "annotated" v1.0 &&
git rev-list --objects ^v1.0^ v1.0 >expect &&
git rev-list --objects v1.0^..v1.0 >actual &&
test_cmp expect actual
'
test_expect_success 'propagate uninteresting flag down correctly' '
git rev-list --objects ^HEAD^{tree} HEAD^{tree} >actual &&
test_must_be_empty actual
'
test_expect_success 'symleft flag bit is propagated down from tag' '
git log --format="%m %s" --left-right v1.0...main >actual &&
cat >expect <<-\EOF &&
< another
< that
> two
> one
EOF
test_cmp expect actual
'
test_expect_success 'rev-list can show index objects' '
# Of the blobs and trees in the index, note:
#
# - we do not show two/three, because it is the
# same blob as "one", and we show objects only once
#
# - we do show the tree "two", because it has a valid cache tree
# from the last commit
#
# - we do not show the root tree; since we updated the index, it
# does not have a valid cache tree
#
echo only-in-index >only-in-index &&
test_when_finished "git reset --hard" &&
rev1=$(git rev-parse HEAD:one) &&
rev2=$(git rev-parse HEAD:two) &&
revi=$(git hash-object only-in-index) &&
cat >expect <<-EOF &&
$rev1 one
$revi only-in-index
$rev2 two
EOF
git add only-in-index &&
git rev-list --objects --indexed-objects >actual &&
test_cmp expect actual
'
test_expect_success 'rev-list can negate index objects' '
git rev-parse HEAD >expect &&
git rev-list -1 --objects HEAD --not --indexed-objects >actual &&
test_cmp expect actual
'
test_expect_success '--bisect and --first-parent can be combined' '
git rev-list --bisect --first-parent HEAD
'
test_expect_success '--header shows a NUL after each commit' '
# We know that there is no Q in the true payload; names and
# addresses of the authors and the committers do not have
# any, and object names or header names do not, either.
git rev-list --header --max-count=2 HEAD |
nul_to_q |
grep "^Q" >actual &&
cat >expect <<-EOF &&
Q$(git rev-parse HEAD~1)
Q
EOF
test_cmp expect actual
'
test_expect_success 'rev-list --end-of-options' '
git update-ref refs/heads/--output=yikes HEAD &&
git rev-list --end-of-options --output=yikes >actual &&
test_path_is_missing yikes &&
git rev-list HEAD >expect &&
test_cmp expect actual
'
test_expect_success 'rev-list --count' '
count=$(git rev-list --count HEAD) &&
git rev-list HEAD >actual &&
test_line_count = $count actual
'
test_expect_success 'rev-list --count --objects' '
count=$(git rev-list --count --objects HEAD) &&
git rev-list --objects HEAD >actual &&
test_line_count = $count actual
'
test_expect_success 'rev-list --unsorted-input results in different sorting' '
git rev-list --unsorted-input HEAD HEAD~ >first &&
git rev-list --unsorted-input HEAD~ HEAD >second &&
! test_cmp first second &&
sort first >first.sorted &&
sort second >second.sorted &&
test_cmp first.sorted second.sorted
'
test_expect_success 'rev-list --unsorted-input incompatible with --no-walk' '
cat >expect <<-EOF &&
fatal: --no-walk is incompatible with --unsorted-input
EOF
test_must_fail git rev-list --unsorted-input --no-walk HEAD 2>error &&
test_cmp expect error &&
test_must_fail git rev-list --unsorted-input --no-walk=sorted HEAD 2>error &&
test_cmp expect error &&
test_must_fail git rev-list --unsorted-input --no-walk=unsorted HEAD 2>error &&
test_cmp expect error &&
cat >expect <<-EOF &&
fatal: --unsorted-input is incompatible with --no-walk
EOF
test_must_fail git rev-list --no-walk --unsorted-input HEAD 2>error &&
test_cmp expect error &&
test_must_fail git rev-list --no-walk=sorted --unsorted-input HEAD 2>error &&
test_cmp expect error &&
test_must_fail git rev-list --no-walk=unsorted --unsorted-input HEAD 2>error &&
test_cmp expect error
'
test_done