git/t/t4067-diff-partial-clone.sh
Jonathan Tan 95acf11a3d diff: restrict when prefetching occurs
Commit 7fbbcb21b1 ("diff: batch fetching of missing blobs", 2019-04-08)
optimized "diff" by prefetching blobs in a partial clone, but there are
some cases wherein blobs do not need to be prefetched. In these cases,
any command that uses the diff machinery will unnecessarily fetch blobs.

diffcore_std() may read blobs when it calls the following functions:
 (1) diffcore_skip_stat_unmatch() (controlled by the config variable
     diff.autorefreshindex)
 (2) diffcore_break() and diffcore_merge_broken() (for break-rewrite
     detection)
 (3) diffcore_rename() (for rename detection)
 (4) diffcore_pickaxe() (for detecting addition/deletion of specified
     string)

Instead of always prefetching blobs, teach diffcore_skip_stat_unmatch(),
diffcore_break(), and diffcore_rename() to prefetch blobs upon the first
read of a missing object. This covers (1), (2), and (3): to cover the
rest, teach diffcore_std() to prefetch if the output type is one that
includes blob data (and hence blob data will be required later anyway),
or if it knows that (4) will be run.

Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-07 16:09:29 -07:00

182 lines
5.9 KiB
Bash
Executable file

#!/bin/sh
test_description='behavior of diff when reading objects in a partial clone'
. ./test-lib.sh
test_expect_success 'git show batches blobs' '
test_when_finished "rm -rf server client trace" &&
test_create_repo server &&
echo a >server/a &&
echo b >server/b &&
git -C server add a b &&
git -C server commit -m x &&
test_config -C server uploadpack.allowfilter 1 &&
test_config -C server uploadpack.allowanysha1inwant 1 &&
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
# Ensure that there is exactly 1 negotiation by checking that there is
# only 1 "done" line sent. ("done" marks the end of negotiation.)
GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
grep "git> done" trace >done_lines &&
test_line_count = 1 done_lines
'
test_expect_success 'diff batches blobs' '
test_when_finished "rm -rf server client trace" &&
test_create_repo server &&
echo a >server/a &&
echo b >server/b &&
git -C server add a b &&
git -C server commit -m x &&
echo c >server/c &&
echo d >server/d &&
git -C server add c d &&
git -C server commit -m x &&
test_config -C server uploadpack.allowfilter 1 &&
test_config -C server uploadpack.allowanysha1inwant 1 &&
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
# Ensure that there is exactly 1 negotiation by checking that there is
# only 1 "done" line sent. ("done" marks the end of negotiation.)
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
grep "git> done" trace >done_lines &&
test_line_count = 1 done_lines
'
test_expect_success 'diff skips same-OID blobs' '
test_when_finished "rm -rf server client trace" &&
test_create_repo server &&
echo a >server/a &&
echo b >server/b &&
git -C server add a b &&
git -C server commit -m x &&
echo another-a >server/a &&
git -C server add a &&
git -C server commit -m x &&
test_config -C server uploadpack.allowfilter 1 &&
test_config -C server uploadpack.allowanysha1inwant 1 &&
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
echo a | git hash-object --stdin >hash-old-a &&
echo another-a | git hash-object --stdin >hash-new-a &&
echo b | git hash-object --stdin >hash-b &&
# Ensure that only a and another-a are fetched.
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
grep "want $(cat hash-old-a)" trace &&
grep "want $(cat hash-new-a)" trace &&
! grep "want $(cat hash-b)" trace
'
test_expect_success 'when fetching missing objects, diff skips GITLINKs' '
test_when_finished "rm -rf sub server client trace" &&
test_create_repo sub &&
test_commit -C sub first &&
test_create_repo server &&
echo a >server/a &&
git -C server add a &&
git -C server submodule add "file://$(pwd)/sub" &&
git -C server commit -m x &&
test_commit -C server/sub second &&
echo another-a >server/a &&
git -C server add a sub &&
git -C server commit -m x &&
test_config -C server uploadpack.allowfilter 1 &&
test_config -C server uploadpack.allowanysha1inwant 1 &&
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
echo a | git hash-object --stdin >hash-old-a &&
echo another-a | git hash-object --stdin >hash-new-a &&
# Ensure that a and another-a are fetched, and check (by successful
# execution of the diff) that no invalid OIDs are sent.
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
grep "want $(cat hash-old-a)" trace &&
grep "want $(cat hash-new-a)" trace
'
test_expect_success 'diff with rename detection batches blobs' '
test_when_finished "rm -rf server client trace" &&
test_create_repo server &&
echo a >server/a &&
printf "b\nb\nb\nb\nb\n" >server/b &&
git -C server add a b &&
git -C server commit -m x &&
rm server/b &&
printf "b\nb\nb\nb\nbX\n" >server/c &&
git -C server add c &&
git -C server commit -a -m x &&
test_config -C server uploadpack.allowfilter 1 &&
test_config -C server uploadpack.allowanysha1inwant 1 &&
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
# Ensure that there is exactly 1 negotiation by checking that there is
# only 1 "done" line sent. ("done" marks the end of negotiation.)
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff -M HEAD^ HEAD >out &&
grep "similarity index" out &&
grep "git> done" trace >done_lines &&
test_line_count = 1 done_lines
'
test_expect_success 'diff does not fetch anything if inexact rename detection is not needed' '
test_when_finished "rm -rf server client trace" &&
test_create_repo server &&
echo a >server/a &&
printf "b\nb\nb\nb\nb\n" >server/b &&
git -C server add a b &&
git -C server commit -m x &&
mv server/b server/c &&
git -C server add c &&
git -C server commit -a -m x &&
test_config -C server uploadpack.allowfilter 1 &&
test_config -C server uploadpack.allowanysha1inwant 1 &&
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
# Ensure no fetches.
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff --raw -M HEAD^ HEAD &&
! test_path_exists trace
'
test_expect_success 'diff --break-rewrites fetches only if necessary, and batches blobs if it does' '
test_when_finished "rm -rf server client trace" &&
test_create_repo server &&
echo a >server/a &&
printf "b\nb\nb\nb\nb\n" >server/b &&
git -C server add a b &&
git -C server commit -m x &&
printf "c\nc\nc\nc\nc\n" >server/b &&
git -C server commit -a -m x &&
test_config -C server uploadpack.allowfilter 1 &&
test_config -C server uploadpack.allowanysha1inwant 1 &&
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
# Ensure no fetches.
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff --raw -M HEAD^ HEAD &&
! test_path_exists trace &&
# But with --break-rewrites, ensure that there is exactly 1 negotiation
# by checking that there is only 1 "done" line sent. ("done" marks the
# end of negotiation.)
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff --break-rewrites --raw -M HEAD^ HEAD &&
grep "git> done" trace >done_lines &&
test_line_count = 1 done_lines
'
test_done