git/t/t1050-large.sh
Nguyễn Thái Ngọc Duy ec9d224903 fsck: use streaming interface for large blobs in pack
For blobs, we want to make sure the on-disk data is not corrupted
(i.e. can be inflated and produce the expected SHA-1). Blob content is
opaque, there's nothing else inside to check for.

For really large blobs, we may want to avoid unpacking the entire blob
in memory, just to check whether it produces the same SHA-1. On 32-bit
systems, we may not have enough virtual address space for such memory
allocation. And even on 64-bit where it's not a problem, allocating a
lot more memory could result in kicking other parts of systems to swap
file, generating lots of I/O and slowing everything down.

For this particular operation, not unpacking the blob and letting
check_sha1_signature, which supports streaming interface, do the job
is sufficient. check_sha1_signature() is not shown in the diff,
unfortunately. But if will be called when "data_valid && !data" is
false.

We will call the callback function "fn" with NULL as "data". The only
callback of this function is fsck_obj_buffer(), which does not touch
"data" at all if it's a blob.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-13 09:15:29 -07:00

185 lines
4.1 KiB
Bash
Executable file

#!/bin/sh
# Copyright (c) 2011, Google Inc.
test_description='adding and checking out large blobs'
. ./test-lib.sh
test_expect_success setup '
# clone does not allow us to pass core.bigfilethreshold to
# new repos, so set core.bigfilethreshold globally
git config --global core.bigfilethreshold 200k &&
printf "%2000000s" X >large1 &&
cp large1 large2 &&
cp large1 large3 &&
printf "%2500000s" Y >huge &&
GIT_ALLOC_LIMIT=1500k &&
export GIT_ALLOC_LIMIT
'
test_expect_success 'add a large file or two' '
git add large1 huge large2 &&
# make sure we got a single packfile and no loose objects
bad= count=0 idx= &&
for p in .git/objects/pack/pack-*.pack
do
count=$(( $count + 1 ))
if test -f "$p" && idx=${p%.pack}.idx && test -f "$idx"
then
continue
fi
bad=t
done &&
test -z "$bad" &&
test $count = 1 &&
cnt=$(git show-index <"$idx" | wc -l) &&
test $cnt = 2 &&
for l in .git/objects/??/??????????????????????????????????????
do
test -f "$l" || continue
bad=t
done &&
test -z "$bad" &&
# attempt to add another copy of the same
git add large3 &&
bad= count=0 &&
for p in .git/objects/pack/pack-*.pack
do
count=$(( $count + 1 ))
if test -f "$p" && idx=${p%.pack}.idx && test -f "$idx"
then
continue
fi
bad=t
done &&
test -z "$bad" &&
test $count = 1
'
test_expect_success 'checkout a large file' '
large1=$(git rev-parse :large1) &&
git update-index --add --cacheinfo 100644 $large1 another &&
git checkout another &&
test_cmp large1 another
'
test_expect_success 'packsize limit' '
test_create_repo mid &&
(
cd mid &&
git config core.bigfilethreshold 64k &&
git config pack.packsizelimit 256k &&
# mid1 and mid2 will fit within 256k limit but
# appending mid3 will bust the limit and will
# result in a separate packfile.
test-genrandom "a" $(( 66 * 1024 )) >mid1 &&
test-genrandom "b" $(( 80 * 1024 )) >mid2 &&
test-genrandom "c" $(( 128 * 1024 )) >mid3 &&
git add mid1 mid2 mid3 &&
count=0
for pi in .git/objects/pack/pack-*.idx
do
test -f "$pi" && count=$(( $count + 1 ))
done &&
test $count = 2 &&
(
git hash-object --stdin <mid1
git hash-object --stdin <mid2
git hash-object --stdin <mid3
) |
sort >expect &&
for pi in .git/objects/pack/pack-*.idx
do
git show-index <"$pi"
done |
sed -e "s/^[0-9]* \([0-9a-f]*\) .*/\1/" |
sort >actual &&
test_cmp expect actual
)
'
test_expect_success 'diff --raw' '
git commit -q -m initial &&
echo modified >>large1 &&
git add large1 &&
git commit -q -m modified &&
git diff --raw HEAD^
'
test_expect_success 'diff --stat' '
git diff --stat HEAD^ HEAD
'
test_expect_success 'diff' '
git diff HEAD^ HEAD >actual &&
grep "Binary files.*differ" actual
'
test_expect_success 'diff --cached' '
git diff --cached HEAD^ >actual &&
grep "Binary files.*differ" actual
'
test_expect_success 'hash-object' '
git hash-object large1
'
test_expect_success 'cat-file a large file' '
git cat-file blob :large1 >/dev/null
'
test_expect_success 'cat-file a large file from a tag' '
git tag -m largefile largefiletag :large1 &&
git cat-file blob largefiletag >/dev/null
'
test_expect_success 'git-show a large file' '
git show :large1 >/dev/null
'
test_expect_success 'index-pack' '
git clone file://"$(pwd)"/.git foo &&
GIT_DIR=non-existent git index-pack --strict --verify foo/.git/objects/pack/*.pack
'
test_expect_success 'repack' '
git repack -ad
'
test_expect_success 'pack-objects with large loose object' '
SHA1=$(git hash-object huge) &&
test_create_repo loose &&
echo $SHA1 | git pack-objects --stdout |
GIT_ALLOC_LIMIT=0 GIT_DIR=loose/.git git unpack-objects &&
echo $SHA1 | GIT_DIR=loose/.git git pack-objects pack &&
test_create_repo packed &&
mv pack-* packed/.git/objects/pack &&
GIT_DIR=packed/.git git cat-file blob $SHA1 >actual &&
test_cmp huge actual
'
test_expect_success 'tar achiving' '
git archive --format=tar HEAD >/dev/null
'
test_expect_success 'zip achiving, store only' '
git archive --format=zip -0 HEAD >/dev/null
'
test_expect_success 'zip achiving, deflate' '
git archive --format=zip HEAD >/dev/null
'
test_expect_success 'fsck large blobs' '
git fsck 2>err &&
test_must_be_empty err
'
test_done