git/t/t0021-conversion.sh

256 lines
7 KiB
Bash
Raw Normal View History

#!/bin/sh
test_description='blob conversion via gitattributes'
. ./test-lib.sh
cat <<EOF >rot13.sh
#!$SHELL_PATH
tr \
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' \
'nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM'
EOF
chmod +x rot13.sh
test_expect_success setup '
git config filter.rot13.smudge ./rot13.sh &&
git config filter.rot13.clean ./rot13.sh &&
{
echo "*.t filter=rot13"
echo "*.i ident"
} >.gitattributes &&
{
echo a b c d e f g h i j k l m
echo n o p q r s t u v w x y z
echo '\''$Id$'\''
} >test &&
cat test >test.t &&
cat test >test.o &&
cat test >test.i &&
git add test test.t test.i &&
rm -f test test.t test.i &&
git checkout -- test test.t test.i
'
script='s/^\$Id: \([0-9a-f]*\) \$/\1/p'
test_expect_success check '
cmp test.o test &&
cmp test.o test.t &&
# ident should be stripped in the repository
git diff --raw --exit-code :test :test.i &&
id=$(git rev-parse --verify :test) &&
embedded=$(sed -ne "$script" test.i) &&
test "z$id" = "z$embedded" &&
git cat-file blob :test.t > test.r &&
./rot13.sh < test.o > test.t &&
cmp test.r test.t
'
# If an expanded ident ever gets into the repository, we want to make sure that
# it is collapsed before being expanded again on checkout
test_expect_success expanded_in_repo '
{
echo "File with expanded keywords"
echo "\$Id\$"
echo "\$Id:\$"
echo "\$Id: 0000000000000000000000000000000000000000 \$"
echo "\$Id: NoSpaceAtEnd\$"
echo "\$Id:NoSpaceAtFront \$"
echo "\$Id:NoSpaceAtEitherEnd\$"
echo "\$Id: NoTerminatingSymbol"
echo "\$Id: Foreign Commit With Spaces \$"
} >expanded-keywords.0 &&
{
cat expanded-keywords.0 &&
printf "\$Id: NoTerminatingSymbolAtEOF"
} >expanded-keywords &&
cat expanded-keywords >expanded-keywords-crlf &&
git add expanded-keywords expanded-keywords-crlf &&
git commit -m "File with keywords expanded" &&
id=$(git rev-parse --verify :expanded-keywords) &&
{
echo "File with expanded keywords"
echo "\$Id: $id \$"
echo "\$Id: $id \$"
echo "\$Id: $id \$"
echo "\$Id: $id \$"
echo "\$Id: $id \$"
echo "\$Id: $id \$"
echo "\$Id: NoTerminatingSymbol"
echo "\$Id: Foreign Commit With Spaces \$"
} >expected-output.0 &&
{
cat expected-output.0 &&
printf "\$Id: NoTerminatingSymbolAtEOF"
} >expected-output &&
{
append_cr <expected-output.0 &&
printf "\$Id: NoTerminatingSymbolAtEOF"
} >expected-output-crlf &&
{
echo "expanded-keywords ident"
echo "expanded-keywords-crlf ident text eol=crlf"
} >>.gitattributes &&
rm -f expanded-keywords expanded-keywords-crlf &&
git checkout -- expanded-keywords &&
test_cmp expanded-keywords expected-output &&
git checkout -- expanded-keywords-crlf &&
test_cmp expanded-keywords-crlf expected-output-crlf
'
# The use of %f in a filter definition is expanded to the path to
# the filename being smudged or cleaned. It must be shell escaped.
# First, set up some interesting file names and pet them in
# .gitattributes.
test_expect_success 'filter shell-escaped filenames' '
cat >argc.sh <<-EOF &&
#!$SHELL_PATH
cat >/dev/null
echo argc: \$# "\$@"
EOF
normal=name-no-magic &&
special="name with '\''sq'\'' and \$x" &&
echo some test text >"$normal" &&
echo some test text >"$special" &&
git add "$normal" "$special" &&
git commit -q -m "add files" &&
echo "name* filter=argc" >.gitattributes &&
# delete the files and check them out again, using a smudge filter
# that will count the args and echo the command-line back to us
git config filter.argc.smudge "sh ./argc.sh %f" &&
rm "$normal" "$special" &&
git checkout -- "$normal" "$special" &&
# make sure argc.sh counted the right number of args
echo "argc: 1 $normal" >expect &&
test_cmp expect "$normal" &&
echo "argc: 1 $special" >expect &&
test_cmp expect "$special" &&
# do the same thing, but with more args in the filter expression
git config filter.argc.smudge "sh ./argc.sh %f --my-extra-arg" &&
rm "$normal" "$special" &&
git checkout -- "$normal" "$special" &&
# make sure argc.sh counted the right number of args
echo "argc: 2 $normal --my-extra-arg" >expect &&
test_cmp expect "$normal" &&
echo "argc: 2 $special --my-extra-arg" >expect &&
test_cmp expect "$special" &&
:
'
convert: stream from fd to required clean filter to reduce used address space The data is streamed to the filter process anyway. Better avoid mapping the file if possible. This is especially useful if a clean filter reduces the size, for example if it computes a sha1 for binary data, like git media. The file size that the previous implementation could handle was limited by the available address space; large files for example could not be handled with (32-bit) msysgit. The new implementation can filter files of any size as long as the filter output is small enough. The new code path is only taken if the filter is required. The filter consumes data directly from the fd. If it fails, the original data is not immediately available. The condition can easily be handled as a fatal error, which is expected for a required filter anyway. If the filter was not required, the condition would need to be handled in a different way, like seeking to 0 and reading the data. But this would require more restructuring of the code and is probably not worth it. The obvious approach of falling back to reading all data would not help achieving the main purpose of this patch, which is to handle large files with limited address space. If reading all data is an option, we can simply take the old code path right away and mmap the entire file. The environment variable GIT_MMAP_LIMIT, which has been introduced in a previous commit is used to test that the expected code path is taken. A related test that exercises required filters is modified to verify that the data actually has been modified on its way from the file system to the object store. Signed-off-by: Steffen Prohaska <prohaska@zib.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-08-26 15:23:25 +00:00
test_expect_success 'required filter should filter data' '
git config filter.required.smudge ./rot13.sh &&
git config filter.required.clean ./rot13.sh &&
git config filter.required.required true &&
echo "*.r filter=required" >.gitattributes &&
convert: stream from fd to required clean filter to reduce used address space The data is streamed to the filter process anyway. Better avoid mapping the file if possible. This is especially useful if a clean filter reduces the size, for example if it computes a sha1 for binary data, like git media. The file size that the previous implementation could handle was limited by the available address space; large files for example could not be handled with (32-bit) msysgit. The new implementation can filter files of any size as long as the filter output is small enough. The new code path is only taken if the filter is required. The filter consumes data directly from the fd. If it fails, the original data is not immediately available. The condition can easily be handled as a fatal error, which is expected for a required filter anyway. If the filter was not required, the condition would need to be handled in a different way, like seeking to 0 and reading the data. But this would require more restructuring of the code and is probably not worth it. The obvious approach of falling back to reading all data would not help achieving the main purpose of this patch, which is to handle large files with limited address space. If reading all data is an option, we can simply take the old code path right away and mmap the entire file. The environment variable GIT_MMAP_LIMIT, which has been introduced in a previous commit is used to test that the expected code path is taken. A related test that exercises required filters is modified to verify that the data actually has been modified on its way from the file system to the object store. Signed-off-by: Steffen Prohaska <prohaska@zib.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-08-26 15:23:25 +00:00
cat test.o >test.r &&
git add test.r &&
convert: stream from fd to required clean filter to reduce used address space The data is streamed to the filter process anyway. Better avoid mapping the file if possible. This is especially useful if a clean filter reduces the size, for example if it computes a sha1 for binary data, like git media. The file size that the previous implementation could handle was limited by the available address space; large files for example could not be handled with (32-bit) msysgit. The new implementation can filter files of any size as long as the filter output is small enough. The new code path is only taken if the filter is required. The filter consumes data directly from the fd. If it fails, the original data is not immediately available. The condition can easily be handled as a fatal error, which is expected for a required filter anyway. If the filter was not required, the condition would need to be handled in a different way, like seeking to 0 and reading the data. But this would require more restructuring of the code and is probably not worth it. The obvious approach of falling back to reading all data would not help achieving the main purpose of this patch, which is to handle large files with limited address space. If reading all data is an option, we can simply take the old code path right away and mmap the entire file. The environment variable GIT_MMAP_LIMIT, which has been introduced in a previous commit is used to test that the expected code path is taken. A related test that exercises required filters is modified to verify that the data actually has been modified on its way from the file system to the object store. Signed-off-by: Steffen Prohaska <prohaska@zib.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-08-26 15:23:25 +00:00
rm -f test.r &&
convert: stream from fd to required clean filter to reduce used address space The data is streamed to the filter process anyway. Better avoid mapping the file if possible. This is especially useful if a clean filter reduces the size, for example if it computes a sha1 for binary data, like git media. The file size that the previous implementation could handle was limited by the available address space; large files for example could not be handled with (32-bit) msysgit. The new implementation can filter files of any size as long as the filter output is small enough. The new code path is only taken if the filter is required. The filter consumes data directly from the fd. If it fails, the original data is not immediately available. The condition can easily be handled as a fatal error, which is expected for a required filter anyway. If the filter was not required, the condition would need to be handled in a different way, like seeking to 0 and reading the data. But this would require more restructuring of the code and is probably not worth it. The obvious approach of falling back to reading all data would not help achieving the main purpose of this patch, which is to handle large files with limited address space. If reading all data is an option, we can simply take the old code path right away and mmap the entire file. The environment variable GIT_MMAP_LIMIT, which has been introduced in a previous commit is used to test that the expected code path is taken. A related test that exercises required filters is modified to verify that the data actually has been modified on its way from the file system to the object store. Signed-off-by: Steffen Prohaska <prohaska@zib.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-08-26 15:23:25 +00:00
git checkout -- test.r &&
cmp test.o test.r &&
./rot13.sh <test.o >expected &&
git cat-file blob :test.r >actual &&
cmp expected actual
'
test_expect_success 'required filter smudge failure' '
git config filter.failsmudge.smudge false &&
git config filter.failsmudge.clean cat &&
git config filter.failsmudge.required true &&
echo "*.fs filter=failsmudge" >.gitattributes &&
echo test >test.fs &&
git add test.fs &&
rm -f test.fs &&
test_must_fail git checkout -- test.fs
'
test_expect_success 'required filter clean failure' '
git config filter.failclean.smudge cat &&
git config filter.failclean.clean false &&
git config filter.failclean.required true &&
echo "*.fc filter=failclean" >.gitattributes &&
echo test >test.fc &&
test_must_fail git add test.fc
'
convert: stream from fd to required clean filter to reduce used address space The data is streamed to the filter process anyway. Better avoid mapping the file if possible. This is especially useful if a clean filter reduces the size, for example if it computes a sha1 for binary data, like git media. The file size that the previous implementation could handle was limited by the available address space; large files for example could not be handled with (32-bit) msysgit. The new implementation can filter files of any size as long as the filter output is small enough. The new code path is only taken if the filter is required. The filter consumes data directly from the fd. If it fails, the original data is not immediately available. The condition can easily be handled as a fatal error, which is expected for a required filter anyway. If the filter was not required, the condition would need to be handled in a different way, like seeking to 0 and reading the data. But this would require more restructuring of the code and is probably not worth it. The obvious approach of falling back to reading all data would not help achieving the main purpose of this patch, which is to handle large files with limited address space. If reading all data is an option, we can simply take the old code path right away and mmap the entire file. The environment variable GIT_MMAP_LIMIT, which has been introduced in a previous commit is used to test that the expected code path is taken. A related test that exercises required filters is modified to verify that the data actually has been modified on its way from the file system to the object store. Signed-off-by: Steffen Prohaska <prohaska@zib.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-08-26 15:23:25 +00:00
test_expect_success 'filtering large input to small output should use little memory' '
git config filter.devnull.clean "cat >/dev/null" &&
git config filter.devnull.required true &&
for i in $(test_seq 1 30); do printf "%1048576d" 1; done >30MB &&
echo "30MB filter=devnull" >.gitattributes &&
GIT_MMAP_LIMIT=1m GIT_ALLOC_LIMIT=1m git add 30MB
'
test_expect_success 'filter that does not read is fine' '
test-genrandom foo $((128 * 1024 + 1)) >big &&
echo "big filter=epipe" >.gitattributes &&
git config filter.epipe.clean "echo xyzzy" &&
git add big &&
git cat-file blob :big >actual &&
echo xyzzy >expect &&
test_cmp expect actual
'
xread, xwrite: limit size of IO to 8MB Checking out 2GB or more through an external filter (see test) fails on Mac OS X 10.8.4 (12E55) for a 64-bit executable with: error: read from external filter cat failed error: cannot feed the input to external filter cat error: cat died of signal 13 error: external filter cat failed 141 error: external filter cat failed The reason is that read() immediately returns with EINVAL when asked to read more than 2GB. According to POSIX [1], if the value of nbyte passed to read() is greater than SSIZE_MAX, the result is implementation-defined. The write function has the same restriction [2]. Since OS X still supports running 32-bit executables, the 32-bit limit (SSIZE_MAX = INT_MAX = 2GB - 1) seems to be also imposed on 64-bit executables under certain conditions. For write, the problem has been addressed earlier [6c642a]. Address the problem for read() and write() differently, by limiting size of IO chunks unconditionally on all platforms in xread() and xwrite(). Large chunks only cause problems, like causing latencies when killing the process, even if OS X was not buggy. Doing IO in reasonably sized smaller chunks should have no negative impact on performance. The compat wrapper clipped_write() introduced earlier [6c642a] is not needed anymore. It will be reverted in a separate commit. The new test catches read and write problems. Note that 'git add' exits with 0 even if it prints filtering errors to stderr. The test, therefore, checks stderr. 'git add' should probably be changed (sometime in another commit) to exit with nonzero if filtering fails. The test could then be changed to use test_must_fail. Thanks to the following people for suggestions and testing: Johannes Sixt <j6t@kdbg.org> John Keeping <john@keeping.me.uk> Jonathan Nieder <jrnieder@gmail.com> Kyle J. McKay <mackyle@gmail.com> Linus Torvalds <torvalds@linux-foundation.org> Torsten Bögershausen <tboegi@web.de> [1] http://pubs.opengroup.org/onlinepubs/009695399/functions/read.html [2] http://pubs.opengroup.org/onlinepubs/009695399/functions/write.html [6c642a] commit 6c642a878688adf46b226903858b53e2d31ac5c3 compate/clipped-write.c: large write(2) fails on Mac OS X/XNU Signed-off-by: Steffen Prohaska <prohaska@zib.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-20 06:43:54 +00:00
test_expect_success EXPENSIVE 'filter large file' '
git config filter.largefile.smudge cat &&
git config filter.largefile.clean cat &&
for i in $(test_seq 1 2048); do printf "%1048576d" 1; done >2GB &&
echo "2GB filter=largefile" >.gitattributes &&
git add 2GB 2>err &&
! test -s err &&
rm -f 2GB &&
git checkout -- 2GB 2>err &&
! test -s err
'
test_expect_success "filter: clean empty file" '
git config filter.in-repo-header.clean "echo cleaned && cat" &&
git config filter.in-repo-header.smudge "sed 1d" &&
echo "empty-in-worktree filter=in-repo-header" >>.gitattributes &&
>empty-in-worktree &&
echo cleaned >expected &&
git add empty-in-worktree &&
git show :empty-in-worktree >actual &&
test_cmp expected actual
'
test_expect_success "filter: smudge empty file" '
git config filter.empty-in-repo.clean "cat >/dev/null" &&
git config filter.empty-in-repo.smudge "echo smudged && cat" &&
echo "empty-in-repo filter=empty-in-repo" >>.gitattributes &&
echo dead data walking >empty-in-repo &&
git add empty-in-repo &&
echo smudged >expected &&
git checkout-index --prefix=filtered- empty-in-repo &&
test_cmp expected filtered-empty-in-repo
'
test_done