git/t/t7008-grep-binary.sh

180 lines
4 KiB
Bash
Raw Normal View History

#!/bin/sh
test_description='git grep in binary files'
. ./test-lib.sh
test_expect_success 'setup' "
echo 'binaryQfile' | q_to_nul >a &&
git add a &&
git commit -m.
"
test_expect_success 'git grep ina a' '
echo Binary file a matches >expect &&
git grep ina a >actual &&
test_cmp expect actual
'
test_expect_success 'git grep -ah ina a' '
git grep -ah ina a >actual &&
test_cmp a actual
'
test_expect_success 'git grep -I ina a' '
: >expect &&
test_must_fail git grep -I ina a >actual &&
test_cmp expect actual
'
test_expect_success 'git grep -c ina a' '
echo a:1 >expect &&
git grep -c ina a >actual &&
test_cmp expect actual
'
test_expect_success 'git grep -l ina a' '
echo a >expect &&
git grep -l ina a >actual &&
test_cmp expect actual
'
test_expect_success 'git grep -L bar a' '
echo a >expect &&
git grep -L bar a >actual &&
test_cmp expect actual
'
test_expect_success 'git grep -q ina a' '
: >expect &&
git grep -q ina a >actual &&
test_cmp expect actual
'
test_expect_success 'git grep -F ile a' '
git grep -F ile a
'
test_expect_success 'git grep -Fi iLE a' '
git grep -Fi iLE a
'
# This test actually passes on platforms where regexec() supports the
# flag REG_STARTEND.
test_expect_success 'git grep ile a' '
git grep ile a
'
test_expect_failure 'git grep .fi a' '
git grep .fi a
'
test_expect_success 'git grep -F y<NUL>f a' "
printf 'yQf' | q_to_nul >f &&
git grep -f f -F a
"
test_expect_success 'git grep -F y<NUL>x a' "
printf 'yQx' | q_to_nul >f &&
test_must_fail git grep -f f -F a
"
test_expect_success 'git grep -Fi Y<NUL>f a' "
printf 'YQf' | q_to_nul >f &&
git grep -f f -Fi a
"
Use kwset in grep Benchmarks for the hot cache case: before: $ perf stat --repeat=5 git grep qwerty > /dev/null Performance counter stats for 'git grep qwerty' (5 runs): 3,478,085 cache-misses # 2.322 M/sec ( +- 2.690% ) 11,356,177 cache-references # 7.582 M/sec ( +- 2.598% ) 3,872,184 branch-misses # 0.363 % ( +- 0.258% ) 1,067,367,848 branches # 712.673 M/sec ( +- 2.622% ) 3,828,370,782 instructions # 0.947 IPC ( +- 0.033% ) 4,043,832,831 cycles # 2700.037 M/sec ( +- 0.167% ) 8,518 page-faults # 0.006 M/sec ( +- 3.648% ) 847 CPU-migrations # 0.001 M/sec ( +- 3.262% ) 6,546 context-switches # 0.004 M/sec ( +- 2.292% ) 1497.695495 task-clock-msecs # 3.303 CPUs ( +- 2.550% ) 0.453394396 seconds time elapsed ( +- 0.912% ) after: $ perf stat --repeat=5 git grep qwerty > /dev/null Performance counter stats for 'git grep qwerty' (5 runs): 2,989,918 cache-misses # 3.166 M/sec ( +- 5.013% ) 10,986,041 cache-references # 11.633 M/sec ( +- 4.899% ) (scaled from 95.06%) 3,511,993 branch-misses # 1.422 % ( +- 0.785% ) 246,893,561 branches # 261.433 M/sec ( +- 3.967% ) 1,392,727,757 instructions # 0.564 IPC ( +- 0.040% ) 2,468,142,397 cycles # 2613.494 M/sec ( +- 0.110% ) 7,747 page-faults # 0.008 M/sec ( +- 3.995% ) 897 CPU-migrations # 0.001 M/sec ( +- 2.383% ) 6,535 context-switches # 0.007 M/sec ( +- 1.993% ) 944.384228 task-clock-msecs # 3.177 CPUs ( +- 0.268% ) 0.297257643 seconds time elapsed ( +- 0.450% ) So we gain about 35% by using the kwset code. As a side effect of using kwset two grep tests are fixed by this patch. The first is fixed because kwset can deal with case-insensitive search containing NULs, something strcasestr cannot do. The second one is fixed because we consider patterns containing NULs as fixed strings (regcomp cannot accept patterns with NULs). Signed-off-by: Fredrik Kuivinen <frekui@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-08-20 22:42:18 +00:00
test_expect_success 'git grep -Fi Y<NUL>x a' "
printf 'YQx' | q_to_nul >f &&
test_must_fail git grep -f f -Fi a
"
test_expect_success 'git grep y<NUL>f a' "
printf 'yQf' | q_to_nul >f &&
git grep -f f a
"
Use kwset in grep Benchmarks for the hot cache case: before: $ perf stat --repeat=5 git grep qwerty > /dev/null Performance counter stats for 'git grep qwerty' (5 runs): 3,478,085 cache-misses # 2.322 M/sec ( +- 2.690% ) 11,356,177 cache-references # 7.582 M/sec ( +- 2.598% ) 3,872,184 branch-misses # 0.363 % ( +- 0.258% ) 1,067,367,848 branches # 712.673 M/sec ( +- 2.622% ) 3,828,370,782 instructions # 0.947 IPC ( +- 0.033% ) 4,043,832,831 cycles # 2700.037 M/sec ( +- 0.167% ) 8,518 page-faults # 0.006 M/sec ( +- 3.648% ) 847 CPU-migrations # 0.001 M/sec ( +- 3.262% ) 6,546 context-switches # 0.004 M/sec ( +- 2.292% ) 1497.695495 task-clock-msecs # 3.303 CPUs ( +- 2.550% ) 0.453394396 seconds time elapsed ( +- 0.912% ) after: $ perf stat --repeat=5 git grep qwerty > /dev/null Performance counter stats for 'git grep qwerty' (5 runs): 2,989,918 cache-misses # 3.166 M/sec ( +- 5.013% ) 10,986,041 cache-references # 11.633 M/sec ( +- 4.899% ) (scaled from 95.06%) 3,511,993 branch-misses # 1.422 % ( +- 0.785% ) 246,893,561 branches # 261.433 M/sec ( +- 3.967% ) 1,392,727,757 instructions # 0.564 IPC ( +- 0.040% ) 2,468,142,397 cycles # 2613.494 M/sec ( +- 0.110% ) 7,747 page-faults # 0.008 M/sec ( +- 3.995% ) 897 CPU-migrations # 0.001 M/sec ( +- 2.383% ) 6,535 context-switches # 0.007 M/sec ( +- 1.993% ) 944.384228 task-clock-msecs # 3.177 CPUs ( +- 0.268% ) 0.297257643 seconds time elapsed ( +- 0.450% ) So we gain about 35% by using the kwset code. As a side effect of using kwset two grep tests are fixed by this patch. The first is fixed because kwset can deal with case-insensitive search containing NULs, something strcasestr cannot do. The second one is fixed because we consider patterns containing NULs as fixed strings (regcomp cannot accept patterns with NULs). Signed-off-by: Fredrik Kuivinen <frekui@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-08-20 22:42:18 +00:00
test_expect_success 'git grep y<NUL>x a' "
printf 'yQx' | q_to_nul >f &&
test_must_fail git grep -f f a
"
test_expect_success 'grep respects binary diff attribute' '
echo text >t &&
git add t &&
echo t:text >expect &&
git grep text t >actual &&
test_cmp expect actual &&
echo "t -diff" >.gitattributes &&
echo "Binary file t matches" >expect &&
git grep text t >actual &&
test_cmp expect actual
'
test_expect_success 'grep --cached respects binary diff attribute' '
git grep --cached text t >actual &&
test_cmp expect actual
'
test_expect_success 'grep --cached respects binary diff attribute (2)' '
git add .gitattributes &&
rm .gitattributes &&
git grep --cached text t >actual &&
test_when_finished "git rm --cached .gitattributes" &&
test_when_finished "git checkout .gitattributes" &&
test_cmp expect actual
'
test_expect_success 'grep revision respects binary diff attribute' '
git commit -m new &&
echo "Binary file HEAD:t matches" >expect &&
git grep text HEAD -- t >actual &&
test_when_finished "git reset HEAD^" &&
test_cmp expect actual
'
test_expect_success 'grep respects not-binary diff attribute' '
echo binQary | q_to_nul >b &&
git add b &&
echo "Binary file b matches" >expect &&
git grep bin b >actual &&
test_cmp expect actual &&
echo "b diff" >.gitattributes &&
echo "b:binQary" >expect &&
git grep bin b | nul_to_q >actual &&
test_cmp expect actual
'
cat >nul_to_q_textconv <<'EOF'
#!/bin/sh
"$PERL_PATH" -pe 'y/\000/Q/' < "$1"
EOF
chmod +x nul_to_q_textconv
test_expect_success 'setup textconv filters' '
echo a diff=foo >.gitattributes &&
git config diff.foo.textconv "\"$(pwd)\""/nul_to_q_textconv
'
test_expect_success 'grep does not honor textconv' '
test_must_fail git grep Qfile
'
test_expect_success 'grep --textconv honors textconv' '
echo "a:binaryQfile" >expect &&
git grep --textconv Qfile >actual &&
test_cmp expect actual
'
test_expect_success 'grep --no-textconv does not honor textconv' '
test_must_fail git grep --no-textconv Qfile
'
test_expect_success 'grep --textconv blob honors textconv' '
echo "HEAD:a:binaryQfile" >expect &&
git grep --textconv Qfile HEAD:a >actual &&
test_cmp expect actual
'
test_done