git/t/t3011-common-prefixes-and-directory-traversal.sh

211 lines
5.5 KiB
Bash
Raw Permalink Normal View History

#!/bin/sh
test_description='directory traversal handling, especially with common prefixes'
TEST_PASSES_SANITIZE_LEAK=true
. ./test-lib.sh
test_expect_success 'setup' '
test_commit hello &&
>empty &&
mkdir untracked_dir &&
>untracked_dir/empty &&
git init untracked_repo &&
>untracked_repo/empty &&
cat <<-EOF >.gitignore &&
ignored
an_ignored_dir/
EOF
mkdir an_ignored_dir &&
mkdir an_untracked_dir &&
>an_ignored_dir/ignored &&
>an_ignored_dir/untracked &&
>an_untracked_dir/ignored &&
>an_untracked_dir/untracked
'
test_expect_success 'git ls-files -o shows the right entries' '
cat <<-EOF >expect &&
.gitignore
actual
an_ignored_dir/ignored
an_ignored_dir/untracked
an_untracked_dir/ignored
an_untracked_dir/untracked
empty
expect
untracked_dir/empty
untracked_repo/
EOF
git ls-files -o >actual &&
test_cmp expect actual
'
test_expect_success 'git ls-files -o --exclude-standard shows the right entries' '
cat <<-EOF >expect &&
.gitignore
actual
an_untracked_dir/untracked
empty
expect
untracked_dir/empty
untracked_repo/
EOF
git ls-files -o --exclude-standard >actual &&
test_cmp expect actual
'
test_expect_success 'git ls-files -o untracked_dir recurses' '
echo untracked_dir/empty >expect &&
git ls-files -o untracked_dir >actual &&
test_cmp expect actual
'
test_expect_success 'git ls-files -o untracked_dir/ recurses' '
echo untracked_dir/empty >expect &&
git ls-files -o untracked_dir/ >actual &&
test_cmp expect actual
'
test_expect_success 'git ls-files -o --directory untracked_dir does not recurse' '
echo untracked_dir/ >expect &&
git ls-files -o --directory untracked_dir >actual &&
test_cmp expect actual
'
dir: fix checks on common prefix directory Many years ago, the directory traversing logic had an optimization that would always recurse into any directory that was a common prefix of all the pathspecs without walking the leading directories to get down to the desired directory. Thus, git ls-files -o .git/ # case A would notice that .git/ was a common prefix of all pathspecs (since it is the only pathspec listed), and then traverse into it and start showing unknown files under that directory. Unfortunately, .git/ is not a directory we should be traversing into, which made this optimization problematic. This also affected cases like git ls-files -o --exclude-standard t/ # case B where t/ was in the .gitignore file and thus isn't interesting and shouldn't be recursed into. It also affected cases like git ls-files -o --directory untracked_dir/ # case C where untracked_dir/ is indeed untracked and thus interesting, but the --directory flag means we only want to show the directory itself, not recurse into it and start listing untracked files below it. The case B class of bugs were noted and fixed in commits 16e2cfa90993 ("read_directory(): further split treat_path()", 2010-01-08) and 48ffef966c76 ("ls-files: fix overeager pathspec optimization", 2010-01-08), with the idea being that we first wanted to check whether the common prefix was interesting. The former patch noted that treat_path() couldn't be used when checking the common prefix because treat_path() requires a dir_entry() and we haven't read any directories at the point we are checking the common prefix. So, that patch split treat_one_path() out of treat_path(). The latter patch then created a new treat_leading_path() which duplicated by hand the bits of treat_path() that couldn't be broken out and then called treat_one_path() for the remainder. There were three problems with this approach: * The duplicated logic in treat_leading_path() accidentally missed the check for special paths (such as is_dot_or_dotdot and matching ".git"), causing case A types of bugs to continue to be an issue. * The treat_leading_path() logic assumed we should traverse into anything where path_treatment was not path_none, i.e. it perpetuated class C types of bugs. * It meant we had split logic that needed to kept in sync, running the risk that people introduced new inconsistencies (such as in commit be8a84c52669, which we reverted earlier in this series, or in commit df5bcdf83ae which we'll fix in a subsequent commit) Fix most these problems by making treat_leading_path() not only loop over each leading path component, but calling treat_path() directly on each. To do so, we have to create a synthetic dir_entry, but that only takes a few lines. Then, pay attention to the path_treatment result we get from treat_path() and don't treat path_excluded, path_untracked, and path_recurse all the same as path_recurse. This leaves one remaining problem, the new inconsistency from commit df5bcdf83ae. That will be addressed in a subsequent commit. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-12-19 21:28:24 +00:00
test_expect_success 'git ls-files -o --directory untracked_dir/ does not recurse' '
echo untracked_dir/ >expect &&
git ls-files -o --directory untracked_dir/ >actual &&
test_cmp expect actual
'
test_expect_success 'git ls-files -o untracked_repo does not recurse' '
echo untracked_repo/ >expect &&
git ls-files -o untracked_repo >actual &&
test_cmp expect actual
'
dir: fix checks on common prefix directory Many years ago, the directory traversing logic had an optimization that would always recurse into any directory that was a common prefix of all the pathspecs without walking the leading directories to get down to the desired directory. Thus, git ls-files -o .git/ # case A would notice that .git/ was a common prefix of all pathspecs (since it is the only pathspec listed), and then traverse into it and start showing unknown files under that directory. Unfortunately, .git/ is not a directory we should be traversing into, which made this optimization problematic. This also affected cases like git ls-files -o --exclude-standard t/ # case B where t/ was in the .gitignore file and thus isn't interesting and shouldn't be recursed into. It also affected cases like git ls-files -o --directory untracked_dir/ # case C where untracked_dir/ is indeed untracked and thus interesting, but the --directory flag means we only want to show the directory itself, not recurse into it and start listing untracked files below it. The case B class of bugs were noted and fixed in commits 16e2cfa90993 ("read_directory(): further split treat_path()", 2010-01-08) and 48ffef966c76 ("ls-files: fix overeager pathspec optimization", 2010-01-08), with the idea being that we first wanted to check whether the common prefix was interesting. The former patch noted that treat_path() couldn't be used when checking the common prefix because treat_path() requires a dir_entry() and we haven't read any directories at the point we are checking the common prefix. So, that patch split treat_one_path() out of treat_path(). The latter patch then created a new treat_leading_path() which duplicated by hand the bits of treat_path() that couldn't be broken out and then called treat_one_path() for the remainder. There were three problems with this approach: * The duplicated logic in treat_leading_path() accidentally missed the check for special paths (such as is_dot_or_dotdot and matching ".git"), causing case A types of bugs to continue to be an issue. * The treat_leading_path() logic assumed we should traverse into anything where path_treatment was not path_none, i.e. it perpetuated class C types of bugs. * It meant we had split logic that needed to kept in sync, running the risk that people introduced new inconsistencies (such as in commit be8a84c52669, which we reverted earlier in this series, or in commit df5bcdf83ae which we'll fix in a subsequent commit) Fix most these problems by making treat_leading_path() not only loop over each leading path component, but calling treat_path() directly on each. To do so, we have to create a synthetic dir_entry, but that only takes a few lines. Then, pay attention to the path_treatment result we get from treat_path() and don't treat path_excluded, path_untracked, and path_recurse all the same as path_recurse. This leaves one remaining problem, the new inconsistency from commit df5bcdf83ae. That will be addressed in a subsequent commit. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-12-19 21:28:24 +00:00
test_expect_success 'git ls-files -o untracked_repo/ does not recurse' '
echo untracked_repo/ >expect &&
git ls-files -o untracked_repo/ >actual &&
test_cmp expect actual
'
test_expect_success 'git ls-files -o untracked_dir untracked_repo recurses into untracked_dir only' '
cat <<-EOF >expect &&
untracked_dir/empty
untracked_repo/
EOF
git ls-files -o untracked_dir untracked_repo >actual &&
test_cmp expect actual
'
test_expect_success 'git ls-files -o untracked_dir/ untracked_repo/ recurses into untracked_dir only' '
cat <<-EOF >expect &&
untracked_dir/empty
untracked_repo/
EOF
git ls-files -o untracked_dir/ untracked_repo/ >actual &&
test_cmp expect actual
'
test_expect_success 'git ls-files -o --directory untracked_dir untracked_repo does not recurse' '
cat <<-EOF >expect &&
untracked_dir/
untracked_repo/
EOF
git ls-files -o --directory untracked_dir untracked_repo >actual &&
test_cmp expect actual
'
test_expect_success 'git ls-files -o --directory untracked_dir/ untracked_repo/ does not recurse' '
cat <<-EOF >expect &&
untracked_dir/
untracked_repo/
EOF
git ls-files -o --directory untracked_dir/ untracked_repo/ >actual &&
test_cmp expect actual
'
test_expect_success 'git ls-files -o .git shows nothing' '
git ls-files -o .git >actual &&
test_must_be_empty actual
'
dir: fix checks on common prefix directory Many years ago, the directory traversing logic had an optimization that would always recurse into any directory that was a common prefix of all the pathspecs without walking the leading directories to get down to the desired directory. Thus, git ls-files -o .git/ # case A would notice that .git/ was a common prefix of all pathspecs (since it is the only pathspec listed), and then traverse into it and start showing unknown files under that directory. Unfortunately, .git/ is not a directory we should be traversing into, which made this optimization problematic. This also affected cases like git ls-files -o --exclude-standard t/ # case B where t/ was in the .gitignore file and thus isn't interesting and shouldn't be recursed into. It also affected cases like git ls-files -o --directory untracked_dir/ # case C where untracked_dir/ is indeed untracked and thus interesting, but the --directory flag means we only want to show the directory itself, not recurse into it and start listing untracked files below it. The case B class of bugs were noted and fixed in commits 16e2cfa90993 ("read_directory(): further split treat_path()", 2010-01-08) and 48ffef966c76 ("ls-files: fix overeager pathspec optimization", 2010-01-08), with the idea being that we first wanted to check whether the common prefix was interesting. The former patch noted that treat_path() couldn't be used when checking the common prefix because treat_path() requires a dir_entry() and we haven't read any directories at the point we are checking the common prefix. So, that patch split treat_one_path() out of treat_path(). The latter patch then created a new treat_leading_path() which duplicated by hand the bits of treat_path() that couldn't be broken out and then called treat_one_path() for the remainder. There were three problems with this approach: * The duplicated logic in treat_leading_path() accidentally missed the check for special paths (such as is_dot_or_dotdot and matching ".git"), causing case A types of bugs to continue to be an issue. * The treat_leading_path() logic assumed we should traverse into anything where path_treatment was not path_none, i.e. it perpetuated class C types of bugs. * It meant we had split logic that needed to kept in sync, running the risk that people introduced new inconsistencies (such as in commit be8a84c52669, which we reverted earlier in this series, or in commit df5bcdf83ae which we'll fix in a subsequent commit) Fix most these problems by making treat_leading_path() not only loop over each leading path component, but calling treat_path() directly on each. To do so, we have to create a synthetic dir_entry, but that only takes a few lines. Then, pay attention to the path_treatment result we get from treat_path() and don't treat path_excluded, path_untracked, and path_recurse all the same as path_recurse. This leaves one remaining problem, the new inconsistency from commit df5bcdf83ae. That will be addressed in a subsequent commit. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-12-19 21:28:24 +00:00
test_expect_success 'git ls-files -o .git/ shows nothing' '
git ls-files -o .git/ >actual &&
test_must_be_empty actual
'
test_expect_success FUNNYNAMES 'git ls-files -o untracked_* recurses appropriately' '
mkdir "untracked_*" &&
>"untracked_*/empty" &&
cat <<-EOF >expect &&
untracked_*/empty
untracked_dir/empty
untracked_repo/
EOF
git ls-files -o "untracked_*" >actual &&
test_cmp expect actual
'
# It turns out fill_directory returns the right paths, but ls-files' post-call
# filtering in show_dir_entry() via calling dir_path_match() which ends up
# in git_fnmatch() has logic for PATHSPEC_ONESTAR that assumes the pathspec
# must match the full path; it doesn't check it for matching a leading
# directory.
test_expect_failure FUNNYNAMES 'git ls-files -o untracked_*/ recurses appropriately' '
cat <<-EOF >expect &&
untracked_*/empty
untracked_dir/empty
untracked_repo/
EOF
git ls-files -o "untracked_*/" >actual &&
test_cmp expect actual
'
test_expect_success FUNNYNAMES 'git ls-files -o --directory untracked_* does not recurse' '
cat <<-EOF >expect &&
untracked_*/
untracked_dir/
untracked_repo/
EOF
git ls-files -o --directory "untracked_*" >actual &&
test_cmp expect actual
'
test_expect_success FUNNYNAMES 'git ls-files -o --directory untracked_*/ does not recurse' '
cat <<-EOF >expect &&
untracked_*/
untracked_dir/
untracked_repo/
EOF
git ls-files -o --directory "untracked_*/" >actual &&
test_cmp expect actual
'
test_expect_success 'git ls-files -o consistent between one or two dirs' '
git ls-files -o --exclude-standard an_ignored_dir/ an_untracked_dir/ >tmp &&
! grep ^an_ignored_dir/ tmp >expect &&
git ls-files -o --exclude-standard an_ignored_dir/ >actual &&
test_cmp expect actual
'
# ls-files doesn't have a way to request showing both untracked and ignored
# files at the same time, so use `git status --ignored`
2019-12-19 21:28:25 +00:00
test_expect_success 'git status --ignored shows same files under dir with or without pathspec' '
cat <<-EOF >expect &&
?? an_untracked_dir/
!! an_untracked_dir/ignored
EOF
git status --porcelain --ignored >output &&
grep an_untracked_dir output >expect &&
git status --porcelain --ignored an_untracked_dir/ >actual &&
test_cmp expect actual
'
test_done