git/t/t7300-clean.sh

804 lines
18 KiB
Bash
Raw Normal View History

#!/bin/sh
#
# Copyright (c) 2007 Michael Spang
#
test_description='git clean basic tests'
. ./test-lib.sh
git config clean.requireForce no
test_expect_success 'setup' '
mkdir -p src &&
touch src/part1.c Makefile &&
echo build >.gitignore &&
echo \*.o >>.gitignore &&
git add . &&
git commit -m setup &&
touch src/part2.c README &&
git add .
'
test_expect_success 'git clean with skip-worktree .gitignore' '
git update-index --skip-worktree .gitignore &&
rm .gitignore &&
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
git clean &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test ! -f a.out &&
test ! -f src/part3.c &&
test -f docs/manual.txt &&
test -f obj.o &&
test -f build/lib.so &&
git update-index --no-skip-worktree .gitignore &&
git checkout .gitignore
'
test_expect_success 'git clean' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
git clean &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test ! -f a.out &&
test ! -f src/part3.c &&
test -f docs/manual.txt &&
test -f obj.o &&
test -f build/lib.so
'
test_expect_success 'git clean src/' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
git clean src/ &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test -f a.out &&
test ! -f src/part3.c &&
test -f docs/manual.txt &&
test -f obj.o &&
test -f build/lib.so
'
test_expect_success 'git clean src/ src/' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
git clean src/ src/ &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test -f a.out &&
test ! -f src/part3.c &&
test -f docs/manual.txt &&
test -f obj.o &&
test -f build/lib.so
'
test_expect_success 'git clean with prefix' '
mkdir -p build docs src/test &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so src/test/1.c &&
(cd src/ && git clean) &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test -f a.out &&
test ! -f src/part3.c &&
test -f src/test/1.c &&
test -f docs/manual.txt &&
test -f obj.o &&
test -f build/lib.so
'
test_expect_success 'git clean with relative prefix' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
would_clean=$(
cd docs &&
git clean -n ../src |
clean: disambiguate the definition of -d The -d flag pre-dated git-clean's ability to have paths specified. As such, the default for git-clean was to only remove untracked files in the current directory, and -d existed to allow it to recurse into subdirectories. The interaction of paths and the -d option appears to not have been carefully considered, as evidenced by numerous bugs and a dearth of tests covering such pairings in the testsuite. The definition turns out to be important, so let's look at some of the various ways one could interpret the -d option: A) Without -d, only look in subdirectories which contain tracked files under them; with -d, also look in subdirectories which are untracked for files to clean. B) Without specified paths from the user for us to delete, we need to have some kind of default, so...without -d, only look in subdirectories which contain tracked files under them; with -d, also look in subdirectories which are untracked for files to clean. The important distinction here is that choice B says that the presence or absence of '-d' is irrelevant if paths are specified. The logic behind option B is that if a user explicitly asked us to clean a specified pathspec, then we should clean anything that matches that pathspec. Some examples may clarify. Should git clean -f untracked_dir/file remove untracked_dir/file or not? It seems crazy not to, but a strict reading of option A says it shouldn't be removed. How about git clean -f untracked_dir/file1 tracked_dir/file2 or git clean -f untracked_dir_1/file1 untracked_dir_2/file2 ? Should it remove either or both of these files? Should it require multiple runs to remove both the files listed? (If this sounds like a crazy question to even ask, see the commit message of "t7300: Add some testcases showing failure to clean specified pathspecs" added earlier in this patch series.) What if -ffd were used instead of -f -- should that allow these to be removed? Should it take multiple invocations with -ffd? What if a glob (such as '*tracked*') were used instead of spelling out the directory names? What if the filenames involved globs, such as git clean -f '*.o' or git clean -f '*/*.o' ? The current documentation actually suggests a definition that is slightly different than choice A, and the implementation prior to this series provided something radically different than either choices A or B. (The implementation, though, was clearly just buggy). There may be other choices as well. However, for almost any given choice of definition for -d that I can think of, some of the examples above will appear buggy to the user. The only case that doesn't have negative surprises is choice B: treat a user-specified path as a request to clean all untracked files which match that path specification, including recursing into any untracked directories. Change the documentation and basic implementation to use this definition. There were two regression tests that indirectly depended on the current implementation, but neither was about subdirectory handling. These two tests were introduced in commit 5b7570cfb41c ("git-clean: add tests for relative path", 2008-03-07) which was solely created to add coverage for the changes in commit fb328947c8e ("git-clean: correct printing relative path", 2008-03-07). Both tests specified a directory that happened to have an untracked subdirectory, but both were only checking that the resulting printout of a file that was removed was shown with a relative path. Update these tests appropriately. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-17 16:35:01 +00:00
grep part3 |
sed -n -e "s|^Would remove ||p"
) &&
test "$would_clean" = ../src/part3.c
'
test_expect_success 'git clean with absolute path' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
would_clean=$(
cd docs &&
git clean -n "$(pwd)/../src" |
clean: disambiguate the definition of -d The -d flag pre-dated git-clean's ability to have paths specified. As such, the default for git-clean was to only remove untracked files in the current directory, and -d existed to allow it to recurse into subdirectories. The interaction of paths and the -d option appears to not have been carefully considered, as evidenced by numerous bugs and a dearth of tests covering such pairings in the testsuite. The definition turns out to be important, so let's look at some of the various ways one could interpret the -d option: A) Without -d, only look in subdirectories which contain tracked files under them; with -d, also look in subdirectories which are untracked for files to clean. B) Without specified paths from the user for us to delete, we need to have some kind of default, so...without -d, only look in subdirectories which contain tracked files under them; with -d, also look in subdirectories which are untracked for files to clean. The important distinction here is that choice B says that the presence or absence of '-d' is irrelevant if paths are specified. The logic behind option B is that if a user explicitly asked us to clean a specified pathspec, then we should clean anything that matches that pathspec. Some examples may clarify. Should git clean -f untracked_dir/file remove untracked_dir/file or not? It seems crazy not to, but a strict reading of option A says it shouldn't be removed. How about git clean -f untracked_dir/file1 tracked_dir/file2 or git clean -f untracked_dir_1/file1 untracked_dir_2/file2 ? Should it remove either or both of these files? Should it require multiple runs to remove both the files listed? (If this sounds like a crazy question to even ask, see the commit message of "t7300: Add some testcases showing failure to clean specified pathspecs" added earlier in this patch series.) What if -ffd were used instead of -f -- should that allow these to be removed? Should it take multiple invocations with -ffd? What if a glob (such as '*tracked*') were used instead of spelling out the directory names? What if the filenames involved globs, such as git clean -f '*.o' or git clean -f '*/*.o' ? The current documentation actually suggests a definition that is slightly different than choice A, and the implementation prior to this series provided something radically different than either choices A or B. (The implementation, though, was clearly just buggy). There may be other choices as well. However, for almost any given choice of definition for -d that I can think of, some of the examples above will appear buggy to the user. The only case that doesn't have negative surprises is choice B: treat a user-specified path as a request to clean all untracked files which match that path specification, including recursing into any untracked directories. Change the documentation and basic implementation to use this definition. There were two regression tests that indirectly depended on the current implementation, but neither was about subdirectory handling. These two tests were introduced in commit 5b7570cfb41c ("git-clean: add tests for relative path", 2008-03-07) which was solely created to add coverage for the changes in commit fb328947c8e ("git-clean: correct printing relative path", 2008-03-07). Both tests specified a directory that happened to have an untracked subdirectory, but both were only checking that the resulting printout of a file that was removed was shown with a relative path. Update these tests appropriately. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-17 16:35:01 +00:00
grep part3 |
sed -n -e "s|^Would remove ||p"
) &&
test "$would_clean" = ../src/part3.c
'
test_expect_success 'git clean with out of work tree relative path' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
(
cd docs &&
test_must_fail git clean -n ../..
)
'
test_expect_success 'git clean with out of work tree absolute path' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
dd=$(cd .. && pwd) &&
(
cd docs &&
test_must_fail git clean -n $dd
)
'
test_expect_success 'git clean -d with prefix and path' '
mkdir -p build docs src/feature &&
touch a.out src/part3.c src/feature/file.c docs/manual.txt obj.o build/lib.so &&
(cd src/ && git clean -d feature/) &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test -f a.out &&
test -f src/part3.c &&
test ! -f src/feature/file.c &&
test -f docs/manual.txt &&
test -f obj.o &&
test -f build/lib.so
'
test_expect_success SYMLINKS 'git clean symbolic link' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
ln -s docs/manual.txt src/part4.c &&
git clean &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test ! -f a.out &&
test ! -f src/part3.c &&
test ! -f src/part4.c &&
test -f docs/manual.txt &&
test -f obj.o &&
test -f build/lib.so
'
test_expect_success 'git clean with wildcard' '
touch a.clean b.clean other.c &&
git clean "*.clean" &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test ! -f a.clean &&
test ! -f b.clean &&
test -f other.c
'
test_expect_success 'git clean -n' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
git clean -n &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test -f a.out &&
test -f src/part3.c &&
test -f docs/manual.txt &&
test -f obj.o &&
test -f build/lib.so
'
test_expect_success 'git clean -d' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
git clean -d &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test ! -f a.out &&
test ! -f src/part3.c &&
test ! -d docs &&
test -f obj.o &&
test -f build/lib.so
'
test_expect_success 'git clean -d src/ examples/' '
mkdir -p build docs examples &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so examples/1.c &&
git clean -d src/ examples/ &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test -f a.out &&
test ! -f src/part3.c &&
test ! -f examples/1.c &&
test -f docs/manual.txt &&
test -f obj.o &&
test -f build/lib.so
'
test_expect_success 'git clean -x' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
git clean -x &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test ! -f a.out &&
test ! -f src/part3.c &&
test -f docs/manual.txt &&
test ! -f obj.o &&
test -f build/lib.so
'
test_expect_success 'git clean -d -x' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
git clean -d -x &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test ! -f a.out &&
test ! -f src/part3.c &&
test ! -d docs &&
test ! -f obj.o &&
test ! -d build
'
test_expect_success 'git clean -d -x with ignored tracked directory' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
git clean -d -x -e src &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test ! -f a.out &&
test -f src/part3.c &&
test ! -d docs &&
test ! -f obj.o &&
test ! -d build
'
test_expect_success 'git clean -X' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
git clean -X &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test -f a.out &&
test -f src/part3.c &&
test -f docs/manual.txt &&
test ! -f obj.o &&
test -f build/lib.so
'
test_expect_success 'git clean -d -X' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
git clean -d -X &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test -f a.out &&
test -f src/part3.c &&
test -f docs/manual.txt &&
test ! -f obj.o &&
test ! -d build
'
test_expect_success 'git clean -d -X with ignored tracked directory' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
git clean -d -X -e src &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test -f a.out &&
test ! -f src/part3.c &&
test -f docs/manual.txt &&
test ! -f obj.o &&
test ! -d build
'
test_expect_success 'clean.requireForce defaults to true' '
git config --unset clean.requireForce &&
test_must_fail git clean
'
test_expect_success 'clean.requireForce' '
git config clean.requireForce true &&
test_must_fail git clean
'
test_expect_success 'clean.requireForce and -n' '
mkdir -p build docs &&
touch a.out src/part3.c docs/manual.txt obj.o build/lib.so &&
git clean -n &&
test -f Makefile &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test -f a.out &&
test -f src/part3.c &&
test -f docs/manual.txt &&
test -f obj.o &&
test -f build/lib.so
'
test_expect_success 'clean.requireForce and -f' '
git clean -f &&
test -f README &&
test -f src/part1.c &&
test -f src/part2.c &&
test ! -f a.out &&
test ! -f src/part3.c &&
test -f docs/manual.txt &&
test -f obj.o &&
test -f build/lib.so
'
clean: further clean-up of implementation around "--force" We clarified how "clean.requireForce" interacts with the "--dry-run" option in the previous commit, both in the implementation and in the documentation. Even when "git clean" (without other options) is required to be used with "--force" (i.e. either clean.requireForce is unset, or explicitly set to true) to protect end-users from casual invocation of the command by mistake, "--dry-run" does not require "--force" to be used, because it is already its own protection mechanism by being a no-op to the working tree files. The previous commit, however, missed another clean-up opportunity around the same area. Just like in the "--dry-run" mode, the command in the "--interactive" mode does not require "--force", either. This is because by going interactive and giving the end user one more chance to confirm, the mode itself is serving as its own protection mechanism. Let's take things one step further, and unify the code that defines interaction between "--force" and these two other options. Just like we added explanation for the reason why "--dry-run" does not honor "clean.requireForce", give an explanation for the reason why "--interactive" makes "clean.requireForce" to be ignored. Finally, add some tests to show the interaction between "--force" and "--interactive". We already have tests that show interaction between "--force" and "--dry-run", but didn't test "--interactive". Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-03 22:06:00 +00:00
test_expect_success 'clean.requireForce and --interactive' '
git clean --interactive </dev/null >output 2>error &&
test_grep ! "requireForce is true and" error &&
test_grep "\*\*\* Commands \*\*\*" output
'
test_expect_success 'core.excludesfile' '
echo excludes >excludes &&
echo included >included &&
git config core.excludesfile excludes &&
output=$(git clean -n excludes included 2>&1) &&
expr "$output" : ".*included" >/dev/null &&
! expr "$output" : ".*excludes" >/dev/null
'
test_expect_success SANITY 'removal failure' '
mkdir foo &&
touch foo/bar &&
test_when_finished "chmod 755 foo" &&
(exec <foo/bar &&
chmod 0 foo &&
test_must_fail git clean -f -d)
'
test_expect_success 'nested git work tree' '
rm -fr foo bar baz &&
mkdir -p foo bar baz/boo &&
(
cd foo &&
git init &&
test_commit nested hello.world
) &&
(
cd bar &&
>goodbye.people
) &&
(
cd baz/boo &&
git init &&
test_commit deeply.nested deeper.world
) &&
git clean -f -d &&
test -f foo/.git/index &&
test -f foo/hello.world &&
test -f baz/boo/.git/index &&
test -f baz/boo/deeper.world &&
! test -d bar
'
clean: improve performance when removing lots of directories "git clean" uses resolve_gitlink_ref() to check for the presence of nested git repositories, but it has the drawback of creating a ref_cache entry for every directory that should potentially be cleaned. The linear search through the ref_cache list causes a massive performance hit for large number of directories. Modify clean.c:remove_dirs to use setup.c:is_git_directory and setup.c:read_gitfile_gently instead. Both these functions will open files and parse contents when they find something that looks like a git repository. This is ok from a performance standpoint since finding repository candidates should be comparatively rare. Using is_git_directory and read_gitfile_gently should give a more standardized check for what is and what isn't a git repository but also gives three behavioral changes. The first change is that we will now detect and avoid cleaning empty nested git repositories (only init run). This is desirable. Second, we will no longer die when cleaning a file named ".git" with garbage content (it will be cleaned instead). This is also desirable. The last change is that we will detect and avoid cleaning empty bare repositories that have been placed in a directory named ".git". This is not desirable but should have no real user impact since we already fail to clean non-empty bare repositories in the same scenario. This is thus deemed acceptable. On top of this we add some extra precautions. If read_gitfile_gently fails to open the git file, read the git file or verify the path in the git file we assume that the path with the git file is a valid repository and avoid cleaning. Update t7300 to reflect these changes in behavior. The time to clean an untracked directory containing 100000 sub directories went from 61s to 1.7s after this change. Helped-by: Jeff King <peff@peff.net> Signed-off-by: Erik Elfström <erik.elfstrom@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-15 19:39:55 +00:00
test_expect_success 'should clean things that almost look like git but are not' '
rm -fr almost_git almost_bare_git almost_submodule &&
mkdir -p almost_git/.git/objects &&
mkdir -p almost_git/.git/refs &&
cat >almost_git/.git/HEAD <<-\EOF &&
garbage
EOF
cp -r almost_git/.git/ almost_bare_git &&
mkdir almost_submodule/ &&
cat >almost_submodule/.git <<-\EOF &&
garbage
EOF
test_when_finished "rm -rf almost_*" &&
git clean -f -d &&
test_path_is_missing almost_git &&
test_path_is_missing almost_bare_git &&
test_path_is_missing almost_submodule
'
test_expect_success 'should not clean submodules' '
rm -fr repo to_clean sub1 sub2 &&
mkdir repo to_clean &&
(
cd repo &&
git init &&
test_commit msg hello.world
) &&
test_config_global protocol.file.allow always &&
git submodule add ./repo/.git sub1 &&
git commit -m "sub1" &&
git branch before_sub2 &&
git submodule add ./repo/.git sub2 &&
git commit -m "sub2" &&
git checkout before_sub2 &&
>to_clean/should_clean.this &&
git clean -f -d &&
test_path_is_file repo/.git/index &&
test_path_is_file repo/hello.world &&
test_path_is_file sub1/.git &&
test_path_is_file sub1/hello.world &&
test_path_is_file sub2/.git &&
test_path_is_file sub2/hello.world &&
test_path_is_missing to_clean
'
test_expect_success POSIXPERM,SANITY 'should avoid cleaning possible submodules' '
rm -fr to_clean possible_sub1 &&
mkdir to_clean possible_sub1 &&
test_when_finished "rm -rf possible_sub*" &&
echo "gitdir: foo" >possible_sub1/.git &&
>possible_sub1/hello.world &&
chmod 0 possible_sub1/.git &&
>to_clean/should_clean.this &&
git clean -f -d &&
test_path_is_file possible_sub1/.git &&
test_path_is_file possible_sub1/hello.world &&
test_path_is_missing to_clean
'
clean: improve performance when removing lots of directories "git clean" uses resolve_gitlink_ref() to check for the presence of nested git repositories, but it has the drawback of creating a ref_cache entry for every directory that should potentially be cleaned. The linear search through the ref_cache list causes a massive performance hit for large number of directories. Modify clean.c:remove_dirs to use setup.c:is_git_directory and setup.c:read_gitfile_gently instead. Both these functions will open files and parse contents when they find something that looks like a git repository. This is ok from a performance standpoint since finding repository candidates should be comparatively rare. Using is_git_directory and read_gitfile_gently should give a more standardized check for what is and what isn't a git repository but also gives three behavioral changes. The first change is that we will now detect and avoid cleaning empty nested git repositories (only init run). This is desirable. Second, we will no longer die when cleaning a file named ".git" with garbage content (it will be cleaned instead). This is also desirable. The last change is that we will detect and avoid cleaning empty bare repositories that have been placed in a directory named ".git". This is not desirable but should have no real user impact since we already fail to clean non-empty bare repositories in the same scenario. This is thus deemed acceptable. On top of this we add some extra precautions. If read_gitfile_gently fails to open the git file, read the git file or verify the path in the git file we assume that the path with the git file is a valid repository and avoid cleaning. Update t7300 to reflect these changes in behavior. The time to clean an untracked directory containing 100000 sub directories went from 61s to 1.7s after this change. Helped-by: Jeff King <peff@peff.net> Signed-off-by: Erik Elfström <erik.elfstrom@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-15 19:39:55 +00:00
test_expect_success 'nested (empty) git should be kept' '
rm -fr empty_repo to_clean &&
git init empty_repo &&
mkdir to_clean &&
>to_clean/should_clean.this &&
# Note that we put the expect file in the .git directory so that it
# does not get cleaned.
find empty_repo | sort >.git/expect &&
git clean -f -d &&
find empty_repo | sort >actual &&
test_cmp .git/expect actual &&
test_path_is_missing to_clean
'
test_expect_success 'nested bare repositories should be cleaned' '
rm -fr bare1 bare2 subdir &&
git init --bare bare1 &&
git clone --local --bare . bare2 &&
mkdir subdir &&
cp -r bare2 subdir/bare3 &&
git clean -f -d &&
test_path_is_missing bare1 &&
test_path_is_missing bare2 &&
test_path_is_missing subdir
'
clean: improve performance when removing lots of directories "git clean" uses resolve_gitlink_ref() to check for the presence of nested git repositories, but it has the drawback of creating a ref_cache entry for every directory that should potentially be cleaned. The linear search through the ref_cache list causes a massive performance hit for large number of directories. Modify clean.c:remove_dirs to use setup.c:is_git_directory and setup.c:read_gitfile_gently instead. Both these functions will open files and parse contents when they find something that looks like a git repository. This is ok from a performance standpoint since finding repository candidates should be comparatively rare. Using is_git_directory and read_gitfile_gently should give a more standardized check for what is and what isn't a git repository but also gives three behavioral changes. The first change is that we will now detect and avoid cleaning empty nested git repositories (only init run). This is desirable. Second, we will no longer die when cleaning a file named ".git" with garbage content (it will be cleaned instead). This is also desirable. The last change is that we will detect and avoid cleaning empty bare repositories that have been placed in a directory named ".git". This is not desirable but should have no real user impact since we already fail to clean non-empty bare repositories in the same scenario. This is thus deemed acceptable. On top of this we add some extra precautions. If read_gitfile_gently fails to open the git file, read the git file or verify the path in the git file we assume that the path with the git file is a valid repository and avoid cleaning. Update t7300 to reflect these changes in behavior. The time to clean an untracked directory containing 100000 sub directories went from 61s to 1.7s after this change. Helped-by: Jeff King <peff@peff.net> Signed-off-by: Erik Elfström <erik.elfstrom@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-15 19:39:55 +00:00
test_expect_failure 'nested (empty) bare repositories should be cleaned even when in .git' '
rm -fr strange_bare &&
mkdir strange_bare &&
git init --bare strange_bare/.git &&
git clean -f -d &&
test_path_is_missing strange_bare
'
test_expect_failure 'nested (non-empty) bare repositories should be cleaned even when in .git' '
rm -fr strange_bare &&
mkdir strange_bare &&
git clone --local --bare . strange_bare/.git &&
git clean -f -d &&
test_path_is_missing strange_bare
'
clean: avoid removing untracked files in a nested git repository Users expect files in a nested git repository to be left alone unless sufficiently forced (with two -f's). Unfortunately, in certain circumstances, git would delete both tracked (and possibly dirty) files and untracked files within a nested repository. To explain how this happens, let's contrast a couple cases. First, take the following example setup (which assumes we are already within a git repo): git init nested cd nested >tracked git add tracked git commit -m init >untracked cd .. In this setup, everything works as expected; running 'git clean -fd' will result in fill_directory() returning the following paths: nested/ nested/tracked nested/untracked and then correct_untracked_entries() would notice this can be compressed to nested/ and then since "nested/" is a directory, we would call remove_dirs("nested/", ...), which would check is_nonbare_repository_dir() and then decide to skip it. However, if someone also creates an ignored file: >nested/ignored then running 'git clean -fd' would result in fill_directory() returning the same paths: nested/ nested/tracked nested/untracked but correct_untracked_entries() will notice that we had ignored entries under nested/ and thus simplify this list to nested/tracked nested/untracked Since these are not directories, we do not call remove_dirs() which was the only place that had the is_nonbare_repository_dir() safety check -- resulting in us deleting both the untracked file and the tracked (and possibly dirty) file. One possible fix for this issue would be walking the parent directories of each path and checking if they represent nonbare repositories, but that would be wasteful. Even if we added caching of some sort, it's still a waste because we should have been able to check that "nested/" represented a nonbare repository before even descending into it in the first place. Add a DIR_SKIP_NESTED_GIT flag to dir_struct.flags and use it to prevent fill_directory() and friends from descending into nested git repos. With this change, we also modify two regression tests added in commit 91479b9c72f1 ("t7300: add tests to document behavior of clean and nested git", 2015-06-15). That commit, nor its series, nor the six previous iterations of that series on the mailing list discussed why those tests coded the expectation they did. In fact, it appears their purpose was simply to test _existing_ behavior to make sure that the performance changes didn't change the behavior. However, these two tests directly contradicted the manpage's claims that two -f's were required to delete files/directories under a nested git repository. While one could argue that the user gave an explicit path which matched files/directories that were within a nested repository, there's a slippery slope that becomes very difficult for users to understand once you go down that route (e.g. what if they specified "git clean -f -d '*.c'"?) It would also be hard to explain what the exact behavior was; avoid such problems by making it really simple. Also, clean up some grammar errors describing this functionality in the git-clean manpage. Finally, there are still a couple bugs with -ffd not cleaning out enough (e.g. missing the nested .git) and with -ffdX possibly cleaning out the wrong files (paying attention to outer .gitignore instead of inner). This patch does not address these cases at all (and does not change the behavior relative to those flags), it only fixes the handling when given a single -f. See https://public-inbox.org/git/20190905212043.GC32087@szeder.dev/ for more discussion of the -ffd[X?] bugs. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-17 16:35:02 +00:00
test_expect_success 'giving path in nested git work tree will NOT remove it' '
rm -fr repo &&
mkdir repo &&
(
cd repo &&
git init &&
mkdir -p bar/baz &&
test_commit msg bar/baz/hello.world
) &&
find repo | sort >expect &&
git clean -f -d repo/bar/baz &&
find repo | sort >actual &&
test_cmp expect actual
'
test_expect_success 'giving path to nested .git will not remove it' '
rm -fr repo &&
mkdir repo untracked &&
(
cd repo &&
git init &&
test_commit msg hello.world
) &&
find repo | sort >expect &&
git clean -f -d repo/.git &&
find repo | sort >actual &&
test_cmp expect actual &&
test_path_is_dir untracked/
'
clean: avoid removing untracked files in a nested git repository Users expect files in a nested git repository to be left alone unless sufficiently forced (with two -f's). Unfortunately, in certain circumstances, git would delete both tracked (and possibly dirty) files and untracked files within a nested repository. To explain how this happens, let's contrast a couple cases. First, take the following example setup (which assumes we are already within a git repo): git init nested cd nested >tracked git add tracked git commit -m init >untracked cd .. In this setup, everything works as expected; running 'git clean -fd' will result in fill_directory() returning the following paths: nested/ nested/tracked nested/untracked and then correct_untracked_entries() would notice this can be compressed to nested/ and then since "nested/" is a directory, we would call remove_dirs("nested/", ...), which would check is_nonbare_repository_dir() and then decide to skip it. However, if someone also creates an ignored file: >nested/ignored then running 'git clean -fd' would result in fill_directory() returning the same paths: nested/ nested/tracked nested/untracked but correct_untracked_entries() will notice that we had ignored entries under nested/ and thus simplify this list to nested/tracked nested/untracked Since these are not directories, we do not call remove_dirs() which was the only place that had the is_nonbare_repository_dir() safety check -- resulting in us deleting both the untracked file and the tracked (and possibly dirty) file. One possible fix for this issue would be walking the parent directories of each path and checking if they represent nonbare repositories, but that would be wasteful. Even if we added caching of some sort, it's still a waste because we should have been able to check that "nested/" represented a nonbare repository before even descending into it in the first place. Add a DIR_SKIP_NESTED_GIT flag to dir_struct.flags and use it to prevent fill_directory() and friends from descending into nested git repos. With this change, we also modify two regression tests added in commit 91479b9c72f1 ("t7300: add tests to document behavior of clean and nested git", 2015-06-15). That commit, nor its series, nor the six previous iterations of that series on the mailing list discussed why those tests coded the expectation they did. In fact, it appears their purpose was simply to test _existing_ behavior to make sure that the performance changes didn't change the behavior. However, these two tests directly contradicted the manpage's claims that two -f's were required to delete files/directories under a nested git repository. While one could argue that the user gave an explicit path which matched files/directories that were within a nested repository, there's a slippery slope that becomes very difficult for users to understand once you go down that route (e.g. what if they specified "git clean -f -d '*.c'"?) It would also be hard to explain what the exact behavior was; avoid such problems by making it really simple. Also, clean up some grammar errors describing this functionality in the git-clean manpage. Finally, there are still a couple bugs with -ffd not cleaning out enough (e.g. missing the nested .git) and with -ffdX possibly cleaning out the wrong files (paying attention to outer .gitignore instead of inner). This patch does not address these cases at all (and does not change the behavior relative to those flags), it only fixes the handling when given a single -f. See https://public-inbox.org/git/20190905212043.GC32087@szeder.dev/ for more discussion of the -ffd[X?] bugs. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-17 16:35:02 +00:00
test_expect_success 'giving path to nested .git/ will NOT remove contents' '
rm -fr repo untracked &&
mkdir repo untracked &&
(
cd repo &&
git init &&
test_commit msg hello.world
) &&
find repo | sort >expect &&
git clean -f -d repo/.git/ &&
find repo | sort >actual &&
test_cmp expect actual &&
test_path_is_dir untracked/
'
test_expect_success 'force removal of nested git work tree' '
rm -fr foo bar baz &&
mkdir -p foo bar baz/boo &&
(
cd foo &&
git init &&
test_commit nested hello.world
) &&
(
cd bar &&
>goodbye.people
) &&
(
cd baz/boo &&
git init &&
test_commit deeply.nested deeper.world
) &&
git clean -f -f -d &&
! test -d foo &&
! test -d bar &&
! test -d baz
'
test_expect_success 'git clean -e' '
rm -fr repo &&
mkdir repo &&
(
cd repo &&
git init &&
touch known 1 2 3 &&
git add known &&
git clean -f -e 1 -e 2 &&
test -e 1 &&
test -e 2 &&
! (test -e 3) &&
test -e known
)
'
test_expect_success SANITY 'git clean -d with an unreadable empty directory' '
mkdir foo &&
chmod a= foo &&
git clean -dfx foo &&
! test -d foo
'
test_expect_success 'git clean -d respects pathspecs (dir is prefix of pathspec)' '
mkdir -p foo &&
mkdir -p foobar &&
git clean -df foobar &&
test_path_is_dir foo &&
test_path_is_missing foobar
'
test_expect_success 'git clean -d respects pathspecs (pathspec is prefix of dir)' '
mkdir -p foo &&
mkdir -p foobar &&
git clean -df foo &&
test_path_is_missing foo &&
test_path_is_dir foobar
'
test_expect_success 'git clean -d skips untracked dirs containing ignored files' '
echo /foo/bar >.gitignore &&
echo ignoreme >>.gitignore &&
rm -rf foo &&
mkdir -p foo/a/aa/aaa foo/b/bb/bbb &&
touch foo/bar foo/baz foo/a/aa/ignoreme foo/b/ignoreme foo/b/bb/1 foo/b/bb/2 &&
git clean -df &&
test_path_is_dir foo &&
test_path_is_file foo/bar &&
test_path_is_missing foo/baz &&
test_path_is_file foo/a/aa/ignoreme &&
test_path_is_missing foo/a/aa/aaa &&
test_path_is_file foo/b/ignoreme &&
test_path_is_missing foo/b/bb
'
clean: avoid removing untracked files in a nested git repository Users expect files in a nested git repository to be left alone unless sufficiently forced (with two -f's). Unfortunately, in certain circumstances, git would delete both tracked (and possibly dirty) files and untracked files within a nested repository. To explain how this happens, let's contrast a couple cases. First, take the following example setup (which assumes we are already within a git repo): git init nested cd nested >tracked git add tracked git commit -m init >untracked cd .. In this setup, everything works as expected; running 'git clean -fd' will result in fill_directory() returning the following paths: nested/ nested/tracked nested/untracked and then correct_untracked_entries() would notice this can be compressed to nested/ and then since "nested/" is a directory, we would call remove_dirs("nested/", ...), which would check is_nonbare_repository_dir() and then decide to skip it. However, if someone also creates an ignored file: >nested/ignored then running 'git clean -fd' would result in fill_directory() returning the same paths: nested/ nested/tracked nested/untracked but correct_untracked_entries() will notice that we had ignored entries under nested/ and thus simplify this list to nested/tracked nested/untracked Since these are not directories, we do not call remove_dirs() which was the only place that had the is_nonbare_repository_dir() safety check -- resulting in us deleting both the untracked file and the tracked (and possibly dirty) file. One possible fix for this issue would be walking the parent directories of each path and checking if they represent nonbare repositories, but that would be wasteful. Even if we added caching of some sort, it's still a waste because we should have been able to check that "nested/" represented a nonbare repository before even descending into it in the first place. Add a DIR_SKIP_NESTED_GIT flag to dir_struct.flags and use it to prevent fill_directory() and friends from descending into nested git repos. With this change, we also modify two regression tests added in commit 91479b9c72f1 ("t7300: add tests to document behavior of clean and nested git", 2015-06-15). That commit, nor its series, nor the six previous iterations of that series on the mailing list discussed why those tests coded the expectation they did. In fact, it appears their purpose was simply to test _existing_ behavior to make sure that the performance changes didn't change the behavior. However, these two tests directly contradicted the manpage's claims that two -f's were required to delete files/directories under a nested git repository. While one could argue that the user gave an explicit path which matched files/directories that were within a nested repository, there's a slippery slope that becomes very difficult for users to understand once you go down that route (e.g. what if they specified "git clean -f -d '*.c'"?) It would also be hard to explain what the exact behavior was; avoid such problems by making it really simple. Also, clean up some grammar errors describing this functionality in the git-clean manpage. Finally, there are still a couple bugs with -ffd not cleaning out enough (e.g. missing the nested .git) and with -ffdX possibly cleaning out the wrong files (paying attention to outer .gitignore instead of inner). This patch does not address these cases at all (and does not change the behavior relative to those flags), it only fixes the handling when given a single -f. See https://public-inbox.org/git/20190905212043.GC32087@szeder.dev/ for more discussion of the -ffd[X?] bugs. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-17 16:35:02 +00:00
test_expect_success 'git clean -d skips nested repo containing ignored files' '
test_when_finished "rm -rf nested-repo-with-ignored-file" &&
git init nested-repo-with-ignored-file &&
(
cd nested-repo-with-ignored-file &&
>file &&
git add file &&
git commit -m Initial &&
# This file is ignored by a .gitignore rule in the outer repo
# added in the previous test.
>ignoreme
) &&
git clean -fd &&
test_path_is_file nested-repo-with-ignored-file/.git/index &&
test_path_is_file nested-repo-with-ignored-file/ignoreme &&
test_path_is_file nested-repo-with-ignored-file/file
'
dir: if our pathspec might match files under a dir, recurse into it For git clean, if a directory is entirely untracked and the user did not specify -d (corresponding to DIR_SHOW_IGNORED_TOO), then we usually do not want to remove that directory and thus do not recurse into it. However, if the user manually specified specific (or even globbed) paths somewhere under that directory to remove, then we need to recurse into the directory to make sure we remove the relevant paths under that directory as the user requested. Note that this does not mean that the recursed-into directory will be added to dir->entries for later removal; as of a few commits earlier in this series, there is another more strict match check that is run after returning from a recursed-into directory before deciding to add it to the list of entries. Therefore, this will only result in files underneath the given directory which match one of the pathspecs being added to the entries list. Two notes of potential interest to future readers: * If we wanted to only recurse into a directory when it is specifically matched rather than matched-via-glob (e.g. '*.c'), then we could do so via making the final non-zero return in match_pathspec_item be MATCHED_RECURSIVELY instead of MATCHED_RECURSIVELY_LEADING_PATHSPEC. (Note that the relative order of MATCHED_RECURSIVELY_LEADING_PATHSPEC and MATCHED_RECURSIVELY are important for such a change.) I was leaving open that possibility while writing an RFC asking for the behavior we want, but even though we don't want it, that knowledge might help you understand the code flow better. * There is a growing amount of logic in read_directory_recursive() for deciding whether to recurse into a subdirectory. However, there is a comment immediately preceding this logic that says to recurse if instructed by treat_path(). It may be better for the logic in read_directory_recursive() to ultimately be moved to treat_path() (or another function it calls, such as treat_directory()), but I have left that for someone else to tackle in the future. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-17 16:34:58 +00:00
test_expect_success 'git clean handles being told what to clean' '
t7300: add testcases showing failure to clean specified pathspecs Someone brought me a testcase where multiple git-clean invocations were required to clean out unwanted files: mkdir d{1,2} touch d{1,2}/ut touch d1/t && git add d1/t With this setup, the user would need to run git clean -ffd */ut twice to delete both ut files. A little testing showed some interesting variants: * If only one of those two ut files existed (either one), then only one clean command would be necessary. * If both directories had tracked files, then only one git clean would be necessary to clean both files. * If both directories had no tracked files then the clean command above would never clean either of the untracked files despite the pathspec explicitly calling both of them out. A bisect showed that the failure to clean out the files started with commit cf424f5fd89b ("clean: respect pathspecs with "-d", 2014-03-10). However, that pointed to a separate issue: while the "-d" flag was used by the original user who showed me this problem, that flag should have been irrelevant to this problem. Testing again without the "-d" flag showed that the same buggy behavior exists without using that flag, and has in fact existed since before cf424f5fd89b. Although these problems at first are perceived to be different (e.g. never clearing out the requested files vs. taking multiple invocations to get everything cleared out), they are actually just different manifestations of the same problem. The case with multiple directories that have no tracked files is the more general case; solving it will solve all the others. So, I concentrate on it. Add testcases showing that multiple untracked files within entirely untracked directories cannot be cleaned when specifying these files to git clean via pathspecs. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-17 16:34:53 +00:00
mkdir -p d1 d2 &&
touch d1/ut d2/ut &&
git clean -f */ut &&
test_path_is_missing d1/ut &&
test_path_is_missing d2/ut
'
test_expect_success 'git clean handles being told what to clean, with -d' '
t7300: add testcases showing failure to clean specified pathspecs Someone brought me a testcase where multiple git-clean invocations were required to clean out unwanted files: mkdir d{1,2} touch d{1,2}/ut touch d1/t && git add d1/t With this setup, the user would need to run git clean -ffd */ut twice to delete both ut files. A little testing showed some interesting variants: * If only one of those two ut files existed (either one), then only one clean command would be necessary. * If both directories had tracked files, then only one git clean would be necessary to clean both files. * If both directories had no tracked files then the clean command above would never clean either of the untracked files despite the pathspec explicitly calling both of them out. A bisect showed that the failure to clean out the files started with commit cf424f5fd89b ("clean: respect pathspecs with "-d", 2014-03-10). However, that pointed to a separate issue: while the "-d" flag was used by the original user who showed me this problem, that flag should have been irrelevant to this problem. Testing again without the "-d" flag showed that the same buggy behavior exists without using that flag, and has in fact existed since before cf424f5fd89b. Although these problems at first are perceived to be different (e.g. never clearing out the requested files vs. taking multiple invocations to get everything cleared out), they are actually just different manifestations of the same problem. The case with multiple directories that have no tracked files is the more general case; solving it will solve all the others. So, I concentrate on it. Add testcases showing that multiple untracked files within entirely untracked directories cannot be cleaned when specifying these files to git clean via pathspecs. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-17 16:34:53 +00:00
mkdir -p d1 d2 &&
touch d1/ut d2/ut &&
git clean -ffd */ut &&
test_path_is_missing d1/ut &&
test_path_is_missing d2/ut
'
dir: if our pathspec might match files under a dir, recurse into it For git clean, if a directory is entirely untracked and the user did not specify -d (corresponding to DIR_SHOW_IGNORED_TOO), then we usually do not want to remove that directory and thus do not recurse into it. However, if the user manually specified specific (or even globbed) paths somewhere under that directory to remove, then we need to recurse into the directory to make sure we remove the relevant paths under that directory as the user requested. Note that this does not mean that the recursed-into directory will be added to dir->entries for later removal; as of a few commits earlier in this series, there is another more strict match check that is run after returning from a recursed-into directory before deciding to add it to the list of entries. Therefore, this will only result in files underneath the given directory which match one of the pathspecs being added to the entries list. Two notes of potential interest to future readers: * If we wanted to only recurse into a directory when it is specifically matched rather than matched-via-glob (e.g. '*.c'), then we could do so via making the final non-zero return in match_pathspec_item be MATCHED_RECURSIVELY instead of MATCHED_RECURSIVELY_LEADING_PATHSPEC. (Note that the relative order of MATCHED_RECURSIVELY_LEADING_PATHSPEC and MATCHED_RECURSIVELY are important for such a change.) I was leaving open that possibility while writing an RFC asking for the behavior we want, but even though we don't want it, that knowledge might help you understand the code flow better. * There is a growing amount of logic in read_directory_recursive() for deciding whether to recurse into a subdirectory. However, there is a comment immediately preceding this logic that says to recurse if instructed by treat_path(). It may be better for the logic in read_directory_recursive() to ultimately be moved to treat_path() (or another function it calls, such as treat_directory()), but I have left that for someone else to tackle in the future. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-17 16:34:58 +00:00
test_expect_success 'git clean works if a glob is passed without -d' '
t7300: add testcases showing failure to clean specified pathspecs Someone brought me a testcase where multiple git-clean invocations were required to clean out unwanted files: mkdir d{1,2} touch d{1,2}/ut touch d1/t && git add d1/t With this setup, the user would need to run git clean -ffd */ut twice to delete both ut files. A little testing showed some interesting variants: * If only one of those two ut files existed (either one), then only one clean command would be necessary. * If both directories had tracked files, then only one git clean would be necessary to clean both files. * If both directories had no tracked files then the clean command above would never clean either of the untracked files despite the pathspec explicitly calling both of them out. A bisect showed that the failure to clean out the files started with commit cf424f5fd89b ("clean: respect pathspecs with "-d", 2014-03-10). However, that pointed to a separate issue: while the "-d" flag was used by the original user who showed me this problem, that flag should have been irrelevant to this problem. Testing again without the "-d" flag showed that the same buggy behavior exists without using that flag, and has in fact existed since before cf424f5fd89b. Although these problems at first are perceived to be different (e.g. never clearing out the requested files vs. taking multiple invocations to get everything cleared out), they are actually just different manifestations of the same problem. The case with multiple directories that have no tracked files is the more general case; solving it will solve all the others. So, I concentrate on it. Add testcases showing that multiple untracked files within entirely untracked directories cannot be cleaned when specifying these files to git clean via pathspecs. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-17 16:34:53 +00:00
mkdir -p d1 d2 &&
touch d1/ut d2/ut &&
git clean -f "*ut" &&
test_path_is_missing d1/ut &&
test_path_is_missing d2/ut
'
test_expect_success 'git clean works if a glob is passed with -d' '
t7300: add testcases showing failure to clean specified pathspecs Someone brought me a testcase where multiple git-clean invocations were required to clean out unwanted files: mkdir d{1,2} touch d{1,2}/ut touch d1/t && git add d1/t With this setup, the user would need to run git clean -ffd */ut twice to delete both ut files. A little testing showed some interesting variants: * If only one of those two ut files existed (either one), then only one clean command would be necessary. * If both directories had tracked files, then only one git clean would be necessary to clean both files. * If both directories had no tracked files then the clean command above would never clean either of the untracked files despite the pathspec explicitly calling both of them out. A bisect showed that the failure to clean out the files started with commit cf424f5fd89b ("clean: respect pathspecs with "-d", 2014-03-10). However, that pointed to a separate issue: while the "-d" flag was used by the original user who showed me this problem, that flag should have been irrelevant to this problem. Testing again without the "-d" flag showed that the same buggy behavior exists without using that flag, and has in fact existed since before cf424f5fd89b. Although these problems at first are perceived to be different (e.g. never clearing out the requested files vs. taking multiple invocations to get everything cleared out), they are actually just different manifestations of the same problem. The case with multiple directories that have no tracked files is the more general case; solving it will solve all the others. So, I concentrate on it. Add testcases showing that multiple untracked files within entirely untracked directories cannot be cleaned when specifying these files to git clean via pathspecs. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-17 16:34:53 +00:00
mkdir -p d1 d2 &&
touch d1/ut d2/ut &&
git clean -ffd "*ut" &&
test_path_is_missing d1/ut &&
test_path_is_missing d2/ut
'
test_expect_success MINGW 'handle clean & core.longpaths = false nicely' '
test_config core.longpaths false &&
a50=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &&
mkdir -p $a50$a50/$a50$a50/$a50$a50 &&
: >"$a50$a50/test.txt" 2>"$a50$a50/$a50$a50/$a50$a50/test.txt" &&
# create a temporary outside the working tree to hide from "git clean"
test_must_fail git clean -xdf 2>.git/err &&
# grepping for a strerror string is unportable but it is OK here with
# MINGW prereq
test_grep "too long" .git/err
'
dir: treat_leading_path() and read_directory_recursive(), round 2 I was going to title this "dir: more synchronizing of treat_leading_path() and read_directory_recursive()", a nod to commit 777b42034764 ("dir: synchronize treat_leading_path() and read_directory_recursive()", 2019-12-19), but the title was too long. Anyway, first the backstory... fill_directory() has always had a slightly error-prone interface: it returns a subset of paths which *might* match the specified pathspec; it was intended to prune away some paths which didn't match the specified pathspec and keep at least all the ones that did match it. Given this interface, callers were responsible to post-process the results and check whether each actually matched the pathspec. builtin/clean.c did this. It would first prune out duplicates (e.g. if "dir" was returned as well as all files under "dir/", then it would simplify this to just "dir"), and after pruning duplicates it would compare the remaining paths to the specified pathspec(s). This post-processing itself could run into problems, though, as noted in commit 404ebceda01c ("dir: also check directories for matching pathspecs", 2019-09-17): For the case of git-clean and a set of pathspecs of "dir/file" and "more", this caused a problem because we'd end up with dir entries for both of "dir" "dir/file" Then correct_untracked_entries() would try to helpfully prune duplicates for us by removing "dir/file" since it's under "dir", leaving us with "dir" Since the original pathspec only had "dir/file", the only entry left doesn't match and leaves nothing to be removed. (Note that if only one pathspec was specified, e.g. only "dir/file", then the common_prefix_len optimizations in fill_directory would cause us to bypass this problem, making it appear in simple tests that we could correctly remove manually specified pathspecs.) That commit fixed the issue -- when multiple pathspecs were specified -- by making sure fill_directory() wouldn't return both "dir" and "dir/file" outside the common_prefix_len optimization path. This is where it starts to get fun. In commit b9670c1f5e6b ("dir: fix checks on common prefix directory", 2019-12-19), we noticed that the common_prefix_len wasn't doing appropriate checks and letting all kinds of stuff through, resulting in recursing into .git/ directories and other craziness. So it started locking down and doing checks on pathnames within that code path. That continued with commit 777b42034764 ("dir: synchronize treat_leading_path() and read_directory_recursive()", 2019-12-19), which noted the following: Our optimization to avoid calling into read_directory_recursive() when all pathspecs have a common leading directory mean that we need to match the logic that read_directory_recursive() would use if we had just called it from the root. Since it does more than call treat_path() we need to copy that same logic. ...and then it more forcefully addressed the issue with this wonderfully ironic statement: Needing to duplicate logic like this means it is guaranteed someone will eventually need to make further changes and forget to update both locations. It is tempting to just nuke the leading_directory special casing to avoid such bugs and simplify the code, but unpack_trees' verify_clean_subdirectory() also calls read_directory() and does so with a non-empty leading path, so I'm hesitant to try to restructure further. Add obnoxious warnings to treat_leading_path() and read_directory_recursive() to try to warn people of such problems. You would think that with such a strongly worded description, that its author would have actually ensured that the logic in treat_leading_path() and read_directory_recursive() did actually match and that *everything* that was needed had at least been copied over at the time that this paragraph was written. But you'd be wrong, I messed it up by missing part of the logic. Copy the missing bits to fix the new final test in t7300. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-16 20:21:54 +00:00
test_expect_success 'clean untracked paths by pathspec' '
git init untracked &&
mkdir untracked/dir &&
echo >untracked/dir/file.txt &&
git -C untracked clean -f dir/file.txt &&
ls untracked/dir >actual &&
test_must_be_empty actual
'
dir: avoid unnecessary traversal into ignored directory The show_other_directories case in treat_directory() tried to handle both excludes and untracked files with the same logic, and mishandled both the excludes and the untracked files in the process, in different ways. Split that logic apart, and then focus on the logic for the excludes; a subsequent commit will address the logic for untracked files. For show_other_directories, an excluded directory means that every path underneath that directory will also be excluded. Given that the calling code requested to just show directories when everything under a directory had the same state (that's what the "DIR_SHOW_OTHER_DIRECTORIES" flag means), we generally do not need to traverse into such directories and can just immediately mark them as ignored (i.e. as path_excluded). The only reason we cannot just immediately return path_excluded is the DIR_HIDE_EMPTY_DIRECTORIES flag and the possibility that the ignored directory is an empty directory. The code previously treated DIR_SHOW_IGNORED_TOO in most cases as an exception as well, which was wrong. It can sometimes reduce the number of cases where we need to recurse (namely if DIR_SHOW_IGNORED_TOO_MODE_MATCHING is also set), but should not be able to increase the number of cases where we need to recurse. Fix the logic accordingly. Some sidenotes about possible confusion with dir.c: * "ignored" often refers to an untracked ignore", i.e. a file which is not tracked which matches one of the ignore/exclusion rules. But you can also have a "tracked ignore", a tracked file that happens to match one of the ignore/exclusion rules and which dir.c has to worry about since "git ls-files -c -i" is supposed to list them. * The dir code often uses "ignored" and "excluded" interchangeably, which you need to keep in mind while reading the code. * "exclude" is used multiple ways in the code: * As noted above, "exclude" is often a synonym for "ignored". * The logic for parsing .gitignore files was re-used in .git/info/sparse-checkout, except there it is used to mark paths that the user wants to *keep*. This was mostly addressed by commit 65edd96aec ("treewide: rename 'exclude' methods to 'pattern'", 2019-09-03), but every once in a while you'll find a comment about "exclude" referring to these patterns that might in fact be in use by the sparse-checkout machinery for inclusion rules. * The word "EXCLUDE" is also used for pathspec negation, as in (pathspec->items[3].magic & PATHSPEC_EXCLUDE) Thus if a user had a .gitignore file containing *~ *.log !settings.log And then ran git add -- 'settings.*' ':^settings.log' Then :^settings.log is a pathspec negation making settings.log not be requested to be added even though all other settings.* files are being added. Also, !settings.log in the gitignore file is a negative exclude pattern meaning that settings.log is normally a file we want to track even though all other *.log files are ignored. Sometimes it feels like dir.c needs its own glossary with its many definitions, including the multiply-defined terms. Reported-by: Jason Gore <Jason.Gore@microsoft.com> Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-12 17:28:19 +00:00
test_expect_success 'avoid traversing into ignored directories' '
test_when_finished rm -f output error trace.* &&
test_create_repo avoid-traversing-deep-hierarchy &&
(
cd avoid-traversing-deep-hierarchy &&
mkdir -p untracked/subdir/with/a &&
>untracked/subdir/with/a/random-file.txt &&
GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \
git clean -ffdxn -e untracked
) &&
# Make sure we only visited into the top-level directory, and did
# not traverse into the "untracked" subdirectory since it was excluded
grep data.*read_directo.*directories-visited trace.output |
cut -d "|" -f 9 >trace.relevant &&
cat >trace.expect <<-EOF &&
..directories-visited:1
EOF
test_cmp trace.expect trace.relevant
'
test_expect_success 'traverse into directories that may have ignored entries' '
test_when_finished rm -f output &&
test_create_repo need-to-traverse-into-hierarchy &&
(
cd need-to-traverse-into-hierarchy &&
mkdir -p modules/foobar/src/generated &&
> modules/foobar/src/generated/code.c &&
> modules/foobar/Makefile &&
echo "/modules/**/src/generated/" >.gitignore &&
git clean -fX modules/foobar >../output &&
grep Removing ../output &&
test_path_is_missing modules/foobar/src/generated/code.c &&
test_path_is_file modules/foobar/Makefile
)
'
test_done