From ce0330cad88cdcbb4b151a6dc0064d98757a928e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Wed, 24 Jan 2018 16:30:19 +0700 Subject: [PATCH 1/5] status: add a failing test showing a core.untrackedCache bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The untracked cache gets confused when a directory is swapped out for a file. It is easiest to reproduce this by swapping out a directory with a symlink to another directory, and as the tests show the symlink case is the only case we've found where "git status" will subsequently report incorrect information, even though it's possible to otherwise get the untracked cache into a state where its internal data structures don't reflect reality. In the symlink case, whatever files are inside the target of the symlink will be incorrectly shown as untracked. This issue does not happen if the symlink links to another file, only if it links to another directory. A stand-alone testcase for copying into a terminal: ( rm -rf /tmp/testrepo && git init /tmp/testrepo && cd /tmp/testrepo && mkdir x y && touch x/a y/b && git add x/a y/b && git commit -msnap && git rm -rf y && ln -s x y && git add y && git commit -msnap2 && git checkout HEAD~ && git status && git checkout master && sleep 1 && git status && git status ) This will incorrectly show y/a as an untracked file. Both the "git status" call right before "git checkout master" and the "sleep 1" after the "checkout master" are needed to reproduce this, presumably due to the untracked cache tracking on the basis of cached whole seconds from stat(2). When git gets into this state, a workaround to fix it is to issue a one-off: git -c core.untrackedCache=false status For the non-symlink case, the bug is that the output of test-dump-untracked-cache should not include: /one/ 0000000000000000000000000000000000000000 recurse valid It being in the output implies that cached traversal of root includes the directory "one" which does not exist on disk anymore. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- t/t7063-status-untracked-cache.sh | 87 +++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh index e5fb892f95..dba7f50bbb 100755 --- a/t/t7063-status-untracked-cache.sh +++ b/t/t7063-status-untracked-cache.sh @@ -22,6 +22,12 @@ avoid_racy() { sleep 1 } +status_is_clean() { + >../status.expect && + git status --porcelain >../status.actual && + test_cmp ../status.expect ../status.actual +} + test_lazy_prereq UNTRACKED_CACHE ' { git update-index --test-untracked-cache; ret=$?; } && test $ret -ne 1 @@ -683,4 +689,85 @@ test_expect_success 'untracked cache survives a commit' ' test_cmp ../before ../after ' +test_expect_success 'teardown worktree' ' + cd .. +' + +test_expect_success SYMLINKS 'setup worktree for symlink test' ' + git init worktree-symlink && + cd worktree-symlink && + git config core.untrackedCache true && + mkdir one two && + touch one/file two/file && + git add one/file two/file && + git commit -m"first commit" && + git rm -rf one && + ln -s two one && + git add one && + git commit -m"second commit" +' + +test_expect_failure SYMLINKS '"status" after symlink replacement should be clean with UC=true' ' + git checkout HEAD~ && + status_is_clean && + status_is_clean && + git checkout master && + avoid_racy && + status_is_clean && + status_is_clean +' + +test_expect_success SYMLINKS '"status" after symlink replacement should be clean with UC=false' ' + git config core.untrackedCache false && + git checkout HEAD~ && + status_is_clean && + status_is_clean && + git checkout master && + avoid_racy && + status_is_clean && + status_is_clean +' + +test_expect_success 'setup worktree for non-symlink test' ' + git init worktree-non-symlink && + cd worktree-non-symlink && + git config core.untrackedCache true && + mkdir one two && + touch one/file two/file && + git add one/file two/file && + git commit -m"first commit" && + git rm -rf one && + cp two/file one && + git add one && + git commit -m"second commit" +' + +test_expect_failure '"status" after file replacement should be clean with UC=true' ' + git checkout HEAD~ && + status_is_clean && + status_is_clean && + git checkout master && + avoid_racy && + status_is_clean && + test-dump-untracked-cache >../actual && + grep -F "recurse valid" ../actual >../actual.grep && + cat >../expect.grep < Date: Wed, 24 Jan 2018 16:30:20 +0700 Subject: [PATCH 2/5] dir.c: avoid stat() in valid_cached_dir() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stat() may follow a symlink and return stat data of the link's target instead of the link itself. We are concerned about the link itself. It's kind of hard to demonstrate the bug. I think when path->buf is a symlink, we most likely find that its target's stat data does not match our cached one, which means we ignore the cache and fall back to slow path. This is performance issue, not correctness (though we could still catch it by verifying test-dump-untracked-cache. The less unlikely case is, link target stat data matches the cached version and we incorrectly go fast path, ignoring real data on disk. A test for this may involve manipulating stat data, which may be not portable. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dir.c b/dir.c index 3c54366a17..ee1605f004 100644 --- a/dir.c +++ b/dir.c @@ -1739,7 +1739,7 @@ static int valid_cached_dir(struct dir_struct *dir, */ refresh_fsmonitor(istate); if (!(dir->untracked->use_fsmonitor && untracked->valid)) { - if (stat(path->len ? path->buf : ".", &st)) { + if (lstat(path->len ? path->buf : ".", &st)) { invalidate_directory(dir->untracked, untracked); memset(&untracked->stat_data, 0, sizeof(untracked->stat_data)); return 0; From b640313110ab5117b3c45b3445da4ce28967a386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Wed, 24 Jan 2018 16:30:21 +0700 Subject: [PATCH 3/5] dir.c: fix missing dir invalidation in untracked code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's start with how create a new directory cache after the last one becomes invalid (e.g. because its dir mtime has changed...). In open_cached_dir(): 1. We start out with valid_cached_dir() returning false, which should call invalidate_directory() to put a directory state back to initial state, no untracked entries (untracked_nr zero), no sub directory traversal (dirs[].recurse zero). 2. Since the cache cannot be used, we go the slow path opendir() and go through items one by one via readdir(). All the directories on disk will be added back to the cache (if not already exist in dirs[]) and its flag "recurse" gets changed to one to note that it's part of the cached dir travesal next time. 3. By the time we reach close_cached_dir() we should have a good subdir list in dirs[]. Those with "recurse" flag set are the ones present in the on-disk directory. The directory is now marked "valid". Next time read_directory() is called, since the directory is marked valid, it will skip readdir(), go fast path and traverse through dirs[] array instead. Steps one and two need some tight cooperation. If a subdir is removed, readdir() will not find it and of course we cannot examine/invalidate it. To make sure removed directories on disk are gone from the cache, step one must make sure recurse flag of all subdirs are zero. But that's not true. If "valid" flag is already false, there is a chance we go straight to the end of valid_cached_dir() without calling invalidate_directory(). Or we fail to meet the "if (untracked-valid)" condition and skip over the invalidate_directory(). After step 3, we mark the cache valid. Any stale subdir with incorrect recurse flag becomes a real subdir next time we traverse the directory using dirs[] array. We could avoid this by making sure invalidate_directory() is always called (therefore dirs[].recurse cleared) at the beginning of open_cached_dir(). Which is what this patch does. As to how we get into this situation, the key in the test is this command git checkout master where "one/file" is replaced with "one" in the index. This index update triggers untracked_cache_invalidate_path(), which clears valid flag of the root directory while keeping "recurse" flag on the subdir "one" on. On the next git-status, we go through steps 1-3 above and save an incorrect cache on disk. The second git-status blindly follows the bad cache data and shows the problem. This is arguably because of a bad design where "recurse" flag plays double roles: whether a directory should be saved on disk, and whether it is part of a directory traversal. We need to keep recurse flag set at "checkout master" because of the first role: we need to keep subdir caches (dir "two" for example has not been touched at all, no reason to throw its cache away). As long as we make sure to ignore/reset "recurse" flag at the beginning of a directory traversal, we're good. But maybe eventually we should separate these two roles. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- dir.c | 22 ++++++++++++++-------- t/t7063-status-untracked-cache.sh | 4 ++-- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/dir.c b/dir.c index ee1605f004..44b4dd2ec8 100644 --- a/dir.c +++ b/dir.c @@ -733,7 +733,16 @@ static void invalidate_directory(struct untracked_cache *uc, struct untracked_cache_dir *dir) { int i; - uc->dir_invalidated++; + + /* + * Invalidation increment here is just roughly correct. If + * untracked_nr or any of dirs[].recurse is non-zero, we + * should increment dir_invalidated too. But that's more + * expensive to do. + */ + if (dir->valid) + uc->dir_invalidated++; + dir->valid = 0; dir->untracked_nr = 0; for (i = 0; i < dir->dirs_nr; i++) @@ -1740,23 +1749,18 @@ static int valid_cached_dir(struct dir_struct *dir, refresh_fsmonitor(istate); if (!(dir->untracked->use_fsmonitor && untracked->valid)) { if (lstat(path->len ? path->buf : ".", &st)) { - invalidate_directory(dir->untracked, untracked); memset(&untracked->stat_data, 0, sizeof(untracked->stat_data)); return 0; } if (!untracked->valid || match_stat_data_racy(istate, &untracked->stat_data, &st)) { - if (untracked->valid) - invalidate_directory(dir->untracked, untracked); fill_stat_data(&untracked->stat_data, &st); return 0; } } - if (untracked->check_only != !!check_only) { - invalidate_directory(dir->untracked, untracked); + if (untracked->check_only != !!check_only) return 0; - } /* * prep_exclude will be called eventually on this directory, @@ -1788,8 +1792,10 @@ static int open_cached_dir(struct cached_dir *cdir, if (valid_cached_dir(dir, untracked, istate, path, check_only)) return 0; cdir->fdir = opendir(path->len ? path->buf : "."); - if (dir->untracked) + if (dir->untracked) { + invalidate_directory(dir->untracked, untracked); dir->untracked->dir_opened++; + } if (!cdir->fdir) return -1; return 0; diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh index dba7f50bbb..46b947824f 100755 --- a/t/t7063-status-untracked-cache.sh +++ b/t/t7063-status-untracked-cache.sh @@ -707,7 +707,7 @@ test_expect_success SYMLINKS 'setup worktree for symlink test' ' git commit -m"second commit" ' -test_expect_failure SYMLINKS '"status" after symlink replacement should be clean with UC=true' ' +test_expect_success SYMLINKS '"status" after symlink replacement should be clean with UC=true' ' git checkout HEAD~ && status_is_clean && status_is_clean && @@ -742,7 +742,7 @@ test_expect_success 'setup worktree for non-symlink test' ' git commit -m"second commit" ' -test_expect_failure '"status" after file replacement should be clean with UC=true' ' +test_expect_success '"status" after file replacement should be clean with UC=true' ' git checkout HEAD~ && status_is_clean && status_is_clean && From b6731550749c7326f0dcf26211e7c2e55de00e28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Wed, 24 Jan 2018 16:30:23 +0700 Subject: [PATCH 4/5] dir.c: stop ignoring opendir() error in open_cached_dir() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A follow-up to the recently fixed bugs in the untracked invalidation. If opendir() fails it should show a warning, perhaps this should die, but if this ever happens the error is probably recoverable for the user, and dying would just make things worse. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- dir.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dir.c b/dir.c index 44b4dd2ec8..55736d3e2a 100644 --- a/dir.c +++ b/dir.c @@ -1787,11 +1787,16 @@ static int open_cached_dir(struct cached_dir *cdir, struct strbuf *path, int check_only) { + const char *c_path; + memset(cdir, 0, sizeof(*cdir)); cdir->untracked = untracked; if (valid_cached_dir(dir, untracked, istate, path, check_only)) return 0; - cdir->fdir = opendir(path->len ? path->buf : "."); + c_path = path->len ? path->buf : "."; + cdir->fdir = opendir(c_path); + if (!cdir->fdir) + warning_errno(_("could not open directory '%s'"), c_path); if (dir->untracked) { invalidate_directory(dir->untracked, untracked); dir->untracked->dir_opened++; From 0cacebf099dd6467845419f212acdcfe5f8d923f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Wed, 7 Feb 2018 16:21:40 +0700 Subject: [PATCH 5/5] dir.c: ignore paths containing .git when invalidating untracked cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit read_directory() code ignores all paths named ".git" even if it's not a valid git repository. See treat_path() for details. Since ".git" is basically invisible to read_directory(), when we are asked to invalidate a path that contains ".git", we can safely ignore it because the slow path would not consider it anyway. This helps when fsmonitor is used and we have a real ".git" repo at worktree top. Occasionally .git/index will be updated and if the fsmonitor hook does not filter it, untracked cache is asked to invalidate the path ".git/index". Without this patch, we invalidate the root directory unncessarily, which: - makes read_directory() fall back to slow path for root directory (slower) - makes the index dirty (because UNTR extension is updated). Depending on the index size, writing it down could also be slow. A note about the new "safe_path" knob. Since this new check could be relatively expensive, avoid it when we know it's not needed. If the path comes from the index, it can't contain ".git". If it does contain, we may be screwed up at many more levels, not just this one. Noticed-by: Ævar Arnfjörð Bjarmason Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 10 ++++++---- dir.h | 2 +- fsmonitor.c | 2 +- fsmonitor.h | 2 +- t/t7519-status-fsmonitor.sh | 39 +++++++++++++++++++++++++++++++++++++ unpack-trees.c | 2 +- 6 files changed, 49 insertions(+), 8 deletions(-) diff --git a/dir.c b/dir.c index 55736d3e2a..7afc6756ec 100644 --- a/dir.c +++ b/dir.c @@ -1712,7 +1712,7 @@ static enum path_treatment treat_path(struct dir_struct *dir, if (!de) return treat_path_fast(dir, untracked, cdir, istate, path, baselen, pathspec); - if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git")) + if (is_dot_or_dotdot(de->d_name) || !fspathcmp(de->d_name, ".git")) return path_none; strbuf_setlen(path, baselen); strbuf_addstr(path, de->d_name); @@ -2909,10 +2909,12 @@ static int invalidate_one_component(struct untracked_cache *uc, } void untracked_cache_invalidate_path(struct index_state *istate, - const char *path) + const char *path, int safe_path) { if (!istate->untracked || !istate->untracked->root) return; + if (!safe_path && !verify_path(path)) + return; invalidate_one_component(istate->untracked, istate->untracked->root, path, strlen(path)); } @@ -2920,13 +2922,13 @@ void untracked_cache_invalidate_path(struct index_state *istate, void untracked_cache_remove_from_index(struct index_state *istate, const char *path) { - untracked_cache_invalidate_path(istate, path); + untracked_cache_invalidate_path(istate, path, 1); } void untracked_cache_add_to_index(struct index_state *istate, const char *path) { - untracked_cache_invalidate_path(istate, path); + untracked_cache_invalidate_path(istate, path, 1); } /* Update gitfile and core.worktree setting to connect work tree and git dir */ diff --git a/dir.h b/dir.h index 233a2eb36b..efea4b8803 100644 --- a/dir.h +++ b/dir.h @@ -347,7 +347,7 @@ static inline int dir_path_match(const struct dir_entry *ent, int cmp_dir_entry(const void *p1, const void *p2); int check_dir_entry_contains(const struct dir_entry *out, const struct dir_entry *in); -void untracked_cache_invalidate_path(struct index_state *, const char *); +void untracked_cache_invalidate_path(struct index_state *, const char *, int safe_path); void untracked_cache_remove_from_index(struct index_state *, const char *); void untracked_cache_add_to_index(struct index_state *, const char *); diff --git a/fsmonitor.c b/fsmonitor.c index 0af7c4edba..6d7bcd5d0e 100644 --- a/fsmonitor.c +++ b/fsmonitor.c @@ -130,7 +130,7 @@ static void fsmonitor_refresh_callback(struct index_state *istate, const char *n * as it could be a new untracked file. */ trace_printf_key(&trace_fsmonitor, "fsmonitor_refresh_callback '%s'", name); - untracked_cache_invalidate_path(istate, name); + untracked_cache_invalidate_path(istate, name, 0); } void refresh_fsmonitor(struct index_state *istate) diff --git a/fsmonitor.h b/fsmonitor.h index cd3cc0ccf2..65f3743636 100644 --- a/fsmonitor.h +++ b/fsmonitor.h @@ -65,7 +65,7 @@ static inline void mark_fsmonitor_invalid(struct index_state *istate, struct cac { if (core_fsmonitor) { ce->ce_flags &= ~CE_FSMONITOR_VALID; - untracked_cache_invalidate_path(istate, ce->name); + untracked_cache_invalidate_path(istate, ce->name, 1); trace_printf_key(&trace_fsmonitor, "mark_fsmonitor_invalid '%s'", ce->name); } } diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh index eb2d13bbcf..756beb0d8e 100755 --- a/t/t7519-status-fsmonitor.sh +++ b/t/t7519-status-fsmonitor.sh @@ -314,4 +314,43 @@ test_expect_success 'splitting the index results in the same state' ' test_cmp expect actual ' +test_expect_success UNTRACKED_CACHE 'ignore .git changes when invalidating UNTR' ' + test_create_repo dot-git && + ( + cd dot-git && + mkdir -p .git/hooks && + : >tracked && + : >modified && + mkdir dir1 && + : >dir1/tracked && + : >dir1/modified && + mkdir dir2 && + : >dir2/tracked && + : >dir2/modified && + write_integration_script && + git config core.fsmonitor .git/hooks/fsmonitor-test && + git update-index --untracked-cache && + git update-index --fsmonitor && + GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace-before" \ + git status && + test-dump-untracked-cache >../before + ) && + cat >>dot-git/.git/hooks/fsmonitor-test <<-\EOF && + printf ".git\0" + printf ".git/index\0" + printf "dir1/.git\0" + printf "dir1/.git/index\0" + EOF + ( + cd dot-git && + GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace-after" \ + git status && + test-dump-untracked-cache >../after + ) && + grep "directory invalidation" trace-before >>before && + grep "directory invalidation" trace-after >>after && + # UNTR extension unchanged, dir invalidation count unchanged + test_cmp before after +' + test_done diff --git a/unpack-trees.c b/unpack-trees.c index bf8b602901..3a2158bcf0 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1506,7 +1506,7 @@ static void invalidate_ce_path(const struct cache_entry *ce, if (!ce) return; cache_tree_invalidate_path(o->src_index, ce->name); - untracked_cache_invalidate_path(o->src_index, ce->name); + untracked_cache_invalidate_path(o->src_index, ce->name, 1); } /*