From d6b8fc303b389b026f2bf9918f6f83041488989b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 31 Jan 2008 01:17:48 -0800 Subject: [PATCH 1/2] gitignore(5): Allow "foo/" in ignore list to match directory "foo" A pattern "foo/" in the exclude list did not match directory "foo", but a pattern "foo" did. This attempts to extend the exclude mechanism so that it would while not matching a regular file or a symbolic link "foo". In order to differentiate a directory and non directory, this passes down the type of path being checked to excluded() function. A downside is that the recursive directory walk may need to run lstat(2) more often on systems whose "struct dirent" do not give the type of the entry; earlier it did not have to do so for an excluded path, but we now need to figure out if a path is a directory before deciding to exclude it. This is especially bad because an idea similar to the earlier CE_UPTODATE optimization to reduce number of lstat(2) calls would by definition not apply to the codepaths involved, as (1) directories will not be registered in the index, and (2) excluded paths will not be in the index anyway. Signed-off-by: Junio C Hamano --- Documentation/gitignore.txt | 7 +++++ builtin-ls-files.c | 6 +++-- cache.h | 12 +++++++++ dir.c | 42 ++++++++++++++++++++++-------- dir.h | 3 ++- t/t3001-ls-files-others-exclude.sh | 41 +++++++++++++++++++++++++++++ unpack-trees.c | 2 +- 7 files changed, 98 insertions(+), 15 deletions(-) diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt index 08373f52bb7..e847b3ba63f 100644 --- a/Documentation/gitignore.txt +++ b/Documentation/gitignore.txt @@ -57,6 +57,13 @@ Patterns have the following format: included again. If a negated pattern matches, this will override lower precedence patterns sources. + - If the pattern ends with a slash, it is removed for the + purpose of the following description, but it would only find + a match with a directory. In other words, `foo/` will match a + directory `foo` and paths underneath it, but will not match a + regular file or a symbolic link `foo` (this is consistent + with the way how pathspec works in general in git). + - If the pattern does not contain a slash '/', git treats it as a shell glob pattern and checks for a match against the pathname without leading directories. diff --git a/builtin-ls-files.c b/builtin-ls-files.c index 0f0ab2da167..dbba371b8e3 100644 --- a/builtin-ls-files.c +++ b/builtin-ls-files.c @@ -238,7 +238,8 @@ static void show_files(struct dir_struct *dir, const char *prefix) if (show_cached | show_stage) { for (i = 0; i < active_nr; i++) { struct cache_entry *ce = active_cache[i]; - if (excluded(dir, ce->name) != dir->show_ignored) + if (excluded(dir, ce->name, ce_to_dtype(ce)) != + dir->show_ignored) continue; if (show_unmerged && !ce_stage(ce)) continue; @@ -252,7 +253,8 @@ static void show_files(struct dir_struct *dir, const char *prefix) struct cache_entry *ce = active_cache[i]; struct stat st; int err; - if (excluded(dir, ce->name) != dir->show_ignored) + if (excluded(dir, ce->name, ce_to_dtype(ce)) != + dir->show_ignored) continue; err = lstat(ce->name, &st); if (show_deleted && err) diff --git a/cache.h b/cache.h index 549f4bbac72..55298301108 100644 --- a/cache.h +++ b/cache.h @@ -141,6 +141,18 @@ static inline unsigned int ce_mode_from_stat(struct cache_entry *ce, unsigned in } return create_ce_mode(mode); } +static inline int ce_to_dtype(const struct cache_entry *ce) +{ + unsigned ce_mode = ntohl(ce->ce_mode); + if (S_ISREG(ce_mode)) + return DT_REG; + else if (S_ISDIR(ce_mode) || S_ISGITLINK(ce_mode)) + return DT_DIR; + else if (S_ISLNK(ce_mode)) + return DT_LNK; + else + return DT_UNKNOWN; +} #define canon_mode(mode) \ (S_ISREG(mode) ? (S_IFREG | ce_permissions(mode)) : \ S_ISLNK(mode) ? S_IFLNK : S_ISDIR(mode) ? S_IFDIR : S_IFGITLINK) diff --git a/dir.c b/dir.c index 3e345c2fc50..a4f8c258cc4 100644 --- a/dir.c +++ b/dir.c @@ -126,18 +126,34 @@ static int no_wildcard(const char *string) void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *which) { - struct exclude *x = xmalloc(sizeof (*x)); + struct exclude *x; + size_t len; + int to_exclude = 1; + int flags = 0; - x->to_exclude = 1; if (*string == '!') { - x->to_exclude = 0; + to_exclude = 0; string++; } - x->pattern = string; + len = strlen(string); + if (len && string[len - 1] == '/') { + char *s; + x = xmalloc(sizeof(*x) + len); + s = (char*)(x+1); + memcpy(s, string, len - 1); + s[len - 1] = '\0'; + string = s; + x->pattern = s; + flags = EXC_FLAG_MUSTBEDIR; + } else { + x = xmalloc(sizeof(*x)); + x->pattern = string; + } + x->to_exclude = to_exclude; x->patternlen = strlen(string); x->base = base; x->baselen = baselen; - x->flags = 0; + x->flags = flags; if (!strchr(string, '/')) x->flags |= EXC_FLAG_NODIR; if (no_wildcard(string)) @@ -261,7 +277,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) * Return 1 for exclude, 0 for include and -1 for undecided. */ static int excluded_1(const char *pathname, - int pathlen, const char *basename, + int pathlen, const char *basename, int dtype, struct exclude_list *el) { int i; @@ -272,6 +288,10 @@ static int excluded_1(const char *pathname, const char *exclude = x->pattern; int to_exclude = x->to_exclude; + if ((x->flags & EXC_FLAG_MUSTBEDIR) && + (dtype != DT_DIR)) + continue; + if (x->flags & EXC_FLAG_NODIR) { /* match basename */ if (x->flags & EXC_FLAG_NOWILDCARD) { @@ -314,7 +334,7 @@ static int excluded_1(const char *pathname, return -1; /* undecided */ } -int excluded(struct dir_struct *dir, const char *pathname) +int excluded(struct dir_struct *dir, const char *pathname, int dtype) { int pathlen = strlen(pathname); int st; @@ -323,7 +343,8 @@ int excluded(struct dir_struct *dir, const char *pathname) prep_exclude(dir, pathname, basename-pathname); for (st = EXC_CMDL; st <= EXC_FILE; st++) { - switch (excluded_1(pathname, pathlen, basename, &dir->exclude_list[st])) { + switch (excluded_1(pathname, pathlen, basename, + dtype, &dir->exclude_list[st])) { case 0: return 0; case 1: @@ -560,7 +581,8 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co if (simplify_away(fullname, baselen + len, simplify)) continue; - exclude = excluded(dir, fullname); + dtype = get_dtype(de, fullname); + exclude = excluded(dir, fullname, dtype); if (exclude && dir->collect_ignored && in_pathspec(fullname, baselen + len, simplify)) dir_add_ignored(dir, fullname, baselen + len); @@ -572,8 +594,6 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co if (exclude && !dir->show_ignored) continue; - dtype = get_dtype(de, fullname); - /* * Do we want to see just the ignored files? * We still need to recurse into directories, diff --git a/dir.h b/dir.h index d8814dccb2d..10d72b5222b 100644 --- a/dir.h +++ b/dir.h @@ -9,6 +9,7 @@ struct dir_entry { #define EXC_FLAG_NODIR 1 #define EXC_FLAG_NOWILDCARD 2 #define EXC_FLAG_ENDSWITH 4 +#define EXC_FLAG_MUSTBEDIR 8 struct exclude_list { int nr; @@ -67,7 +68,7 @@ extern int match_pathspec(const char **pathspec, const char *name, int namelen, extern int read_directory(struct dir_struct *, const char *path, const char *base, int baselen, const char **pathspec); -extern int excluded(struct dir_struct *, const char *); +extern int excluded(struct dir_struct *, const char *, int); extern void add_excludes_from_file(struct dir_struct *, const char *fname); extern void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *which); diff --git a/t/t3001-ls-files-others-exclude.sh b/t/t3001-ls-files-others-exclude.sh index e25b2556833..b4297bacf2b 100755 --- a/t/t3001-ls-files-others-exclude.sh +++ b/t/t3001-ls-files-others-exclude.sh @@ -99,4 +99,45 @@ EOF test_expect_success 'git-status honours core.excludesfile' \ 'diff -u expect output' +test_expect_success 'trailing slash in exclude allows directory match(1)' ' + + git ls-files --others --exclude=one/ >output && + if grep "^one/" output + then + echo Ooops + false + else + : happy + fi + +' + +test_expect_success 'trailing slash in exclude allows directory match (2)' ' + + git ls-files --others --exclude=one/two/ >output && + if grep "^one/two/" output + then + echo Ooops + false + else + : happy + fi + +' + +test_expect_success 'trailing slash in exclude forces directory match (1)' ' + + >two + git ls-files --others --exclude=two/ >output && + grep "^two" output + +' + +test_expect_success 'trailing slash in exclude forces directory match (2)' ' + + git ls-files --others --exclude=one/a.1/ >output && + grep "^one/a.1" output + +' + test_done diff --git a/unpack-trees.c b/unpack-trees.c index aa2513ed798..11af2636c27 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -523,7 +523,7 @@ static void verify_absent(struct cache_entry *ce, const char *action, if (!lstat(ce->name, &st)) { int cnt; - if (o->dir && excluded(o->dir, ce->name)) + if (o->dir && excluded(o->dir, ce->name, ce_to_dtype(ce))) /* * ce->name is explicitly excluded, so it is Ok to * overwrite it. From 6831a88ac03759a8133f10ffd52ad235a081a8a3 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 31 Jan 2008 20:23:25 -0800 Subject: [PATCH 2/2] gitignore: lazily find dtype When we process "foo/" entries in gitignore files on a system that does not have d_type member in "struct dirent", the earlier implementation ran lstat(2) separately when matching with entries that came from the command line, in-tree .gitignore files, and $GIT_DIR/info/excludes file. This optimizes it by delaying the lstat(2) call until it becomes absolutely necessary. The initial idea for this change was by Jeff King, but I optimized it further to pass pointers to around. Signed-off-by: Junio C Hamano --- builtin-ls-files.c | 8 ++++---- dir.c | 25 ++++++++++++++++--------- dir.h | 2 +- unpack-trees.c | 3 ++- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/builtin-ls-files.c b/builtin-ls-files.c index dbba371b8e3..54cb2518dbb 100644 --- a/builtin-ls-files.c +++ b/builtin-ls-files.c @@ -238,8 +238,8 @@ static void show_files(struct dir_struct *dir, const char *prefix) if (show_cached | show_stage) { for (i = 0; i < active_nr; i++) { struct cache_entry *ce = active_cache[i]; - if (excluded(dir, ce->name, ce_to_dtype(ce)) != - dir->show_ignored) + int dtype = ce_to_dtype(ce); + if (excluded(dir, ce->name, &dtype) != dir->show_ignored) continue; if (show_unmerged && !ce_stage(ce)) continue; @@ -253,8 +253,8 @@ static void show_files(struct dir_struct *dir, const char *prefix) struct cache_entry *ce = active_cache[i]; struct stat st; int err; - if (excluded(dir, ce->name, ce_to_dtype(ce)) != - dir->show_ignored) + int dtype = ce_to_dtype(ce); + if (excluded(dir, ce->name, &dtype) != dir->show_ignored) continue; err = lstat(ce->name, &st); if (show_deleted && err) diff --git a/dir.c b/dir.c index a4f8c258cc4..292639b1562 100644 --- a/dir.c +++ b/dir.c @@ -17,6 +17,7 @@ struct path_simplify { static int read_directory_recursive(struct dir_struct *dir, const char *path, const char *base, int baselen, int check_only, const struct path_simplify *simplify); +static int get_dtype(struct dirent *de, const char *path); int common_prefix(const char **pathspec) { @@ -277,7 +278,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) * Return 1 for exclude, 0 for include and -1 for undecided. */ static int excluded_1(const char *pathname, - int pathlen, const char *basename, int dtype, + int pathlen, const char *basename, int *dtype, struct exclude_list *el) { int i; @@ -288,9 +289,12 @@ static int excluded_1(const char *pathname, const char *exclude = x->pattern; int to_exclude = x->to_exclude; - if ((x->flags & EXC_FLAG_MUSTBEDIR) && - (dtype != DT_DIR)) - continue; + if (x->flags & EXC_FLAG_MUSTBEDIR) { + if (*dtype == DT_UNKNOWN) + *dtype = get_dtype(NULL, pathname); + if (*dtype != DT_DIR) + continue; + } if (x->flags & EXC_FLAG_NODIR) { /* match basename */ @@ -334,7 +338,7 @@ static int excluded_1(const char *pathname, return -1; /* undecided */ } -int excluded(struct dir_struct *dir, const char *pathname, int dtype) +int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) { int pathlen = strlen(pathname); int st; @@ -344,7 +348,7 @@ int excluded(struct dir_struct *dir, const char *pathname, int dtype) prep_exclude(dir, pathname, basename-pathname); for (st = EXC_CMDL; st <= EXC_FILE; st++) { switch (excluded_1(pathname, pathlen, basename, - dtype, &dir->exclude_list[st])) { + dtype_p, &dir->exclude_list[st])) { case 0: return 0; case 1: @@ -529,7 +533,7 @@ static int in_pathspec(const char *path, int len, const struct path_simplify *si static int get_dtype(struct dirent *de, const char *path) { - int dtype = DTYPE(de); + int dtype = de ? DTYPE(de) : DT_UNKNOWN; struct stat st; if (dtype != DT_UNKNOWN) @@ -581,8 +585,8 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co if (simplify_away(fullname, baselen + len, simplify)) continue; - dtype = get_dtype(de, fullname); - exclude = excluded(dir, fullname, dtype); + dtype = DTYPE(de); + exclude = excluded(dir, fullname, &dtype); if (exclude && dir->collect_ignored && in_pathspec(fullname, baselen + len, simplify)) dir_add_ignored(dir, fullname, baselen + len); @@ -594,6 +598,9 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co if (exclude && !dir->show_ignored) continue; + if (dtype == DT_UNKNOWN) + dtype = get_dtype(de, fullname); + /* * Do we want to see just the ignored files? * We still need to recurse into directories, diff --git a/dir.h b/dir.h index 10d72b5222b..2df15defb67 100644 --- a/dir.h +++ b/dir.h @@ -68,7 +68,7 @@ extern int match_pathspec(const char **pathspec, const char *name, int namelen, extern int read_directory(struct dir_struct *, const char *path, const char *base, int baselen, const char **pathspec); -extern int excluded(struct dir_struct *, const char *, int); +extern int excluded(struct dir_struct *, const char *, int *); extern void add_excludes_from_file(struct dir_struct *, const char *fname); extern void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *which); diff --git a/unpack-trees.c b/unpack-trees.c index 11af2636c27..29848e926c9 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -522,8 +522,9 @@ static void verify_absent(struct cache_entry *ce, const char *action, if (!lstat(ce->name, &st)) { int cnt; + int dtype = ce_to_dtype(ce); - if (o->dir && excluded(o->dir, ce->name, ce_to_dtype(ce))) + if (o->dir && excluded(o->dir, ce->name, &dtype)) /* * ce->name is explicitly excluded, so it is Ok to * overwrite it.