From f1a7082f2a05b6b187fad9c661e2c872f8b477f5 Mon Sep 17 00:00:00 2001 From: Adam Spiers Date: Thu, 27 Dec 2012 02:32:20 +0000 Subject: [PATCH 01/10] api-directory-listing.txt: update to match code 7c4c97c0ac turned the flags in struct dir_struct into a single bitfield variable, but forgot to update this document. Signed-off-by: Adam Spiers Signed-off-by: Junio C Hamano --- Documentation/technical/api-directory-listing.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/technical/api-directory-listing.txt b/Documentation/technical/api-directory-listing.txt index add6f435b5..0356d25974 100644 --- a/Documentation/technical/api-directory-listing.txt +++ b/Documentation/technical/api-directory-listing.txt @@ -17,24 +17,24 @@ options are: The name of the file to be read in each directory for excluded files (typically `.gitignore`). -`collect_ignored`:: +`flags`:: - Include paths that are to be excluded in the result. + A bit-field of options: -`show_ignored`:: +`DIR_SHOW_IGNORED`::: The traversal is for finding just ignored files, not unignored files. -`show_other_directories`:: +`DIR_SHOW_OTHER_DIRECTORIES`::: Include a directory that is not tracked. -`hide_empty_directories`:: +`DIR_HIDE_EMPTY_DIRECTORIES`::: Do not include a directory that is not tracked and is empty. -`no_gitlinks`:: +`DIR_NO_GITLINKS`::: If set, recurse into a directory that looks like a git directory. Otherwise it is shown as a directory. From 95a68344afcaf229765921c70458ee76add342dc Mon Sep 17 00:00:00 2001 From: Adam Spiers Date: Thu, 27 Dec 2012 02:32:21 +0000 Subject: [PATCH 02/10] Improve documentation and comments regarding directory traversal API traversal API has a few potentially confusing properties. These comments clarify a few key aspects and will hopefully make it easier to understand for other newcomers in the future. Signed-off-by: Adam Spiers Signed-off-by: Junio C Hamano --- .../technical/api-directory-listing.txt | 9 ++++--- dir.c | 8 +++++- dir.h | 26 +++++++++++++++++-- 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/Documentation/technical/api-directory-listing.txt b/Documentation/technical/api-directory-listing.txt index 0356d25974..944fc39fac 100644 --- a/Documentation/technical/api-directory-listing.txt +++ b/Documentation/technical/api-directory-listing.txt @@ -9,8 +9,11 @@ Data structure -------------- `struct dir_struct` structure is used to pass directory traversal -options to the library and to record the paths discovered. The notable -options are: +options to the library and to record the paths discovered. A single +`struct dir_struct` is used regardless of whether or not the traversal +recursively descends into subdirectories. + +The notable options are: `exclude_per_dir`:: @@ -39,7 +42,7 @@ options are: If set, recurse into a directory that looks like a git directory. Otherwise it is shown as a directory. -The result of the enumeration is left in these fields:: +The result of the enumeration is left in these fields: `entries[]`:: diff --git a/dir.c b/dir.c index ee8e7115a8..89e27a6825 100644 --- a/dir.c +++ b/dir.c @@ -2,6 +2,8 @@ * This handles recursive filename detection with exclude * files, index knowledge etc.. * + * See Documentation/technical/api-directory-listing.txt + * * Copyright (C) Linus Torvalds, 2005-2006 * Junio Hamano, 2005-2006 */ @@ -476,6 +478,10 @@ void add_excludes_from_file(struct dir_struct *dir, const char *fname) die("cannot use %s as an exclude file", fname); } +/* + * Loads the per-directory exclude list for the substring of base + * which has a char length of baselen. + */ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) { struct exclude_list *el; @@ -486,7 +492,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX)) return; /* too long a path -- ignore */ - /* Pop the ones that are not the prefix of the path being checked. */ + /* Pop the directories that are not the prefix of the path being checked. */ el = &dir->exclude_list[EXC_DIRS]; while ((stk = dir->exclude_stack) != NULL) { if (stk->baselen <= baselen && diff --git a/dir.h b/dir.h index f5c89e3b80..e0869bca2f 100644 --- a/dir.h +++ b/dir.h @@ -1,6 +1,8 @@ #ifndef DIR_H #define DIR_H +/* See Documentation/technical/api-directory-listing.txt */ + #include "strbuf.h" struct dir_entry { @@ -13,6 +15,12 @@ struct dir_entry { #define EXC_FLAG_MUSTBEDIR 8 #define EXC_FLAG_NEGATIVE 16 +/* + * Each .gitignore file will be parsed into patterns which are then + * appended to the relevant exclude_list (either EXC_DIRS or + * EXC_FILE). exclude_lists are also used to represent the list of + * --exclude values passed via CLI args (EXC_CMDL). + */ struct exclude_list { int nr; int alloc; @@ -26,9 +34,15 @@ struct exclude_list { } **excludes; }; +/* + * The contents of the per-directory exclude files are lazily read on + * demand and then cached in memory, one per exclude_stack struct, in + * order to avoid opening and parsing each one every time that + * directory is traversed. + */ struct exclude_stack { - struct exclude_stack *prev; - char *filebuf; + struct exclude_stack *prev; /* the struct exclude_stack for the parent directory */ + char *filebuf; /* remember pointer to per-directory exclude file contents so we can free() */ int baselen; int exclude_ix; }; @@ -59,6 +73,14 @@ struct dir_struct { #define EXC_DIRS 1 #define EXC_FILE 2 + /* + * Temporary variables which are used during loading of the + * per-directory exclude lists. + * + * exclude_stack points to the top of the exclude_stack, and + * basebuf contains the full path to the current + * (sub)directory in the traversal. + */ struct exclude_stack *exclude_stack; char basebuf[PATH_MAX]; }; From 840fc334e98e89e28039f85c3eff0caab5f20eb3 Mon Sep 17 00:00:00 2001 From: Adam Spiers Date: Thu, 27 Dec 2012 02:32:22 +0000 Subject: [PATCH 03/10] dir.c: rename cryptic 'which' variable to more consistent name 'el' is only *slightly* less cryptic, but is already used as the variable name for a struct exclude_list pointer in numerous other places, so this reduces the number of cryptic variable names in use by one :-) Signed-off-by: Adam Spiers Signed-off-by: Junio C Hamano --- dir.c | 10 +++++----- dir.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dir.c b/dir.c index 89e27a6825..f31aa59fa3 100644 --- a/dir.c +++ b/dir.c @@ -349,7 +349,7 @@ void parse_exclude_pattern(const char **pattern, } void add_exclude(const char *string, const char *base, - int baselen, struct exclude_list *which) + int baselen, struct exclude_list *el) { struct exclude *x; int patternlen; @@ -373,8 +373,8 @@ void add_exclude(const char *string, const char *base, x->base = base; x->baselen = baselen; x->flags = flags; - ALLOC_GROW(which->excludes, which->nr + 1, which->alloc); - which->excludes[which->nr++] = x; + ALLOC_GROW(el->excludes, el->nr + 1, el->alloc); + el->excludes[el->nr++] = x; } static void *read_skip_worktree_file_from_index(const char *path, size_t *size) @@ -416,7 +416,7 @@ int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, char **buf_p, - struct exclude_list *which, + struct exclude_list *el, int check_index) { struct stat st; @@ -463,7 +463,7 @@ int add_excludes_from_file_to_list(const char *fname, if (buf[i] == '\n') { if (entry != buf + i && entry[0] != '#') { buf[i - (i && buf[i-1] == '\r')] = 0; - add_exclude(entry, base, baselen, which); + add_exclude(entry, base, baselen, el); } entry = buf + i + 1; } diff --git a/dir.h b/dir.h index e0869bca2f..680c1eb6ea 100644 --- a/dir.h +++ b/dir.h @@ -127,11 +127,11 @@ extern int path_excluded(struct path_exclude_check *, const char *, int namelen, extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, - char **buf_p, struct exclude_list *which, int check_index); + char **buf_p, struct exclude_list *el, int check_index); extern void add_excludes_from_file(struct dir_struct *, const char *fname); extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen); extern void add_exclude(const char *string, const char *base, - int baselen, struct exclude_list *which); + int baselen, struct exclude_list *el); extern void free_excludes(struct exclude_list *el); extern int file_exists(const char *); From 9013089c4a841bf17bcc8e2510a86ceecac29ffd Mon Sep 17 00:00:00 2001 From: Adam Spiers Date: Thu, 27 Dec 2012 02:32:23 +0000 Subject: [PATCH 04/10] dir.c: rename path_excluded() to is_path_excluded() Start adopting clearer names for exclude functions. This 'is_*' naming pattern for functions returning booleans was agreed here: http://thread.gmane.org/gmane.comp.version-control.git/204661/focus=204924 Signed-off-by: Adam Spiers Signed-off-by: Junio C Hamano --- builtin/add.c | 2 +- builtin/ls-files.c | 2 +- dir.c | 4 ++-- dir.h | 2 +- unpack-trees.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index 89dce56a24..c689f3725e 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -453,7 +453,7 @@ int cmd_add(int argc, const char **argv, const char *prefix) && !file_exists(pathspec[i])) { if (ignore_missing) { int dtype = DT_UNKNOWN; - if (path_excluded(&check, pathspec[i], -1, &dtype)) + if (is_path_excluded(&check, pathspec[i], -1, &dtype)) dir_add_ignored(&dir, pathspec[i], strlen(pathspec[i])); } else die(_("pathspec '%s' did not match any files"), diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 31b3f2d900..ef7f99a9ed 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -203,7 +203,7 @@ static void show_ru_info(void) static int ce_excluded(struct path_exclude_check *check, struct cache_entry *ce) { int dtype = ce_to_dtype(ce); - return path_excluded(check, ce->name, ce_namelen(ce), &dtype); + return is_path_excluded(check, ce->name, ce_namelen(ce), &dtype); } static void show_files(struct dir_struct *dir) diff --git a/dir.c b/dir.c index f31aa59fa3..f1c0abd182 100644 --- a/dir.c +++ b/dir.c @@ -685,8 +685,8 @@ void path_exclude_check_clear(struct path_exclude_check *check) * A path to a directory known to be excluded is left in check->path to * optimize for repeated checks for files in the same excluded directory. */ -int path_excluded(struct path_exclude_check *check, - const char *name, int namelen, int *dtype) +int is_path_excluded(struct path_exclude_check *check, + const char *name, int namelen, int *dtype) { int i; struct strbuf *path = &check->path; diff --git a/dir.h b/dir.h index 680c1eb6ea..c59bad8851 100644 --- a/dir.h +++ b/dir.h @@ -123,7 +123,7 @@ struct path_exclude_check { }; extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *); extern void path_exclude_check_clear(struct path_exclude_check *); -extern int path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype); +extern int is_path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype); extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, diff --git a/unpack-trees.c b/unpack-trees.c index 33a581924e..3ac6370f99 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1372,7 +1372,7 @@ static int check_ok_to_remove(const char *name, int len, int dtype, return 0; if (o->dir && - path_excluded(o->path_exclude_check, name, -1, &dtype)) + is_path_excluded(o->path_exclude_check, name, -1, &dtype)) /* * ce->name is explicitly excluded, so it is Ok to * overwrite it. From 0795805053ad89a24954684fca2e69fea2bf50b9 Mon Sep 17 00:00:00 2001 From: Adam Spiers Date: Thu, 27 Dec 2012 02:32:24 +0000 Subject: [PATCH 05/10] dir.c: rename excluded_from_list() to is_excluded_from_list() Continue adopting clearer names for exclude functions. This 'is_*' naming pattern for functions returning booleans was discussed here: http://thread.gmane.org/gmane.comp.version-control.git/204661/focus=204924 Also adjust their callers as necessary. Signed-off-by: Adam Spiers Signed-off-by: Junio C Hamano --- dir.c | 11 ++++++----- dir.h | 4 ++-- unpack-trees.c | 8 +++++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/dir.c b/dir.c index f1c0abd182..08004915b6 100644 --- a/dir.c +++ b/dir.c @@ -605,9 +605,9 @@ int match_pathname(const char *pathname, int pathlen, /* Scan the list and let the last match determine the fate. * Return 1 for exclude, 0 for include and -1 for undecided. */ -int excluded_from_list(const char *pathname, - int pathlen, const char *basename, int *dtype, - struct exclude_list *el) +int is_excluded_from_list(const char *pathname, + int pathlen, const char *basename, int *dtype, + struct exclude_list *el) { int i; @@ -654,8 +654,9 @@ static int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) prep_exclude(dir, pathname, basename-pathname); for (st = EXC_CMDL; st <= EXC_FILE; st++) { - switch (excluded_from_list(pathname, pathlen, basename, - dtype_p, &dir->exclude_list[st])) { + switch (is_excluded_from_list(pathname, pathlen, + basename, dtype_p, + &dir->exclude_list[st])) { case 0: return 0; case 1: diff --git a/dir.h b/dir.h index c59bad8851..554f87a5fd 100644 --- a/dir.h +++ b/dir.h @@ -98,8 +98,8 @@ extern int within_depth(const char *name, int namelen, int depth, int max_depth) extern int fill_directory(struct dir_struct *dir, const char **pathspec); extern int read_directory(struct dir_struct *, const char *path, int len, const char **pathspec); -extern int excluded_from_list(const char *pathname, int pathlen, const char *basename, - int *dtype, struct exclude_list *el); +extern int is_excluded_from_list(const char *pathname, int pathlen, const char *basename, + int *dtype, struct exclude_list *el); struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len); /* diff --git a/unpack-trees.c b/unpack-trees.c index 3ac6370f99..c0da094285 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -836,7 +836,8 @@ static int clear_ce_flags_dir(struct cache_entry **cache, int nr, { struct cache_entry **cache_end; int dtype = DT_DIR; - int ret = excluded_from_list(prefix, prefix_len, basename, &dtype, el); + int ret = is_excluded_from_list(prefix, prefix_len, + basename, &dtype, el); prefix[prefix_len++] = '/'; @@ -855,7 +856,7 @@ static int clear_ce_flags_dir(struct cache_entry **cache, int nr, * with ret (iow, we know in advance the incl/excl * decision for the entire directory), clear flag here without * calling clear_ce_flags_1(). That function will call - * the expensive excluded_from_list() on every entry. + * the expensive is_excluded_from_list() on every entry. */ return clear_ce_flags_1(cache, cache_end - cache, prefix, prefix_len, @@ -938,7 +939,8 @@ static int clear_ce_flags_1(struct cache_entry **cache, int nr, /* Non-directory */ dtype = ce_to_dtype(ce); - ret = excluded_from_list(ce->name, ce_namelen(ce), name, &dtype, el); + ret = is_excluded_from_list(ce->name, ce_namelen(ce), + name, &dtype, el); if (ret < 0) ret = defval; if (ret > 0) From 6d24e7a807922a5fbf1aa4d42f66e4f0a0aaa141 Mon Sep 17 00:00:00 2001 From: Adam Spiers Date: Thu, 27 Dec 2012 02:32:25 +0000 Subject: [PATCH 06/10] dir.c: rename excluded() to is_excluded() Continue adopting clearer names for exclude functions. This is_* naming pattern for functions returning booleans was discussed here: http://thread.gmane.org/gmane.comp.version-control.git/204661/focus=204924 Signed-off-by: Adam Spiers Signed-off-by: Junio C Hamano --- attr.c | 2 +- dir.c | 10 +++++----- dir.h | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/attr.c b/attr.c index 2fc6353628..5362563088 100644 --- a/attr.c +++ b/attr.c @@ -284,7 +284,7 @@ static struct match_attr *parse_attr_line(const char *line, const char *src, * (reading the file from top to bottom), .gitattribute of the root * directory (again, reading the file from top to bottom) down to the * current directory, and then scan the list backwards to find the first match. - * This is exactly the same as what excluded() does in dir.c to deal with + * This is exactly the same as what is_excluded() does in dir.c to deal with * .gitignore */ diff --git a/dir.c b/dir.c index 08004915b6..8c99dc40dd 100644 --- a/dir.c +++ b/dir.c @@ -645,7 +645,7 @@ int is_excluded_from_list(const char *pathname, return -1; /* undecided */ } -static int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) +static int is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) { int pathlen = strlen(pathname); int st; @@ -695,7 +695,7 @@ int is_path_excluded(struct path_exclude_check *check, /* * we allow the caller to pass namelen as an optimization; it * must match the length of the name, as we eventually call - * excluded() on the whole name string. + * is_excluded() on the whole name string. */ if (namelen < 0) namelen = strlen(name); @@ -712,7 +712,7 @@ int is_path_excluded(struct path_exclude_check *check, if (ch == '/') { int dt = DT_DIR; - if (excluded(check->dir, path->buf, &dt)) + if (is_excluded(check->dir, path->buf, &dt)) return 1; } strbuf_addch(path, ch); @@ -721,7 +721,7 @@ int is_path_excluded(struct path_exclude_check *check, /* An entry in the index; cannot be a directory with subentries */ strbuf_setlen(path, 0); - return excluded(check->dir, name, dtype); + return is_excluded(check->dir, name, dtype); } static struct dir_entry *dir_entry_new(const char *pathname, int len) @@ -1021,7 +1021,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, const struct path_simplify *simplify, int dtype, struct dirent *de) { - int exclude = excluded(dir, path->buf, &dtype); + int exclude = is_excluded(dir, path->buf, &dtype); if (exclude && (dir->flags & DIR_COLLECT_IGNORED) && exclude_matches_pathspec(path->buf, path->len, simplify)) dir_add_ignored(dir, path->buf, path->len); diff --git a/dir.h b/dir.h index 554f87a5fd..d68a9971c2 100644 --- a/dir.h +++ b/dir.h @@ -113,8 +113,8 @@ extern int match_pathname(const char *, int, const char *, int, int, int); /* - * The excluded() API is meant for callers that check each level of leading - * directory hierarchies with excluded() to avoid recursing into excluded + * The is_excluded() API is meant for callers that check each level of leading + * directory hierarchies with is_excluded() to avoid recursing into excluded * directories. Callers that do not do so should use this API instead. */ struct path_exclude_check { From 578cd7c3eae11e552bbc34b29f35e273a455988e Mon Sep 17 00:00:00 2001 From: Adam Spiers Date: Thu, 27 Dec 2012 02:32:26 +0000 Subject: [PATCH 07/10] dir.c: refactor is_excluded_from_list() The excluded function uses a new helper function called last_exclude_matching_from_list() to perform the inner loop over all of the exclude patterns. The helper just tells us whether the path is included, excluded, or undecided. However, it may be useful to know _which_ pattern was triggered. So let's pass out the entire exclude match, which contains the status information we were already passing out. Further patches can make use of this. This is a modified forward port of a patch from 2009 by Jeff King: http://article.gmane.org/gmane.comp.version-control.git/108815 Signed-off-by: Adam Spiers Signed-off-by: Junio C Hamano --- dir.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/dir.c b/dir.c index 8c99dc40dd..d1a04136e5 100644 --- a/dir.c +++ b/dir.c @@ -602,22 +602,26 @@ int match_pathname(const char *pathname, int pathlen, return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0; } -/* Scan the list and let the last match determine the fate. - * Return 1 for exclude, 0 for include and -1 for undecided. +/* + * Scan the given exclude list in reverse to see whether pathname + * should be ignored. The first match (i.e. the last on the list), if + * any, determines the fate. Returns the exclude_list element which + * matched, or NULL for undecided. */ -int is_excluded_from_list(const char *pathname, - int pathlen, const char *basename, int *dtype, - struct exclude_list *el) +static struct exclude *last_exclude_matching_from_list(const char *pathname, + int pathlen, + const char *basename, + int *dtype, + struct exclude_list *el) { int i; if (!el->nr) - return -1; /* undefined */ + return NULL; /* undefined */ for (i = el->nr - 1; 0 <= i; i--) { struct exclude *x = el->excludes[i]; const char *exclude = x->pattern; - int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1; int prefix = x->nowildcardlen; if (x->flags & EXC_FLAG_MUSTBEDIR) { @@ -632,7 +636,7 @@ int is_excluded_from_list(const char *pathname, pathlen - (basename - pathname), exclude, prefix, x->patternlen, x->flags)) - return to_exclude; + return x; continue; } @@ -640,8 +644,23 @@ int is_excluded_from_list(const char *pathname, if (match_pathname(pathname, pathlen, x->base, x->baselen ? x->baselen - 1 : 0, exclude, prefix, x->patternlen, x->flags)) - return to_exclude; + return x; } + return NULL; /* undecided */ +} + +/* + * Scan the list and let the last match determine the fate. + * Return 1 for exclude, 0 for include and -1 for undecided. + */ +int is_excluded_from_list(const char *pathname, + int pathlen, const char *basename, int *dtype, + struct exclude_list *el) +{ + struct exclude *exclude; + exclude = last_exclude_matching_from_list(pathname, pathlen, basename, dtype, el); + if (exclude) + return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; return -1; /* undecided */ } From f4cd69a67469eff90bee8e31529ca4e03c1afdf2 Mon Sep 17 00:00:00 2001 From: Adam Spiers Date: Thu, 27 Dec 2012 02:32:27 +0000 Subject: [PATCH 08/10] dir.c: refactor is_excluded() In a similar way to the previous commit, this extracts a new helper function last_exclude_matching() which returns the last exclude_list element which matched, or NULL if no match was found. is_excluded() becomes a wrapper around this, and just returns 0 or 1 depending on whether any matching exclude_list element was found. This allows callers to find out _why_ a given path was excluded, rather than just whether it was or not, paving the way for a new git sub-command which allows users to test their exclude lists from the command line. Signed-off-by: Adam Spiers Signed-off-by: Junio C Hamano --- dir.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/dir.c b/dir.c index d1a04136e5..b9d4234526 100644 --- a/dir.c +++ b/dir.c @@ -664,24 +664,44 @@ int is_excluded_from_list(const char *pathname, return -1; /* undecided */ } -static int is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) +/* + * Loads the exclude lists for the directory containing pathname, then + * scans all exclude lists to determine whether pathname is excluded. + * Returns the exclude_list element which matched, or NULL for + * undecided. + */ +static struct exclude *last_exclude_matching(struct dir_struct *dir, + const char *pathname, + int *dtype_p) { int pathlen = strlen(pathname); int st; + struct exclude *exclude; const char *basename = strrchr(pathname, '/'); basename = (basename) ? basename+1 : pathname; prep_exclude(dir, pathname, basename-pathname); for (st = EXC_CMDL; st <= EXC_FILE; st++) { - switch (is_excluded_from_list(pathname, pathlen, - basename, dtype_p, - &dir->exclude_list[st])) { - case 0: - return 0; - case 1: - return 1; - } + exclude = last_exclude_matching_from_list( + pathname, pathlen, basename, dtype_p, + &dir->exclude_list[st]); + if (exclude) + return exclude; } + return NULL; +} + +/* + * Loads the exclude lists for the directory containing pathname, then + * scans all exclude lists to determine whether pathname is excluded. + * Returns 1 if true, otherwise 0. + */ +static int is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) +{ + struct exclude *exclude = + last_exclude_matching(dir, pathname, dtype_p); + if (exclude) + return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; return 0; } From a35341a86ecf354d46cf326ed9e0ffbceb54309d Mon Sep 17 00:00:00 2001 From: Adam Spiers Date: Thu, 27 Dec 2012 02:32:28 +0000 Subject: [PATCH 09/10] dir.c: refactor is_path_excluded() In a similar way to the previous commit, this extracts a new helper function last_exclude_matching_path() which return the last exclude_list element which matched, or NULL if no match was found. is_path_excluded() becomes a wrapper around this, and just returns 0 or 1 depending on whether any matching exclude_list element was found. This allows callers to find out _why_ a given path was excluded, rather than just whether it was or not, paving the way for a new git sub-command which allows users to test their exclude lists from the command line. Signed-off-by: Adam Spiers Signed-off-by: Junio C Hamano --- dir.c | 47 ++++++++++++++++++++++++++++++++++++++--------- dir.h | 3 +++ 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/dir.c b/dir.c index b9d4234526..16e10b0d14 100644 --- a/dir.c +++ b/dir.c @@ -709,6 +709,7 @@ void path_exclude_check_init(struct path_exclude_check *check, struct dir_struct *dir) { check->dir = dir; + check->exclude = NULL; strbuf_init(&check->path, 256); } @@ -718,18 +719,21 @@ void path_exclude_check_clear(struct path_exclude_check *check) } /* - * Is this name excluded? This is for a caller like show_files() that - * do not honor directory hierarchy and iterate through paths that are - * possibly in an ignored directory. + * For each subdirectory in name, starting with the top-most, checks + * to see if that subdirectory is excluded, and if so, returns the + * corresponding exclude structure. Otherwise, checks whether name + * itself (which is presumably a file) is excluded. * * A path to a directory known to be excluded is left in check->path to * optimize for repeated checks for files in the same excluded directory. */ -int is_path_excluded(struct path_exclude_check *check, - const char *name, int namelen, int *dtype) +struct exclude *last_exclude_matching_path(struct path_exclude_check *check, + const char *name, int namelen, + int *dtype) { int i; struct strbuf *path = &check->path; + struct exclude *exclude; /* * we allow the caller to pass namelen as an optimization; it @@ -739,11 +743,17 @@ int is_path_excluded(struct path_exclude_check *check, if (namelen < 0) namelen = strlen(name); + /* + * If path is non-empty, and name is equal to path or a + * subdirectory of path, name should be excluded, because + * it's inside a directory which is already known to be + * excluded and was previously left in check->path. + */ if (path->len && path->len <= namelen && !memcmp(name, path->buf, path->len) && (!name[path->len] || name[path->len] == '/')) - return 1; + return check->exclude; strbuf_setlen(path, 0); for (i = 0; name[i]; i++) { @@ -751,8 +761,12 @@ int is_path_excluded(struct path_exclude_check *check, if (ch == '/') { int dt = DT_DIR; - if (is_excluded(check->dir, path->buf, &dt)) - return 1; + exclude = last_exclude_matching(check->dir, + path->buf, &dt); + if (exclude) { + check->exclude = exclude; + return exclude; + } } strbuf_addch(path, ch); } @@ -760,7 +774,22 @@ int is_path_excluded(struct path_exclude_check *check, /* An entry in the index; cannot be a directory with subentries */ strbuf_setlen(path, 0); - return is_excluded(check->dir, name, dtype); + return last_exclude_matching(check->dir, name, dtype); +} + +/* + * Is this name excluded? This is for a caller like show_files() that + * do not honor directory hierarchy and iterate through paths that are + * possibly in an ignored directory. + */ +int is_path_excluded(struct path_exclude_check *check, + const char *name, int namelen, int *dtype) +{ + struct exclude *exclude = + last_exclude_matching_path(check, name, namelen, dtype); + if (exclude) + return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; + return 0; } static struct dir_entry *dir_entry_new(const char *pathname, int len) diff --git a/dir.h b/dir.h index d68a9971c2..dcb1ad3d3a 100644 --- a/dir.h +++ b/dir.h @@ -119,10 +119,13 @@ extern int match_pathname(const char *, int, */ struct path_exclude_check { struct dir_struct *dir; + struct exclude *exclude; struct strbuf path; }; extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *); extern void path_exclude_check_clear(struct path_exclude_check *); +extern struct exclude *last_exclude_matching_path(struct path_exclude_check *, const char *, + int namelen, int *dtype); extern int is_path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype); From f61988125130ac091bfb69bda5d62b0ad8f054c4 Mon Sep 17 00:00:00 2001 From: Adam Spiers Date: Thu, 27 Dec 2012 02:32:29 +0000 Subject: [PATCH 10/10] dir.c: rename free_excludes() to clear_exclude_list() It is clearer to use a 'clear_' prefix for functions which empty and deallocate the contents of a data structure without freeing the structure itself, and a 'free_' prefix for functions which also free the structure itself. http://article.gmane.org/gmane.comp.version-control.git/206128 Signed-off-by: Adam Spiers Signed-off-by: Junio C Hamano --- dir.c | 6 +++++- dir.h | 2 +- unpack-trees.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dir.c b/dir.c index 16e10b0d14..41f141c8d6 100644 --- a/dir.c +++ b/dir.c @@ -400,7 +400,11 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size) return data; } -void free_excludes(struct exclude_list *el) +/* + * Frees memory within el which was allocated for exclude patterns and + * the file buffer. Does not free el itself. + */ +void clear_exclude_list(struct exclude_list *el) { int i; diff --git a/dir.h b/dir.h index dcb1ad3d3a..5664ba8c64 100644 --- a/dir.h +++ b/dir.h @@ -135,7 +135,7 @@ extern void add_excludes_from_file(struct dir_struct *, const char *fname); extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen); extern void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *el); -extern void free_excludes(struct exclude_list *el); +extern void clear_exclude_list(struct exclude_list *el); extern int file_exists(const char *); extern int is_inside_dir(const char *dir); diff --git a/unpack-trees.c b/unpack-trees.c index c0da094285..ad621d95e9 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1153,7 +1153,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options *o->dst_index = o->result; done: - free_excludes(&el); + clear_exclude_list(&el); if (o->path_exclude_check) { path_exclude_check_clear(o->path_exclude_check); free(o->path_exclude_check);