diff --git a/Documentation/technical/api-directory-listing.txt b/Documentation/technical/api-directory-listing.txt index add6f435b5..944fc39fac 100644 --- a/Documentation/technical/api-directory-listing.txt +++ b/Documentation/technical/api-directory-listing.txt @@ -9,37 +9,40 @@ Data structure -------------- `struct dir_struct` structure is used to pass directory traversal -options to the library and to record the paths discovered. The notable -options are: +options to the library and to record the paths discovered. A single +`struct dir_struct` is used regardless of whether or not the traversal +recursively descends into subdirectories. + +The notable options are: `exclude_per_dir`:: The name of the file to be read in each directory for excluded files (typically `.gitignore`). -`collect_ignored`:: +`flags`:: - Include paths that are to be excluded in the result. + A bit-field of options: -`show_ignored`:: +`DIR_SHOW_IGNORED`::: The traversal is for finding just ignored files, not unignored files. -`show_other_directories`:: +`DIR_SHOW_OTHER_DIRECTORIES`::: Include a directory that is not tracked. -`hide_empty_directories`:: +`DIR_HIDE_EMPTY_DIRECTORIES`::: Do not include a directory that is not tracked and is empty. -`no_gitlinks`:: +`DIR_NO_GITLINKS`::: If set, recurse into a directory that looks like a git directory. Otherwise it is shown as a directory. -The result of the enumeration is left in these fields:: +The result of the enumeration is left in these fields: `entries[]`:: diff --git a/attr.c b/attr.c index 466c93fa50..d6d71901b2 100644 --- a/attr.c +++ b/attr.c @@ -284,7 +284,7 @@ static struct match_attr *parse_attr_line(const char *line, const char *src, * (reading the file from top to bottom), .gitattribute of the root * directory (again, reading the file from top to bottom) down to the * current directory, and then scan the list backwards to find the first match. - * This is exactly the same as what excluded() does in dir.c to deal with + * This is exactly the same as what is_excluded() does in dir.c to deal with * .gitignore */ diff --git a/builtin/add.c b/builtin/add.c index e664100c71..075312afcd 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -454,7 +454,7 @@ int cmd_add(int argc, const char **argv, const char *prefix) && !file_exists(pathspec[i])) { if (ignore_missing) { int dtype = DT_UNKNOWN; - if (path_excluded(&check, pathspec[i], -1, &dtype)) + if (is_path_excluded(&check, pathspec[i], -1, &dtype)) dir_add_ignored(&dir, pathspec[i], strlen(pathspec[i])); } else die(_("pathspec '%s' did not match any files"), diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 4a9ee690c7..373c573449 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -203,7 +203,7 @@ static void show_ru_info(void) static int ce_excluded(struct path_exclude_check *check, struct cache_entry *ce) { int dtype = ce_to_dtype(ce); - return path_excluded(check, ce->name, ce_namelen(ce), &dtype); + return is_path_excluded(check, ce->name, ce_namelen(ce), &dtype); } static void show_files(struct dir_struct *dir) diff --git a/dir.c b/dir.c index 3780755047..e883a91483 100644 --- a/dir.c +++ b/dir.c @@ -2,6 +2,8 @@ * This handles recursive filename detection with exclude * files, index knowledge etc.. * + * See Documentation/technical/api-directory-listing.txt + * * Copyright (C) Linus Torvalds, 2005-2006 * Junio Hamano, 2005-2006 */ @@ -377,7 +379,7 @@ void parse_exclude_pattern(const char **pattern, } void add_exclude(const char *string, const char *base, - int baselen, struct exclude_list *which) + int baselen, struct exclude_list *el) { struct exclude *x; int patternlen; @@ -401,8 +403,8 @@ void add_exclude(const char *string, const char *base, x->base = base; x->baselen = baselen; x->flags = flags; - ALLOC_GROW(which->excludes, which->nr + 1, which->alloc); - which->excludes[which->nr++] = x; + ALLOC_GROW(el->excludes, el->nr + 1, el->alloc); + el->excludes[el->nr++] = x; } static void *read_skip_worktree_file_from_index(const char *path, size_t *size) @@ -428,7 +430,11 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size) return data; } -void free_excludes(struct exclude_list *el) +/* + * Frees memory within el which was allocated for exclude patterns and + * the file buffer. Does not free el itself. + */ +void clear_exclude_list(struct exclude_list *el) { int i; @@ -444,7 +450,7 @@ int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, char **buf_p, - struct exclude_list *which, + struct exclude_list *el, int check_index) { struct stat st; @@ -493,7 +499,7 @@ int add_excludes_from_file_to_list(const char *fname, if (buf[i] == '\n') { if (entry != buf + i && entry[0] != '#') { buf[i - (i && buf[i-1] == '\r')] = 0; - add_exclude(entry, base, baselen, which); + add_exclude(entry, base, baselen, el); } entry = buf + i + 1; } @@ -508,6 +514,10 @@ void add_excludes_from_file(struct dir_struct *dir, const char *fname) die("cannot use %s as an exclude file", fname); } +/* + * Loads the per-directory exclude list for the substring of base + * which has a char length of baselen. + */ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) { struct exclude_list *el; @@ -518,7 +528,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX)) return; /* too long a path -- ignore */ - /* Pop the ones that are not the prefix of the path being checked. */ + /* Pop the directories that are not the prefix of the path being checked. */ el = &dir->exclude_list[EXC_DIRS]; while ((stk = dir->exclude_stack) != NULL) { if (stk->baselen <= baselen && @@ -629,22 +639,26 @@ int match_pathname(const char *pathname, int pathlen, ignore_case ? FNM_CASEFOLD : 0) == 0; } -/* Scan the list and let the last match determine the fate. - * Return 1 for exclude, 0 for include and -1 for undecided. +/* + * Scan the given exclude list in reverse to see whether pathname + * should be ignored. The first match (i.e. the last on the list), if + * any, determines the fate. Returns the exclude_list element which + * matched, or NULL for undecided. */ -int excluded_from_list(const char *pathname, - int pathlen, const char *basename, int *dtype, - struct exclude_list *el) +static struct exclude *last_exclude_matching_from_list(const char *pathname, + int pathlen, + const char *basename, + int *dtype, + struct exclude_list *el) { int i; if (!el->nr) - return -1; /* undefined */ + return NULL; /* undefined */ for (i = el->nr - 1; 0 <= i; i--) { struct exclude *x = el->excludes[i]; const char *exclude = x->pattern; - int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1; int prefix = x->nowildcardlen; if (x->flags & EXC_FLAG_MUSTBEDIR) { @@ -659,7 +673,7 @@ int excluded_from_list(const char *pathname, pathlen - (basename - pathname), exclude, prefix, x->patternlen, x->flags)) - return to_exclude; + return x; continue; } @@ -667,28 +681,64 @@ int excluded_from_list(const char *pathname, if (match_pathname(pathname, pathlen, x->base, x->baselen ? x->baselen - 1 : 0, exclude, prefix, x->patternlen, x->flags)) - return to_exclude; + return x; } + return NULL; /* undecided */ +} + +/* + * Scan the list and let the last match determine the fate. + * Return 1 for exclude, 0 for include and -1 for undecided. + */ +int is_excluded_from_list(const char *pathname, + int pathlen, const char *basename, int *dtype, + struct exclude_list *el) +{ + struct exclude *exclude; + exclude = last_exclude_matching_from_list(pathname, pathlen, basename, dtype, el); + if (exclude) + return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; return -1; /* undecided */ } -static int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) +/* + * Loads the exclude lists for the directory containing pathname, then + * scans all exclude lists to determine whether pathname is excluded. + * Returns the exclude_list element which matched, or NULL for + * undecided. + */ +static struct exclude *last_exclude_matching(struct dir_struct *dir, + const char *pathname, + int *dtype_p) { int pathlen = strlen(pathname); int st; + struct exclude *exclude; const char *basename = strrchr(pathname, '/'); basename = (basename) ? basename+1 : pathname; prep_exclude(dir, pathname, basename-pathname); for (st = EXC_CMDL; st <= EXC_FILE; st++) { - switch (excluded_from_list(pathname, pathlen, basename, - dtype_p, &dir->exclude_list[st])) { - case 0: - return 0; - case 1: - return 1; - } + exclude = last_exclude_matching_from_list( + pathname, pathlen, basename, dtype_p, + &dir->exclude_list[st]); + if (exclude) + return exclude; } + return NULL; +} + +/* + * Loads the exclude lists for the directory containing pathname, then + * scans all exclude lists to determine whether pathname is excluded. + * Returns 1 if true, otherwise 0. + */ +static int is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) +{ + struct exclude *exclude = + last_exclude_matching(dir, pathname, dtype_p); + if (exclude) + return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; return 0; } @@ -696,6 +746,7 @@ void path_exclude_check_init(struct path_exclude_check *check, struct dir_struct *dir) { check->dir = dir; + check->exclude = NULL; strbuf_init(&check->path, 256); } @@ -705,32 +756,41 @@ void path_exclude_check_clear(struct path_exclude_check *check) } /* - * Is this name excluded? This is for a caller like show_files() that - * do not honor directory hierarchy and iterate through paths that are - * possibly in an ignored directory. + * For each subdirectory in name, starting with the top-most, checks + * to see if that subdirectory is excluded, and if so, returns the + * corresponding exclude structure. Otherwise, checks whether name + * itself (which is presumably a file) is excluded. * * A path to a directory known to be excluded is left in check->path to * optimize for repeated checks for files in the same excluded directory. */ -int path_excluded(struct path_exclude_check *check, - const char *name, int namelen, int *dtype) +struct exclude *last_exclude_matching_path(struct path_exclude_check *check, + const char *name, int namelen, + int *dtype) { int i; struct strbuf *path = &check->path; + struct exclude *exclude; /* * we allow the caller to pass namelen as an optimization; it * must match the length of the name, as we eventually call - * excluded() on the whole name string. + * is_excluded() on the whole name string. */ if (namelen < 0) namelen = strlen(name); + /* + * If path is non-empty, and name is equal to path or a + * subdirectory of path, name should be excluded, because + * it's inside a directory which is already known to be + * excluded and was previously left in check->path. + */ if (path->len && path->len <= namelen && !memcmp(name, path->buf, path->len) && (!name[path->len] || name[path->len] == '/')) - return 1; + return check->exclude; strbuf_setlen(path, 0); for (i = 0; name[i]; i++) { @@ -738,8 +798,12 @@ int path_excluded(struct path_exclude_check *check, if (ch == '/') { int dt = DT_DIR; - if (excluded(check->dir, path->buf, &dt)) - return 1; + exclude = last_exclude_matching(check->dir, + path->buf, &dt); + if (exclude) { + check->exclude = exclude; + return exclude; + } } strbuf_addch(path, ch); } @@ -747,7 +811,22 @@ int path_excluded(struct path_exclude_check *check, /* An entry in the index; cannot be a directory with subentries */ strbuf_setlen(path, 0); - return excluded(check->dir, name, dtype); + return last_exclude_matching(check->dir, name, dtype); +} + +/* + * Is this name excluded? This is for a caller like show_files() that + * do not honor directory hierarchy and iterate through paths that are + * possibly in an ignored directory. + */ +int is_path_excluded(struct path_exclude_check *check, + const char *name, int namelen, int *dtype) +{ + struct exclude *exclude = + last_exclude_matching_path(check, name, namelen, dtype); + if (exclude) + return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; + return 0; } static struct dir_entry *dir_entry_new(const char *pathname, int len) @@ -1047,7 +1126,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, const struct path_simplify *simplify, int dtype, struct dirent *de) { - int exclude = excluded(dir, path->buf, &dtype); + int exclude = is_excluded(dir, path->buf, &dtype); if (exclude && (dir->flags & DIR_COLLECT_IGNORED) && exclude_matches_pathspec(path->buf, path->len, simplify)) dir_add_ignored(dir, path->buf, path->len); diff --git a/dir.h b/dir.h index ab5af42b2e..ae1bc467ae 100644 --- a/dir.h +++ b/dir.h @@ -1,6 +1,8 @@ #ifndef DIR_H #define DIR_H +/* See Documentation/technical/api-directory-listing.txt */ + #include "strbuf.h" struct dir_entry { @@ -13,6 +15,12 @@ struct dir_entry { #define EXC_FLAG_MUSTBEDIR 8 #define EXC_FLAG_NEGATIVE 16 +/* + * Each .gitignore file will be parsed into patterns which are then + * appended to the relevant exclude_list (either EXC_DIRS or + * EXC_FILE). exclude_lists are also used to represent the list of + * --exclude values passed via CLI args (EXC_CMDL). + */ struct exclude_list { int nr; int alloc; @@ -26,9 +34,15 @@ struct exclude_list { } **excludes; }; +/* + * The contents of the per-directory exclude files are lazily read on + * demand and then cached in memory, one per exclude_stack struct, in + * order to avoid opening and parsing each one every time that + * directory is traversed. + */ struct exclude_stack { - struct exclude_stack *prev; - char *filebuf; + struct exclude_stack *prev; /* the struct exclude_stack for the parent directory */ + char *filebuf; /* remember pointer to per-directory exclude file contents so we can free() */ int baselen; int exclude_ix; }; @@ -59,6 +73,14 @@ struct dir_struct { #define EXC_DIRS 1 #define EXC_FILE 2 + /* + * Temporary variables which are used during loading of the + * per-directory exclude lists. + * + * exclude_stack points to the top of the exclude_stack, and + * basebuf contains the full path to the current + * (sub)directory in the traversal. + */ struct exclude_stack *exclude_stack; char basebuf[PATH_MAX]; }; @@ -76,8 +98,8 @@ extern int within_depth(const char *name, int namelen, int depth, int max_depth) extern int fill_directory(struct dir_struct *dir, const char **pathspec); extern int read_directory(struct dir_struct *, const char *path, int len, const char **pathspec); -extern int excluded_from_list(const char *pathname, int pathlen, const char *basename, - int *dtype, struct exclude_list *el); +extern int is_excluded_from_list(const char *pathname, int pathlen, const char *basename, + int *dtype, struct exclude_list *el); struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len); /* @@ -91,26 +113,29 @@ extern int match_pathname(const char *, int, const char *, int, int, int); /* - * The excluded() API is meant for callers that check each level of leading - * directory hierarchies with excluded() to avoid recursing into excluded + * The is_excluded() API is meant for callers that check each level of leading + * directory hierarchies with is_excluded() to avoid recursing into excluded * directories. Callers that do not do so should use this API instead. */ struct path_exclude_check { struct dir_struct *dir; + struct exclude *exclude; struct strbuf path; }; extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *); extern void path_exclude_check_clear(struct path_exclude_check *); -extern int path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype); +extern struct exclude *last_exclude_matching_path(struct path_exclude_check *, const char *, + int namelen, int *dtype); +extern int is_path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype); extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, - char **buf_p, struct exclude_list *which, int check_index); + char **buf_p, struct exclude_list *el, int check_index); extern void add_excludes_from_file(struct dir_struct *, const char *fname); extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen); extern void add_exclude(const char *string, const char *base, - int baselen, struct exclude_list *which); -extern void free_excludes(struct exclude_list *el); + int baselen, struct exclude_list *el); +extern void clear_exclude_list(struct exclude_list *el); extern int file_exists(const char *); extern int is_inside_dir(const char *dir); diff --git a/unpack-trees.c b/unpack-trees.c index 61acc5e564..0e1a196ace 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -837,7 +837,8 @@ static int clear_ce_flags_dir(struct cache_entry **cache, int nr, { struct cache_entry **cache_end; int dtype = DT_DIR; - int ret = excluded_from_list(prefix, prefix_len, basename, &dtype, el); + int ret = is_excluded_from_list(prefix, prefix_len, + basename, &dtype, el); prefix[prefix_len++] = '/'; @@ -856,7 +857,7 @@ static int clear_ce_flags_dir(struct cache_entry **cache, int nr, * with ret (iow, we know in advance the incl/excl * decision for the entire directory), clear flag here without * calling clear_ce_flags_1(). That function will call - * the expensive excluded_from_list() on every entry. + * the expensive is_excluded_from_list() on every entry. */ return clear_ce_flags_1(cache, cache_end - cache, prefix, prefix_len, @@ -939,7 +940,8 @@ static int clear_ce_flags_1(struct cache_entry **cache, int nr, /* Non-directory */ dtype = ce_to_dtype(ce); - ret = excluded_from_list(ce->name, ce_namelen(ce), name, &dtype, el); + ret = is_excluded_from_list(ce->name, ce_namelen(ce), + name, &dtype, el); if (ret < 0) ret = defval; if (ret > 0) @@ -1152,7 +1154,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options *o->dst_index = o->result; done: - free_excludes(&el); + clear_exclude_list(&el); if (o->path_exclude_check) { path_exclude_check_clear(o->path_exclude_check); free(o->path_exclude_check); @@ -1373,7 +1375,7 @@ static int check_ok_to_remove(const char *name, int len, int dtype, return 0; if (o->dir && - path_excluded(o->path_exclude_check, name, -1, &dtype)) + is_path_excluded(o->path_exclude_check, name, -1, &dtype)) /* * ce->name is explicitly excluded, so it is Ok to * overwrite it.