Merge branch 'as/dir-c-cleanup'

Refactor and generally clean up the directory traversal API
implementation.

* as/dir-c-cleanup:
  dir.c: rename free_excludes() to clear_exclude_list()
  dir.c: refactor is_path_excluded()
  dir.c: refactor is_excluded()
  dir.c: refactor is_excluded_from_list()
  dir.c: rename excluded() to is_excluded()
  dir.c: rename excluded_from_list() to is_excluded_from_list()
  dir.c: rename path_excluded() to is_path_excluded()
  dir.c: rename cryptic 'which' variable to more consistent name
  Improve documentation and comments regarding directory traversal API
  api-directory-listing.txt: update to match code
This commit is contained in:
Junio C Hamano 2013-01-10 13:47:25 -08:00
commit d912b0e44f
7 changed files with 171 additions and 62 deletions

View file

@ -9,37 +9,40 @@ Data structure
-------------- --------------
`struct dir_struct` structure is used to pass directory traversal `struct dir_struct` structure is used to pass directory traversal
options to the library and to record the paths discovered. The notable options to the library and to record the paths discovered. A single
options are: `struct dir_struct` is used regardless of whether or not the traversal
recursively descends into subdirectories.
The notable options are:
`exclude_per_dir`:: `exclude_per_dir`::
The name of the file to be read in each directory for excluded The name of the file to be read in each directory for excluded
files (typically `.gitignore`). files (typically `.gitignore`).
`collect_ignored`:: `flags`::
Include paths that are to be excluded in the result. A bit-field of options:
`show_ignored`:: `DIR_SHOW_IGNORED`:::
The traversal is for finding just ignored files, not unignored The traversal is for finding just ignored files, not unignored
files. files.
`show_other_directories`:: `DIR_SHOW_OTHER_DIRECTORIES`:::
Include a directory that is not tracked. Include a directory that is not tracked.
`hide_empty_directories`:: `DIR_HIDE_EMPTY_DIRECTORIES`:::
Do not include a directory that is not tracked and is empty. Do not include a directory that is not tracked and is empty.
`no_gitlinks`:: `DIR_NO_GITLINKS`:::
If set, recurse into a directory that looks like a git If set, recurse into a directory that looks like a git
directory. Otherwise it is shown as a directory. directory. Otherwise it is shown as a directory.
The result of the enumeration is left in these fields:: The result of the enumeration is left in these fields:
`entries[]`:: `entries[]`::

2
attr.c
View file

@ -284,7 +284,7 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
* (reading the file from top to bottom), .gitattribute of the root * (reading the file from top to bottom), .gitattribute of the root
* directory (again, reading the file from top to bottom) down to the * directory (again, reading the file from top to bottom) down to the
* current directory, and then scan the list backwards to find the first match. * current directory, and then scan the list backwards to find the first match.
* This is exactly the same as what excluded() does in dir.c to deal with * This is exactly the same as what is_excluded() does in dir.c to deal with
* .gitignore * .gitignore
*/ */

View file

@ -454,7 +454,7 @@ int cmd_add(int argc, const char **argv, const char *prefix)
&& !file_exists(pathspec[i])) { && !file_exists(pathspec[i])) {
if (ignore_missing) { if (ignore_missing) {
int dtype = DT_UNKNOWN; int dtype = DT_UNKNOWN;
if (path_excluded(&check, pathspec[i], -1, &dtype)) if (is_path_excluded(&check, pathspec[i], -1, &dtype))
dir_add_ignored(&dir, pathspec[i], strlen(pathspec[i])); dir_add_ignored(&dir, pathspec[i], strlen(pathspec[i]));
} else } else
die(_("pathspec '%s' did not match any files"), die(_("pathspec '%s' did not match any files"),

View file

@ -203,7 +203,7 @@ static void show_ru_info(void)
static int ce_excluded(struct path_exclude_check *check, struct cache_entry *ce) static int ce_excluded(struct path_exclude_check *check, struct cache_entry *ce)
{ {
int dtype = ce_to_dtype(ce); int dtype = ce_to_dtype(ce);
return path_excluded(check, ce->name, ce_namelen(ce), &dtype); return is_path_excluded(check, ce->name, ce_namelen(ce), &dtype);
} }
static void show_files(struct dir_struct *dir) static void show_files(struct dir_struct *dir)

149
dir.c
View file

@ -2,6 +2,8 @@
* This handles recursive filename detection with exclude * This handles recursive filename detection with exclude
* files, index knowledge etc.. * files, index knowledge etc..
* *
* See Documentation/technical/api-directory-listing.txt
*
* Copyright (C) Linus Torvalds, 2005-2006 * Copyright (C) Linus Torvalds, 2005-2006
* Junio Hamano, 2005-2006 * Junio Hamano, 2005-2006
*/ */
@ -377,7 +379,7 @@ void parse_exclude_pattern(const char **pattern,
} }
void add_exclude(const char *string, const char *base, void add_exclude(const char *string, const char *base,
int baselen, struct exclude_list *which) int baselen, struct exclude_list *el)
{ {
struct exclude *x; struct exclude *x;
int patternlen; int patternlen;
@ -401,8 +403,8 @@ void add_exclude(const char *string, const char *base,
x->base = base; x->base = base;
x->baselen = baselen; x->baselen = baselen;
x->flags = flags; x->flags = flags;
ALLOC_GROW(which->excludes, which->nr + 1, which->alloc); ALLOC_GROW(el->excludes, el->nr + 1, el->alloc);
which->excludes[which->nr++] = x; el->excludes[el->nr++] = x;
} }
static void *read_skip_worktree_file_from_index(const char *path, size_t *size) static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
@ -428,7 +430,11 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
return data; return data;
} }
void free_excludes(struct exclude_list *el) /*
* Frees memory within el which was allocated for exclude patterns and
* the file buffer. Does not free el itself.
*/
void clear_exclude_list(struct exclude_list *el)
{ {
int i; int i;
@ -444,7 +450,7 @@ int add_excludes_from_file_to_list(const char *fname,
const char *base, const char *base,
int baselen, int baselen,
char **buf_p, char **buf_p,
struct exclude_list *which, struct exclude_list *el,
int check_index) int check_index)
{ {
struct stat st; struct stat st;
@ -493,7 +499,7 @@ int add_excludes_from_file_to_list(const char *fname,
if (buf[i] == '\n') { if (buf[i] == '\n') {
if (entry != buf + i && entry[0] != '#') { if (entry != buf + i && entry[0] != '#') {
buf[i - (i && buf[i-1] == '\r')] = 0; buf[i - (i && buf[i-1] == '\r')] = 0;
add_exclude(entry, base, baselen, which); add_exclude(entry, base, baselen, el);
} }
entry = buf + i + 1; entry = buf + i + 1;
} }
@ -508,6 +514,10 @@ void add_excludes_from_file(struct dir_struct *dir, const char *fname)
die("cannot use %s as an exclude file", fname); die("cannot use %s as an exclude file", fname);
} }
/*
* Loads the per-directory exclude list for the substring of base
* which has a char length of baselen.
*/
static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
{ {
struct exclude_list *el; struct exclude_list *el;
@ -518,7 +528,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
(baselen + strlen(dir->exclude_per_dir) >= PATH_MAX)) (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX))
return; /* too long a path -- ignore */ return; /* too long a path -- ignore */
/* Pop the ones that are not the prefix of the path being checked. */ /* Pop the directories that are not the prefix of the path being checked. */
el = &dir->exclude_list[EXC_DIRS]; el = &dir->exclude_list[EXC_DIRS];
while ((stk = dir->exclude_stack) != NULL) { while ((stk = dir->exclude_stack) != NULL) {
if (stk->baselen <= baselen && if (stk->baselen <= baselen &&
@ -629,22 +639,26 @@ int match_pathname(const char *pathname, int pathlen,
ignore_case ? FNM_CASEFOLD : 0) == 0; ignore_case ? FNM_CASEFOLD : 0) == 0;
} }
/* Scan the list and let the last match determine the fate. /*
* Return 1 for exclude, 0 for include and -1 for undecided. * Scan the given exclude list in reverse to see whether pathname
* should be ignored. The first match (i.e. the last on the list), if
* any, determines the fate. Returns the exclude_list element which
* matched, or NULL for undecided.
*/ */
int excluded_from_list(const char *pathname, static struct exclude *last_exclude_matching_from_list(const char *pathname,
int pathlen, const char *basename, int *dtype, int pathlen,
struct exclude_list *el) const char *basename,
int *dtype,
struct exclude_list *el)
{ {
int i; int i;
if (!el->nr) if (!el->nr)
return -1; /* undefined */ return NULL; /* undefined */
for (i = el->nr - 1; 0 <= i; i--) { for (i = el->nr - 1; 0 <= i; i--) {
struct exclude *x = el->excludes[i]; struct exclude *x = el->excludes[i];
const char *exclude = x->pattern; const char *exclude = x->pattern;
int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
int prefix = x->nowildcardlen; int prefix = x->nowildcardlen;
if (x->flags & EXC_FLAG_MUSTBEDIR) { if (x->flags & EXC_FLAG_MUSTBEDIR) {
@ -659,7 +673,7 @@ int excluded_from_list(const char *pathname,
pathlen - (basename - pathname), pathlen - (basename - pathname),
exclude, prefix, x->patternlen, exclude, prefix, x->patternlen,
x->flags)) x->flags))
return to_exclude; return x;
continue; continue;
} }
@ -667,28 +681,64 @@ int excluded_from_list(const char *pathname,
if (match_pathname(pathname, pathlen, if (match_pathname(pathname, pathlen,
x->base, x->baselen ? x->baselen - 1 : 0, x->base, x->baselen ? x->baselen - 1 : 0,
exclude, prefix, x->patternlen, x->flags)) exclude, prefix, x->patternlen, x->flags))
return to_exclude; return x;
} }
return NULL; /* undecided */
}
/*
* Scan the list and let the last match determine the fate.
* Return 1 for exclude, 0 for include and -1 for undecided.
*/
int is_excluded_from_list(const char *pathname,
int pathlen, const char *basename, int *dtype,
struct exclude_list *el)
{
struct exclude *exclude;
exclude = last_exclude_matching_from_list(pathname, pathlen, basename, dtype, el);
if (exclude)
return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
return -1; /* undecided */ return -1; /* undecided */
} }
static int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) /*
* Loads the exclude lists for the directory containing pathname, then
* scans all exclude lists to determine whether pathname is excluded.
* Returns the exclude_list element which matched, or NULL for
* undecided.
*/
static struct exclude *last_exclude_matching(struct dir_struct *dir,
const char *pathname,
int *dtype_p)
{ {
int pathlen = strlen(pathname); int pathlen = strlen(pathname);
int st; int st;
struct exclude *exclude;
const char *basename = strrchr(pathname, '/'); const char *basename = strrchr(pathname, '/');
basename = (basename) ? basename+1 : pathname; basename = (basename) ? basename+1 : pathname;
prep_exclude(dir, pathname, basename-pathname); prep_exclude(dir, pathname, basename-pathname);
for (st = EXC_CMDL; st <= EXC_FILE; st++) { for (st = EXC_CMDL; st <= EXC_FILE; st++) {
switch (excluded_from_list(pathname, pathlen, basename, exclude = last_exclude_matching_from_list(
dtype_p, &dir->exclude_list[st])) { pathname, pathlen, basename, dtype_p,
case 0: &dir->exclude_list[st]);
return 0; if (exclude)
case 1: return exclude;
return 1;
}
} }
return NULL;
}
/*
* Loads the exclude lists for the directory containing pathname, then
* scans all exclude lists to determine whether pathname is excluded.
* Returns 1 if true, otherwise 0.
*/
static int is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
{
struct exclude *exclude =
last_exclude_matching(dir, pathname, dtype_p);
if (exclude)
return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
return 0; return 0;
} }
@ -696,6 +746,7 @@ void path_exclude_check_init(struct path_exclude_check *check,
struct dir_struct *dir) struct dir_struct *dir)
{ {
check->dir = dir; check->dir = dir;
check->exclude = NULL;
strbuf_init(&check->path, 256); strbuf_init(&check->path, 256);
} }
@ -705,32 +756,41 @@ void path_exclude_check_clear(struct path_exclude_check *check)
} }
/* /*
* Is this name excluded? This is for a caller like show_files() that * For each subdirectory in name, starting with the top-most, checks
* do not honor directory hierarchy and iterate through paths that are * to see if that subdirectory is excluded, and if so, returns the
* possibly in an ignored directory. * corresponding exclude structure. Otherwise, checks whether name
* itself (which is presumably a file) is excluded.
* *
* A path to a directory known to be excluded is left in check->path to * A path to a directory known to be excluded is left in check->path to
* optimize for repeated checks for files in the same excluded directory. * optimize for repeated checks for files in the same excluded directory.
*/ */
int path_excluded(struct path_exclude_check *check, struct exclude *last_exclude_matching_path(struct path_exclude_check *check,
const char *name, int namelen, int *dtype) const char *name, int namelen,
int *dtype)
{ {
int i; int i;
struct strbuf *path = &check->path; struct strbuf *path = &check->path;
struct exclude *exclude;
/* /*
* we allow the caller to pass namelen as an optimization; it * we allow the caller to pass namelen as an optimization; it
* must match the length of the name, as we eventually call * must match the length of the name, as we eventually call
* excluded() on the whole name string. * is_excluded() on the whole name string.
*/ */
if (namelen < 0) if (namelen < 0)
namelen = strlen(name); namelen = strlen(name);
/*
* If path is non-empty, and name is equal to path or a
* subdirectory of path, name should be excluded, because
* it's inside a directory which is already known to be
* excluded and was previously left in check->path.
*/
if (path->len && if (path->len &&
path->len <= namelen && path->len <= namelen &&
!memcmp(name, path->buf, path->len) && !memcmp(name, path->buf, path->len) &&
(!name[path->len] || name[path->len] == '/')) (!name[path->len] || name[path->len] == '/'))
return 1; return check->exclude;
strbuf_setlen(path, 0); strbuf_setlen(path, 0);
for (i = 0; name[i]; i++) { for (i = 0; name[i]; i++) {
@ -738,8 +798,12 @@ int path_excluded(struct path_exclude_check *check,
if (ch == '/') { if (ch == '/') {
int dt = DT_DIR; int dt = DT_DIR;
if (excluded(check->dir, path->buf, &dt)) exclude = last_exclude_matching(check->dir,
return 1; path->buf, &dt);
if (exclude) {
check->exclude = exclude;
return exclude;
}
} }
strbuf_addch(path, ch); strbuf_addch(path, ch);
} }
@ -747,7 +811,22 @@ int path_excluded(struct path_exclude_check *check,
/* An entry in the index; cannot be a directory with subentries */ /* An entry in the index; cannot be a directory with subentries */
strbuf_setlen(path, 0); strbuf_setlen(path, 0);
return excluded(check->dir, name, dtype); return last_exclude_matching(check->dir, name, dtype);
}
/*
* Is this name excluded? This is for a caller like show_files() that
* do not honor directory hierarchy and iterate through paths that are
* possibly in an ignored directory.
*/
int is_path_excluded(struct path_exclude_check *check,
const char *name, int namelen, int *dtype)
{
struct exclude *exclude =
last_exclude_matching_path(check, name, namelen, dtype);
if (exclude)
return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
return 0;
} }
static struct dir_entry *dir_entry_new(const char *pathname, int len) static struct dir_entry *dir_entry_new(const char *pathname, int len)
@ -1047,7 +1126,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
const struct path_simplify *simplify, const struct path_simplify *simplify,
int dtype, struct dirent *de) int dtype, struct dirent *de)
{ {
int exclude = excluded(dir, path->buf, &dtype); int exclude = is_excluded(dir, path->buf, &dtype);
if (exclude && (dir->flags & DIR_COLLECT_IGNORED) if (exclude && (dir->flags & DIR_COLLECT_IGNORED)
&& exclude_matches_pathspec(path->buf, path->len, simplify)) && exclude_matches_pathspec(path->buf, path->len, simplify))
dir_add_ignored(dir, path->buf, path->len); dir_add_ignored(dir, path->buf, path->len);

45
dir.h
View file

@ -1,6 +1,8 @@
#ifndef DIR_H #ifndef DIR_H
#define DIR_H #define DIR_H
/* See Documentation/technical/api-directory-listing.txt */
#include "strbuf.h" #include "strbuf.h"
struct dir_entry { struct dir_entry {
@ -13,6 +15,12 @@ struct dir_entry {
#define EXC_FLAG_MUSTBEDIR 8 #define EXC_FLAG_MUSTBEDIR 8
#define EXC_FLAG_NEGATIVE 16 #define EXC_FLAG_NEGATIVE 16
/*
* Each .gitignore file will be parsed into patterns which are then
* appended to the relevant exclude_list (either EXC_DIRS or
* EXC_FILE). exclude_lists are also used to represent the list of
* --exclude values passed via CLI args (EXC_CMDL).
*/
struct exclude_list { struct exclude_list {
int nr; int nr;
int alloc; int alloc;
@ -26,9 +34,15 @@ struct exclude_list {
} **excludes; } **excludes;
}; };
/*
* The contents of the per-directory exclude files are lazily read on
* demand and then cached in memory, one per exclude_stack struct, in
* order to avoid opening and parsing each one every time that
* directory is traversed.
*/
struct exclude_stack { struct exclude_stack {
struct exclude_stack *prev; struct exclude_stack *prev; /* the struct exclude_stack for the parent directory */
char *filebuf; char *filebuf; /* remember pointer to per-directory exclude file contents so we can free() */
int baselen; int baselen;
int exclude_ix; int exclude_ix;
}; };
@ -59,6 +73,14 @@ struct dir_struct {
#define EXC_DIRS 1 #define EXC_DIRS 1
#define EXC_FILE 2 #define EXC_FILE 2
/*
* Temporary variables which are used during loading of the
* per-directory exclude lists.
*
* exclude_stack points to the top of the exclude_stack, and
* basebuf contains the full path to the current
* (sub)directory in the traversal.
*/
struct exclude_stack *exclude_stack; struct exclude_stack *exclude_stack;
char basebuf[PATH_MAX]; char basebuf[PATH_MAX];
}; };
@ -76,8 +98,8 @@ extern int within_depth(const char *name, int namelen, int depth, int max_depth)
extern int fill_directory(struct dir_struct *dir, const char **pathspec); extern int fill_directory(struct dir_struct *dir, const char **pathspec);
extern int read_directory(struct dir_struct *, const char *path, int len, const char **pathspec); extern int read_directory(struct dir_struct *, const char *path, int len, const char **pathspec);
extern int excluded_from_list(const char *pathname, int pathlen, const char *basename, extern int is_excluded_from_list(const char *pathname, int pathlen, const char *basename,
int *dtype, struct exclude_list *el); int *dtype, struct exclude_list *el);
struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len); struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len);
/* /*
@ -91,26 +113,29 @@ extern int match_pathname(const char *, int,
const char *, int, int, int); const char *, int, int, int);
/* /*
* The excluded() API is meant for callers that check each level of leading * The is_excluded() API is meant for callers that check each level of leading
* directory hierarchies with excluded() to avoid recursing into excluded * directory hierarchies with is_excluded() to avoid recursing into excluded
* directories. Callers that do not do so should use this API instead. * directories. Callers that do not do so should use this API instead.
*/ */
struct path_exclude_check { struct path_exclude_check {
struct dir_struct *dir; struct dir_struct *dir;
struct exclude *exclude;
struct strbuf path; struct strbuf path;
}; };
extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *); extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *);
extern void path_exclude_check_clear(struct path_exclude_check *); extern void path_exclude_check_clear(struct path_exclude_check *);
extern int path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype); extern struct exclude *last_exclude_matching_path(struct path_exclude_check *, const char *,
int namelen, int *dtype);
extern int is_path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype);
extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
char **buf_p, struct exclude_list *which, int check_index); char **buf_p, struct exclude_list *el, int check_index);
extern void add_excludes_from_file(struct dir_struct *, const char *fname); extern void add_excludes_from_file(struct dir_struct *, const char *fname);
extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen); extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
extern void add_exclude(const char *string, const char *base, extern void add_exclude(const char *string, const char *base,
int baselen, struct exclude_list *which); int baselen, struct exclude_list *el);
extern void free_excludes(struct exclude_list *el); extern void clear_exclude_list(struct exclude_list *el);
extern int file_exists(const char *); extern int file_exists(const char *);
extern int is_inside_dir(const char *dir); extern int is_inside_dir(const char *dir);

View file

@ -837,7 +837,8 @@ static int clear_ce_flags_dir(struct cache_entry **cache, int nr,
{ {
struct cache_entry **cache_end; struct cache_entry **cache_end;
int dtype = DT_DIR; int dtype = DT_DIR;
int ret = excluded_from_list(prefix, prefix_len, basename, &dtype, el); int ret = is_excluded_from_list(prefix, prefix_len,
basename, &dtype, el);
prefix[prefix_len++] = '/'; prefix[prefix_len++] = '/';
@ -856,7 +857,7 @@ static int clear_ce_flags_dir(struct cache_entry **cache, int nr,
* with ret (iow, we know in advance the incl/excl * with ret (iow, we know in advance the incl/excl
* decision for the entire directory), clear flag here without * decision for the entire directory), clear flag here without
* calling clear_ce_flags_1(). That function will call * calling clear_ce_flags_1(). That function will call
* the expensive excluded_from_list() on every entry. * the expensive is_excluded_from_list() on every entry.
*/ */
return clear_ce_flags_1(cache, cache_end - cache, return clear_ce_flags_1(cache, cache_end - cache,
prefix, prefix_len, prefix, prefix_len,
@ -939,7 +940,8 @@ static int clear_ce_flags_1(struct cache_entry **cache, int nr,
/* Non-directory */ /* Non-directory */
dtype = ce_to_dtype(ce); dtype = ce_to_dtype(ce);
ret = excluded_from_list(ce->name, ce_namelen(ce), name, &dtype, el); ret = is_excluded_from_list(ce->name, ce_namelen(ce),
name, &dtype, el);
if (ret < 0) if (ret < 0)
ret = defval; ret = defval;
if (ret > 0) if (ret > 0)
@ -1152,7 +1154,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
*o->dst_index = o->result; *o->dst_index = o->result;
done: done:
free_excludes(&el); clear_exclude_list(&el);
if (o->path_exclude_check) { if (o->path_exclude_check) {
path_exclude_check_clear(o->path_exclude_check); path_exclude_check_clear(o->path_exclude_check);
free(o->path_exclude_check); free(o->path_exclude_check);
@ -1373,7 +1375,7 @@ static int check_ok_to_remove(const char *name, int len, int dtype,
return 0; return 0;
if (o->dir && if (o->dir &&
path_excluded(o->path_exclude_check, name, -1, &dtype)) is_path_excluded(o->path_exclude_check, name, -1, &dtype))
/* /*
* ce->name is explicitly excluded, so it is Ok to * ce->name is explicitly excluded, so it is Ok to
* overwrite it. * overwrite it.