Merge branch 'as/dir-c-cleanup'

Refactor and generally clean up the directory traversal API
implementation.

* as/dir-c-cleanup:
  dir.c: rename free_excludes() to clear_exclude_list()
  dir.c: refactor is_path_excluded()
  dir.c: refactor is_excluded()
  dir.c: refactor is_excluded_from_list()
  dir.c: rename excluded() to is_excluded()
  dir.c: rename excluded_from_list() to is_excluded_from_list()
  dir.c: rename path_excluded() to is_path_excluded()
  dir.c: rename cryptic 'which' variable to more consistent name
  Improve documentation and comments regarding directory traversal API
  api-directory-listing.txt: update to match code
This commit is contained in:
Junio C Hamano 2013-01-10 13:47:25 -08:00
commit d912b0e44f
7 changed files with 171 additions and 62 deletions

View file

@ -9,37 +9,40 @@ Data structure
--------------
`struct dir_struct` structure is used to pass directory traversal
options to the library and to record the paths discovered. The notable
options are:
options to the library and to record the paths discovered. A single
`struct dir_struct` is used regardless of whether or not the traversal
recursively descends into subdirectories.
The notable options are:
`exclude_per_dir`::
The name of the file to be read in each directory for excluded
files (typically `.gitignore`).
`collect_ignored`::
`flags`::
Include paths that are to be excluded in the result.
A bit-field of options:
`show_ignored`::
`DIR_SHOW_IGNORED`:::
The traversal is for finding just ignored files, not unignored
files.
`show_other_directories`::
`DIR_SHOW_OTHER_DIRECTORIES`:::
Include a directory that is not tracked.
`hide_empty_directories`::
`DIR_HIDE_EMPTY_DIRECTORIES`:::
Do not include a directory that is not tracked and is empty.
`no_gitlinks`::
`DIR_NO_GITLINKS`:::
If set, recurse into a directory that looks like a git
directory. Otherwise it is shown as a directory.
The result of the enumeration is left in these fields::
The result of the enumeration is left in these fields:
`entries[]`::

2
attr.c
View file

@ -284,7 +284,7 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
* (reading the file from top to bottom), .gitattribute of the root
* directory (again, reading the file from top to bottom) down to the
* current directory, and then scan the list backwards to find the first match.
* This is exactly the same as what excluded() does in dir.c to deal with
* This is exactly the same as what is_excluded() does in dir.c to deal with
* .gitignore
*/

View file

@ -454,7 +454,7 @@ int cmd_add(int argc, const char **argv, const char *prefix)
&& !file_exists(pathspec[i])) {
if (ignore_missing) {
int dtype = DT_UNKNOWN;
if (path_excluded(&check, pathspec[i], -1, &dtype))
if (is_path_excluded(&check, pathspec[i], -1, &dtype))
dir_add_ignored(&dir, pathspec[i], strlen(pathspec[i]));
} else
die(_("pathspec '%s' did not match any files"),

View file

@ -203,7 +203,7 @@ static void show_ru_info(void)
static int ce_excluded(struct path_exclude_check *check, struct cache_entry *ce)
{
int dtype = ce_to_dtype(ce);
return path_excluded(check, ce->name, ce_namelen(ce), &dtype);
return is_path_excluded(check, ce->name, ce_namelen(ce), &dtype);
}
static void show_files(struct dir_struct *dir)

149
dir.c
View file

@ -2,6 +2,8 @@
* This handles recursive filename detection with exclude
* files, index knowledge etc..
*
* See Documentation/technical/api-directory-listing.txt
*
* Copyright (C) Linus Torvalds, 2005-2006
* Junio Hamano, 2005-2006
*/
@ -377,7 +379,7 @@ void parse_exclude_pattern(const char **pattern,
}
void add_exclude(const char *string, const char *base,
int baselen, struct exclude_list *which)
int baselen, struct exclude_list *el)
{
struct exclude *x;
int patternlen;
@ -401,8 +403,8 @@ void add_exclude(const char *string, const char *base,
x->base = base;
x->baselen = baselen;
x->flags = flags;
ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
which->excludes[which->nr++] = x;
ALLOC_GROW(el->excludes, el->nr + 1, el->alloc);
el->excludes[el->nr++] = x;
}
static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
@ -428,7 +430,11 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
return data;
}
void free_excludes(struct exclude_list *el)
/*
* Frees memory within el which was allocated for exclude patterns and
* the file buffer. Does not free el itself.
*/
void clear_exclude_list(struct exclude_list *el)
{
int i;
@ -444,7 +450,7 @@ int add_excludes_from_file_to_list(const char *fname,
const char *base,
int baselen,
char **buf_p,
struct exclude_list *which,
struct exclude_list *el,
int check_index)
{
struct stat st;
@ -493,7 +499,7 @@ int add_excludes_from_file_to_list(const char *fname,
if (buf[i] == '\n') {
if (entry != buf + i && entry[0] != '#') {
buf[i - (i && buf[i-1] == '\r')] = 0;
add_exclude(entry, base, baselen, which);
add_exclude(entry, base, baselen, el);
}
entry = buf + i + 1;
}
@ -508,6 +514,10 @@ void add_excludes_from_file(struct dir_struct *dir, const char *fname)
die("cannot use %s as an exclude file", fname);
}
/*
* Loads the per-directory exclude list for the substring of base
* which has a char length of baselen.
*/
static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
{
struct exclude_list *el;
@ -518,7 +528,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
(baselen + strlen(dir->exclude_per_dir) >= PATH_MAX))
return; /* too long a path -- ignore */
/* Pop the ones that are not the prefix of the path being checked. */
/* Pop the directories that are not the prefix of the path being checked. */
el = &dir->exclude_list[EXC_DIRS];
while ((stk = dir->exclude_stack) != NULL) {
if (stk->baselen <= baselen &&
@ -629,22 +639,26 @@ int match_pathname(const char *pathname, int pathlen,
ignore_case ? FNM_CASEFOLD : 0) == 0;
}
/* Scan the list and let the last match determine the fate.
* Return 1 for exclude, 0 for include and -1 for undecided.
/*
* Scan the given exclude list in reverse to see whether pathname
* should be ignored. The first match (i.e. the last on the list), if
* any, determines the fate. Returns the exclude_list element which
* matched, or NULL for undecided.
*/
int excluded_from_list(const char *pathname,
int pathlen, const char *basename, int *dtype,
struct exclude_list *el)
static struct exclude *last_exclude_matching_from_list(const char *pathname,
int pathlen,
const char *basename,
int *dtype,
struct exclude_list *el)
{
int i;
if (!el->nr)
return -1; /* undefined */
return NULL; /* undefined */
for (i = el->nr - 1; 0 <= i; i--) {
struct exclude *x = el->excludes[i];
const char *exclude = x->pattern;
int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
int prefix = x->nowildcardlen;
if (x->flags & EXC_FLAG_MUSTBEDIR) {
@ -659,7 +673,7 @@ int excluded_from_list(const char *pathname,
pathlen - (basename - pathname),
exclude, prefix, x->patternlen,
x->flags))
return to_exclude;
return x;
continue;
}
@ -667,28 +681,64 @@ int excluded_from_list(const char *pathname,
if (match_pathname(pathname, pathlen,
x->base, x->baselen ? x->baselen - 1 : 0,
exclude, prefix, x->patternlen, x->flags))
return to_exclude;
return x;
}
return NULL; /* undecided */
}
/*
* Scan the list and let the last match determine the fate.
* Return 1 for exclude, 0 for include and -1 for undecided.
*/
int is_excluded_from_list(const char *pathname,
int pathlen, const char *basename, int *dtype,
struct exclude_list *el)
{
struct exclude *exclude;
exclude = last_exclude_matching_from_list(pathname, pathlen, basename, dtype, el);
if (exclude)
return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
return -1; /* undecided */
}
static int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
/*
* Loads the exclude lists for the directory containing pathname, then
* scans all exclude lists to determine whether pathname is excluded.
* Returns the exclude_list element which matched, or NULL for
* undecided.
*/
static struct exclude *last_exclude_matching(struct dir_struct *dir,
const char *pathname,
int *dtype_p)
{
int pathlen = strlen(pathname);
int st;
struct exclude *exclude;
const char *basename = strrchr(pathname, '/');
basename = (basename) ? basename+1 : pathname;
prep_exclude(dir, pathname, basename-pathname);
for (st = EXC_CMDL; st <= EXC_FILE; st++) {
switch (excluded_from_list(pathname, pathlen, basename,
dtype_p, &dir->exclude_list[st])) {
case 0:
return 0;
case 1:
return 1;
}
exclude = last_exclude_matching_from_list(
pathname, pathlen, basename, dtype_p,
&dir->exclude_list[st]);
if (exclude)
return exclude;
}
return NULL;
}
/*
* Loads the exclude lists for the directory containing pathname, then
* scans all exclude lists to determine whether pathname is excluded.
* Returns 1 if true, otherwise 0.
*/
static int is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
{
struct exclude *exclude =
last_exclude_matching(dir, pathname, dtype_p);
if (exclude)
return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
return 0;
}
@ -696,6 +746,7 @@ void path_exclude_check_init(struct path_exclude_check *check,
struct dir_struct *dir)
{
check->dir = dir;
check->exclude = NULL;
strbuf_init(&check->path, 256);
}
@ -705,32 +756,41 @@ void path_exclude_check_clear(struct path_exclude_check *check)
}
/*
* Is this name excluded? This is for a caller like show_files() that
* do not honor directory hierarchy and iterate through paths that are
* possibly in an ignored directory.
* For each subdirectory in name, starting with the top-most, checks
* to see if that subdirectory is excluded, and if so, returns the
* corresponding exclude structure. Otherwise, checks whether name
* itself (which is presumably a file) is excluded.
*
* A path to a directory known to be excluded is left in check->path to
* optimize for repeated checks for files in the same excluded directory.
*/
int path_excluded(struct path_exclude_check *check,
const char *name, int namelen, int *dtype)
struct exclude *last_exclude_matching_path(struct path_exclude_check *check,
const char *name, int namelen,
int *dtype)
{
int i;
struct strbuf *path = &check->path;
struct exclude *exclude;
/*
* we allow the caller to pass namelen as an optimization; it
* must match the length of the name, as we eventually call
* excluded() on the whole name string.
* is_excluded() on the whole name string.
*/
if (namelen < 0)
namelen = strlen(name);
/*
* If path is non-empty, and name is equal to path or a
* subdirectory of path, name should be excluded, because
* it's inside a directory which is already known to be
* excluded and was previously left in check->path.
*/
if (path->len &&
path->len <= namelen &&
!memcmp(name, path->buf, path->len) &&
(!name[path->len] || name[path->len] == '/'))
return 1;
return check->exclude;
strbuf_setlen(path, 0);
for (i = 0; name[i]; i++) {
@ -738,8 +798,12 @@ int path_excluded(struct path_exclude_check *check,
if (ch == '/') {
int dt = DT_DIR;
if (excluded(check->dir, path->buf, &dt))
return 1;
exclude = last_exclude_matching(check->dir,
path->buf, &dt);
if (exclude) {
check->exclude = exclude;
return exclude;
}
}
strbuf_addch(path, ch);
}
@ -747,7 +811,22 @@ int path_excluded(struct path_exclude_check *check,
/* An entry in the index; cannot be a directory with subentries */
strbuf_setlen(path, 0);
return excluded(check->dir, name, dtype);
return last_exclude_matching(check->dir, name, dtype);
}
/*
* Is this name excluded? This is for a caller like show_files() that
* do not honor directory hierarchy and iterate through paths that are
* possibly in an ignored directory.
*/
int is_path_excluded(struct path_exclude_check *check,
const char *name, int namelen, int *dtype)
{
struct exclude *exclude =
last_exclude_matching_path(check, name, namelen, dtype);
if (exclude)
return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
return 0;
}
static struct dir_entry *dir_entry_new(const char *pathname, int len)
@ -1047,7 +1126,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
const struct path_simplify *simplify,
int dtype, struct dirent *de)
{
int exclude = excluded(dir, path->buf, &dtype);
int exclude = is_excluded(dir, path->buf, &dtype);
if (exclude && (dir->flags & DIR_COLLECT_IGNORED)
&& exclude_matches_pathspec(path->buf, path->len, simplify))
dir_add_ignored(dir, path->buf, path->len);

45
dir.h
View file

@ -1,6 +1,8 @@
#ifndef DIR_H
#define DIR_H
/* See Documentation/technical/api-directory-listing.txt */
#include "strbuf.h"
struct dir_entry {
@ -13,6 +15,12 @@ struct dir_entry {
#define EXC_FLAG_MUSTBEDIR 8
#define EXC_FLAG_NEGATIVE 16
/*
* Each .gitignore file will be parsed into patterns which are then
* appended to the relevant exclude_list (either EXC_DIRS or
* EXC_FILE). exclude_lists are also used to represent the list of
* --exclude values passed via CLI args (EXC_CMDL).
*/
struct exclude_list {
int nr;
int alloc;
@ -26,9 +34,15 @@ struct exclude_list {
} **excludes;
};
/*
* The contents of the per-directory exclude files are lazily read on
* demand and then cached in memory, one per exclude_stack struct, in
* order to avoid opening and parsing each one every time that
* directory is traversed.
*/
struct exclude_stack {
struct exclude_stack *prev;
char *filebuf;
struct exclude_stack *prev; /* the struct exclude_stack for the parent directory */
char *filebuf; /* remember pointer to per-directory exclude file contents so we can free() */
int baselen;
int exclude_ix;
};
@ -59,6 +73,14 @@ struct dir_struct {
#define EXC_DIRS 1
#define EXC_FILE 2
/*
* Temporary variables which are used during loading of the
* per-directory exclude lists.
*
* exclude_stack points to the top of the exclude_stack, and
* basebuf contains the full path to the current
* (sub)directory in the traversal.
*/
struct exclude_stack *exclude_stack;
char basebuf[PATH_MAX];
};
@ -76,8 +98,8 @@ extern int within_depth(const char *name, int namelen, int depth, int max_depth)
extern int fill_directory(struct dir_struct *dir, const char **pathspec);
extern int read_directory(struct dir_struct *, const char *path, int len, const char **pathspec);
extern int excluded_from_list(const char *pathname, int pathlen, const char *basename,
int *dtype, struct exclude_list *el);
extern int is_excluded_from_list(const char *pathname, int pathlen, const char *basename,
int *dtype, struct exclude_list *el);
struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len);
/*
@ -91,26 +113,29 @@ extern int match_pathname(const char *, int,
const char *, int, int, int);
/*
* The excluded() API is meant for callers that check each level of leading
* directory hierarchies with excluded() to avoid recursing into excluded
* The is_excluded() API is meant for callers that check each level of leading
* directory hierarchies with is_excluded() to avoid recursing into excluded
* directories. Callers that do not do so should use this API instead.
*/
struct path_exclude_check {
struct dir_struct *dir;
struct exclude *exclude;
struct strbuf path;
};
extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *);
extern void path_exclude_check_clear(struct path_exclude_check *);
extern int path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype);
extern struct exclude *last_exclude_matching_path(struct path_exclude_check *, const char *,
int namelen, int *dtype);
extern int is_path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype);
extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
char **buf_p, struct exclude_list *which, int check_index);
char **buf_p, struct exclude_list *el, int check_index);
extern void add_excludes_from_file(struct dir_struct *, const char *fname);
extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
extern void add_exclude(const char *string, const char *base,
int baselen, struct exclude_list *which);
extern void free_excludes(struct exclude_list *el);
int baselen, struct exclude_list *el);
extern void clear_exclude_list(struct exclude_list *el);
extern int file_exists(const char *);
extern int is_inside_dir(const char *dir);

View file

@ -837,7 +837,8 @@ static int clear_ce_flags_dir(struct cache_entry **cache, int nr,
{
struct cache_entry **cache_end;
int dtype = DT_DIR;
int ret = excluded_from_list(prefix, prefix_len, basename, &dtype, el);
int ret = is_excluded_from_list(prefix, prefix_len,
basename, &dtype, el);
prefix[prefix_len++] = '/';
@ -856,7 +857,7 @@ static int clear_ce_flags_dir(struct cache_entry **cache, int nr,
* with ret (iow, we know in advance the incl/excl
* decision for the entire directory), clear flag here without
* calling clear_ce_flags_1(). That function will call
* the expensive excluded_from_list() on every entry.
* the expensive is_excluded_from_list() on every entry.
*/
return clear_ce_flags_1(cache, cache_end - cache,
prefix, prefix_len,
@ -939,7 +940,8 @@ static int clear_ce_flags_1(struct cache_entry **cache, int nr,
/* Non-directory */
dtype = ce_to_dtype(ce);
ret = excluded_from_list(ce->name, ce_namelen(ce), name, &dtype, el);
ret = is_excluded_from_list(ce->name, ce_namelen(ce),
name, &dtype, el);
if (ret < 0)
ret = defval;
if (ret > 0)
@ -1152,7 +1154,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
*o->dst_index = o->result;
done:
free_excludes(&el);
clear_exclude_list(&el);
if (o->path_exclude_check) {
path_exclude_check_clear(o->path_exclude_check);
free(o->path_exclude_check);
@ -1373,7 +1375,7 @@ static int check_ok_to_remove(const char *name, int len, int dtype,
return 0;
if (o->dir &&
path_excluded(o->path_exclude_check, name, -1, &dtype))
is_path_excluded(o->path_exclude_check, name, -1, &dtype))
/*
* ce->name is explicitly excluded, so it is Ok to
* overwrite it.