git/dir.h
Karsten Blees 95c6f27164 dir.c: unify is_excluded and is_path_excluded APIs
The is_excluded and is_path_excluded APIs are very similar, except for a
few noteworthy differences:

is_excluded doesn't handle ignored directories, results for paths within
ignored directories are incorrect. This is probably based on the premise
that recursive directory scans should stop at ignored directories, which
is no longer true (in certain cases, read_directory_recursive currently
calls is_excluded *and* is_path_excluded to get correct ignored state).

is_excluded caches parsed .gitignore files of the last directory in struct
dir_struct. If the directory changes, it finds a common parent directory
and is very careful to drop only as much state as necessary. On the other
hand, is_excluded will also read and parse .gitignore files in already
ignored directories, which are completely irrelevant.

is_path_excluded correctly handles ignored directories by checking if any
component in the path is excluded. As it uses is_excluded internally, this
unfortunately forces is_excluded to drop and re-read all .gitignore files,
as there is no common parent directory for the root dir.

is_path_excluded tracks state in a separate struct path_exclude_check,
which is essentially a wrapper of dir_struct with two more fields. However,
as is_path_excluded also modifies dir_struct, it is not possible to e.g.
use multiple path_exclude_check structures with the same dir_struct in
parallel. The additional structure just unnecessarily complicates the API.

Teach is_excluded / prep_exclude about ignored directories: whenever
entering a new directory, first check if the entire directory is excluded.
Remember the excluded state in dir_struct. Don't traverse into already
ignored directories (i.e. don't read irrelevant .gitignore files).

Directories could also be excluded by exclude patterns specified on the
command line or .git/info/exclude, so we cannot simply skip prep_exclude
entirely if there's no .gitignore file name (dir_struct.exclude_per_dir).
Move this check to just before actually reading the file.

is_path_excluded is now equivalent to is_excluded, so we can simply
redirect to it (the public API is cleaned up in the next patch).

The performance impact of the additional ignored check per directory is
hardly noticeable when reading directories recursively (e.g. 'git status').
However, performance of git commands using the is_path_excluded API (e.g.
'git ls-files --cached --ignored --exclude-standard') is greatly improved
as this no longer re-reads .gitignore files on each call.

Here's some performance data from the linux and WebKit repos (best of 10
runs on a Debian Linux on SSD, core.preloadIndex=true):

       | ls-files -ci   |    status      | status --ignored
       | linux | WebKit | linux | WebKit | linux | WebKit
-------+-------+--------+-------+--------+-------+---------
before | 0.506 |  6.539 | 0.212 |  1.555 | 0.323 |  2.541
after  | 0.080 |  1.191 | 0.218 |  1.583 | 0.321 |  2.579
gain   | 6.325 |  5.490 | 0.972 |  0.982 | 1.006 |  0.985

Signed-off-by: Karsten Blees <blees@dcon.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-15 12:34:00 -07:00

217 lines
6.8 KiB
C

#ifndef DIR_H
#define DIR_H
/* See Documentation/technical/api-directory-listing.txt */
#include "strbuf.h"
struct dir_entry {
unsigned int len;
char name[FLEX_ARRAY]; /* more */
};
#define EXC_FLAG_NODIR 1
#define EXC_FLAG_ENDSWITH 4
#define EXC_FLAG_MUSTBEDIR 8
#define EXC_FLAG_NEGATIVE 16
/*
* Each excludes file will be parsed into a fresh exclude_list which
* is appended to the relevant exclude_list_group (either EXC_DIRS or
* EXC_FILE). An exclude_list within the EXC_CMDL exclude_list_group
* can also be used to represent the list of --exclude values passed
* via CLI args.
*/
struct exclude_list {
int nr;
int alloc;
/* remember pointer to exclude file contents so we can free() */
char *filebuf;
/* origin of list, e.g. path to filename, or descriptive string */
const char *src;
struct exclude {
/*
* This allows callers of last_exclude_matching() etc.
* to determine the origin of the matching pattern.
*/
struct exclude_list *el;
const char *pattern;
int patternlen;
int nowildcardlen;
const char *base;
int baselen;
int flags;
/*
* Counting starts from 1 for line numbers in ignore files,
* and from -1 decrementing for patterns from CLI args.
*/
int srcpos;
} **excludes;
};
/*
* The contents of the per-directory exclude files are lazily read on
* demand and then cached in memory, one per exclude_stack struct, in
* order to avoid opening and parsing each one every time that
* directory is traversed.
*/
struct exclude_stack {
struct exclude_stack *prev; /* the struct exclude_stack for the parent directory */
int baselen;
int exclude_ix; /* index of exclude_list within EXC_DIRS exclude_list_group */
};
struct exclude_list_group {
int nr, alloc;
struct exclude_list *el;
};
struct dir_struct {
int nr, alloc;
int ignored_nr, ignored_alloc;
enum {
DIR_SHOW_IGNORED = 1<<0,
DIR_SHOW_OTHER_DIRECTORIES = 1<<1,
DIR_HIDE_EMPTY_DIRECTORIES = 1<<2,
DIR_NO_GITLINKS = 1<<3,
DIR_COLLECT_IGNORED = 1<<4
} flags;
struct dir_entry **entries;
struct dir_entry **ignored;
/* Exclude info */
const char *exclude_per_dir;
/*
* We maintain three groups of exclude pattern lists:
*
* EXC_CMDL lists patterns explicitly given on the command line.
* EXC_DIRS lists patterns obtained from per-directory ignore files.
* EXC_FILE lists patterns from fallback ignore files, e.g.
* - .git/info/exclude
* - core.excludesfile
*
* Each group contains multiple exclude lists, a single list
* per source.
*/
#define EXC_CMDL 0
#define EXC_DIRS 1
#define EXC_FILE 2
struct exclude_list_group exclude_list_group[3];
/*
* Temporary variables which are used during loading of the
* per-directory exclude lists.
*
* exclude_stack points to the top of the exclude_stack, and
* basebuf contains the full path to the current
* (sub)directory in the traversal. Exclude points to the
* matching exclude struct if the directory is excluded.
*/
struct exclude_stack *exclude_stack;
struct exclude *exclude;
char basebuf[PATH_MAX];
};
/*
* The ordering of these constants is significant, with
* higher-numbered match types signifying "closer" (i.e. more
* specific) matches which will override lower-numbered match types
* when populating the seen[] array.
*/
#define MATCHED_RECURSIVELY 1
#define MATCHED_FNMATCH 2
#define MATCHED_EXACTLY 3
extern char *common_prefix(const char **pathspec);
extern int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen);
extern int match_pathspec_depth(const struct pathspec *pathspec,
const char *name, int namelen,
int prefix, char *seen);
extern int within_depth(const char *name, int namelen, int depth, int max_depth);
extern int fill_directory(struct dir_struct *dir, const char **pathspec);
extern int read_directory(struct dir_struct *, const char *path, int len, const char **pathspec);
extern int is_excluded_from_list(const char *pathname, int pathlen, const char *basename,
int *dtype, struct exclude_list *el);
struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len);
/*
* these implement the matching logic for dir.c:excluded_from_list and
* attr.c:path_matches()
*/
extern int match_basename(const char *, int,
const char *, int, int, int);
extern int match_pathname(const char *, int,
const char *, int,
const char *, int, int, int);
/*
* The is_excluded() API is meant for callers that check each level of leading
* directory hierarchies with is_excluded() to avoid recursing into excluded
* directories. Callers that do not do so should use this API instead.
*/
struct path_exclude_check {
struct dir_struct *dir;
};
extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *);
extern void path_exclude_check_clear(struct path_exclude_check *);
extern struct exclude *last_exclude_matching_path(struct path_exclude_check *, const char *,
int namelen, int *dtype);
extern int is_path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype);
extern struct exclude_list *add_exclude_list(struct dir_struct *dir,
int group_type, const char *src);
extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
struct exclude_list *el, int check_index);
extern void add_excludes_from_file(struct dir_struct *, const char *fname);
extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
extern void add_exclude(const char *string, const char *base,
int baselen, struct exclude_list *el, int srcpos);
extern void clear_exclude_list(struct exclude_list *el);
extern void clear_directory(struct dir_struct *dir);
extern int file_exists(const char *);
extern int is_inside_dir(const char *dir);
extern int dir_inside_of(const char *subdir, const char *dir);
static inline int is_dot_or_dotdot(const char *name)
{
return (name[0] == '.' &&
(name[1] == '\0' ||
(name[1] == '.' && name[2] == '\0')));
}
extern int is_empty_dir(const char *dir);
extern void setup_standard_excludes(struct dir_struct *dir);
#define REMOVE_DIR_EMPTY_ONLY 01
#define REMOVE_DIR_KEEP_NESTED_GIT 02
#define REMOVE_DIR_KEEP_TOPLEVEL 04
extern int remove_dir_recursively(struct strbuf *path, int flag);
/* tries to remove the path with empty directories along it, ignores ENOENT */
extern int remove_path(const char *path);
extern int strcmp_icase(const char *a, const char *b);
extern int strncmp_icase(const char *a, const char *b, size_t count);
extern int fnmatch_icase(const char *pattern, const char *string, int flags);
/*
* The prefix part of pattern must not contains wildcards.
*/
#define GFNM_PATHNAME 1 /* similar to FNM_PATHNAME */
#define GFNM_ONESTAR 2 /* there is only _one_ wildcard, a star */
extern int git_fnmatch(const char *pattern, const char *string,
int flags, int prefix);
#endif