Merge branch 'nd/pathspec-wildcard'

Optimize matching paths with common forms of pathspecs that contain
wildcard characters.

* nd/pathspec-wildcard:
  tree_entry_interesting: do basedir compare on wildcard patterns when possible
  pathspec: apply "*.c" optimization from exclude
  pathspec: do exact comparison on the leading non-wildcard part
  pathspec: save the non-wildcard length part
This commit is contained in:
Junio C Hamano 2013-01-05 23:40:15 -08:00
commit 990a4fea96
6 changed files with 120 additions and 11 deletions

View file

@ -337,7 +337,7 @@ void overlay_tree_on_cache(const char *tree_name, const char *prefix)
matchbuf[0] = prefix;
matchbuf[1] = NULL;
init_pathspec(&pathspec, matchbuf);
pathspec.items[0].use_wildcard = 0;
pathspec.items[0].nowildcard_len = pathspec.items[0].len;
} else
init_pathspec(&pathspec, NULL);
if (read_tree(tree, 1, &pathspec))

View file

@ -168,7 +168,7 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix)
init_pathspec(&pathspec, get_pathspec(prefix, argv + 1));
for (i = 0; i < pathspec.nr; i++)
pathspec.items[i].use_wildcard = 0;
pathspec.items[i].nowildcard_len = pathspec.items[i].len;
pathspec.has_wildcard = 0;
tree = parse_tree_indirect(sha1);
if (!tree)

View file

@ -473,6 +473,8 @@ extern int index_name_is_other(const struct index_state *, const char *, int);
extern int ie_match_stat(const struct index_state *, struct cache_entry *, struct stat *, unsigned int);
extern int ie_modified(const struct index_state *, struct cache_entry *, struct stat *, unsigned int);
#define PATHSPEC_ONESTAR 1 /* the pathspec pattern sastisfies GFNM_ONESTAR */
struct pathspec {
const char **raw; /* get_pathspec() result, not freed by free_pathspec() */
int nr;
@ -482,7 +484,8 @@ struct pathspec {
struct pathspec_item {
const char *match;
int len;
unsigned int use_wildcard:1;
int nowildcard_len;
int flags;
} *items;
};

36
dir.c
View file

@ -34,6 +34,28 @@ int fnmatch_icase(const char *pattern, const char *string, int flags)
return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0));
}
inline int git_fnmatch(const char *pattern, const char *string,
int flags, int prefix)
{
int fnm_flags = 0;
if (flags & GFNM_PATHNAME)
fnm_flags |= FNM_PATHNAME;
if (prefix > 0) {
if (strncmp(pattern, string, prefix))
return FNM_NOMATCH;
pattern += prefix;
string += prefix;
}
if (flags & GFNM_ONESTAR) {
int pattern_len = strlen(++pattern);
int string_len = strlen(string);
return string_len < pattern_len ||
strcmp(pattern,
string + string_len - pattern_len);
}
return fnmatch(pattern, string, fnm_flags);
}
static size_t common_prefix_len(const char **pathspec)
{
const char *n, *first;
@ -230,7 +252,10 @@ static int match_pathspec_item(const struct pathspec_item *item, int prefix,
return MATCHED_RECURSIVELY;
}
if (item->use_wildcard && !fnmatch(match, name, 0))
if (item->nowildcard_len < item->len &&
!git_fnmatch(match, name,
item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
item->nowildcard_len - prefix))
return MATCHED_FNMATCH;
return 0;
@ -1429,9 +1454,14 @@ int init_pathspec(struct pathspec *pathspec, const char **paths)
item->match = path;
item->len = strlen(path);
item->use_wildcard = !no_wildcard(path);
if (item->use_wildcard)
item->nowildcard_len = simple_length(path);
item->flags = 0;
if (item->nowildcard_len < item->len) {
pathspec->has_wildcard = 1;
if (path[item->nowildcard_len] == '*' &&
no_wildcard(path + item->nowildcard_len + 1))
item->flags |= PATHSPEC_ONESTAR;
}
}
qsort(pathspec->items, pathspec->nr,

9
dir.h
View file

@ -139,4 +139,13 @@ extern int strcmp_icase(const char *a, const char *b);
extern int strncmp_icase(const char *a, const char *b, size_t count);
extern int fnmatch_icase(const char *pattern, const char *string, int flags);
/*
* The prefix part of pattern must not contains wildcards.
*/
#define GFNM_PATHNAME 1 /* similar to FNM_PATHNAME */
#define GFNM_ONESTAR 2 /* there is only _one_ wildcard, a star */
extern int git_fnmatch(const char *pattern, const char *string,
int flags, int prefix);
#endif

View file

@ -572,6 +572,54 @@ static int match_dir_prefix(const char *base,
return 0;
}
/*
* Perform matching on the leading non-wildcard part of
* pathspec. item->nowildcard_len must be greater than zero. Return
* non-zero if base is matched.
*/
static int match_wildcard_base(const struct pathspec_item *item,
const char *base, int baselen,
int *matched)
{
const char *match = item->match;
/* the wildcard part is not considered in this function */
int matchlen = item->nowildcard_len;
if (baselen) {
int dirlen;
/*
* Return early if base is longer than the
* non-wildcard part but it does not match.
*/
if (baselen >= matchlen) {
*matched = matchlen;
return !strncmp(base, match, matchlen);
}
dirlen = matchlen;
while (dirlen && match[dirlen - 1] != '/')
dirlen--;
/*
* Return early if base is shorter than the
* non-wildcard part but it does not match. Note that
* base ends with '/' so we are sure it really matches
* directory
*/
if (strncmp(base, match, baselen))
return 0;
*matched = baselen;
} else
*matched = 0;
/*
* we could have checked entry against the non-wildcard part
* that is not in base and does similar never_interesting
* optimization as in match_entry. For now just be happy with
* base comparison.
*/
return entry_interesting;
}
/*
* Is a tree entry interesting given the pathspec we have?
*
@ -602,7 +650,7 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
const struct pathspec_item *item = ps->items+i;
const char *match = item->match;
const char *base_str = base->buf + base_offset;
int matchlen = item->len;
int matchlen = item->len, matched = 0;
if (baselen >= matchlen) {
/* If it doesn't match, move along... */
@ -626,8 +674,10 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
&never_interesting))
return entry_interesting;
if (item->use_wildcard) {
if (!fnmatch(match + baselen, entry->path, 0))
if (item->nowildcard_len < item->len) {
if (!git_fnmatch(match + baselen, entry->path,
item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
item->nowildcard_len - baselen))
return entry_interesting;
/*
@ -642,17 +692,34 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
}
match_wildcards:
if (!item->use_wildcard)
if (item->nowildcard_len == item->len)
continue;
if (item->nowildcard_len &&
!match_wildcard_base(item, base_str, baselen, &matched))
return entry_not_interesting;
/*
* Concatenate base and entry->path into one and do
* fnmatch() on it.
*
* While we could avoid concatenation in certain cases
* [1], which saves a memcpy and potentially a
* realloc, it turns out not worth it. Measurement on
* linux-2.6 does not show any clear improvements,
* partly because of the nowildcard_len optimization
* in git_fnmatch(). Avoid micro-optimizations here.
*
* [1] if match_wildcard_base() says the base
* directory is already matched, we only need to match
* the rest, which is shorter so _in theory_ faster.
*/
strbuf_add(base, entry->path, pathlen);
if (!fnmatch(match, base->buf + base_offset, 0)) {
if (!git_fnmatch(match, base->buf + base_offset,
item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
item->nowildcard_len)) {
strbuf_setlen(base, base_offset + baselen);
return entry_interesting;
}