wildmatch: advance faster in <asterisk> + <literal> patterns

Normally when we match "*X" on "abcX", we call dowild("X", "abcX"),
dowild("X", "bcX"), dowild("X", "cX") and dowild("X", "X"). Only the
last call may have a chance of matching. By skipping the text before
"X", we can eliminate the first three useless calls.

compat, '*/*/*' on linux-2.6.git file list 2000 times, before:
wildmatch 7s 985049us
fnmatch   2s 735541us or 34.26% faster

and after:
wildmatch 4s 492549us
fnmatch   0s 888263us or 19.77% slower

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Nguyễn Thái Ngọc Duy 2013-01-01 09:44:10 +07:00 committed by Junio C Hamano
parent 46983441ae
commit 6f1a31f0aa
2 changed files with 31 additions and 0 deletions

View file

@ -207,6 +207,11 @@ match 0 x foo '*/*/*'
match 0 x foo/bar '*/*/*' match 0 x foo/bar '*/*/*'
match 1 x foo/bba/arr '*/*/*' match 1 x foo/bba/arr '*/*/*'
match 0 x foo/bb/aa/rr '*/*/*' match 0 x foo/bb/aa/rr '*/*/*'
match 1 x foo/bb/aa/rr '**/**/**'
match 1 x abcXdefXghi '*X*i'
match 0 x ab/cXd/efXg/hi '*X*i'
match 1 x ab/cXd/efXg/hi '*/*X*/*/*i'
match 1 x ab/cXd/efXg/hi '**/*X*/**/*i'
pathmatch 1 foo foo pathmatch 1 foo foo
pathmatch 0 foo fo pathmatch 0 foo fo
@ -226,5 +231,8 @@ pathmatch 0 foo '*/*/*'
pathmatch 0 foo/bar '*/*/*' pathmatch 0 foo/bar '*/*/*'
pathmatch 1 foo/bba/arr '*/*/*' pathmatch 1 foo/bba/arr '*/*/*'
pathmatch 1 foo/bb/aa/rr '*/*/*' pathmatch 1 foo/bb/aa/rr '*/*/*'
pathmatch 1 abcXdefXghi '*X*i'
pathmatch 1 ab/cXd/efXg/hi '*/*X*/*/*i'
pathmatch 1 ab/cXd/efXg/hi '*Xg*i'
test_done test_done

View file

@ -133,6 +133,29 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags)
while (1) { while (1) {
if (t_ch == '\0') if (t_ch == '\0')
break; break;
/*
* Try to advance faster when an asterisk is
* followed by a literal. We know in this case
* that the the string before the literal
* must belong to "*".
* If match_slash is false, do not look past
* the first slash as it cannot belong to '*'.
*/
if (!is_glob_special(*p)) {
p_ch = *p;
if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
p_ch = tolower(p_ch);
while ((t_ch = *text) != '\0' &&
(match_slash || t_ch != '/')) {
if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
t_ch = tolower(t_ch);
if (t_ch == p_ch)
break;
text++;
}
if (t_ch != p_ch)
return WM_NOMATCH;
}
if ((matched = dowild(p, text, flags)) != WM_NOMATCH) { if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
if (!match_slash || matched != WM_ABORT_TO_STARSTAR) if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
return matched; return matched;