regex: use regexec_buf()

The new regexec_buf() function operates on buffers with an explicitly
specified length, rather than NUL-terminated strings.

We need to use this function whenever the buffer we want to pass to
regexec(3) may have been mmap(2)ed (and is hence not NUL-terminated).

Note: the original motivation for this patch was to fix a bug where
`git diff -G <regex>` would crash. This patch converts more callers,
though, some of which allocated to construct NUL-terminated strings,
or worse, modified buffers to temporarily insert NULs while calling
regexec(3).  By converting them to use regexec_buf(), the code has
become much cleaner.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Johannes Schindelin 2016-09-21 20:24:14 +02:00 committed by Junio C Hamano
parent 2f8952250a
commit b7d36ffca0
5 changed files with 17 additions and 33 deletions

3
diff.c
View file

@ -941,7 +941,8 @@ static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex,
{ {
if (word_regex && *begin < buffer->size) { if (word_regex && *begin < buffer->size) {
regmatch_t match[1]; regmatch_t match[1];
if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) { if (!regexec_buf(word_regex, buffer->ptr + *begin,
buffer->size - *begin, 1, match, 0)) {
char *p = memchr(buffer->ptr + *begin + match[0].rm_so, char *p = memchr(buffer->ptr + *begin + match[0].rm_so,
'\n', match[0].rm_eo - match[0].rm_so); '\n', match[0].rm_eo - match[0].rm_so);
*end = p ? p - buffer->ptr : match[0].rm_eo + *begin; *end = p ? p - buffer->ptr : match[0].rm_eo + *begin;

View file

@ -21,7 +21,6 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len)
{ {
struct diffgrep_cb *data = priv; struct diffgrep_cb *data = priv;
regmatch_t regmatch; regmatch_t regmatch;
int hold;
if (line[0] != '+' && line[0] != '-') if (line[0] != '+' && line[0] != '-')
return; return;
@ -31,11 +30,8 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len)
* caller early. * caller early.
*/ */
return; return;
/* Yuck -- line ought to be "const char *"! */ data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1,
hold = line[len]; &regmatch, 0);
line[len] = '\0';
data->hit = !regexec(data->regexp, line + 1, 1, &regmatch, 0);
line[len] = hold;
} }
static int diff_grep(mmfile_t *one, mmfile_t *two, static int diff_grep(mmfile_t *one, mmfile_t *two,
@ -48,9 +44,11 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
xdemitconf_t xecfg; xdemitconf_t xecfg;
if (!one) if (!one)
return !regexec(regexp, two->ptr, 1, &regmatch, 0); return !regexec_buf(regexp, two->ptr, two->size,
1, &regmatch, 0);
if (!two) if (!two)
return !regexec(regexp, one->ptr, 1, &regmatch, 0); return !regexec_buf(regexp, one->ptr, one->size,
1, &regmatch, 0);
/* /*
* We have both sides; need to run textual diff and see if * We have both sides; need to run textual diff and see if
@ -81,8 +79,8 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
regmatch_t regmatch; regmatch_t regmatch;
int flags = 0; int flags = 0;
assert(data[sz] == '\0'); while (*data &&
while (*data && !regexec(regexp, data, 1, &regmatch, flags)) { !regexec_buf(regexp, data, sz, 1, &regmatch, flags)) {
flags |= REG_NOTBOL; flags |= REG_NOTBOL;
data += regmatch.rm_eo; data += regmatch.rm_eo;
if (*data && regmatch.rm_so == regmatch.rm_eo) if (*data && regmatch.rm_so == regmatch.rm_eo)

14
grep.c
View file

@ -848,17 +848,6 @@ static int fixmatch(struct grep_pat *p, char *line, char *eol,
} }
} }
static int regmatch(const regex_t *preg, char *line, char *eol,
regmatch_t *match, int eflags)
{
#ifdef REG_STARTEND
match->rm_so = 0;
match->rm_eo = eol - line;
eflags |= REG_STARTEND;
#endif
return regexec(preg, line, 1, match, eflags);
}
static int patmatch(struct grep_pat *p, char *line, char *eol, static int patmatch(struct grep_pat *p, char *line, char *eol,
regmatch_t *match, int eflags) regmatch_t *match, int eflags)
{ {
@ -869,7 +858,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,
else if (p->pcre_regexp) else if (p->pcre_regexp)
hit = !pcrematch(p, line, eol, match, eflags); hit = !pcrematch(p, line, eol, match, eflags);
else else
hit = !regmatch(&p->regexp, line, eol, match, eflags); hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
eflags);
return hit; return hit;
} }

View file

@ -14,7 +14,7 @@ test_expect_success setup '
test_tick && test_tick &&
git commit -m "A 4k file" git commit -m "A 4k file"
' '
test_expect_failure '-G matches' ' test_expect_success '-G matches' '
git diff --name-only -G "^0{4096}$" HEAD^ >out && git diff --name-only -G "^0{4096}$" HEAD^ >out &&
test 4096-zeroes.txt = "$(cat out)" test 4096-zeroes.txt = "$(cat out)"
' '

View file

@ -216,11 +216,10 @@ struct ff_regs {
static long ff_regexp(const char *line, long len, static long ff_regexp(const char *line, long len,
char *buffer, long buffer_size, void *priv) char *buffer, long buffer_size, void *priv)
{ {
char *line_buffer;
struct ff_regs *regs = priv; struct ff_regs *regs = priv;
regmatch_t pmatch[2]; regmatch_t pmatch[2];
int i; int i;
int result = -1; int result;
/* Exclude terminating newline (and cr) from matching */ /* Exclude terminating newline (and cr) from matching */
if (len > 0 && line[len-1] == '\n') { if (len > 0 && line[len-1] == '\n') {
@ -230,18 +229,16 @@ static long ff_regexp(const char *line, long len,
len--; len--;
} }
line_buffer = xstrndup(line, len); /* make NUL terminated */
for (i = 0; i < regs->nr; i++) { for (i = 0; i < regs->nr; i++) {
struct ff_reg *reg = regs->array + i; struct ff_reg *reg = regs->array + i;
if (!regexec(&reg->re, line_buffer, 2, pmatch, 0)) { if (!regexec_buf(&reg->re, line, len, 2, pmatch, 0)) {
if (reg->negate) if (reg->negate)
goto fail; return -1;
break; break;
} }
} }
if (regs->nr <= i) if (regs->nr <= i)
goto fail; return -1;
i = pmatch[1].rm_so >= 0 ? 1 : 0; i = pmatch[1].rm_so >= 0 ? 1 : 0;
line += pmatch[i].rm_so; line += pmatch[i].rm_so;
result = pmatch[i].rm_eo - pmatch[i].rm_so; result = pmatch[i].rm_eo - pmatch[i].rm_so;
@ -250,8 +247,6 @@ static long ff_regexp(const char *line, long len,
while (result > 0 && (isspace(line[result - 1]))) while (result > 0 && (isspace(line[result - 1])))
result--; result--;
memcpy(buffer, line, result); memcpy(buffer, line, result);
fail:
free(line_buffer);
return result; return result;
} }