git/diffcore-pickaxe.c
Jeff King 3efb988098 react to errors in xdi_diff
When we call into xdiff to perform a diff, we generally lose
the return code completely. Typically by ignoring the return
of our xdi_diff wrapper, but sometimes we even propagate
that return value up and then ignore it later.  This can
lead to us silently producing incorrect diffs (e.g., "git
log" might produce no output at all, not even a diff header,
for a content-level diff).

In practice this does not happen very often, because the
typical reason for xdiff to report failure is that it
malloc() failed (it uses straight malloc, and not our
xmalloc wrapper).  But it could also happen when xdiff
triggers one our callbacks, which returns an error (e.g.,
outf() in builtin/rerere.c tries to report a write failure
in this way). And the next patch also plans to add more
failure modes.

Let's notice an error return from xdiff and react
appropriately. In most of the diff.c code, we can simply
die(), which matches the surrounding code (e.g., that is
what we do if we fail to load a file for diffing in the
first place). This is not that elegant, but we are probably
better off dying to let the user know there was a problem,
rather than simply generating bogus output.

We could also just die() directly in xdi_diff, but the
callers typically have a bit more context, and can provide a
better message (and if we do later decide to pass errors up,
we're one step closer to doing so).

There is one interesting case, which is in diff_grep(). Here
if we cannot generate the diff, there is nothing to match,
and we silently return "no hits". This is actually what the
existing code does already, but we make it a little more
explicit.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-28 14:57:10 -07:00

239 lines
5.7 KiB
C

/*
* Copyright (C) 2005 Junio C Hamano
* Copyright (C) 2010 Google Inc.
*/
#include "cache.h"
#include "diff.h"
#include "diffcore.h"
#include "xdiff-interface.h"
#include "kwset.h"
typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two,
struct diff_options *o,
regex_t *regexp, kwset_t kws);
struct diffgrep_cb {
regex_t *regexp;
int hit;
};
static void diffgrep_consume(void *priv, char *line, unsigned long len)
{
struct diffgrep_cb *data = priv;
regmatch_t regmatch;
int hold;
if (line[0] != '+' && line[0] != '-')
return;
if (data->hit)
/*
* NEEDSWORK: we should have a way to terminate the
* caller early.
*/
return;
/* Yuck -- line ought to be "const char *"! */
hold = line[len];
line[len] = '\0';
data->hit = !regexec(data->regexp, line + 1, 1, &regmatch, 0);
line[len] = hold;
}
static int diff_grep(mmfile_t *one, mmfile_t *two,
struct diff_options *o,
regex_t *regexp, kwset_t kws)
{
regmatch_t regmatch;
struct diffgrep_cb ecbdata;
xpparam_t xpp;
xdemitconf_t xecfg;
if (!one)
return !regexec(regexp, two->ptr, 1, &regmatch, 0);
if (!two)
return !regexec(regexp, one->ptr, 1, &regmatch, 0);
/*
* We have both sides; need to run textual diff and see if
* the pattern appears on added/deleted lines.
*/
memset(&xpp, 0, sizeof(xpp));
memset(&xecfg, 0, sizeof(xecfg));
ecbdata.regexp = regexp;
ecbdata.hit = 0;
xecfg.ctxlen = o->context;
xecfg.interhunkctxlen = o->interhunkcontext;
if (xdi_diff_outf(one, two, diffgrep_consume, &ecbdata, &xpp, &xecfg))
return 0;
return ecbdata.hit;
}
static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
{
unsigned int cnt;
unsigned long sz;
const char *data;
sz = mf->size;
data = mf->ptr;
cnt = 0;
if (regexp) {
regmatch_t regmatch;
int flags = 0;
assert(data[sz] == '\0');
while (*data && !regexec(regexp, data, 1, &regmatch, flags)) {
flags |= REG_NOTBOL;
data += regmatch.rm_eo;
if (*data && regmatch.rm_so == regmatch.rm_eo)
data++;
cnt++;
}
} else { /* Classic exact string match */
while (sz) {
struct kwsmatch kwsm;
size_t offset = kwsexec(kws, data, sz, &kwsm);
if (offset == -1)
break;
sz -= offset + kwsm.size[0];
data += offset + kwsm.size[0];
cnt++;
}
}
return cnt;
}
static int has_changes(mmfile_t *one, mmfile_t *two,
struct diff_options *o,
regex_t *regexp, kwset_t kws)
{
unsigned int one_contains = one ? contains(one, regexp, kws) : 0;
unsigned int two_contains = two ? contains(two, regexp, kws) : 0;
return one_contains != two_contains;
}
static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
regex_t *regexp, kwset_t kws, pickaxe_fn fn)
{
struct userdiff_driver *textconv_one = NULL;
struct userdiff_driver *textconv_two = NULL;
mmfile_t mf1, mf2;
int ret;
if (!o->pickaxe[0])
return 0;
/* ignore unmerged */
if (!DIFF_FILE_VALID(p->one) && !DIFF_FILE_VALID(p->two))
return 0;
if (DIFF_OPT_TST(o, ALLOW_TEXTCONV)) {
textconv_one = get_textconv(p->one);
textconv_two = get_textconv(p->two);
}
/*
* If we have an unmodified pair, we know that the count will be the
* same and don't even have to load the blobs. Unless textconv is in
* play, _and_ we are using two different textconv filters (e.g.,
* because a pair is an exact rename with different textconv attributes
* for each side, which might generate different content).
*/
if (textconv_one == textconv_two && diff_unmodified_pair(p))
return 0;
mf1.size = fill_textconv(textconv_one, p->one, &mf1.ptr);
mf2.size = fill_textconv(textconv_two, p->two, &mf2.ptr);
ret = fn(DIFF_FILE_VALID(p->one) ? &mf1 : NULL,
DIFF_FILE_VALID(p->two) ? &mf2 : NULL,
o, regexp, kws);
if (textconv_one)
free(mf1.ptr);
if (textconv_two)
free(mf2.ptr);
diff_free_filespec_data(p->one);
diff_free_filespec_data(p->two);
return ret;
}
static void pickaxe(struct diff_queue_struct *q, struct diff_options *o,
regex_t *regexp, kwset_t kws, pickaxe_fn fn)
{
int i;
struct diff_queue_struct outq;
DIFF_QUEUE_CLEAR(&outq);
if (o->pickaxe_opts & DIFF_PICKAXE_ALL) {
/* Showing the whole changeset if needle exists */
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
if (pickaxe_match(p, o, regexp, kws, fn))
return; /* do not munge the queue */
}
/*
* Otherwise we will clear the whole queue by copying
* the empty outq at the end of this function, but
* first clear the current entries in the queue.
*/
for (i = 0; i < q->nr; i++)
diff_free_filepair(q->queue[i]);
} else {
/* Showing only the filepairs that has the needle */
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
if (pickaxe_match(p, o, regexp, kws, fn))
diff_q(&outq, p);
else
diff_free_filepair(p);
}
}
free(q->queue);
*q = outq;
}
void diffcore_pickaxe(struct diff_options *o)
{
const char *needle = o->pickaxe;
int opts = o->pickaxe_opts;
regex_t regex, *regexp = NULL;
kwset_t kws = NULL;
if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) {
int err;
int cflags = REG_EXTENDED | REG_NEWLINE;
if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE))
cflags |= REG_ICASE;
err = regcomp(&regex, needle, cflags);
if (err) {
/* The POSIX.2 people are surely sick */
char errbuf[1024];
regerror(err, &regex, errbuf, 1024);
regfree(&regex);
die("invalid regex: %s", errbuf);
}
regexp = &regex;
} else {
kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)
? tolower_trans_tbl : NULL);
kwsincr(kws, needle, strlen(needle));
kwsprep(kws);
}
/* Might want to warn when both S and G are on; I don't care... */
pickaxe(&diff_queued_diff, o, regexp, kws,
(opts & DIFF_PICKAXE_KIND_G) ? diff_grep : has_changes);
if (regexp)
regfree(regexp);
else
kwsfree(kws);
return;
}