Merge branch 'pw/diff-color-moved-ws-fix'

"git diff --color-moved-ws" updates.

* pw/diff-color-moved-ws-fix:
  diff --color-moved-ws: handle blank lines
  diff --color-moved-ws: modify allow-indentation-change
  diff --color-moved-ws: optimize allow-indentation-change
  diff --color-moved=zebra: be stricter with color alternation
  diff --color-moved-ws: fix false positives
  diff --color-moved-ws: demonstrate false positives
  diff: allow --no-color-moved-ws
  Use "whitespace" consistently
  diff: document --no-color-moved
This commit is contained in:
Junio C Hamano 2019-01-29 12:47:53 -08:00
commit 15b07cba0b
4 changed files with 255 additions and 86 deletions

View file

@ -293,8 +293,12 @@ dimmed-zebra::
`dimmed_zebra` is a deprecated synonym.
--
--no-color-moved::
Turn off move detection. This can be used to override configuration
settings. It is the same as `--color-moved=no`.
--color-moved-ws=<modes>::
This configures how white spaces are ignored when performing the
This configures how whitespace is ignored when performing the
move detection for `--color-moved`.
ifdef::git-diff[]
It can be set by the `diff.colorMovedWS` configuration setting.
@ -302,6 +306,8 @@ endif::git-diff[]
These modes can be given as a comma separated list:
+
--
no::
Do not ignore whitespace when performing move detection.
ignore-space-at-eol::
Ignore changes in whitespace at EOL.
ignore-space-change::
@ -312,12 +318,17 @@ ignore-all-space::
Ignore whitespace when comparing lines. This ignores differences
even if one line has whitespace where the other line has none.
allow-indentation-change::
Initially ignore any white spaces in the move detection, then
Initially ignore any whitespace in the move detection, then
group the moved code blocks only into a block if the change in
whitespace is the same per line. This is incompatible with the
other modes.
--
--no-color-moved-ws::
Do not ignore whitespace when performing move detection. This can be
used to override configuration settings. It is the same as
`--color-moved-ws=no`.
--word-diff[=<mode>]::
Show a word diff, using the <mode> to delimit changed words.
By default, words are delimited by whitespace; see

View file

@ -23,8 +23,8 @@ In the second form, a list of objects (separated by linefeeds) is provided on
stdin, and the SHA-1, type, and size of each object is printed on stdout. The
output format can be overridden using the optional `<format>` argument. If
either `--textconv` or `--filters` was specified, the input is expected to
list the object names followed by the path name, separated by a single white
space, so that the appropriate drivers can be determined.
list the object names followed by the path name, separated by a single
whitespace, so that the appropriate drivers can be determined.
OPTIONS
-------
@ -79,7 +79,7 @@ OPTIONS
Print object information and contents for each object provided
on stdin. May not be combined with any other options or arguments
except `--textconv` or `--filters`, in which case the input lines
also need to specify the path, separated by white space. See the
also need to specify the path, separated by whitespace. See the
section `BATCH OUTPUT` below for details.
--batch-check::
@ -87,7 +87,7 @@ OPTIONS
Print object information for each object provided on stdin. May
not be combined with any other options or arguments except
`--textconv` or `--filters`, in which case the input lines also
need to specify the path, separated by white space. See the
need to specify the path, separated by whitespace. See the
section `BATCH OUTPUT` below for details.
--batch-all-objects::

219
diff.c
View file

@ -304,7 +304,9 @@ static unsigned parse_color_moved_ws(const char *arg)
strbuf_addstr(&sb, i->string);
strbuf_trim(&sb);
if (!strcmp(sb.buf, "ignore-space-change"))
if (!strcmp(sb.buf, "no"))
ret = 0;
else if (!strcmp(sb.buf, "ignore-space-change"))
ret |= XDF_IGNORE_WHITESPACE_CHANGE;
else if (!strcmp(sb.buf, "ignore-space-at-eol"))
ret |= XDF_IGNORE_WHITESPACE_AT_EOL;
@ -322,7 +324,7 @@ static unsigned parse_color_moved_ws(const char *arg)
if ((ret & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) &&
(ret & XDF_WHITESPACE_FLAGS)) {
error(_("color-moved-ws: allow-indentation-change cannot be combined with other white space modes"));
error(_("color-moved-ws: allow-indentation-change cannot be combined with other whitespace modes"));
ret |= COLOR_MOVED_WS_ERROR;
}
@ -754,6 +756,8 @@ struct emitted_diff_symbol {
const char *line;
int len;
int flags;
int indent_off; /* Offset to first non-whitespace character */
int indent_width; /* The visual width of the indentation */
enum diff_symbol s;
};
#define EMITTED_DIFF_SYMBOL_INIT {NULL}
@ -784,44 +788,85 @@ struct moved_entry {
struct moved_entry *next_line;
};
/**
* The struct ws_delta holds white space differences between moved lines, i.e.
* between '+' and '-' lines that have been detected to be a move.
* The string contains the difference in leading white spaces, before the
* rest of the line is compared using the white space config for move
* coloring. The current_longer indicates if the first string in the
* comparision is longer than the second.
*/
struct ws_delta {
char *string;
unsigned int current_longer : 1;
};
#define WS_DELTA_INIT { NULL, 0 }
struct moved_block {
struct moved_entry *match;
struct ws_delta wsd;
int wsd; /* The whitespace delta of this block */
};
static void moved_block_clear(struct moved_block *b)
{
FREE_AND_NULL(b->wsd.string);
b->match = NULL;
memset(b, 0, sizeof(*b));
}
#define INDENT_BLANKLINE INT_MIN
static void fill_es_indent_data(struct emitted_diff_symbol *es)
{
unsigned int off = 0, i;
int width = 0, tab_width = es->flags & WS_TAB_WIDTH_MASK;
const char *s = es->line;
const int len = es->len;
/* skip any \v \f \r at start of indentation */
while (s[off] == '\f' || s[off] == '\v' ||
(s[off] == '\r' && off < len - 1))
off++;
/* calculate the visual width of indentation */
while(1) {
if (s[off] == ' ') {
width++;
off++;
} else if (s[off] == '\t') {
width += tab_width - (width % tab_width);
while (s[++off] == '\t')
width += tab_width;
} else {
break;
}
}
/* check if this line is blank */
for (i = off; i < len; i++)
if (!isspace(s[i]))
break;
if (i == len) {
es->indent_width = INDENT_BLANKLINE;
es->indent_off = len;
} else {
es->indent_off = off;
es->indent_width = width;
}
}
static int compute_ws_delta(const struct emitted_diff_symbol *a,
const struct emitted_diff_symbol *b,
struct ws_delta *out)
const struct emitted_diff_symbol *b,
int *out)
{
const struct emitted_diff_symbol *longer = a->len > b->len ? a : b;
const struct emitted_diff_symbol *shorter = a->len > b->len ? b : a;
int d = longer->len - shorter->len;
int a_len = a->len,
b_len = b->len,
a_off = a->indent_off,
a_width = a->indent_width,
b_off = b->indent_off,
b_width = b->indent_width;
int delta;
if (strncmp(longer->line + d, shorter->line, shorter->len))
if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) {
*out = INDENT_BLANKLINE;
return 1;
}
if (a->s == DIFF_SYMBOL_PLUS)
delta = a_width - b_width;
else
delta = b_width - a_width;
if (a_len - a_off != b_len - b_off ||
memcmp(a->line + a_off, b->line + b_off, a_len - a_off))
return 0;
out->string = xmemdupz(longer->line, d);
out->current_longer = (a == longer);
*out = delta;
return 1;
}
@ -833,51 +878,53 @@ static int cmp_in_block_with_wsd(const struct diff_options *o,
int n)
{
struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n];
int al = cur->es->len, cl = l->len;
int al = cur->es->len, bl = match->es->len, cl = l->len;
const char *a = cur->es->line,
*b = match->es->line,
*c = l->line;
int wslen;
int a_off = cur->es->indent_off,
a_width = cur->es->indent_width,
c_off = l->indent_off,
c_width = l->indent_width;
int delta;
/*
* We need to check if 'cur' is equal to 'match'.
* As those are from the same (+/-) side, we do not need to adjust for
* indent changes. However these were found using fuzzy matching
* so we do have to check if they are equal.
* We need to check if 'cur' is equal to 'match'. As those
* are from the same (+/-) side, we do not need to adjust for
* indent changes. However these were found using fuzzy
* matching so we do have to check if they are equal. Here we
* just check the lengths. We delay calling memcmp() to check
* the contents until later as if the length comparison for a
* and c fails we can avoid the call all together.
*/
if (strcmp(a, b))
if (al != bl)
return 1;
if (!pmb->wsd.string)
/*
* The white space delta is not active? This can happen
* when we exit early in this function.
*/
return 1;
/* If 'l' and 'cur' are both blank then they match. */
if (a_width == INDENT_BLANKLINE && c_width == INDENT_BLANKLINE)
return 0;
/*
* The indent changes of the block are known and stored in
* pmb->wsd; however we need to check if the indent changes of the
* current line are still the same as before.
*
* To do so we need to compare 'l' to 'cur', adjusting the
* one of them for the white spaces, depending which was longer.
* The indent changes of the block are known and stored in pmb->wsd;
* however we need to check if the indent changes of the current line
* match those of the current block and that the text of 'l' and 'cur'
* after the indentation match.
*/
if (cur->es->s == DIFF_SYMBOL_PLUS)
delta = a_width - c_width;
else
delta = c_width - a_width;
wslen = strlen(pmb->wsd.string);
if (pmb->wsd.current_longer) {
c += wslen;
cl -= wslen;
} else {
a += wslen;
al -= wslen;
}
/*
* If the previous lines of this block were all blank then set its
* whitespace delta.
*/
if (pmb->wsd == INDENT_BLANKLINE)
pmb->wsd = delta;
if (al != cl || memcmp(a, c, al))
return 1;
return 0;
return !(delta == pmb->wsd && al - a_off == cl - c_off &&
!memcmp(a, b, al) && !
memcmp(a + a_off, c + c_off, al - a_off));
}
static int moved_entry_cmp(const void *hashmap_cmp_fn_data,
@ -943,6 +990,9 @@ static void add_lines_to_move_detection(struct diff_options *o,
continue;
}
if (o->color_moved_ws_handling &
COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
fill_es_indent_data(&o->emitted_symbols->buf[n]);
key = prepare_entry(o, n);
if (prev_line && prev_line->es->s == o->emitted_symbols->buf[n].s)
prev_line->next_line = key;
@ -1021,8 +1071,7 @@ static int shrink_potential_moved_blocks(struct moved_block *pmb,
if (lp < pmb_nr && rp > -1 && lp < rp) {
pmb[lp] = pmb[rp];
pmb[rp].match = NULL;
pmb[rp].wsd.string = NULL;
memset(&pmb[rp], 0, sizeof(pmb[rp]));
rp--;
lp++;
}
@ -1042,14 +1091,17 @@ static int shrink_potential_moved_blocks(struct moved_block *pmb,
* The last block consists of the (n - block_length)'th line up to but not
* including the nth line.
*
* Returns 0 if the last block is empty or is unset by this function, non zero
* otherwise.
*
* NEEDSWORK: This uses the same heuristic as blame_entry_score() in blame.c.
* Think of a way to unify them.
*/
static void adjust_last_block(struct diff_options *o, int n, int block_length)
static int adjust_last_block(struct diff_options *o, int n, int block_length)
{
int i, alnum_count = 0;
if (o->color_moved == COLOR_MOVED_PLAIN)
return;
return block_length;
for (i = 1; i < block_length + 1; i++) {
const char *c = o->emitted_symbols->buf[n - i].line;
for (; *c; c++) {
@ -1057,11 +1109,12 @@ static void adjust_last_block(struct diff_options *o, int n, int block_length)
continue;
alnum_count++;
if (alnum_count >= COLOR_MOVED_MIN_ALNUM_COUNT)
return;
return 1;
}
}
for (i = 1; i < block_length + 1; i++)
o->emitted_symbols->buf[n - i].flags &= ~DIFF_SYMBOL_MOVED_LINE;
return 0;
}
/* Find blocks of moved code, delegate actual coloring decision to helper */
@ -1071,7 +1124,7 @@ static void mark_color_as_moved(struct diff_options *o,
{
struct moved_block *pmb = NULL; /* potentially moved blocks */
int pmb_nr = 0, pmb_alloc = 0;
int n, flipped_block = 1, block_length = 0;
int n, flipped_block = 0, block_length = 0;
for (n = 0; n < o->emitted_symbols->nr; n++) {
@ -1079,6 +1132,7 @@ static void mark_color_as_moved(struct diff_options *o,
struct moved_entry *key;
struct moved_entry *match = NULL;
struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n];
enum diff_symbol last_symbol = 0;
switch (l->s) {
case DIFF_SYMBOL_PLUS:
@ -1094,7 +1148,7 @@ static void mark_color_as_moved(struct diff_options *o,
free(key);
break;
default:
flipped_block = 1;
flipped_block = 0;
}
if (!match) {
@ -1105,13 +1159,16 @@ static void mark_color_as_moved(struct diff_options *o,
moved_block_clear(&pmb[i]);
pmb_nr = 0;
block_length = 0;
flipped_block = 0;
last_symbol = l->s;
continue;
}
l->flags |= DIFF_SYMBOL_MOVED_LINE;
if (o->color_moved == COLOR_MOVED_PLAIN)
if (o->color_moved == COLOR_MOVED_PLAIN) {
last_symbol = l->s;
l->flags |= DIFF_SYMBOL_MOVED_LINE;
continue;
}
if (o->color_moved_ws_handling &
COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
@ -1134,21 +1191,27 @@ static void mark_color_as_moved(struct diff_options *o,
&pmb[pmb_nr].wsd))
pmb[pmb_nr++].match = match;
} else {
pmb[pmb_nr].wsd.string = NULL;
pmb[pmb_nr].wsd = 0;
pmb[pmb_nr++].match = match;
}
}
flipped_block = (flipped_block + 1) % 2;
if (adjust_last_block(o, n, block_length) &&
pmb_nr && last_symbol != l->s)
flipped_block = (flipped_block + 1) % 2;
else
flipped_block = 0;
adjust_last_block(o, n, block_length);
block_length = 0;
}
block_length++;
if (flipped_block && o->color_moved != COLOR_MOVED_BLOCKS)
l->flags |= DIFF_SYMBOL_MOVED_LINE_ALT;
if (pmb_nr) {
block_length++;
l->flags |= DIFF_SYMBOL_MOVED_LINE;
if (flipped_block && o->color_moved != COLOR_MOVED_BLOCKS)
l->flags |= DIFF_SYMBOL_MOVED_LINE_ALT;
}
last_symbol = l->s;
}
adjust_last_block(o, n, block_length);
@ -1492,7 +1555,7 @@ static void emit_diff_symbol_from_struct(struct diff_options *o,
static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
const char *line, int len, unsigned flags)
{
struct emitted_diff_symbol e = {line, len, flags, s};
struct emitted_diff_symbol e = {line, len, flags, 0, 0, s};
if (o->emitted_symbols)
append_emitted_diff_symbol(o, &e);
@ -5042,6 +5105,8 @@ int diff_opt_parse(struct diff_options *options,
if (cm < 0)
return error("bad --color-moved argument: %s", arg);
options->color_moved = cm;
} else if (!strcmp(arg, "--no-color-moved-ws")) {
options->color_moved_ws_handling = 0;
} else if (skip_prefix(arg, "--color-moved-ws=", &arg)) {
unsigned cm = parse_color_moved_ws(arg);
if (cm & COLOR_MOVED_WS_ERROR)

View file

@ -1802,8 +1802,8 @@ test_expect_success 'only move detection ignores white spaces' '
<BOLD;MAGENTA>-a long line to exceed per-line minimum<RESET>
<BOLD;MAGENTA>-another long line to exceed per-line minimum<RESET>
<RED>-original file<RESET>
<BOLD;YELLOW>+<RESET>Q<BOLD;YELLOW>a long line to exceed per-line minimum<RESET>
<BOLD;YELLOW>+<RESET>Q<BOLD;YELLOW>another long line to exceed per-line minimum<RESET>
<BOLD;CYAN>+<RESET>Q<BOLD;CYAN>a long line to exceed per-line minimum<RESET>
<BOLD;CYAN>+<RESET>Q<BOLD;CYAN>another long line to exceed per-line minimum<RESET>
<GREEN>+<RESET><GREEN>new file<RESET>
EOF
test_cmp expected actual
@ -1827,6 +1827,7 @@ test_expect_success 'compare whitespace delta across moved blocks' '
QQQthat has similar lines
QQQto previous blocks, but with different indent
QQQYetQAnotherQoutlierQ
QLine with internal w h i t e s p a c e change
EOF
git add text.txt &&
@ -1847,6 +1848,7 @@ test_expect_success 'compare whitespace delta across moved blocks' '
QQthat has similar lines
QQto previous blocks, but with different indent
QQYetQAnotherQoutlier
QLine with internal whitespace change
EOF
git diff --color --color-moved --color-moved-ws=allow-indentation-change >actual.raw &&
@ -1856,7 +1858,7 @@ test_expect_success 'compare whitespace delta across moved blocks' '
<BOLD>diff --git a/text.txt b/text.txt<RESET>
<BOLD>--- a/text.txt<RESET>
<BOLD>+++ b/text.txt<RESET>
<CYAN>@@ -1,14 +1,14 @@<RESET>
<CYAN>@@ -1,15 +1,15 @@<RESET>
<BOLD;MAGENTA>-QIndented<RESET>
<BOLD;MAGENTA>-QText across<RESET>
<BOLD;MAGENTA>-Qsome lines<RESET>
@ -1871,6 +1873,7 @@ test_expect_success 'compare whitespace delta across moved blocks' '
<BOLD;MAGENTA>-QQQthat has similar lines<RESET>
<BOLD;MAGENTA>-QQQto previous blocks, but with different indent<RESET>
<RED>-QQQYetQAnotherQoutlierQ<RESET>
<RED>-QLine with internal w h i t e s p a c e change<RESET>
<BOLD;CYAN>+<RESET>QQ<BOLD;CYAN>Indented<RESET>
<BOLD;CYAN>+<RESET>QQ<BOLD;CYAN>Text across<RESET>
<BOLD;CYAN>+<RESET>QQ<BOLD;CYAN>some lines<RESET>
@ -1885,6 +1888,7 @@ test_expect_success 'compare whitespace delta across moved blocks' '
<BOLD;CYAN>+<RESET>QQ<BOLD;CYAN>that has similar lines<RESET>
<BOLD;CYAN>+<RESET>QQ<BOLD;CYAN>to previous blocks, but with different indent<RESET>
<GREEN>+<RESET>QQ<GREEN>YetQAnotherQoutlier<RESET>
<GREEN>+<RESET>Q<GREEN>Line with internal whitespace change<RESET>
EOF
test_cmp expected actual
@ -1915,4 +1919,93 @@ test_expect_success 'compare whitespace delta incompatible with other space opti
test_i18ngrep allow-indentation-change err
'
EMPTY=''
test_expect_success 'compare mixed whitespace delta across moved blocks' '
git reset --hard &&
tr Q_ "\t " <<-EOF >text.txt &&
${EMPTY}
____too short without
${EMPTY}
___being grouped across blank line
${EMPTY}
context
lines
to
anchor
____Indented text to
_Q____be further indented by four spaces across
____Qseveral lines
QQ____These two lines have had their
____indentation reduced by four spaces
Qdifferent indentation change
____too short
EOF
git add text.txt &&
git commit -m "add text.txt" &&
tr Q_ "\t " <<-EOF >text.txt &&
context
lines
to
anchor
QIndented text to
QQbe further indented by four spaces across
Q____several lines
${EMPTY}
QQtoo short without
${EMPTY}
Q_______being grouped across blank line
${EMPTY}
Q_QThese two lines have had their
indentation reduced by four spaces
QQdifferent indentation change
__Qtoo short
EOF
git -c color.diff.whitespace="normal red" \
-c core.whitespace=space-before-tab \
diff --color --color-moved --ws-error-highlight=all \
--color-moved-ws=allow-indentation-change >actual.raw &&
grep -v "index" actual.raw | test_decode_color >actual &&
cat <<-\EOF >expected &&
<BOLD>diff --git a/text.txt b/text.txt<RESET>
<BOLD>--- a/text.txt<RESET>
<BOLD>+++ b/text.txt<RESET>
<CYAN>@@ -1,16 +1,16 @@<RESET>
<BOLD;MAGENTA>-<RESET>
<BOLD;MAGENTA>-<RESET><BOLD;MAGENTA> too short without<RESET>
<BOLD;MAGENTA>-<RESET>
<BOLD;MAGENTA>-<RESET><BOLD;MAGENTA> being grouped across blank line<RESET>
<BOLD;MAGENTA>-<RESET>
<RESET>context<RESET>
<RESET>lines<RESET>
<RESET>to<RESET>
<RESET>anchor<RESET>
<BOLD;MAGENTA>-<RESET><BOLD;MAGENTA> Indented text to<RESET>
<BOLD;MAGENTA>-<RESET><BRED> <RESET> <BOLD;MAGENTA> be further indented by four spaces across<RESET>
<BOLD;MAGENTA>-<RESET><BRED> <RESET> <BOLD;MAGENTA>several lines<RESET>
<BOLD;BLUE>-<RESET> <BOLD;BLUE> These two lines have had their<RESET>
<BOLD;BLUE>-<RESET><BOLD;BLUE> indentation reduced by four spaces<RESET>
<BOLD;MAGENTA>-<RESET> <BOLD;MAGENTA>different indentation change<RESET>
<RED>-<RESET><RED> too short<RESET>
<BOLD;CYAN>+<RESET> <BOLD;CYAN>Indented text to<RESET>
<BOLD;CYAN>+<RESET> <BOLD;CYAN>be further indented by four spaces across<RESET>
<BOLD;CYAN>+<RESET> <BOLD;CYAN> several lines<RESET>
<BOLD;YELLOW>+<RESET>
<BOLD;YELLOW>+<RESET> <BOLD;YELLOW>too short without<RESET>
<BOLD;YELLOW>+<RESET>
<BOLD;YELLOW>+<RESET> <BOLD;YELLOW> being grouped across blank line<RESET>
<BOLD;YELLOW>+<RESET>
<BOLD;CYAN>+<RESET> <BRED> <RESET> <BOLD;CYAN>These two lines have had their<RESET>
<BOLD;CYAN>+<RESET><BOLD;CYAN>indentation reduced by four spaces<RESET>
<BOLD;YELLOW>+<RESET> <BOLD;YELLOW>different indentation change<RESET>
<GREEN>+<RESET><BRED> <RESET> <GREEN>too short<RESET>
EOF
test_cmp expected actual
'
test_done