From 204f01a2f734fddab95b09123b04b1305620e7b6 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Mon, 11 Apr 2011 00:48:50 +0200 Subject: [PATCH 1/4] --dirstat: Describe non-obvious differences relative to --stat or regular diff Also add a testcase documenting the current behavior. Improved-by: Junio C Hamano Signed-off-by: Johan Herland Acked-by: Linus Torvalds Signed-off-by: Junio C Hamano --- Documentation/diff-options.txt | 4 +++ t/t4013-diff-various.sh | 27 ++++++++++++++----- t/t4013/diff.diff_--dirstat_initial_rearrange | 2 ++ ...-stdout_--cover-letter_-n_initial..master^ | 2 +- t/t4013/diff.log_--decorate=full_--all | 6 +++++ t/t4013/diff.log_--decorate_--all | 6 +++++ 6 files changed, 40 insertions(+), 7 deletions(-) create mode 100644 t/t4013/diff.diff_--dirstat_initial_rearrange diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index c93124be79..23772d615d 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -72,6 +72,10 @@ endif::git-format-patch[] a cut-off percent (3% by default) are not shown. The cut-off percent can be set with `--dirstat=`. Changes in a child directory are not counted for the parent directory, unless `--cumulative` is used. ++ +Note that the `--dirstat` option computes the changes while ignoring +pure code movements within a file. In other words, rearranging lines +in a file is not counted as a change. --dirstat-by-file[=]:: Same as `--dirstat`, but counts changed files instead of lines. diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh index 5daa0f2a0c..3b1b392a56 100755 --- a/t/t4013-diff-various.sh +++ b/t/t4013-diff-various.sh @@ -80,18 +80,31 @@ test_expect_success setup ' git config log.showroot false && git commit --amend && + + GIT_AUTHOR_DATE="2006-06-26 00:06:00 +0000" && + GIT_COMMITTER_DATE="2006-06-26 00:06:00 +0000" && + export GIT_AUTHOR_DATE GIT_COMMITTER_DATE && + git checkout -b rearrange initial && + for i in B A; do echo $i; done >dir/sub && + git add dir/sub && + git commit -m "Rearranged lines in dir/sub" && + git checkout master && + git show-branch ' : <<\EOF ! [initial] Initial * [master] Merge branch 'side' - ! [side] Side ---- - - [master] Merge branch 'side' - *+ [side] Side - * [master^] Second -+*+ [initial] Initial + ! [rearrange] Rearranged lines in dir/sub + ! [side] Side +---- + + [rearrange] Rearranged lines in dir/sub + - [master] Merge branch 'side' + * + [side] Side + * [master^] Third + * [master~2] Second ++*++ [initial] Initial EOF V=`git version | sed -e 's/^git version //' -e 's/\./\\./g'` @@ -287,6 +300,8 @@ diff --no-index --name-status -- dir2 dir diff --no-index dir dir3 diff master master^ side diff --dirstat master~1 master~2 +# --dirstat doesn't notice changes that simply rearrange existing lines +diff --dirstat initial rearrange EOF test_expect_success 'log -S requires an argument' ' diff --git a/t/t4013/diff.diff_--dirstat_initial_rearrange b/t/t4013/diff.diff_--dirstat_initial_rearrange new file mode 100644 index 0000000000..fb2e17dd2e --- /dev/null +++ b/t/t4013/diff.diff_--dirstat_initial_rearrange @@ -0,0 +1,2 @@ +$ git diff --dirstat initial rearrange +$ diff --git a/t/t4013/diff.format-patch_--stdout_--cover-letter_-n_initial..master^ b/t/t4013/diff.format-patch_--stdout_--cover-letter_-n_initial..master^ index 1f0f9ad44b..3b4e113012 100644 --- a/t/t4013/diff.format-patch_--stdout_--cover-letter_-n_initial..master^ +++ b/t/t4013/diff.format-patch_--stdout_--cover-letter_-n_initial..master^ @@ -1,7 +1,7 @@ $ git format-patch --stdout --cover-letter -n initial..master^ From 9a6d4949b6b76956d9d5e26f2791ec2ceff5fdc0 Mon Sep 17 00:00:00 2001 From: C O Mitter -Date: Mon, 26 Jun 2006 00:05:00 +0000 +Date: Mon, 26 Jun 2006 00:06:00 +0000 Subject: [DIFFERENT_PREFIX 0/2] *** SUBJECT HERE *** *** BLURB HERE *** diff --git a/t/t4013/diff.log_--decorate=full_--all b/t/t4013/diff.log_--decorate=full_--all index d155e0bab2..44d45257da 100644 --- a/t/t4013/diff.log_--decorate=full_--all +++ b/t/t4013/diff.log_--decorate=full_--all @@ -1,4 +1,10 @@ $ git log --decorate=full --all +commit cd4e72fd96faed3f0ba949dc42967430374e2290 (refs/heads/rearrange) +Author: A U Thor +Date: Mon Jun 26 00:06:00 2006 +0000 + + Rearranged lines in dir/sub + commit 59d314ad6f356dd08601a4cd5e530381da3e3c64 (HEAD, refs/heads/master) Merge: 9a6d494 c7a2ab9 Author: A U Thor diff --git a/t/t4013/diff.log_--decorate_--all b/t/t4013/diff.log_--decorate_--all index fd7c3e6439..27d3eabc26 100644 --- a/t/t4013/diff.log_--decorate_--all +++ b/t/t4013/diff.log_--decorate_--all @@ -1,4 +1,10 @@ $ git log --decorate --all +commit cd4e72fd96faed3f0ba949dc42967430374e2290 (rearrange) +Author: A U Thor +Date: Mon Jun 26 00:06:00 2006 +0000 + + Rearranged lines in dir/sub + commit 59d314ad6f356dd08601a4cd5e530381da3e3c64 (HEAD, master) Merge: 9a6d494 c7a2ab9 Author: A U Thor From 0133dab75d8b15c559aa9df66134d72dce0e0476 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Mon, 11 Apr 2011 00:48:51 +0200 Subject: [PATCH 2/4] --dirstat-by-file: Make it faster and more correct Currently, when using --dirstat-by-file, it first does the full --dirstat analysis (using diffcore_count_changes()), and then resets 'damage' to 1, if any damage was found by diffcore_count_changes(). But --dirstat-by-file is not interested in the file damage per se. It only cares if the file changed at all. In that sense it only cares if the blob object for a file has changed. We therefore only need to compare the object names of each file pair in the diff queue and we can skip the entire --dirstat analysis and simply set 'damage' to 1 for each entry where the object name has changed. This makes --dirstat-by-file faster, and also bypasses --dirstat's practice of ignoring rearranged lines within a file. The patch also contains an added testcase verifying that --dirstat-by-file now detects changes that only rearrange lines within a file. Signed-off-by: Johan Herland Acked-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff.c | 25 +++++++++++++++---- t/t4013-diff-various.sh | 2 ++ ...f.diff_--dirstat-by-file_initial_rearrange | 3 +++ 3 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 t/t4013/diff.diff_--dirstat-by-file_initial_rearrange diff --git a/diff.c b/diff.c index 3fd9e0c703..4f5270b8db 100644 --- a/diff.c +++ b/diff.c @@ -1539,9 +1539,27 @@ static void show_dirstat(struct diff_options *options) struct diff_filepair *p = q->queue[i]; const char *name; unsigned long copied, added, damage; + int content_changed; name = p->one->path ? p->one->path : p->two->path; + if (p->one->sha1_valid && p->two->sha1_valid) + content_changed = hashcmp(p->one->sha1, p->two->sha1); + else + content_changed = 1; + + if (DIFF_OPT_TST(options, DIRSTAT_BY_FILE)) { + /* + * In --dirstat-by-file mode, we don't really need to + * look at the actual file contents at all. + * The fact that the SHA1 changed is enough for us to + * add this file to the list of results + * (with each file contributing equal damage). + */ + damage = content_changed ? 1 : 0; + goto found_damage; + } + if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) { diff_populate_filespec(p->one, 0); diff_populate_filespec(p->two, 0); @@ -1564,14 +1582,11 @@ static void show_dirstat(struct diff_options *options) /* * Original minus copied is the removed material, * added is the new material. They are both damages - * made to the preimage. In --dirstat-by-file mode, count - * damaged files, not damaged lines. This is done by - * counting only a single damaged line per file. + * made to the preimage. */ damage = (p->one->size - copied) + added; - if (DIFF_OPT_TST(options, DIRSTAT_BY_FILE) && damage > 0) - damage = 1; +found_damage: ALLOC_GROW(dir.files, dir.nr + 1, dir.alloc); dir.files[dir.nr].name = name; dir.files[dir.nr].changed = damage; diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh index 3b1b392a56..6428a905ab 100755 --- a/t/t4013-diff-various.sh +++ b/t/t4013-diff-various.sh @@ -302,6 +302,8 @@ diff master master^ side diff --dirstat master~1 master~2 # --dirstat doesn't notice changes that simply rearrange existing lines diff --dirstat initial rearrange +# ...but --dirstat-by-file does notice changes that only rearrange lines +diff --dirstat-by-file initial rearrange EOF test_expect_success 'log -S requires an argument' ' diff --git a/t/t4013/diff.diff_--dirstat-by-file_initial_rearrange b/t/t4013/diff.diff_--dirstat-by-file_initial_rearrange new file mode 100644 index 0000000000..e48e33f678 --- /dev/null +++ b/t/t4013/diff.diff_--dirstat-by-file_initial_rearrange @@ -0,0 +1,3 @@ +$ git diff --dirstat-by-file initial rearrange + 100.0% dir/ +$ From 2ff3a80334115797b8446909655e536f43900bc5 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Mon, 11 Apr 2011 00:48:52 +0200 Subject: [PATCH 3/4] Teach --dirstat not to completely ignore rearranged lines within a file Currently, the --dirstat analysis ignores when lines within a file are rearranged, because the "damage" calculated by show_dirstat() is 0. However, if the object name has changed, we already know that there is some damage, and it is unintuitive to claim there is _no_ damage. Teach show_dirstat() to assign a minimum amount of damage (== 1) to entries for which the analysis otherwise yields zero damage, to still represent that these files are changed, instead of saying that there is no change. Also, skip --dirstat analysis when the object names are the same (e.g. for a pure file rename). Signed-off-by: Johan Herland Acked-by: Linus Torvalds Signed-off-by: Junio C Hamano --- Documentation/diff-options.txt | 4 ++-- diff.c | 19 ++++++++++++++++++- t/t4013-diff-various.sh | 2 -- t/t4013/diff.diff_--dirstat_initial_rearrange | 1 + 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 23772d615d..7e4bd425e1 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -74,8 +74,8 @@ endif::git-format-patch[] counted for the parent directory, unless `--cumulative` is used. + Note that the `--dirstat` option computes the changes while ignoring -pure code movements within a file. In other words, rearranging lines -in a file is not counted as a change. +the amount of pure code movements within a file. In other words, +rearranging lines in a file is not counted as much as other changes. --dirstat-by-file[=]:: Same as `--dirstat`, but counts changed files instead of lines. diff --git a/diff.c b/diff.c index 4f5270b8db..1f44cb4237 100644 --- a/diff.c +++ b/diff.c @@ -1548,6 +1548,16 @@ static void show_dirstat(struct diff_options *options) else content_changed = 1; + if (!content_changed) { + /* + * The SHA1 has not changed, so pre-/post-content is + * identical. We can therefore skip looking at the + * file contents altogether. + */ + damage = 0; + goto found_damage; + } + if (DIFF_OPT_TST(options, DIRSTAT_BY_FILE)) { /* * In --dirstat-by-file mode, we don't really need to @@ -1556,7 +1566,7 @@ static void show_dirstat(struct diff_options *options) * add this file to the list of results * (with each file contributing equal damage). */ - damage = content_changed ? 1 : 0; + damage = 1; goto found_damage; } @@ -1583,8 +1593,15 @@ static void show_dirstat(struct diff_options *options) * Original minus copied is the removed material, * added is the new material. They are both damages * made to the preimage. + * If the resulting damage is zero, we know that + * diffcore_count_changes() considers the two entries to + * be identical, but since content_changed is true, we + * know that there must have been _some_ kind of change, + * so we force all entries to have damage > 0. */ damage = (p->one->size - copied) + added; + if (!damage) + damage = 1; found_damage: ALLOC_GROW(dir.files, dir.nr + 1, dir.alloc); diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh index 6428a905ab..93a6f20871 100755 --- a/t/t4013-diff-various.sh +++ b/t/t4013-diff-various.sh @@ -300,9 +300,7 @@ diff --no-index --name-status -- dir2 dir diff --no-index dir dir3 diff master master^ side diff --dirstat master~1 master~2 -# --dirstat doesn't notice changes that simply rearrange existing lines diff --dirstat initial rearrange -# ...but --dirstat-by-file does notice changes that only rearrange lines diff --dirstat-by-file initial rearrange EOF diff --git a/t/t4013/diff.diff_--dirstat_initial_rearrange b/t/t4013/diff.diff_--dirstat_initial_rearrange index fb2e17dd2e..5fb02c13bc 100644 --- a/t/t4013/diff.diff_--dirstat_initial_rearrange +++ b/t/t4013/diff.diff_--dirstat_initial_rearrange @@ -1,2 +1,3 @@ $ git diff --dirstat initial rearrange + 100.0% dir/ $ From 2ca86714703f81f9dd5dfb31f8d97a8a0089634d Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Tue, 12 Apr 2011 11:24:34 +0200 Subject: [PATCH 4/4] --dirstat: In case of renames, use target filename instead of source filename This changes --dirstat analysis to count "damage" toward the target filename, rather than the source filename. For renames within a directory, this won't matter to the final output, but when moving files between diretories, the output now lists the target directory rather than the source directory. Signed-off-by: Johan Herland Acked-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diff.c b/diff.c index 1f44cb4237..abd9cd5f33 100644 --- a/diff.c +++ b/diff.c @@ -1541,7 +1541,7 @@ static void show_dirstat(struct diff_options *options) unsigned long copied, added, damage; int content_changed; - name = p->one->path ? p->one->path : p->two->path; + name = p->two->path ? p->two->path : p->one->path; if (p->one->sha1_valid && p->two->sha1_valid) content_changed = hashcmp(p->one->sha1, p->two->sha1);