git/tree-diff.c
Linus Torvalds 750f7b668f Finally implement "git log --follow"
Ok, I've really held off doing this too damn long, because I'm lazy, and I
was always hoping that somebody else would do it.

But no, people keep asking for it, but nobody actually did anything, so I
decided I might as well bite the bullet, and instead of telling people
they could add a "--follow" flag to "git log" to do what they want to do,
I decided that it looks like I just have to do it for them..

The code wasn't actually that complicated, in that the diffstat for this
patch literally says "70 insertions(+), 1 deletions(-)", but I will have
to admit that in order to get to this fairly simple patch, you did have to
know and understand the internal git diff generation machinery pretty
well, and had to really be able to follow how commit generation interacts
with generating patches and generating the log.

So I suspect that while I was right that it wasn't that hard, I might have
been expecting too much of random people - this patch does seem to be
firmly in the core "Linus or Junio" territory.

To make a long story short: I'm sorry for it taking so long until I just
did it.

I'm not going to guarantee that this works for everybody, but you really
can just look at the patch, and after the appropriate appreciative noises
("Ooh, aah") over how clever I am, you can then just notice that the code
itself isn't really that complicated.

All the real new code is in the new "try_to_follow_renames()" function. It
really isn't rocket science: we notice that the pathname we were looking
at went away, so we start a full tree diff and try to see if we can
instead make that pathname be a rename or a copy from some other previous
pathname. And if we can, we just continue, except we show *that*
particular diff, and ever after we use the _previous_ pathname.

One thing to look out for: the "rename detection" is considered to be a
singular event in the _linear_ "git log" output! That's what people want
to do, but I just wanted to point out that this patch is *not* carrying
around a "commit,pathname" kind of pair and it's *not* going to be able to
notice the file coming from multiple *different* files in earlier history.

IOW, if you use "git log --follow", then you get the stupid CVS/SVN kind
of "files have single identities" kind of semantics, and git log will just
pick the identity based on the normal move/copy heuristics _as_if_ the
history could be linearized.

Put another way: I think the model is broken, but given the broken model,
I think this patch does just about as well as you can do. If you have
merges with the same "file" having different filenames over the two
branches, git will just end up picking _one_ of the pathnames at the point
where the newer one goes away. It never looks at multiple pathnames in
parallel.

And if you understood all that, you probably didn't need it explained, and
if you didn't understand the above blathering, it doesn't really mtter to
you. What matters to you is that you can now do

	git log -p --follow builtin-rev-list.c

and it will find the point where the old "rev-list.c" got renamed to
"builtin-rev-list.c" and show it as such.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-22 23:37:11 -07:00

424 lines
10 KiB
C

/*
* Helper functions for tree diff generation
*/
#include "cache.h"
#include "diff.h"
#include "diffcore.h"
#include "tree.h"
static char *malloc_base(const char *base, int baselen, const char *path, int pathlen)
{
char *newbase = xmalloc(baselen + pathlen + 2);
memcpy(newbase, base, baselen);
memcpy(newbase + baselen, path, pathlen);
memcpy(newbase + baselen + pathlen, "/", 2);
return newbase;
}
static void show_entry(struct diff_options *opt, const char *prefix, struct tree_desc *desc,
const char *base, int baselen);
static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, const char *base, int baselen, struct diff_options *opt)
{
unsigned mode1, mode2;
const char *path1, *path2;
const unsigned char *sha1, *sha2;
int cmp, pathlen1, pathlen2;
sha1 = tree_entry_extract(t1, &path1, &mode1);
sha2 = tree_entry_extract(t2, &path2, &mode2);
pathlen1 = tree_entry_len(path1, sha1);
pathlen2 = tree_entry_len(path2, sha2);
cmp = base_name_compare(path1, pathlen1, mode1, path2, pathlen2, mode2);
if (cmp < 0) {
show_entry(opt, "-", t1, base, baselen);
return -1;
}
if (cmp > 0) {
show_entry(opt, "+", t2, base, baselen);
return 1;
}
if (!opt->find_copies_harder && !hashcmp(sha1, sha2) && mode1 == mode2)
return 0;
/*
* If the filemode has changed to/from a directory from/to a regular
* file, we need to consider it a remove and an add.
*/
if (S_ISDIR(mode1) != S_ISDIR(mode2)) {
show_entry(opt, "-", t1, base, baselen);
show_entry(opt, "+", t2, base, baselen);
return 0;
}
if (opt->recursive && S_ISDIR(mode1)) {
int retval;
char *newbase = malloc_base(base, baselen, path1, pathlen1);
if (opt->tree_in_recursive)
opt->change(opt, mode1, mode2,
sha1, sha2, base, path1);
retval = diff_tree_sha1(sha1, sha2, newbase, opt);
free(newbase);
return retval;
}
opt->change(opt, mode1, mode2, sha1, sha2, base, path1);
return 0;
}
/*
* Is a tree entry interesting given the pathspec we have?
*
* Return:
* - 2 for "yes, and all subsequent entries will be"
* - 1 for yes
* - zero for no
* - negative for "no, and no subsequent entries will be either"
*/
static int tree_entry_interesting(struct tree_desc *desc, const char *base, int baselen, struct diff_options *opt)
{
const char *path;
const unsigned char *sha1;
unsigned mode;
int i;
int pathlen;
int never_interesting = -1;
if (!opt->nr_paths)
return 1;
sha1 = tree_entry_extract(desc, &path, &mode);
pathlen = tree_entry_len(path, sha1);
for (i = 0; i < opt->nr_paths; i++) {
const char *match = opt->paths[i];
int matchlen = opt->pathlens[i];
int m = -1; /* signals that we haven't called strncmp() */
if (baselen >= matchlen) {
/* If it doesn't match, move along... */
if (strncmp(base, match, matchlen))
continue;
/*
* The base is a subdirectory of a path which
* was specified, so all of them are interesting.
*/
return 2;
}
/* Does the base match? */
if (strncmp(base, match, baselen))
continue;
match += baselen;
matchlen -= baselen;
if (never_interesting) {
/*
* We have not seen any match that sorts later
* than the current path.
*/
/*
* Does match sort strictly earlier than path
* with their common parts?
*/
m = strncmp(match, path,
(matchlen < pathlen) ? matchlen : pathlen);
if (m < 0)
continue;
/*
* If we come here even once, that means there is at
* least one pathspec that would sort equal to or
* later than the path we are currently looking at.
* In other words, if we have never reached this point
* after iterating all pathspecs, it means all
* pathspecs are either outside of base, or inside the
* base but sorts strictly earlier than the current
* one. In either case, they will never match the
* subsequent entries. In such a case, we initialized
* the variable to -1 and that is what will be
* returned, allowing the caller to terminate early.
*/
never_interesting = 0;
}
if (pathlen > matchlen)
continue;
if (matchlen > pathlen) {
if (match[pathlen] != '/')
continue;
if (!S_ISDIR(mode))
continue;
}
if (m == -1)
/*
* we cheated and did not do strncmp(), so we do
* that here.
*/
m = strncmp(match, path, pathlen);
/*
* If common part matched earlier then it is a hit,
* because we rejected the case where path is not a
* leading directory and is shorter than match.
*/
if (!m)
return 1;
}
return never_interesting; /* No matches */
}
/* A whole sub-tree went away or appeared */
static void show_tree(struct diff_options *opt, const char *prefix, struct tree_desc *desc, const char *base, int baselen)
{
int all_interesting = 0;
while (desc->size) {
int show;
if (all_interesting)
show = 1;
else {
show = tree_entry_interesting(desc, base, baselen,
opt);
if (show == 2)
all_interesting = 1;
}
if (show < 0)
break;
if (show)
show_entry(opt, prefix, desc, base, baselen);
update_tree_entry(desc);
}
}
/* A file entry went away or appeared */
static void show_entry(struct diff_options *opt, const char *prefix, struct tree_desc *desc,
const char *base, int baselen)
{
unsigned mode;
const char *path;
const unsigned char *sha1 = tree_entry_extract(desc, &path, &mode);
if (opt->recursive && S_ISDIR(mode)) {
enum object_type type;
int pathlen = tree_entry_len(path, sha1);
char *newbase = malloc_base(base, baselen, path, pathlen);
struct tree_desc inner;
void *tree;
unsigned long size;
tree = read_sha1_file(sha1, &type, &size);
if (!tree || type != OBJ_TREE)
die("corrupt tree sha %s", sha1_to_hex(sha1));
init_tree_desc(&inner, tree, size);
show_tree(opt, prefix, &inner, newbase, baselen + 1 + pathlen);
free(tree);
free(newbase);
} else {
opt->add_remove(opt, prefix[0], mode, sha1, base, path);
}
}
static void skip_uninteresting(struct tree_desc *t, const char *base, int baselen, struct diff_options *opt)
{
int all_interesting = 0;
while (t->size) {
int show;
if (all_interesting)
show = 1;
else {
show = tree_entry_interesting(t, base, baselen, opt);
if (show == 2)
all_interesting = 1;
}
if (!show) {
update_tree_entry(t);
continue;
}
/* Skip it all? */
if (show < 0)
t->size = 0;
return;
}
}
int diff_tree(struct tree_desc *t1, struct tree_desc *t2, const char *base, struct diff_options *opt)
{
int baselen = strlen(base);
for (;;) {
if (opt->quiet && opt->has_changes)
break;
if (opt->nr_paths) {
skip_uninteresting(t1, base, baselen, opt);
skip_uninteresting(t2, base, baselen, opt);
}
if (!t1->size) {
if (!t2->size)
break;
show_entry(opt, "+", t2, base, baselen);
update_tree_entry(t2);
continue;
}
if (!t2->size) {
show_entry(opt, "-", t1, base, baselen);
update_tree_entry(t1);
continue;
}
switch (compare_tree_entry(t1, t2, base, baselen, opt)) {
case -1:
update_tree_entry(t1);
continue;
case 0:
update_tree_entry(t1);
/* Fallthrough */
case 1:
update_tree_entry(t2);
continue;
}
die("git-diff-tree: internal error");
}
return 0;
}
/*
* Does it look like the resulting diff might be due to a rename?
* - single entry
* - not a valid previous file
*/
static inline int diff_might_be_rename(void)
{
return diff_queued_diff.nr == 1 &&
!DIFF_FILE_VALID(diff_queued_diff.queue[0]->one);
}
static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, const char *base, struct diff_options *opt)
{
struct diff_options diff_opts;
const char *paths[2];
int i;
diff_setup(&diff_opts);
diff_opts.recursive = 1;
diff_opts.detect_rename = DIFF_DETECT_RENAME;
diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
diff_opts.single_follow = opt->paths[0];
paths[0] = NULL;
diff_tree_setup_paths(paths, &diff_opts);
if (diff_setup_done(&diff_opts) < 0)
die("unable to set up diff options to follow renames");
diff_tree(t1, t2, base, &diff_opts);
diffcore_std(&diff_opts);
/* NOTE! Ignore the first diff! That was the old one! */
for (i = 1; i < diff_queued_diff.nr; i++) {
struct diff_filepair *p = diff_queued_diff.queue[i];
/*
* Found a source? Not only do we use that for the new
* diff_queued_diff, we also use that as the path in
* the future!
*/
if ((p->status == 'R' || p->status == 'C') && !strcmp(p->two->path, opt->paths[0])) {
diff_queued_diff.queue[0] = p;
opt->paths[0] = xstrdup(p->one->path);
diff_tree_setup_paths(opt->paths, opt);
break;
}
}
/*
* Then, ignore any but the first entry! It might be the old one,
* or it might be the rename/copy we found
*/
diff_queued_diff.nr = 1;
}
int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base, struct diff_options *opt)
{
void *tree1, *tree2;
struct tree_desc t1, t2;
unsigned long size1, size2;
int retval;
tree1 = read_object_with_reference(old, tree_type, &size1, NULL);
if (!tree1)
die("unable to read source tree (%s)", sha1_to_hex(old));
tree2 = read_object_with_reference(new, tree_type, &size2, NULL);
if (!tree2)
die("unable to read destination tree (%s)", sha1_to_hex(new));
init_tree_desc(&t1, tree1, size1);
init_tree_desc(&t2, tree2, size2);
retval = diff_tree(&t1, &t2, base, opt);
if (opt->follow_renames && diff_might_be_rename()) {
init_tree_desc(&t1, tree1, size1);
init_tree_desc(&t2, tree2, size2);
try_to_follow_renames(&t1, &t2, base, opt);
}
free(tree1);
free(tree2);
return retval;
}
int diff_root_tree_sha1(const unsigned char *new, const char *base, struct diff_options *opt)
{
int retval;
void *tree;
unsigned long size;
struct tree_desc empty, real;
tree = read_object_with_reference(new, tree_type, &size, NULL);
if (!tree)
die("unable to read root tree (%s)", sha1_to_hex(new));
init_tree_desc(&real, tree, size);
init_tree_desc(&empty, "", 0);
retval = diff_tree(&empty, &real, base, opt);
free(tree);
return retval;
}
static int count_paths(const char **paths)
{
int i = 0;
while (*paths++)
i++;
return i;
}
void diff_tree_release_paths(struct diff_options *opt)
{
free(opt->pathlens);
}
void diff_tree_setup_paths(const char **p, struct diff_options *opt)
{
opt->nr_paths = 0;
opt->pathlens = NULL;
opt->paths = NULL;
if (p) {
int i;
opt->paths = p;
opt->nr_paths = count_paths(p);
if (opt->nr_paths == 0) {
opt->pathlens = NULL;
return;
}
opt->pathlens = xmalloc(opt->nr_paths * sizeof(int));
for (i=0; i < opt->nr_paths; i++)
opt->pathlens[i] = strlen(p[i]);
}
}