1
0
mirror of https://github.com/git/git synced 2024-07-05 00:58:49 +00:00
git/diff.c

776 lines
19 KiB
C
Raw Normal View History

/*
* Copyright (C) 2005 Junio C Hamano
*/
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>
#include <limits.h>
#include "cache.h"
#include "diff.h"
#include "delta.h"
Rename environment variables. H. Peter Anvin mentioned that using SHA1_whatever as an environment variable name is not nice and we should instead use names starting with "GIT_" prefix to avoid conflicts. Here is what this patch does: * Renames the following environment variables: New name Old Name GIT_AUTHOR_DATE AUTHOR_DATE GIT_AUTHOR_EMAIL AUTHOR_EMAIL GIT_AUTHOR_NAME AUTHOR_NAME GIT_COMMITTER_EMAIL COMMIT_AUTHOR_EMAIL GIT_COMMITTER_NAME COMMIT_AUTHOR_NAME GIT_ALTERNATE_OBJECT_DIRECTORIES SHA1_FILE_DIRECTORIES GIT_OBJECT_DIRECTORY SHA1_FILE_DIRECTORY * Introduces a compatibility macro, gitenv(), which does an getenv() and if it fails calls gitenv_bc(), which in turn picks up the value from old name while giving a warning about using an old name. * Changes all users of the environment variable to fetch environment variable with the new name using gitenv(). * Updates the documentation and scripts shipped with Linus GIT distribution. The transition plan is as follows: * We will keep the backward compatibility list used by gitenv() for now, so the current scripts and user environments continue to work as before. The users will get warnings when they have old name but not new name in their environment to the stderr. * The Porcelain layers should start using new names. However, just in case it ends up calling old Plumbing layer implementation, they should also export old names, taking values from the corresponding new names, during the transition period. * After a transition period, we would drop the compatibility support and drop gitenv(). Revert the callers to directly call getenv() but keep using the new names. The last part is probably optional and the transition duration needs to be set to a reasonable value. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-05-10 00:57:56 +00:00
static const char *diff_opts = "-pu";
static unsigned char null_sha1[20] = { 0, };
static const char *external_diff(void)
{
Rename environment variables. H. Peter Anvin mentioned that using SHA1_whatever as an environment variable name is not nice and we should instead use names starting with "GIT_" prefix to avoid conflicts. Here is what this patch does: * Renames the following environment variables: New name Old Name GIT_AUTHOR_DATE AUTHOR_DATE GIT_AUTHOR_EMAIL AUTHOR_EMAIL GIT_AUTHOR_NAME AUTHOR_NAME GIT_COMMITTER_EMAIL COMMIT_AUTHOR_EMAIL GIT_COMMITTER_NAME COMMIT_AUTHOR_NAME GIT_ALTERNATE_OBJECT_DIRECTORIES SHA1_FILE_DIRECTORIES GIT_OBJECT_DIRECTORY SHA1_FILE_DIRECTORY * Introduces a compatibility macro, gitenv(), which does an getenv() and if it fails calls gitenv_bc(), which in turn picks up the value from old name while giving a warning about using an old name. * Changes all users of the environment variable to fetch environment variable with the new name using gitenv(). * Updates the documentation and scripts shipped with Linus GIT distribution. The transition plan is as follows: * We will keep the backward compatibility list used by gitenv() for now, so the current scripts and user environments continue to work as before. The users will get warnings when they have old name but not new name in their environment to the stderr. * The Porcelain layers should start using new names. However, just in case it ends up calling old Plumbing layer implementation, they should also export old names, taking values from the corresponding new names, during the transition period. * After a transition period, we would drop the compatibility support and drop gitenv(). Revert the callers to directly call getenv() but keep using the new names. The last part is probably optional and the transition duration needs to be set to a reasonable value. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-05-10 00:57:56 +00:00
static const char *external_diff_cmd = NULL;
static int done_preparing = 0;
if (done_preparing)
return external_diff_cmd;
/*
* Default values above are meant to match the
* Linux kernel development style. Examples of
* alternative styles you can specify via environment
* variables are:
*
* GIT_DIFF_OPTS="-c";
*/
Rename environment variables. H. Peter Anvin mentioned that using SHA1_whatever as an environment variable name is not nice and we should instead use names starting with "GIT_" prefix to avoid conflicts. Here is what this patch does: * Renames the following environment variables: New name Old Name GIT_AUTHOR_DATE AUTHOR_DATE GIT_AUTHOR_EMAIL AUTHOR_EMAIL GIT_AUTHOR_NAME AUTHOR_NAME GIT_COMMITTER_EMAIL COMMIT_AUTHOR_EMAIL GIT_COMMITTER_NAME COMMIT_AUTHOR_NAME GIT_ALTERNATE_OBJECT_DIRECTORIES SHA1_FILE_DIRECTORIES GIT_OBJECT_DIRECTORY SHA1_FILE_DIRECTORY * Introduces a compatibility macro, gitenv(), which does an getenv() and if it fails calls gitenv_bc(), which in turn picks up the value from old name while giving a warning about using an old name. * Changes all users of the environment variable to fetch environment variable with the new name using gitenv(). * Updates the documentation and scripts shipped with Linus GIT distribution. The transition plan is as follows: * We will keep the backward compatibility list used by gitenv() for now, so the current scripts and user environments continue to work as before. The users will get warnings when they have old name but not new name in their environment to the stderr. * The Porcelain layers should start using new names. However, just in case it ends up calling old Plumbing layer implementation, they should also export old names, taking values from the corresponding new names, during the transition period. * After a transition period, we would drop the compatibility support and drop gitenv(). Revert the callers to directly call getenv() but keep using the new names. The last part is probably optional and the transition duration needs to be set to a reasonable value. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-05-10 00:57:56 +00:00
if (gitenv("GIT_EXTERNAL_DIFF"))
external_diff_cmd = gitenv("GIT_EXTERNAL_DIFF");
/* In case external diff fails... */
Rename environment variables. H. Peter Anvin mentioned that using SHA1_whatever as an environment variable name is not nice and we should instead use names starting with "GIT_" prefix to avoid conflicts. Here is what this patch does: * Renames the following environment variables: New name Old Name GIT_AUTHOR_DATE AUTHOR_DATE GIT_AUTHOR_EMAIL AUTHOR_EMAIL GIT_AUTHOR_NAME AUTHOR_NAME GIT_COMMITTER_EMAIL COMMIT_AUTHOR_EMAIL GIT_COMMITTER_NAME COMMIT_AUTHOR_NAME GIT_ALTERNATE_OBJECT_DIRECTORIES SHA1_FILE_DIRECTORIES GIT_OBJECT_DIRECTORY SHA1_FILE_DIRECTORY * Introduces a compatibility macro, gitenv(), which does an getenv() and if it fails calls gitenv_bc(), which in turn picks up the value from old name while giving a warning about using an old name. * Changes all users of the environment variable to fetch environment variable with the new name using gitenv(). * Updates the documentation and scripts shipped with Linus GIT distribution. The transition plan is as follows: * We will keep the backward compatibility list used by gitenv() for now, so the current scripts and user environments continue to work as before. The users will get warnings when they have old name but not new name in their environment to the stderr. * The Porcelain layers should start using new names. However, just in case it ends up calling old Plumbing layer implementation, they should also export old names, taking values from the corresponding new names, during the transition period. * After a transition period, we would drop the compatibility support and drop gitenv(). Revert the callers to directly call getenv() but keep using the new names. The last part is probably optional and the transition duration needs to be set to a reasonable value. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-05-10 00:57:56 +00:00
diff_opts = gitenv("GIT_DIFF_OPTS") ? : diff_opts;
done_preparing = 1;
return external_diff_cmd;
}
/* Help to copy the thing properly quoted for the shell safety.
* any single quote is replaced with '\'', and the caller is
* expected to enclose the result within a single quote pair.
*
* E.g.
* original sq_expand result
* name ==> name ==> 'name'
* a b ==> a b ==> 'a b'
* a'b ==> a'\''b ==> 'a'\''b'
*/
static char *sq_expand(const char *src)
{
static char *buf = NULL;
int cnt, c;
const char *cp;
char *bp;
/* count bytes needed to store the quoted string. */
for (cnt = 1, cp = src; *cp; cnt++, cp++)
if (*cp == '\'')
cnt += 3;
buf = xmalloc(cnt);
bp = buf;
while ((c = *src++)) {
if (c != '\'')
*bp++ = c;
else {
bp = strcpy(bp, "'\\''");
bp += 4;
}
}
*bp = 0;
return buf;
}
static struct diff_tempfile {
const char *name;
char hex[41];
char mode[10];
char tmp_path[50];
} diff_temp[2];
struct diff_spec {
unsigned char blob_sha1[20];
unsigned short mode; /* file mode */
unsigned sha1_valid : 1; /* if true, use blob_sha1 and trust mode;
* if false, use the name and read from
* the filesystem.
*/
unsigned file_valid : 1; /* if false the file does not exist */
};
static void builtin_diff(const char *name_a,
const char *name_b,
struct diff_tempfile *temp)
{
int i, next_at, cmd_size;
const char *diff_cmd = "diff -L'%s%s' -L'%s%s'";
const char *diff_arg = "'%s' '%s'||:"; /* "||:" is to return 0 */
const char *input_name_sq[2];
const char *path0[2];
const char *path1[2];
const char *name_sq[2];
char *cmd;
name_sq[0] = sq_expand(name_a);
name_sq[1] = sq_expand(name_b);
/* diff_cmd and diff_arg have 6 %s in total which makes
* the sum of these strings 12 bytes larger than required.
* we use 2 spaces around diff-opts, and we need to count
* terminating NUL, so we subtract 9 here.
*/
cmd_size = (strlen(diff_cmd) + strlen(diff_opts) +
strlen(diff_arg) - 9);
for (i = 0; i < 2; i++) {
input_name_sq[i] = sq_expand(temp[i].name);
if (!strcmp(temp[i].name, "/dev/null")) {
path0[i] = "/dev/null";
path1[i] = "";
} else {
path0[i] = i ? "b/" : "a/";
path1[i] = name_sq[i];
}
cmd_size += (strlen(path0[i]) + strlen(path1[i]) +
strlen(input_name_sq[i]));
}
cmd = xmalloc(cmd_size);
next_at = 0;
next_at += snprintf(cmd+next_at, cmd_size-next_at,
diff_cmd,
path0[0], path1[0], path0[1], path1[1]);
next_at += snprintf(cmd+next_at, cmd_size-next_at,
" %s ", diff_opts);
next_at += snprintf(cmd+next_at, cmd_size-next_at,
diff_arg, input_name_sq[0], input_name_sq[1]);
printf("diff --git a/%s b/%s\n", name_a, name_b);
if (!path1[0][0])
printf("new file mode %s\n", temp[1].mode);
else if (!path1[1][0])
printf("deleted file mode %s\n", temp[0].mode);
else {
if (strcmp(temp[0].mode, temp[1].mode)) {
printf("old mode %s\n", temp[0].mode);
printf("new mode %s\n", temp[1].mode);
}
if (strcmp(name_a, name_b)) {
printf("rename old %s\n", name_a);
printf("rename new %s\n", name_b);
}
if (strncmp(temp[0].mode, temp[1].mode, 3))
/* we do not run diff between different kind
* of objects.
*/
exit(0);
}
fflush(NULL);
execlp("/bin/sh","sh", "-c", cmd, NULL);
}
/*
* Given a name and sha1 pair, if the dircache tells us the file in
* the work tree has that object contents, return true, so that
* prepare_temp_file() does not have to inflate and extract.
*/
static int work_tree_matches(const char *name, const unsigned char *sha1)
{
struct cache_entry *ce;
struct stat st;
int pos, len;
/* We do not read the cache ourselves here, because the
* benchmark with my previous version that always reads cache
* shows that it makes things worse for diff-tree comparing
* two linux-2.6 kernel trees in an already checked out work
* tree. This is because most diff-tree comparisons deal with
* only a small number of files, while reading the cache is
* expensive for a large project, and its cost outweighs the
* savings we get by not inflating the object to a temporary
* file. Practically, this code only helps when we are used
* by diff-cache --cached, which does read the cache before
* calling us.
*/
if (!active_cache)
return 0;
len = strlen(name);
pos = cache_name_pos(name, len);
if (pos < 0)
return 0;
ce = active_cache[pos];
if ((lstat(name, &st) < 0) ||
!S_ISREG(st.st_mode) ||
ce_match_stat(ce, &st) ||
memcmp(sha1, ce->sha1, 20))
return 0;
return 1;
}
static void prep_temp_blob(struct diff_tempfile *temp,
void *blob,
unsigned long size,
unsigned char *sha1,
int mode)
{
int fd;
strcpy(temp->tmp_path, ".diff_XXXXXX");
fd = mkstemp(temp->tmp_path);
if (fd < 0)
die("unable to create temp-file");
if (write(fd, blob, size) != size)
die("unable to write temp-file");
close(fd);
temp->name = temp->tmp_path;
strcpy(temp->hex, sha1_to_hex(sha1));
temp->hex[40] = 0;
sprintf(temp->mode, "%06o", mode);
}
static void prepare_temp_file(const char *name,
struct diff_tempfile *temp,
struct diff_spec *one)
{
if (!one->file_valid) {
not_a_valid_file:
/* A '-' entry produces this for file-2, and
* a '+' entry produces this for file-1.
*/
temp->name = "/dev/null";
strcpy(temp->hex, ".");
strcpy(temp->mode, ".");
return;
}
if (!one->sha1_valid ||
work_tree_matches(name, one->blob_sha1)) {
struct stat st;
temp->name = name;
if (lstat(temp->name, &st) < 0) {
if (errno == ENOENT)
goto not_a_valid_file;
die("stat(%s): %s", temp->name, strerror(errno));
}
if (S_ISLNK(st.st_mode)) {
int ret;
char *buf, buf_[1024];
buf = ((sizeof(buf_) < st.st_size) ?
xmalloc(st.st_size) : buf_);
ret = readlink(name, buf, st.st_size);
if (ret < 0)
die("readlink(%s)", name);
prep_temp_blob(temp, buf, st.st_size,
(one->sha1_valid ?
one->blob_sha1 : null_sha1),
(one->sha1_valid ?
one->mode : S_IFLNK));
}
else {
if (!one->sha1_valid)
strcpy(temp->hex, sha1_to_hex(null_sha1));
else
strcpy(temp->hex, sha1_to_hex(one->blob_sha1));
sprintf(temp->mode, "%06o",
S_IFREG |ce_permissions(st.st_mode));
}
return;
}
else {
void *blob;
char type[20];
unsigned long size;
blob = read_sha1_file(one->blob_sha1, type, &size);
if (!blob || strcmp(type, "blob"))
die("unable to read blob object for %s (%s)",
name, sha1_to_hex(one->blob_sha1));
prep_temp_blob(temp, blob, size, one->blob_sha1, one->mode);
free(blob);
}
}
static void remove_tempfile(void)
{
int i;
for (i = 0; i < 2; i++)
if (diff_temp[i].name == diff_temp[i].tmp_path) {
unlink(diff_temp[i].name);
diff_temp[i].name = NULL;
}
}
static void remove_tempfile_on_signal(int signo)
{
remove_tempfile();
}
static int detect_rename;
static int reverse_diff;
static const char **pathspec;
static int speccnt;
static int diff_rename_minimum_score;
static int matches_pathspec(const char *name)
{
int i;
int namelen;
if (speccnt == 0)
return 1;
namelen = strlen(name);
for (i = 0; i < speccnt; i++) {
int speclen = strlen(pathspec[i]);
if (! strncmp(pathspec[i], name, speclen) &&
speclen <= namelen &&
(name[speclen] == 0 || name[speclen] == '/'))
return 1;
}
return 0;
}
/* An external diff command takes:
*
* diff-cmd name infile1 infile1-sha1 infile1-mode \
* infile2 infile2-sha1 infile2-mode [ rename-to ]
*
*/
static void run_external_diff(const char *name,
const char *other,
struct diff_spec *one,
struct diff_spec *two)
{
struct diff_tempfile *temp = diff_temp;
pid_t pid;
int status;
static int atexit_asked = 0;
if (reverse_diff) {
struct diff_spec *tmp_spec;
tmp_spec = one; one = two; two = tmp_spec;
if (other) {
const char *tmp;
tmp = name; name = other; other = tmp;
}
}
if (!matches_pathspec(name) && (!other || !matches_pathspec(other)))
return;
if (one && two) {
prepare_temp_file(name, &temp[0], one);
prepare_temp_file(other ? : name, &temp[1], two);
if (! atexit_asked &&
(temp[0].name == temp[0].tmp_path ||
temp[1].name == temp[1].tmp_path)) {
atexit_asked = 1;
atexit(remove_tempfile);
}
signal(SIGINT, remove_tempfile_on_signal);
}
fflush(NULL);
pid = fork();
if (pid < 0)
die("unable to fork");
if (!pid) {
const char *pgm = external_diff();
if (pgm) {
if (one && two) {
const char *exec_arg[9];
const char **arg = &exec_arg[0];
*arg++ = pgm;
*arg++ = name;
*arg++ = temp[0].name;
*arg++ = temp[0].hex;
*arg++ = temp[0].mode;
*arg++ = temp[1].name;
*arg++ = temp[1].hex;
*arg++ = temp[1].mode;
if (other)
*arg++ = other;
*arg = 0;
execvp(pgm, (char *const*) exec_arg);
}
else
execlp(pgm, pgm, name, NULL);
}
/*
* otherwise we use the built-in one.
*/
if (one && two)
builtin_diff(name, other ? : name, temp);
else
printf("* Unmerged path %s\n", name);
exit(0);
}
if (waitpid(pid, &status, 0) < 0 ||
!WIFEXITED(status) || WEXITSTATUS(status)) {
/* Earlier we did not check the exit status because
* diff exits non-zero if files are different, and
* we are not interested in knowing that. It was a
* mistake which made it harder to quit a diff-*
* session that uses the git-apply-patch-script as
* the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF
* should also exit non-zero only when it wants to
* abort the entire diff-* session.
*/
remove_tempfile();
fprintf(stderr, "external diff died, stopping at %s.\n", name);
exit(1);
}
remove_tempfile();
}
/*
* We do not detect circular renames. Just hold created and deleted
* entries and later attempt to match them up. If they do not match,
* then spit them out as deletes or creates as original.
*/
static struct diff_spec_hold {
struct diff_spec_hold *next;
struct diff_spec it;
unsigned long size;
int flags;
#define MATCHED 1
#define SHOULD_FREE 2
#define SHOULD_MUNMAP 4
void *data;
char path[1];
} *createdfile, *deletedfile;
static void hold_diff(const char *name,
struct diff_spec *one,
struct diff_spec *two)
{
struct diff_spec_hold **list, *elem;
if (one->file_valid && two->file_valid)
die("internal error");
if (!detect_rename) {
run_external_diff(name, NULL, one, two);
return;
}
elem = xmalloc(sizeof(*elem) + strlen(name));
strcpy(elem->path, name);
elem->size = 0;
elem->data = NULL;
elem->flags = 0;
if (one->file_valid) {
list = &deletedfile;
elem->it = *one;
}
else {
list = &createdfile;
elem->it = *two;
}
elem->next = *list;
*list = elem;
}
static int populate_data(struct diff_spec_hold *s)
{
char type[20];
if (s->data)
return 0;
if (s->it.sha1_valid) {
s->data = read_sha1_file(s->it.blob_sha1, type, &s->size);
s->flags |= SHOULD_FREE;
}
else {
struct stat st;
int fd;
fd = open(s->path, O_RDONLY);
if (fd < 0)
return -1;
if (fstat(fd, &st)) {
close(fd);
return -1;
}
s->size = st.st_size;
s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (!s->size)
s->data = "";
else
s->flags |= SHOULD_MUNMAP;
}
return 0;
}
static void free_data(struct diff_spec_hold *s)
{
if (s->flags & SHOULD_FREE)
free(s->data);
else if (s->flags & SHOULD_MUNMAP)
munmap(s->data, s->size);
s->flags &= ~(SHOULD_FREE|SHOULD_MUNMAP);
s->data = 0;
}
static void flush_remaining_diff(struct diff_spec_hold *elem,
int on_created_list)
{
static struct diff_spec null_file_spec;
null_file_spec.file_valid = 0;
for ( ; elem ; elem = elem->next) {
free_data(elem);
if (elem->flags & MATCHED)
continue;
if (on_created_list)
run_external_diff(elem->path, NULL,
&null_file_spec, &elem->it);
else
run_external_diff(elem->path, NULL,
&elem->it, &null_file_spec);
}
}
static int is_exact_match(struct diff_spec_hold *src,
struct diff_spec_hold *dst)
{
if (src->it.sha1_valid && dst->it.sha1_valid &&
!memcmp(src->it.blob_sha1, dst->it.blob_sha1, 20))
return 1;
if (populate_data(src) || populate_data(dst))
/* this is an error but will be caught downstream */
return 0;
if (src->size == dst->size &&
!memcmp(src->data, dst->data, src->size))
return 1;
return 0;
}
#define MINIMUM_SCORE 5000
int estimate_similarity(struct diff_spec_hold *src, struct diff_spec_hold *dst)
{
/* src points at a deleted file and dst points at a created
* file. They may be quite similar, in which case we want to
* say src is renamed to dst.
*
* Compare them and return how similar they are, representing
* the score as an integer between 0 and 10000. 10000 is
* reserved for the case where they match exactly.
*/
void *delta;
unsigned long delta_size;
delta_size = ((src->size < dst->size) ?
(dst->size - src->size) : (src->size - dst->size));
/* We would not consider rename followed by more than
* 20% edits; that is, delta_size must be smaller than
* (src->size + dst->size)/2 * 0.2, which means...
*/
if ((src->size + dst->size) < delta_size * 10)
return 0;
delta = diff_delta(src->data, src->size,
dst->data, dst->size,
&delta_size);
free(delta);
/* This "delta" is really xdiff with adler32 and all the
* overheads but it is a quick and dirty approximation.
*
* Now we will give some score to it. Let's say 20% edit gets
* 5000 points and 0% edit gets 9000 points. That is, every
* 1/20000 edit gets 1 point penalty. The amount of penalty is:
*
* (delta_size * 2 / (src->size + dst->size)) * 20000
*
*/
return 9000 - (40000 * delta_size / (src->size+dst->size));
}
struct diff_score {
struct diff_spec_hold *src;
struct diff_spec_hold *dst;
int score;
};
static int score_compare(const void *a_, const void *b_)
{
const struct diff_score *a = a_, *b = b_;
return b->score - a->score;
}
static void flush_rename_pair(struct diff_spec_hold *src,
struct diff_spec_hold *dst)
{
src->flags |= MATCHED;
dst->flags |= MATCHED;
free_data(src);
free_data(dst);
run_external_diff(src->path, dst->path,
&src->it, &dst->it);
}
static void free_held_diff(struct diff_spec_hold *list)
{
struct diff_spec_hold *h;
for (h = list; list; list = h) {
h = list->next;
free_data(list);
free(list);
}
}
void diff_flush(void)
{
int num_create, num_delete, c, d;
struct diff_spec_hold *elem, *src, *dst;
struct diff_score *mx;
/* We really want to cull the candidates list early
* with cheap tests in order to avoid doing deltas.
*
* With the current callers, we should not have already
* matched entries at this point, but it is nonetheless
* checked for sanity.
*/
for (dst = createdfile; dst; dst = dst->next) {
if (dst->flags & MATCHED)
continue;
for (src = deletedfile; src; src = src->next) {
if (src->flags & MATCHED)
continue;
if (! is_exact_match(src, dst))
continue;
flush_rename_pair(src, dst);
break;
}
}
/* Count surviving candidates */
for (num_create = 0, elem = createdfile; elem; elem = elem->next)
if (!(elem->flags & MATCHED))
num_create++;
for (num_delete = 0, elem = deletedfile; elem; elem = elem->next)
if (!(elem->flags & MATCHED))
num_delete++;
if (num_create == 0 || num_delete == 0)
goto exit_path;
mx = xmalloc(sizeof(*mx) * num_create * num_delete);
for (c = 0, dst = createdfile; dst; dst = dst->next) {
int base = c * num_delete;
if (dst->flags & MATCHED)
continue;
for (d = 0, src = deletedfile; src; src = src->next) {
struct diff_score *m = &mx[base+d];
if (src->flags & MATCHED)
continue;
m->src = src;
m->dst = dst;
m->score = estimate_similarity(src, dst);
d++;
}
c++;
}
qsort(mx, num_create*num_delete, sizeof(*mx), score_compare);
#if 0
for (c = 0; c < num_create * num_delete; c++) {
src = mx[c].src;
dst = mx[c].dst;
if ((src->flags & MATCHED) || (dst->flags & MATCHED))
continue;
fprintf(stderr,
"**score ** %d %s %s\n",
mx[c].score, src->path, dst->path);
}
#endif
for (c = 0; c < num_create * num_delete; c++) {
src = mx[c].src;
dst = mx[c].dst;
if ((src->flags & MATCHED) || (dst->flags & MATCHED))
continue;
if (mx[c].score < diff_rename_minimum_score)
break;
flush_rename_pair(src, dst);
}
free(mx);
exit_path:
flush_remaining_diff(createdfile, 1);
flush_remaining_diff(deletedfile, 0);
free_held_diff(createdfile);
free_held_diff(deletedfile);
createdfile = deletedfile = NULL;
}
void diff_setup(int detect_rename_, int minimum_score_, int reverse_diff_,
const char **pathspec_, int speccnt_)
{
free_held_diff(createdfile);
free_held_diff(deletedfile);
createdfile = deletedfile = NULL;
detect_rename = detect_rename_;
reverse_diff = reverse_diff_;
pathspec = pathspec_;
speccnt = speccnt_;
diff_rename_minimum_score = minimum_score_ ? : MINIMUM_SCORE;
}
void diff_addremove(int addremove, unsigned mode,
const unsigned char *sha1,
const char *base, const char *path)
{
char concatpath[PATH_MAX];
struct diff_spec spec[2], *one, *two;
memcpy(spec[0].blob_sha1, sha1, 20);
spec[0].mode = mode;
spec[0].sha1_valid = !!memcmp(sha1, null_sha1, 20);
spec[0].file_valid = 1;
spec[1].file_valid = 0;
if (addremove == '+') {
one = spec + 1; two = spec;
} else {
one = spec; two = one + 1;
}
if (path) {
strcpy(concatpath, base);
strcat(concatpath, path);
}
hold_diff(path ? concatpath : base, one, two);
}
void diff_change(unsigned old_mode, unsigned new_mode,
const unsigned char *old_sha1,
const unsigned char *new_sha1,
const char *base, const char *path) {
char concatpath[PATH_MAX];
struct diff_spec spec[2];
if (path) {
strcpy(concatpath, base);
strcat(concatpath, path);
}
memcpy(spec[0].blob_sha1, old_sha1, 20);
spec[0].mode = old_mode;
memcpy(spec[1].blob_sha1, new_sha1, 20);
spec[1].mode = new_mode;
spec[0].sha1_valid = !!memcmp(old_sha1, null_sha1, 20);
spec[1].sha1_valid = !!memcmp(new_sha1, null_sha1, 20);
spec[1].file_valid = spec[0].file_valid = 1;
/* We do not look at changed files as candidate for
* rename detection ever.
*/
run_external_diff(path ? concatpath : base, NULL, &spec[0], &spec[1]);
}
void diff_unmerge(const char *path)
{
run_external_diff(path, NULL, NULL, NULL);
}