Merge branch 'mt/patch-id-stable' (early part)

* 'mt/patch-id-stable' (early part):
  patch-id-test: test stable and unstable behaviour
  patch-id: make it stable against hunk reordering
  test doc: test_write_lines does not split its arguments
  test: add test_write_lines helper
This commit is contained in:
Junio C Hamano 2014-06-16 10:05:37 -07:00
commit 79e539404c
5 changed files with 208 additions and 31 deletions

View file

@ -8,14 +8,14 @@ git-patch-id - Compute unique ID for a patch
SYNOPSIS
--------
[verse]
'git patch-id' < <patch>
'git patch-id' [--stable | --unstable] < <patch>
DESCRIPTION
-----------
A "patch ID" is nothing but a SHA-1 of the diff associated with a patch, with
whitespace and line numbers ignored. As such, it's "reasonably stable", but at
the same time also reasonably unique, i.e., two patches that have the same "patch
ID" are almost guaranteed to be the same thing.
A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a
patch, with whitespace and line numbers ignored. As such, it's "reasonably
stable", but at the same time also reasonably unique, i.e., two patches that
have the same "patch ID" are almost guaranteed to be the same thing.
IOW, you can use this thing to look for likely duplicate commits.
@ -27,6 +27,33 @@ This can be used to make a mapping from patch ID to commit ID.
OPTIONS
-------
--stable::
Use a "stable" sum of hashes as the patch ID. With this option:
- Reordering file diffs that make up a patch does not affect the ID.
In particular, two patches produced by comparing the same two trees
with two different settings for "-O<orderfile>" result in the same
patch ID signature, thereby allowing the computed result to be used
as a key to index some meta-information about the change between
the two trees;
- Result is different from the value produced by git 1.9 and older
or produced when an "unstable" hash (see --unstable below) is
configured - even when used on a diff output taken without any use
of "-O<orderfile>", thereby making existing databases storing such
"unstable" or historical patch-ids unusable.
This is the default if patchid.stable is set to true.
--unstable::
Use an "unstable" hash as the patch ID. With this option,
the result produced is compatible with the patch-id value produced
by git 1.9 and older. Users with pre-existing databases storing
patch-ids produced by git 1.9 and older (who do not deal with reordered
patches) may want to use this option.
This is the default.
<patch>::
The diff to create the ID of.

View file

@ -1,17 +1,14 @@
#include "builtin.h"
static void flush_current_id(int patchlen, unsigned char *id, git_SHA_CTX *c)
static void flush_current_id(int patchlen, unsigned char *id, unsigned char *result)
{
unsigned char result[20];
char name[50];
if (!patchlen)
return;
git_SHA1_Final(result, c);
memcpy(name, sha1_to_hex(id), 41);
printf("%s %s\n", sha1_to_hex(result), name);
git_SHA1_Init(c);
}
static int remove_space(char *line)
@ -56,10 +53,31 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after)
return 1;
}
static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct strbuf *line_buf)
static void flush_one_hunk(unsigned char *result, git_SHA_CTX *ctx)
{
unsigned char hash[20];
unsigned short carry = 0;
int i;
git_SHA1_Final(hash, ctx);
git_SHA1_Init(ctx);
/* 20-byte sum, with carry */
for (i = 0; i < 20; ++i) {
carry += result[i] + hash[i];
result[i] = carry;
carry >>= 8;
}
}
static int get_one_patchid(unsigned char *next_sha1, unsigned char *result,
struct strbuf *line_buf, int stable)
{
int patchlen = 0, found_next = 0;
int before = -1, after = -1;
git_SHA_CTX ctx;
git_SHA1_Init(&ctx);
hashclr(result);
while (strbuf_getwholeline(line_buf, stdin, '\n') != EOF) {
char *line = line_buf->buf;
@ -107,6 +125,8 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st
break;
/* Else we're parsing another header. */
if (stable)
flush_one_hunk(result, &ctx);
before = after = -1;
}
@ -119,39 +139,63 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st
/* Compute the sha without whitespace */
len = remove_space(line);
patchlen += len;
git_SHA1_Update(ctx, line, len);
git_SHA1_Update(&ctx, line, len);
}
if (!found_next)
hashclr(next_sha1);
flush_one_hunk(result, &ctx);
return patchlen;
}
static void generate_id_list(void)
static void generate_id_list(int stable)
{
unsigned char sha1[20], n[20];
git_SHA_CTX ctx;
unsigned char sha1[20], n[20], result[20];
int patchlen;
struct strbuf line_buf = STRBUF_INIT;
git_SHA1_Init(&ctx);
hashclr(sha1);
while (!feof(stdin)) {
patchlen = get_one_patchid(n, &ctx, &line_buf);
flush_current_id(patchlen, sha1, &ctx);
patchlen = get_one_patchid(n, result, &line_buf, stable);
flush_current_id(patchlen, sha1, result);
hashcpy(sha1, n);
}
strbuf_release(&line_buf);
}
static const char patch_id_usage[] = "git patch-id < patch";
static const char patch_id_usage[] = "git patch-id [--stable | --unstable] < patch";
static int git_patch_id_config(const char *var, const char *value, void *cb)
{
int *stable = cb;
if (!strcmp(var, "patchid.stable")) {
*stable = git_config_bool(var, value);
return 0;
}
return git_default_config(var, value, cb);
}
int cmd_patch_id(int argc, const char **argv, const char *prefix)
{
if (argc != 1)
int stable = -1;
git_config(git_patch_id_config, &stable);
/* If nothing is set, default to unstable. */
if (stable < 0)
stable = 0;
if (argc == 2 && !strcmp(argv[1], "--stable"))
stable = 1;
else if (argc == 2 && !strcmp(argv[1], "--unstable"))
stable = 0;
else if (argc != 1)
usage(patch_id_usage);
generate_id_list();
generate_id_list(stable);
return 0;
}

View file

@ -596,6 +596,27 @@ library for your script to use.
...
'
- test_write_lines <lines>
Write <lines> on standard output, one line per argument.
Useful to prepare multi-line files in a compact form.
Example:
test_write_lines a b c d e f g >foo
Is a more compact equivalent of:
cat >foo <<-EOF
a
b
c
d
e
f
g
EOF
- test_pause
This command is useful for writing and debugging tests and must be

View file

@ -5,27 +5,44 @@ test_description='git patch-id'
. ./test-lib.sh
test_expect_success 'setup' '
test_commit initial foo a &&
test_commit first foo b &&
git checkout -b same HEAD^ &&
test_commit same-msg foo b &&
git checkout -b notsame HEAD^ &&
test_commit notsame-msg foo c
as="a a a a a a a a" && # eight a
test_write_lines $as >foo &&
test_write_lines $as >bar &&
git add foo bar &&
git commit -a -m initial &&
test_write_lines $as b >foo &&
test_write_lines $as b >bar &&
git commit -a -m first &&
git checkout -b same master &&
git commit --amend -m same-msg &&
git checkout -b notsame master &&
echo c >foo &&
echo c >bar &&
git commit --amend -a -m notsame-msg &&
test_write_lines bar foo >bar-then-foo &&
test_write_lines foo bar >foo-then-bar
'
test_expect_success 'patch-id output is well-formed' '
git log -p -1 | git patch-id > output &&
git log -p -1 | git patch-id >output &&
grep "^[a-f0-9]\{40\} $(git rev-parse HEAD)$" output
'
#calculate patch id. Make sure output is not empty.
calc_patch_id () {
git patch-id |
sed "s# .*##" > patch-id_"$1"
name="$1"
shift
git patch-id "$@" |
sed "s/ .*//" >patch-id_"$name" &&
test_line_count -gt 0 patch-id_"$name"
}
get_top_diff () {
git log -p -1 "$@" -O bar-then-foo --
}
get_patch_id () {
git log -p -1 "$1" | git patch-id |
sed "s# .*##" > patch-id_"$1"
get_top_diff "$1" | calc_patch_id "$@"
}
test_expect_success 'patch-id detects equality' '
@ -56,6 +73,69 @@ test_expect_success 'whitespace is irrelevant in footer' '
test_cmp patch-id_master patch-id_same
'
cmp_patch_id () {
if
test "$1" = "relevant"
then
! test_cmp patch-id_"$2" patch-id_"$3"
else
test_cmp patch-id_"$2" patch-id_"$3"
fi
}
test_patch_id_file_order () {
relevant="$1"
shift
name="order-${1}-$relevant"
shift
get_top_diff "master" | calc_patch_id "$name" "$@" &&
git checkout same &&
git format-patch -1 --stdout -O foo-then-bar |
calc_patch_id "ordered-$name" "$@" &&
cmp_patch_id $relevant "$name" "ordered-$name"
}
# combined test for options: add more tests here to make them
# run with all options
test_patch_id () {
test_patch_id_file_order "$@"
}
# small tests with detailed diagnostic for basic options.
test_expect_success 'file order is irrelevant with --stable' '
test_patch_id_file_order irrelevant --stable --stable
'
test_expect_success 'file order is relevant with --unstable' '
test_patch_id_file_order relevant --unstable --unstable
'
#Now test various option combinations.
test_expect_success 'default is unstable' '
test_patch_id relevant default
'
test_expect_success 'patchid.stable = true is stable' '
test_config patchid.stable true &&
test_patch_id irrelevant patchid.stable=true
'
test_expect_success 'patchid.stable = false is unstable' '
test_config patchid.stable false &&
test_patch_id relevant patchid.stable=false
'
test_expect_success '--unstable overrides patchid.stable = true' '
test_config patchid.stable true &&
test_patch_id relevant patchid.stable=true--unstable --unstable
'
test_expect_success '--stable overrides patchid.stable = false' '
test_config patchid.stable false &&
test_patch_id irrelevant patchid.stable=false--stable --stable
'
test_expect_success 'patch-id supports git-format-patch MIME output' '
get_patch_id master &&
git checkout same &&

View file

@ -717,6 +717,11 @@ test_ln_s_add () {
fi
}
# This function writes out its parameters, one per line
test_write_lines () {
printf "%s\n" "$@"
}
perl () {
command "$PERL_PATH" "$@"
}