mirror of
https://github.com/git/git
synced 2024-09-12 21:04:12 +00:00
65b5d9fae7
After you anonymize a repository, it can be hard to find which commits correspond between the original and the result, and thus hard to reproduce commands that triggered bugs in the original. Let's make it possible to seed the anonymization map. This lets users either: - mark names to be retained as-is, if they don't consider them secret (in which case their original commands would just work) - map names to new values, which lets them adapt the reproduction recipe to the new names without revealing the originals The implementation is fairly straight-forward. We already store each anonymized token in a hashmap (so that the same token appearing twice is converted to the same result). We can just introduce a new "seed" hashmap which is consulted first. This does make a few more promises to the user about how we'll anonymize things (e.g., token-splitting pathnames). But it's unlikely that we'd want to change those rules, even if the actual anonymization of a single token changes. And it makes things much easier for the user, who can unblind only a directory name without having to specify each path within it. One alternative to this approach would be to anonymize as we see fit, and then dump the whole refname and pathname mappings to a file. This does work, but it's a bit awkward to use (you have to manually dig the items you care about out of the mapping). Helped-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
133 lines
3.6 KiB
Bash
Executable file
133 lines
3.6 KiB
Bash
Executable file
#!/bin/sh
|
|
|
|
test_description='basic tests for fast-export --anonymize'
|
|
. ./test-lib.sh
|
|
|
|
test_expect_success 'setup simple repo' '
|
|
test_commit base &&
|
|
test_commit foo &&
|
|
test_commit retain-me &&
|
|
git checkout -b other HEAD^ &&
|
|
mkdir subdir &&
|
|
test_commit subdir/bar &&
|
|
test_commit subdir/xyzzy &&
|
|
fake_commit=$(echo $ZERO_OID | sed s/0/a/) &&
|
|
git update-index --add --cacheinfo 160000,$fake_commit,link1 &&
|
|
git update-index --add --cacheinfo 160000,$fake_commit,link2 &&
|
|
git commit -m "add gitlink" &&
|
|
git tag -m "annotated tag" mytag
|
|
'
|
|
|
|
test_expect_success 'export anonymized stream' '
|
|
git fast-export --anonymize --all \
|
|
--anonymize-map=retain-me \
|
|
--anonymize-map=xyzzy:custom-name \
|
|
>stream
|
|
'
|
|
|
|
# this also covers commit messages
|
|
test_expect_success 'stream omits path names' '
|
|
! grep base stream &&
|
|
! grep foo stream &&
|
|
! grep subdir stream &&
|
|
! grep bar stream &&
|
|
! grep xyzzy stream
|
|
'
|
|
|
|
test_expect_success 'stream contains user-specified names' '
|
|
grep retain-me stream &&
|
|
grep custom-name stream
|
|
'
|
|
|
|
test_expect_success 'stream omits gitlink oids' '
|
|
# avoid relying on the whole oid to remain hash-agnostic; this is
|
|
# plenty to be unique within our test case
|
|
! grep a000000000000000000 stream
|
|
'
|
|
|
|
test_expect_success 'stream allows master as refname' '
|
|
grep master stream
|
|
'
|
|
|
|
test_expect_success 'stream omits other refnames' '
|
|
! grep other stream &&
|
|
! grep mytag stream
|
|
'
|
|
|
|
test_expect_success 'stream omits identities' '
|
|
! grep "$GIT_COMMITTER_NAME" stream &&
|
|
! grep "$GIT_COMMITTER_EMAIL" stream &&
|
|
! grep "$GIT_AUTHOR_NAME" stream &&
|
|
! grep "$GIT_AUTHOR_EMAIL" stream
|
|
'
|
|
|
|
test_expect_success 'stream omits tag message' '
|
|
! grep "annotated tag" stream
|
|
'
|
|
|
|
# NOTE: we chdir to the new, anonymized repository
|
|
# after this. All further tests should assume this.
|
|
test_expect_success 'import stream to new repository' '
|
|
git init new &&
|
|
cd new &&
|
|
git fast-import <../stream
|
|
'
|
|
|
|
test_expect_success 'result has two branches' '
|
|
git for-each-ref --format="%(refname)" refs/heads >branches &&
|
|
test_line_count = 2 branches &&
|
|
other_branch=$(grep -v refs/heads/master branches)
|
|
'
|
|
|
|
test_expect_success 'repo has original shape and timestamps' '
|
|
shape () {
|
|
git log --format="%m %ct" --left-right --boundary "$@"
|
|
} &&
|
|
(cd .. && shape master...other) >expect &&
|
|
shape master...$other_branch >actual &&
|
|
test_cmp expect actual
|
|
'
|
|
|
|
test_expect_success 'root tree has original shape' '
|
|
# the output entries are not necessarily in the same
|
|
# order, but we should at least have the same set of
|
|
# object types.
|
|
git -C .. ls-tree HEAD >orig-root &&
|
|
cut -d" " -f2 <orig-root | sort >expect &&
|
|
git ls-tree $other_branch >root &&
|
|
cut -d" " -f2 <root | sort >actual &&
|
|
test_cmp expect actual
|
|
'
|
|
|
|
test_expect_success 'paths in subdir ended up in one tree' '
|
|
git -C .. ls-tree other:subdir >orig-subdir &&
|
|
cut -d" " -f2 <orig-subdir | sort >expect &&
|
|
tree=$(grep tree root | cut -f2) &&
|
|
git ls-tree $other_branch:$tree >tree &&
|
|
cut -d" " -f2 <tree >actual &&
|
|
test_cmp expect actual
|
|
'
|
|
|
|
test_expect_success 'identical gitlinks got identical oid' '
|
|
awk "/commit/ { print \$3 }" <root | sort -u >commits &&
|
|
test_line_count = 1 commits
|
|
'
|
|
|
|
test_expect_success 'tag points to branch tip' '
|
|
git rev-parse $other_branch >expect &&
|
|
git for-each-ref --format="%(*objectname)" | grep . >actual &&
|
|
test_cmp expect actual
|
|
'
|
|
|
|
test_expect_success 'idents are shared' '
|
|
git log --all --format="%an <%ae>" >authors &&
|
|
sort -u authors >unique &&
|
|
test_line_count = 1 unique &&
|
|
git log --all --format="%cn <%ce>" >committers &&
|
|
sort -u committers >unique &&
|
|
test_line_count = 1 unique &&
|
|
! test_cmp authors committers
|
|
'
|
|
|
|
test_done
|