Merge branch 'jk/duplicate-objects-in-packs'

A packfile that stores the same object more than once is broken and
will be rejected by "git index-pack" that is run when receiving data
over the wire.

* jk/duplicate-objects-in-packs:
  t5308: check that index-pack --strict detects duplicate objects
  test index-pack on packs with recoverable delta cycles
  add tests for indexing packs with delta cycles
  sha1-lookup: handle duplicate keys with GIT_USE_LOOKUP
  test-sha1: add a binary output mode
This commit is contained in:
Junio C Hamano 2013-09-18 11:43:47 -07:00
commit 6c34560053
5 changed files with 316 additions and 3 deletions

View file

@ -204,7 +204,54 @@ int sha1_entry_pos(const void *table,
* byte 0 thru (ofs-1) are the same between
* lo and hi; ofs is the first byte that is
* different.
*
* If ofs==20, then no bytes are different,
* meaning we have entries with duplicate
* keys. We know that we are in a solid run
* of this entry (because the entries are
* sorted, and our lo and hi are the same,
* there can be nothing but this single key
* in between). So we can stop the search.
* Either one of these entries is it (and
* we do not care which), or we do not have
* it.
*
* Furthermore, we know that one of our
* endpoints must be the edge of the run of
* duplicates. For example, given this
* sequence:
*
* idx 0 1 2 3 4 5
* key A C C C C D
*
* If we are searching for "B", we might
* hit the duplicate run at lo=1, hi=3
* (e.g., by first mi=3, then mi=0). But we
* can never have lo > 1, because B < C.
* That is, if our key is less than the
* run, we know that "lo" is the edge, but
* we can say nothing of "hi". Similarly,
* if our key is greater than the run, we
* know that "hi" is the edge, but we can
* say nothing of "lo".
*
* Therefore if we do not find it, we also
* know where it would go if it did exist:
* just on the far side of the edge that we
* know about.
*/
if (ofs == 20) {
mi = lo;
mi_key = base + elem_size * mi + key_offset;
cmp = memcmp(mi_key, key, 20);
if (!cmp)
return mi;
if (cmp < 0)
return -1 - hi;
else
return -1 - lo;
}
hiv = hi_key[ofs_0];
if (ofs_0 < 19)
hiv = (hiv << 8) | hi_key[ofs_0+1];

100
t/lib-pack.sh Normal file
View file

@ -0,0 +1,100 @@
#!/bin/sh
#
# Support routines for hand-crafting weird or malicious packs.
#
# You can make a complete pack like:
#
# pack_header 2 >foo.pack &&
# pack_obj e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 >>foo.pack &&
# pack_obj e68fe8129b546b101aee9510c5328e7f21ca1d18 >>foo.pack &&
# pack_trailer foo.pack
# Print the big-endian 4-byte octal representation of $1
uint32_octal () {
n=$1
printf '\%o' $(($n / 16777216)); n=$((n % 16777216))
printf '\%o' $(($n / 65536)); n=$((n % 65536))
printf '\%o' $(($n / 256)); n=$((n % 256))
printf '\%o' $(($n ));
}
# Print the big-endian 4-byte binary representation of $1
uint32_binary () {
printf "$(uint32_octal "$1")"
}
# Print a pack header, version 2, for a pack with $1 objects
pack_header () {
printf 'PACK' &&
printf '\0\0\0\2' &&
uint32_binary "$1"
}
# Print the pack data for object $1, as a delta against object $2 (or as a full
# object if $2 is missing or empty). The output is suitable for including
# directly in the packfile, and represents the entirety of the object entry.
# Doing this on the fly (especially picking your deltas) is quite tricky, so we
# have hardcoded some well-known objects. See the case statements below for the
# complete list.
pack_obj () {
case "$1" in
# empty blob
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391)
case "$2" in
'')
printf '\060\170\234\003\0\0\0\0\1'
return
;;
esac
;;
# blob containing "\7\76"
e68fe8129b546b101aee9510c5328e7f21ca1d18)
case "$2" in
'')
printf '\062\170\234\143\267\3\0\0\116\0\106'
return
;;
01d7713666f4de822776c7622c10f1b07de280dc)
printf '\165\1\327\161\66\146\364\336\202\47\166' &&
printf '\307\142\54\20\361\260\175\342\200\334\170' &&
printf '\234\143\142\142\142\267\003\0\0\151\0\114'
return
;;
esac
;;
# blob containing "\7\0"
01d7713666f4de822776c7622c10f1b07de280dc)
case "$2" in
'')
printf '\062\170\234\143\147\0\0\0\20\0\10'
return
;;
e68fe8129b546b101aee9510c5328e7f21ca1d18)
printf '\165\346\217\350\22\233\124\153\20\32\356' &&
printf '\225\20\305\62\216\177\41\312\35\30\170\234' &&
printf '\143\142\142\142\147\0\0\0\53\0\16'
return
;;
esac
;;
esac
echo >&2 "BUG: don't know how to print $1${2:+ (from $2)}"
return 1
}
# Compute and append pack trailer to "$1"
pack_trailer () {
test-sha1 -b <"$1" >trailer.tmp &&
cat trailer.tmp >>"$1" &&
rm -f trailer.tmp
}
# Remove any existing packs to make sure that
# whatever we index next will be the pack that we
# actually use.
clear_packs () {
rm -f .git/objects/pack/*
}

View file

@ -0,0 +1,80 @@
#!/bin/sh
test_description='handling of duplicate objects in incoming packfiles'
. ./test-lib.sh
. "$TEST_DIRECTORY"/lib-pack.sh
# The sha1s we have in our pack. It's important that these have the same
# starting byte, so that they end up in the same fanout section of the index.
# That lets us make sure we are exercising the binary search with both sets.
LO_SHA1=e68fe8129b546b101aee9510c5328e7f21ca1d18
HI_SHA1=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
# And here's a "missing sha1" which will produce failed lookups. It must also
# be in the same fanout section, and should be between the two (so that during
# our binary search, we are sure to end up looking at one or the other of the
# duplicate runs).
MISSING_SHA1='e69d000000000000000000000000000000000000'
# git will never intentionally create packfiles with
# duplicate objects, so we have to construct them by hand.
#
# $1 is the name of the packfile to create
#
# $2 is the number of times to duplicate each object
create_pack () {
pack_header "$((2 * $2))" >"$1" &&
for i in $(test_seq 1 "$2"); do
pack_obj $LO_SHA1 &&
pack_obj $HI_SHA1
done >>"$1" &&
pack_trailer "$1"
}
# double-check that create_pack actually works
test_expect_success 'pack with no duplicates' '
create_pack no-dups.pack 1 &&
git index-pack --stdin <no-dups.pack
'
test_expect_success 'index-pack will allow duplicate objects by default' '
clear_packs &&
create_pack dups.pack 100 &&
git index-pack --stdin <dups.pack
'
test_expect_success 'create batch-check test vectors' '
cat >input <<-EOF &&
$LO_SHA1
$HI_SHA1
$MISSING_SHA1
EOF
cat >expect <<-EOF
$LO_SHA1 blob 2
$HI_SHA1 blob 0
$MISSING_SHA1 missing
EOF
'
test_expect_success 'lookup in duplicated pack (binary search)' '
git cat-file --batch-check <input >actual &&
test_cmp expect actual
'
test_expect_success 'lookup in duplicated pack (GIT_USE_LOOKUP)' '
(
GIT_USE_LOOKUP=1 &&
export GIT_USE_LOOKUP &&
git cat-file --batch-check <input >actual
) &&
test_cmp expect actual
'
test_expect_success 'index-pack can reject packs with duplicates' '
clear_packs &&
create_pack dups.pack 2 &&
test_must_fail git index-pack --strict --stdin <dups.pack &&
test_expect_code 1 git cat-file -e $LO_SHA1
'
test_done

77
t/t5309-pack-delta-cycles.sh Executable file
View file

@ -0,0 +1,77 @@
#!/bin/sh
test_description='test index-pack handling of delta cycles in packfiles'
. ./test-lib.sh
. "$TEST_DIRECTORY"/lib-pack.sh
# Two similar-ish objects that we have computed deltas between.
A=01d7713666f4de822776c7622c10f1b07de280dc
B=e68fe8129b546b101aee9510c5328e7f21ca1d18
# double-check our hand-constucted packs
test_expect_success 'index-pack works with a single delta (A->B)' '
clear_packs &&
{
pack_header 2 &&
pack_obj $A $B &&
pack_obj $B
} >ab.pack &&
pack_trailer ab.pack &&
git index-pack --stdin <ab.pack &&
git cat-file -t $A &&
git cat-file -t $B
'
test_expect_success 'index-pack works with a single delta (B->A)' '
clear_packs &&
{
pack_header 2 &&
pack_obj $A &&
pack_obj $B $A
} >ba.pack &&
pack_trailer ba.pack &&
git index-pack --stdin <ba.pack &&
git cat-file -t $A &&
git cat-file -t $B
'
test_expect_success 'index-pack detects missing base objects' '
clear_packs &&
{
pack_header 1 &&
pack_obj $A $B
} >missing.pack &&
pack_trailer missing.pack &&
test_must_fail git index-pack --fix-thin --stdin <missing.pack
'
test_expect_success 'index-pack detects REF_DELTA cycles' '
clear_packs &&
{
pack_header 2 &&
pack_obj $A $B &&
pack_obj $B $A
} >cycle.pack &&
pack_trailer cycle.pack &&
test_must_fail git index-pack --fix-thin --stdin <cycle.pack
'
test_expect_failure 'failover to an object in another pack' '
clear_packs &&
git index-pack --stdin <ab.pack &&
git index-pack --stdin --fix-thin <cycle.pack
'
test_expect_failure 'failover to a duplicate object in the same pack' '
clear_packs &&
{
pack_header 3 &&
pack_obj $A $B &&
pack_obj $B $A &&
pack_obj $A
} >recoverable.pack &&
pack_trailer recoverable.pack &&
git index-pack --fix-thin --stdin <recoverable.pack
'
test_done

View file

@ -5,10 +5,15 @@ int main(int ac, char **av)
git_SHA_CTX ctx;
unsigned char sha1[20];
unsigned bufsz = 8192;
int binary = 0;
char *buffer;
if (ac == 2)
bufsz = strtoul(av[1], NULL, 10) * 1024 * 1024;
if (ac == 2) {
if (!strcmp(av[1], "-b"))
binary = 1;
else
bufsz = strtoul(av[1], NULL, 10) * 1024 * 1024;
}
if (!bufsz)
bufsz = 8192;
@ -42,6 +47,10 @@ int main(int ac, char **av)
git_SHA1_Update(&ctx, buffer, this_sz);
}
git_SHA1_Final(sha1, &ctx);
puts(sha1_to_hex(sha1));
if (binary)
fwrite(sha1, 1, 20, stdout);
else
puts(sha1_to_hex(sha1));
exit(0);
}