mirror of
https://github.com/git/git
synced 2024-10-30 04:01:21 +00:00
Merge branch 'jk/duplicate-objects-in-packs'
A packfile that stores the same object more than once is broken and will be rejected by "git index-pack" that is run when receiving data over the wire. * jk/duplicate-objects-in-packs: t5308: check that index-pack --strict detects duplicate objects test index-pack on packs with recoverable delta cycles add tests for indexing packs with delta cycles sha1-lookup: handle duplicate keys with GIT_USE_LOOKUP test-sha1: add a binary output mode
This commit is contained in:
commit
6c34560053
5 changed files with 316 additions and 3 deletions
|
@ -204,7 +204,54 @@ int sha1_entry_pos(const void *table,
|
|||
* byte 0 thru (ofs-1) are the same between
|
||||
* lo and hi; ofs is the first byte that is
|
||||
* different.
|
||||
*
|
||||
* If ofs==20, then no bytes are different,
|
||||
* meaning we have entries with duplicate
|
||||
* keys. We know that we are in a solid run
|
||||
* of this entry (because the entries are
|
||||
* sorted, and our lo and hi are the same,
|
||||
* there can be nothing but this single key
|
||||
* in between). So we can stop the search.
|
||||
* Either one of these entries is it (and
|
||||
* we do not care which), or we do not have
|
||||
* it.
|
||||
*
|
||||
* Furthermore, we know that one of our
|
||||
* endpoints must be the edge of the run of
|
||||
* duplicates. For example, given this
|
||||
* sequence:
|
||||
*
|
||||
* idx 0 1 2 3 4 5
|
||||
* key A C C C C D
|
||||
*
|
||||
* If we are searching for "B", we might
|
||||
* hit the duplicate run at lo=1, hi=3
|
||||
* (e.g., by first mi=3, then mi=0). But we
|
||||
* can never have lo > 1, because B < C.
|
||||
* That is, if our key is less than the
|
||||
* run, we know that "lo" is the edge, but
|
||||
* we can say nothing of "hi". Similarly,
|
||||
* if our key is greater than the run, we
|
||||
* know that "hi" is the edge, but we can
|
||||
* say nothing of "lo".
|
||||
*
|
||||
* Therefore if we do not find it, we also
|
||||
* know where it would go if it did exist:
|
||||
* just on the far side of the edge that we
|
||||
* know about.
|
||||
*/
|
||||
if (ofs == 20) {
|
||||
mi = lo;
|
||||
mi_key = base + elem_size * mi + key_offset;
|
||||
cmp = memcmp(mi_key, key, 20);
|
||||
if (!cmp)
|
||||
return mi;
|
||||
if (cmp < 0)
|
||||
return -1 - hi;
|
||||
else
|
||||
return -1 - lo;
|
||||
}
|
||||
|
||||
hiv = hi_key[ofs_0];
|
||||
if (ofs_0 < 19)
|
||||
hiv = (hiv << 8) | hi_key[ofs_0+1];
|
||||
|
|
100
t/lib-pack.sh
Normal file
100
t/lib-pack.sh
Normal file
|
@ -0,0 +1,100 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# Support routines for hand-crafting weird or malicious packs.
|
||||
#
|
||||
# You can make a complete pack like:
|
||||
#
|
||||
# pack_header 2 >foo.pack &&
|
||||
# pack_obj e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 >>foo.pack &&
|
||||
# pack_obj e68fe8129b546b101aee9510c5328e7f21ca1d18 >>foo.pack &&
|
||||
# pack_trailer foo.pack
|
||||
|
||||
# Print the big-endian 4-byte octal representation of $1
|
||||
uint32_octal () {
|
||||
n=$1
|
||||
printf '\%o' $(($n / 16777216)); n=$((n % 16777216))
|
||||
printf '\%o' $(($n / 65536)); n=$((n % 65536))
|
||||
printf '\%o' $(($n / 256)); n=$((n % 256))
|
||||
printf '\%o' $(($n ));
|
||||
}
|
||||
|
||||
# Print the big-endian 4-byte binary representation of $1
|
||||
uint32_binary () {
|
||||
printf "$(uint32_octal "$1")"
|
||||
}
|
||||
|
||||
# Print a pack header, version 2, for a pack with $1 objects
|
||||
pack_header () {
|
||||
printf 'PACK' &&
|
||||
printf '\0\0\0\2' &&
|
||||
uint32_binary "$1"
|
||||
}
|
||||
|
||||
# Print the pack data for object $1, as a delta against object $2 (or as a full
|
||||
# object if $2 is missing or empty). The output is suitable for including
|
||||
# directly in the packfile, and represents the entirety of the object entry.
|
||||
# Doing this on the fly (especially picking your deltas) is quite tricky, so we
|
||||
# have hardcoded some well-known objects. See the case statements below for the
|
||||
# complete list.
|
||||
pack_obj () {
|
||||
case "$1" in
|
||||
# empty blob
|
||||
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391)
|
||||
case "$2" in
|
||||
'')
|
||||
printf '\060\170\234\003\0\0\0\0\1'
|
||||
return
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
# blob containing "\7\76"
|
||||
e68fe8129b546b101aee9510c5328e7f21ca1d18)
|
||||
case "$2" in
|
||||
'')
|
||||
printf '\062\170\234\143\267\3\0\0\116\0\106'
|
||||
return
|
||||
;;
|
||||
01d7713666f4de822776c7622c10f1b07de280dc)
|
||||
printf '\165\1\327\161\66\146\364\336\202\47\166' &&
|
||||
printf '\307\142\54\20\361\260\175\342\200\334\170' &&
|
||||
printf '\234\143\142\142\142\267\003\0\0\151\0\114'
|
||||
return
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
# blob containing "\7\0"
|
||||
01d7713666f4de822776c7622c10f1b07de280dc)
|
||||
case "$2" in
|
||||
'')
|
||||
printf '\062\170\234\143\147\0\0\0\20\0\10'
|
||||
return
|
||||
;;
|
||||
e68fe8129b546b101aee9510c5328e7f21ca1d18)
|
||||
printf '\165\346\217\350\22\233\124\153\20\32\356' &&
|
||||
printf '\225\20\305\62\216\177\41\312\35\30\170\234' &&
|
||||
printf '\143\142\142\142\147\0\0\0\53\0\16'
|
||||
return
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
|
||||
echo >&2 "BUG: don't know how to print $1${2:+ (from $2)}"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Compute and append pack trailer to "$1"
|
||||
pack_trailer () {
|
||||
test-sha1 -b <"$1" >trailer.tmp &&
|
||||
cat trailer.tmp >>"$1" &&
|
||||
rm -f trailer.tmp
|
||||
}
|
||||
|
||||
# Remove any existing packs to make sure that
|
||||
# whatever we index next will be the pack that we
|
||||
# actually use.
|
||||
clear_packs () {
|
||||
rm -f .git/objects/pack/*
|
||||
}
|
80
t/t5308-pack-detect-duplicates.sh
Executable file
80
t/t5308-pack-detect-duplicates.sh
Executable file
|
@ -0,0 +1,80 @@
|
|||
#!/bin/sh
|
||||
|
||||
test_description='handling of duplicate objects in incoming packfiles'
|
||||
. ./test-lib.sh
|
||||
. "$TEST_DIRECTORY"/lib-pack.sh
|
||||
|
||||
# The sha1s we have in our pack. It's important that these have the same
|
||||
# starting byte, so that they end up in the same fanout section of the index.
|
||||
# That lets us make sure we are exercising the binary search with both sets.
|
||||
LO_SHA1=e68fe8129b546b101aee9510c5328e7f21ca1d18
|
||||
HI_SHA1=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
|
||||
|
||||
# And here's a "missing sha1" which will produce failed lookups. It must also
|
||||
# be in the same fanout section, and should be between the two (so that during
|
||||
# our binary search, we are sure to end up looking at one or the other of the
|
||||
# duplicate runs).
|
||||
MISSING_SHA1='e69d000000000000000000000000000000000000'
|
||||
|
||||
# git will never intentionally create packfiles with
|
||||
# duplicate objects, so we have to construct them by hand.
|
||||
#
|
||||
# $1 is the name of the packfile to create
|
||||
#
|
||||
# $2 is the number of times to duplicate each object
|
||||
create_pack () {
|
||||
pack_header "$((2 * $2))" >"$1" &&
|
||||
for i in $(test_seq 1 "$2"); do
|
||||
pack_obj $LO_SHA1 &&
|
||||
pack_obj $HI_SHA1
|
||||
done >>"$1" &&
|
||||
pack_trailer "$1"
|
||||
}
|
||||
|
||||
# double-check that create_pack actually works
|
||||
test_expect_success 'pack with no duplicates' '
|
||||
create_pack no-dups.pack 1 &&
|
||||
git index-pack --stdin <no-dups.pack
|
||||
'
|
||||
|
||||
test_expect_success 'index-pack will allow duplicate objects by default' '
|
||||
clear_packs &&
|
||||
create_pack dups.pack 100 &&
|
||||
git index-pack --stdin <dups.pack
|
||||
'
|
||||
|
||||
test_expect_success 'create batch-check test vectors' '
|
||||
cat >input <<-EOF &&
|
||||
$LO_SHA1
|
||||
$HI_SHA1
|
||||
$MISSING_SHA1
|
||||
EOF
|
||||
cat >expect <<-EOF
|
||||
$LO_SHA1 blob 2
|
||||
$HI_SHA1 blob 0
|
||||
$MISSING_SHA1 missing
|
||||
EOF
|
||||
'
|
||||
|
||||
test_expect_success 'lookup in duplicated pack (binary search)' '
|
||||
git cat-file --batch-check <input >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_expect_success 'lookup in duplicated pack (GIT_USE_LOOKUP)' '
|
||||
(
|
||||
GIT_USE_LOOKUP=1 &&
|
||||
export GIT_USE_LOOKUP &&
|
||||
git cat-file --batch-check <input >actual
|
||||
) &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_expect_success 'index-pack can reject packs with duplicates' '
|
||||
clear_packs &&
|
||||
create_pack dups.pack 2 &&
|
||||
test_must_fail git index-pack --strict --stdin <dups.pack &&
|
||||
test_expect_code 1 git cat-file -e $LO_SHA1
|
||||
'
|
||||
|
||||
test_done
|
77
t/t5309-pack-delta-cycles.sh
Executable file
77
t/t5309-pack-delta-cycles.sh
Executable file
|
@ -0,0 +1,77 @@
|
|||
#!/bin/sh
|
||||
|
||||
test_description='test index-pack handling of delta cycles in packfiles'
|
||||
. ./test-lib.sh
|
||||
. "$TEST_DIRECTORY"/lib-pack.sh
|
||||
|
||||
# Two similar-ish objects that we have computed deltas between.
|
||||
A=01d7713666f4de822776c7622c10f1b07de280dc
|
||||
B=e68fe8129b546b101aee9510c5328e7f21ca1d18
|
||||
|
||||
# double-check our hand-constucted packs
|
||||
test_expect_success 'index-pack works with a single delta (A->B)' '
|
||||
clear_packs &&
|
||||
{
|
||||
pack_header 2 &&
|
||||
pack_obj $A $B &&
|
||||
pack_obj $B
|
||||
} >ab.pack &&
|
||||
pack_trailer ab.pack &&
|
||||
git index-pack --stdin <ab.pack &&
|
||||
git cat-file -t $A &&
|
||||
git cat-file -t $B
|
||||
'
|
||||
|
||||
test_expect_success 'index-pack works with a single delta (B->A)' '
|
||||
clear_packs &&
|
||||
{
|
||||
pack_header 2 &&
|
||||
pack_obj $A &&
|
||||
pack_obj $B $A
|
||||
} >ba.pack &&
|
||||
pack_trailer ba.pack &&
|
||||
git index-pack --stdin <ba.pack &&
|
||||
git cat-file -t $A &&
|
||||
git cat-file -t $B
|
||||
'
|
||||
|
||||
test_expect_success 'index-pack detects missing base objects' '
|
||||
clear_packs &&
|
||||
{
|
||||
pack_header 1 &&
|
||||
pack_obj $A $B
|
||||
} >missing.pack &&
|
||||
pack_trailer missing.pack &&
|
||||
test_must_fail git index-pack --fix-thin --stdin <missing.pack
|
||||
'
|
||||
|
||||
test_expect_success 'index-pack detects REF_DELTA cycles' '
|
||||
clear_packs &&
|
||||
{
|
||||
pack_header 2 &&
|
||||
pack_obj $A $B &&
|
||||
pack_obj $B $A
|
||||
} >cycle.pack &&
|
||||
pack_trailer cycle.pack &&
|
||||
test_must_fail git index-pack --fix-thin --stdin <cycle.pack
|
||||
'
|
||||
|
||||
test_expect_failure 'failover to an object in another pack' '
|
||||
clear_packs &&
|
||||
git index-pack --stdin <ab.pack &&
|
||||
git index-pack --stdin --fix-thin <cycle.pack
|
||||
'
|
||||
|
||||
test_expect_failure 'failover to a duplicate object in the same pack' '
|
||||
clear_packs &&
|
||||
{
|
||||
pack_header 3 &&
|
||||
pack_obj $A $B &&
|
||||
pack_obj $B $A &&
|
||||
pack_obj $A
|
||||
} >recoverable.pack &&
|
||||
pack_trailer recoverable.pack &&
|
||||
git index-pack --fix-thin --stdin <recoverable.pack
|
||||
'
|
||||
|
||||
test_done
|
15
test-sha1.c
15
test-sha1.c
|
@ -5,10 +5,15 @@ int main(int ac, char **av)
|
|||
git_SHA_CTX ctx;
|
||||
unsigned char sha1[20];
|
||||
unsigned bufsz = 8192;
|
||||
int binary = 0;
|
||||
char *buffer;
|
||||
|
||||
if (ac == 2)
|
||||
bufsz = strtoul(av[1], NULL, 10) * 1024 * 1024;
|
||||
if (ac == 2) {
|
||||
if (!strcmp(av[1], "-b"))
|
||||
binary = 1;
|
||||
else
|
||||
bufsz = strtoul(av[1], NULL, 10) * 1024 * 1024;
|
||||
}
|
||||
|
||||
if (!bufsz)
|
||||
bufsz = 8192;
|
||||
|
@ -42,6 +47,10 @@ int main(int ac, char **av)
|
|||
git_SHA1_Update(&ctx, buffer, this_sz);
|
||||
}
|
||||
git_SHA1_Final(sha1, &ctx);
|
||||
puts(sha1_to_hex(sha1));
|
||||
|
||||
if (binary)
|
||||
fwrite(sha1, 1, 20, stdout);
|
||||
else
|
||||
puts(sha1_to_hex(sha1));
|
||||
exit(0);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue