Merge branch 'np/pack-safer'

* np/pack-safer:
  t5303: fix printf format string for portability
  t5303: work around printf breakage in dash
  pack-objects: don't leak pack window reference when splitting packs
  extend test coverage for latest pack corruption resilience improvements
  pack-objects: allow "fixing" a corrupted pack without a full repack
  make find_pack_revindex() aware of the nasty world
  make check_object() resilient to pack corruptions
  make packed_object_info() resilient to pack corruptions
  make unpack_object_header() non fatal
  better validation on delta base object offsets
  close another possibility for propagating pack corruption
This commit is contained in:
Junio C Hamano 2008-11-12 22:26:35 -08:00
commit 7b51b77dbc
8 changed files with 224 additions and 49 deletions

View file

@ -286,6 +286,7 @@ static unsigned long write_object(struct sha1file *f,
*/
if (!to_reuse) {
no_reuse:
if (!usable_delta) {
buf = read_sha1_file(entry->idx.sha1, &type, &size);
if (!buf)
@ -367,46 +368,60 @@ static unsigned long write_object(struct sha1file *f,
struct revindex_entry *revidx;
off_t offset;
if (entry->delta) {
if (entry->delta)
type = (allow_ofs_delta && entry->delta->idx.offset) ?
OBJ_OFS_DELTA : OBJ_REF_DELTA;
reused_delta++;
}
hdrlen = encode_header(type, entry->size, header);
offset = entry->in_pack_offset;
revidx = find_pack_revindex(p, offset);
datalen = revidx[1].offset - offset;
if (!pack_to_stdout && p->index_version > 1 &&
check_pack_crc(p, &w_curs, offset, datalen, revidx->nr))
die("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1));
check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) {
error("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1));
unuse_pack(&w_curs);
goto no_reuse;
}
offset += entry->in_pack_header_size;
datalen -= entry->in_pack_header_size;
if (!pack_to_stdout && p->index_version == 1 &&
check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) {
error("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1));
unuse_pack(&w_curs);
goto no_reuse;
}
if (type == OBJ_OFS_DELTA) {
off_t ofs = entry->idx.offset - entry->delta->idx.offset;
unsigned pos = sizeof(dheader) - 1;
dheader[pos] = ofs & 127;
while (ofs >>= 7)
dheader[--pos] = 128 | (--ofs & 127);
if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit)
if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
unuse_pack(&w_curs);
return 0;
}
sha1write(f, header, hdrlen);
sha1write(f, dheader + pos, sizeof(dheader) - pos);
hdrlen += sizeof(dheader) - pos;
reused_delta++;
} else if (type == OBJ_REF_DELTA) {
if (limit && hdrlen + 20 + datalen + 20 >= limit)
if (limit && hdrlen + 20 + datalen + 20 >= limit) {
unuse_pack(&w_curs);
return 0;
}
sha1write(f, header, hdrlen);
sha1write(f, entry->delta->idx.sha1, 20);
hdrlen += 20;
reused_delta++;
} else {
if (limit && hdrlen + datalen + 20 >= limit)
if (limit && hdrlen + datalen + 20 >= limit) {
unuse_pack(&w_curs);
return 0;
}
sha1write(f, header, hdrlen);
}
if (!pack_to_stdout && p->index_version == 1 &&
check_pack_inflate(p, &w_curs, offset, datalen, entry->size))
die("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1));
copy_pack_data(f, p, &w_curs, offset, datalen);
unuse_pack(&w_curs);
reused++;
@ -1016,9 +1031,11 @@ static void check_object(struct object_entry *entry)
* We want in_pack_type even if we do not reuse delta
* since non-delta representations could still be reused.
*/
used = unpack_object_header_gently(buf, avail,
used = unpack_object_header_buffer(buf, avail,
&entry->in_pack_type,
&entry->size);
if (used == 0)
goto give_up;
/*
* Determine if this is a delta and if so whether we can
@ -1030,6 +1047,8 @@ static void check_object(struct object_entry *entry)
/* Not a delta hence we've already got all we need. */
entry->type = entry->in_pack_type;
entry->in_pack_header_size = used;
if (entry->type < OBJ_COMMIT || entry->type > OBJ_BLOB)
goto give_up;
unuse_pack(&w_curs);
return;
case OBJ_REF_DELTA:
@ -1046,19 +1065,25 @@ static void check_object(struct object_entry *entry)
ofs = c & 127;
while (c & 128) {
ofs += 1;
if (!ofs || MSB(ofs, 7))
die("delta base offset overflow in pack for %s",
sha1_to_hex(entry->idx.sha1));
if (!ofs || MSB(ofs, 7)) {
error("delta base offset overflow in pack for %s",
sha1_to_hex(entry->idx.sha1));
goto give_up;
}
c = buf[used_0++];
ofs = (ofs << 7) + (c & 127);
}
if (ofs >= entry->in_pack_offset)
die("delta base offset out of bound for %s",
sha1_to_hex(entry->idx.sha1));
ofs = entry->in_pack_offset - ofs;
if (ofs <= 0 || ofs >= entry->in_pack_offset) {
error("delta base offset out of bound for %s",
sha1_to_hex(entry->idx.sha1));
goto give_up;
}
if (reuse_delta && !entry->preferred_base) {
struct revindex_entry *revidx;
revidx = find_pack_revindex(p, ofs);
if (!revidx)
goto give_up;
base_ref = nth_packed_object_sha1(p, revidx->nr);
}
entry->in_pack_header_size = used + used_0;
@ -1078,6 +1103,7 @@ static void check_object(struct object_entry *entry)
*/
entry->type = entry->in_pack_type;
entry->delta = base_entry;
entry->delta_size = entry->size;
entry->delta_sibling = base_entry->delta_child;
base_entry->delta_child = entry;
unuse_pack(&w_curs);
@ -1092,6 +1118,8 @@ static void check_object(struct object_entry *entry)
*/
entry->size = get_size_from_delta(p, &w_curs,
entry->in_pack_offset + entry->in_pack_header_size);
if (entry->size == 0)
goto give_up;
unuse_pack(&w_curs);
return;
}
@ -1101,6 +1129,7 @@ static void check_object(struct object_entry *entry)
* with sha1_object_info() to find about the object type
* at this point...
*/
give_up:
unuse_pack(&w_curs);
}
@ -1712,6 +1741,16 @@ static void prepare_pack(int window, int depth)
get_object_details();
/*
* If we're locally repacking then we need to be doubly careful
* from now on in order to make sure no stealth corruption gets
* propagated to the new pack. Clients receiving streamed packs
* should validate everything they get anyway so no need to incur
* the additional cost here in that case.
*/
if (!pack_to_stdout)
do_check_packed_object_crc = 1;
if (!nr_objects || !window || !depth)
return;

View file

@ -370,6 +370,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
base_offset = (base_offset << 7) + (c & 127);
}
base_offset = obj_list[nr].offset - base_offset;
if (base_offset <= 0 || base_offset >= obj_list[nr].offset)
die("offset value out of bound for delta base object");
delta_data = get_data(delta_size);
if (dry_run || !delta_data) {

View file

@ -574,6 +574,9 @@ extern int force_object_loose(const unsigned char *sha1, time_t mtime);
/* just like read_sha1_file(), but non fatal in presence of bad objects */
extern void *read_object(const unsigned char *sha1, enum object_type *type, unsigned long *size);
/* global flag to enable extra checks when accessing packed objects */
extern int do_check_packed_object_crc;
extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type);
extern int move_temp_to_file(const char *tmpfile, const char *filename);
@ -762,7 +765,7 @@ extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t
extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t);
extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *);
extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
extern int matches_pack_name(struct packed_git *p, const char *name);

View file

@ -338,7 +338,7 @@ static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_
base_offset = (base_offset << 7) + (c & 127);
}
delta_base->offset = obj->idx.offset - base_offset;
if (delta_base->offset >= obj->idx.offset)
if (delta_base->offset <= 0 || delta_base->offset >= obj->idx.offset)
bad_object(obj->idx.offset, "delta base offset is out of bound");
break;
case OBJ_COMMIT:

View file

@ -140,7 +140,8 @@ struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs)
else
lo = mi + 1;
} while (lo < hi);
die("internal error: pack revindex corrupt");
error("bad offset for revindex");
return NULL;
}
void discard_revindex(void)

View file

@ -1122,7 +1122,8 @@ static int legacy_loose_object(unsigned char *map)
return 0;
}
unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep)
unsigned long unpack_object_header_buffer(const unsigned char *buf,
unsigned long len, enum object_type *type, unsigned long *sizep)
{
unsigned shift;
unsigned char c;
@ -1134,10 +1135,10 @@ unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned lon
size = c & 15;
shift = 4;
while (c & 0x80) {
if (len <= used)
return 0;
if (sizeof(long) * 8 <= shift)
if (len <= used || sizeof(long) * 8 <= shift) {
error("bad object header");
return 0;
}
c = buf[used++];
size += (c & 0x7f) << shift;
shift += 7;
@ -1176,7 +1177,7 @@ static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned lon
* really worth it and we don't write it any longer. But we
* can still read it.
*/
used = unpack_object_header_gently(map, mapsize, &type, &size);
used = unpack_object_header_buffer(map, mapsize, &type, &size);
if (!used || !valid_loose_object_type[type])
return -1;
map += used;
@ -1325,8 +1326,10 @@ unsigned long get_size_from_delta(struct packed_git *p,
} while ((st == Z_OK || st == Z_BUF_ERROR) &&
stream.total_out < sizeof(delta_head));
inflateEnd(&stream);
if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head))
die("delta data unpack-initial failed");
if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
error("delta data unpack-initial failed");
return 0;
}
/* Examine the initial part of the delta to figure out
* the result size.
@ -1367,7 +1370,7 @@ static off_t get_delta_base(struct packed_git *p,
base_offset = (base_offset << 7) + (c & 127);
}
base_offset = delta_obj_offset - base_offset;
if (base_offset >= delta_obj_offset)
if (base_offset <= 0 || base_offset >= delta_obj_offset)
return 0; /* out of bound */
*curpos += used;
} else if (type == OBJ_REF_DELTA) {
@ -1393,15 +1396,32 @@ static int packed_delta_info(struct packed_git *p,
off_t base_offset;
base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
if (!base_offset)
return OBJ_BAD;
type = packed_object_info(p, base_offset, NULL);
if (type <= OBJ_NONE) {
struct revindex_entry *revidx;
const unsigned char *base_sha1;
revidx = find_pack_revindex(p, base_offset);
if (!revidx)
return OBJ_BAD;
base_sha1 = nth_packed_object_sha1(p, revidx->nr);
mark_bad_packed_object(p, base_sha1);
type = sha1_object_info(base_sha1, NULL);
if (type <= OBJ_NONE)
return OBJ_BAD;
}
/* We choose to only get the type of the base object and
* ignore potentially corrupt pack file that expects the delta
* based on a base with a wrong size. This saves tons of
* inflate() calls.
*/
if (sizep)
if (sizep) {
*sizep = get_size_from_delta(p, w_curs, curpos);
if (*sizep == 0)
type = OBJ_BAD;
}
return type;
}
@ -1423,10 +1443,11 @@ static int unpack_object_header(struct packed_git *p,
* insane, so we know won't exceed what we have been given.
*/
base = use_pack(p, w_curs, *curpos, &left);
used = unpack_object_header_gently(base, left, &type, sizep);
if (!used)
die("object offset outside of pack file");
*curpos += used;
used = unpack_object_header_buffer(base, left, &type, sizep);
if (!used) {
type = OBJ_BAD;
} else
*curpos += used;
return type;
}
@ -1510,8 +1531,9 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
*sizep = size;
break;
default:
die("pack %s contains unknown object type %d",
p->pack_name, type);
error("unknown object type %i at offset %"PRIuMAX" in %s",
type, (uintmax_t)obj_offset, p->pack_name);
type = OBJ_BAD;
}
unuse_pack(&w_curs);
return type;
@ -1675,9 +1697,12 @@ static void *unpack_delta_entry(struct packed_git *p,
* This is costly but should happen only in the presence
* of a corrupted pack, and is better than failing outright.
*/
struct revindex_entry *revidx = find_pack_revindex(p, base_offset);
const unsigned char *base_sha1 =
nth_packed_object_sha1(p, revidx->nr);
struct revindex_entry *revidx;
const unsigned char *base_sha1;
revidx = find_pack_revindex(p, base_offset);
if (!revidx)
return NULL;
base_sha1 = nth_packed_object_sha1(p, revidx->nr);
error("failed to read delta base object %s"
" at offset %"PRIuMAX" from %s",
sha1_to_hex(base_sha1), (uintmax_t)base_offset,
@ -1706,6 +1731,8 @@ static void *unpack_delta_entry(struct packed_git *p,
return result;
}
int do_check_packed_object_crc;
void *unpack_entry(struct packed_git *p, off_t obj_offset,
enum object_type *type, unsigned long *sizep)
{
@ -1713,6 +1740,19 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
off_t curpos = obj_offset;
void *data;
if (do_check_packed_object_crc && p->index_version > 1) {
struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
unsigned long len = revidx[1].offset - obj_offset;
if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
const unsigned char *sha1 =
nth_packed_object_sha1(p, revidx->nr);
error("bad packed object CRC for %s",
sha1_to_hex(sha1));
mark_bad_packed_object(p, sha1);
return NULL;
}
}
*type = unpack_object_header(p, &w_curs, &curpos, sizep);
switch (*type) {
case OBJ_OFS_DELTA:
@ -1966,7 +2006,14 @@ int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
if (!find_pack_entry(sha1, &e, NULL))
return status;
}
return packed_object_info(e.p, e.offset, sizep);
status = packed_object_info(e.p, e.offset, sizep);
if (status < 0) {
mark_bad_packed_object(e.p, sha1);
status = sha1_object_info(sha1, sizep);
}
return status;
}
static void *read_packed_sha1(const unsigned char *sha1,

View file

@ -196,7 +196,8 @@ test_expect_success \
test_expect_success \
'[index v2] 5) pack-objects refuses to reuse corrupted data' \
'test_must_fail git pack-objects test-5 <obj-list'
'test_must_fail git pack-objects test-5 <obj-list &&
test_must_fail git pack-objects --no-reuse-object test-6 <obj-list'
test_expect_success \
'[index v2] 6) verify-pack detects CRC mismatch' \

View file

@ -41,11 +41,17 @@ create_new_pack() {
git verify-pack -v ${pack}.pack
}
do_repack() {
pack=`printf "$blob_1\n$blob_2\n$blob_3\n" |
git pack-objects $@ .git/objects/pack/pack` &&
pack=".git/objects/pack/pack-${pack}"
}
do_corrupt_object() {
ofs=`git show-index < ${pack}.idx | grep $1 | cut -f1 -d" "` &&
ofs=$(($ofs + $2)) &&
chmod +w ${pack}.pack &&
dd if=/dev/zero of=${pack}.pack count=1 bs=1 conv=notrunc seek=$ofs &&
dd of=${pack}.pack count=1 bs=1 conv=notrunc seek=$ofs &&
test_must_fail git verify-pack ${pack}.pack
}
@ -60,7 +66,7 @@ test_expect_success \
test_expect_success \
'create corruption in header of first object' \
'do_corrupt_object $blob_1 0 &&
'do_corrupt_object $blob_1 0 < /dev/zero &&
test_must_fail git cat-file blob $blob_1 > /dev/null &&
test_must_fail git cat-file blob $blob_2 > /dev/null &&
test_must_fail git cat-file blob $blob_3 > /dev/null'
@ -119,7 +125,7 @@ test_expect_success \
'create corruption in header of first delta' \
'create_new_pack &&
git prune-packed &&
do_corrupt_object $blob_2 0 &&
do_corrupt_object $blob_2 0 < /dev/zero &&
git cat-file blob $blob_1 > /dev/null &&
test_must_fail git cat-file blob $blob_2 > /dev/null &&
test_must_fail git cat-file blob $blob_3 > /dev/null'
@ -133,6 +139,15 @@ test_expect_success \
git cat-file blob $blob_2 > /dev/null &&
git cat-file blob $blob_3 > /dev/null'
test_expect_success \
'... and then a repack "clears" the corruption' \
'do_repack &&
git prune-packed &&
git verify-pack ${pack}.pack &&
git cat-file blob $blob_1 > /dev/null &&
git cat-file blob $blob_2 > /dev/null &&
git cat-file blob $blob_3 > /dev/null'
test_expect_success \
'create corruption in data of first delta' \
'create_new_pack &&
@ -152,11 +167,20 @@ test_expect_success \
git cat-file blob $blob_2 > /dev/null &&
git cat-file blob $blob_3 > /dev/null'
test_expect_success \
'... and then a repack "clears" the corruption' \
'do_repack &&
git prune-packed &&
git verify-pack ${pack}.pack &&
git cat-file blob $blob_1 > /dev/null &&
git cat-file blob $blob_2 > /dev/null &&
git cat-file blob $blob_3 > /dev/null'
test_expect_success \
'corruption in delta base reference of first delta (OBJ_REF_DELTA)' \
'create_new_pack &&
git prune-packed &&
do_corrupt_object $blob_2 2 &&
do_corrupt_object $blob_2 2 < /dev/zero &&
git cat-file blob $blob_1 > /dev/null &&
test_must_fail git cat-file blob $blob_2 > /dev/null &&
test_must_fail git cat-file blob $blob_3 > /dev/null'
@ -171,17 +195,75 @@ test_expect_success \
git cat-file blob $blob_3 > /dev/null'
test_expect_success \
'corruption in delta base reference of first delta (OBJ_OFS_DELTA)' \
'... and then a repack "clears" the corruption' \
'do_repack &&
git prune-packed &&
git verify-pack ${pack}.pack &&
git cat-file blob $blob_1 > /dev/null &&
git cat-file blob $blob_2 > /dev/null &&
git cat-file blob $blob_3 > /dev/null'
test_expect_success \
'corruption #0 in delta base reference of first delta (OBJ_OFS_DELTA)' \
'create_new_pack --delta-base-offset &&
git prune-packed &&
do_corrupt_object $blob_2 2 &&
do_corrupt_object $blob_2 2 < /dev/zero &&
git cat-file blob $blob_1 > /dev/null &&
test_must_fail git cat-file blob $blob_2 > /dev/null &&
test_must_fail git cat-file blob $blob_3 > /dev/null'
test_expect_success \
'... and a redundant pack allows for full recovery too' \
'... but having a loose copy allows for full recovery' \
'mv ${pack}.idx tmp &&
git hash-object -t blob -w file_2 &&
mv tmp ${pack}.idx &&
git cat-file blob $blob_1 > /dev/null &&
git cat-file blob $blob_2 > /dev/null &&
git cat-file blob $blob_3 > /dev/null'
test_expect_success \
'... and then a repack "clears" the corruption' \
'do_repack --delta-base-offset &&
git prune-packed &&
git verify-pack ${pack}.pack &&
git cat-file blob $blob_1 > /dev/null &&
git cat-file blob $blob_2 > /dev/null &&
git cat-file blob $blob_3 > /dev/null'
test_expect_success \
'corruption #1 in delta base reference of first delta (OBJ_OFS_DELTA)' \
'create_new_pack --delta-base-offset &&
git prune-packed &&
printf "\001" | do_corrupt_object $blob_2 2 &&
git cat-file blob $blob_1 > /dev/null &&
test_must_fail git cat-file blob $blob_2 > /dev/null &&
test_must_fail git cat-file blob $blob_3 > /dev/null'
test_expect_success \
'... but having a loose copy allows for full recovery' \
'mv ${pack}.idx tmp &&
git hash-object -t blob -w file_2 &&
mv tmp ${pack}.idx &&
git cat-file blob $blob_1 > /dev/null &&
git cat-file blob $blob_2 > /dev/null &&
git cat-file blob $blob_3 > /dev/null'
test_expect_success \
'... and then a repack "clears" the corruption' \
'do_repack --delta-base-offset &&
git prune-packed &&
git verify-pack ${pack}.pack &&
git cat-file blob $blob_1 > /dev/null &&
git cat-file blob $blob_2 > /dev/null &&
git cat-file blob $blob_3 > /dev/null'
test_expect_success \
'... and a redundant pack allows for full recovery too' \
'do_corrupt_object $blob_2 2 < /dev/zero &&
git cat-file blob $blob_1 > /dev/null &&
test_must_fail git cat-file blob $blob_2 > /dev/null &&
test_must_fail git cat-file blob $blob_3 > /dev/null &&
mv ${pack}.idx tmp &&
git hash-object -t blob -w file_1 &&
git hash-object -t blob -w file_2 &&
printf "$blob_1\n$blob_2\n" | git pack-objects .git/objects/pack/pack &&