git/builtin-unpack-objects.c
Martin Koegler b41860bf28 unpack-objects: prevent writing of inconsistent objects
This patch introduces a strict mode, which ensures that:
- no malformed object will be written
- no object with broken links will be written

The patch ensures this by delaying the write of all non blob object.
These object are written, after all objects they link to are written.

An error can only result in unreferenced objects.

Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-03-05 10:53:11 -08:00

522 lines
12 KiB
C

#include "builtin.h"
#include "cache.h"
#include "object.h"
#include "delta.h"
#include "pack.h"
#include "blob.h"
#include "commit.h"
#include "tag.h"
#include "tree.h"
#include "tree-walk.h"
#include "progress.h"
#include "decorate.h"
#include "fsck.h"
static int dry_run, quiet, recover, has_errors, strict;
static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] [--strict] < pack-file";
/* We always read in 4kB chunks. */
static unsigned char buffer[4096];
static unsigned int offset, len;
static off_t consumed_bytes;
static SHA_CTX ctx;
struct obj_buffer {
char *buffer;
unsigned long size;
};
static struct decoration obj_decorate;
static struct obj_buffer *lookup_object_buffer(struct object *base)
{
return lookup_decoration(&obj_decorate, base);
}
static void add_object_buffer(struct object *object, char *buffer, unsigned long size)
{
struct obj_buffer *obj;
obj = xcalloc(1, sizeof(struct obj_buffer));
obj->buffer = buffer;
obj->size = size;
if (add_decoration(&obj_decorate, object, obj))
die("object %s tried to add buffer twice!", sha1_to_hex(object->sha1));
}
/*
* Make sure at least "min" bytes are available in the buffer, and
* return the pointer to the buffer.
*/
static void *fill(int min)
{
if (min <= len)
return buffer + offset;
if (min > sizeof(buffer))
die("cannot fill %d bytes", min);
if (offset) {
SHA1_Update(&ctx, buffer, offset);
memmove(buffer, buffer + offset, len);
offset = 0;
}
do {
ssize_t ret = xread(0, buffer + len, sizeof(buffer) - len);
if (ret <= 0) {
if (!ret)
die("early EOF");
die("read error on input: %s", strerror(errno));
}
len += ret;
} while (len < min);
return buffer;
}
static void use(int bytes)
{
if (bytes > len)
die("used more bytes than were available");
len -= bytes;
offset += bytes;
/* make sure off_t is sufficiently large not to wrap */
if (consumed_bytes > consumed_bytes + bytes)
die("pack too large for current definition of off_t");
consumed_bytes += bytes;
}
static void *get_data(unsigned long size)
{
z_stream stream;
void *buf = xmalloc(size);
memset(&stream, 0, sizeof(stream));
stream.next_out = buf;
stream.avail_out = size;
stream.next_in = fill(1);
stream.avail_in = len;
inflateInit(&stream);
for (;;) {
int ret = inflate(&stream, 0);
use(len - stream.avail_in);
if (stream.total_out == size && ret == Z_STREAM_END)
break;
if (ret != Z_OK) {
error("inflate returned %d\n", ret);
free(buf);
buf = NULL;
if (!recover)
exit(1);
has_errors = 1;
break;
}
stream.next_in = fill(1);
stream.avail_in = len;
}
inflateEnd(&stream);
return buf;
}
struct delta_info {
unsigned char base_sha1[20];
unsigned nr;
off_t base_offset;
unsigned long size;
void *delta;
struct delta_info *next;
};
static struct delta_info *delta_list;
static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
off_t base_offset,
void *delta, unsigned long size)
{
struct delta_info *info = xmalloc(sizeof(*info));
hashcpy(info->base_sha1, base_sha1);
info->base_offset = base_offset;
info->size = size;
info->delta = delta;
info->nr = nr;
info->next = delta_list;
delta_list = info;
}
struct obj_info {
off_t offset;
unsigned char sha1[20];
struct object *obj;
};
#define FLAG_OPEN (1u<<20)
#define FLAG_WRITTEN (1u<<21)
static struct obj_info *obj_list;
unsigned nr_objects;
static void write_cached_object(struct object *obj)
{
unsigned char sha1[20];
struct obj_buffer *obj_buf = lookup_object_buffer(obj);
if (write_sha1_file(obj_buf->buffer, obj_buf->size, typename(obj->type), sha1) < 0)
die("failed to write object %s", sha1_to_hex(obj->sha1));
obj->flags |= FLAG_WRITTEN;
}
static int check_object(struct object *obj, int type, void *data)
{
if (!obj)
return 0;
if (obj->flags & FLAG_WRITTEN)
return 1;
if (type != OBJ_ANY && obj->type != type)
die("object type mismatch");
if (!(obj->flags & FLAG_OPEN)) {
unsigned long size;
int type = sha1_object_info(obj->sha1, &size);
if (type != obj->type || type <= 0)
die("object of unexpected type");
obj->flags |= FLAG_WRITTEN;
return 1;
}
if (fsck_object(obj, 1, fsck_error_function))
die("Error in object");
if (!fsck_walk(obj, check_object, 0))
die("Error on reachable objects of %s", sha1_to_hex(obj->sha1));
write_cached_object(obj);
return 1;
}
static void write_rest(void)
{
unsigned i;
for (i = 0; i < nr_objects; i++)
check_object(obj_list[i].obj, OBJ_ANY, 0);
}
static void added_object(unsigned nr, enum object_type type,
void *data, unsigned long size);
static void write_object(unsigned nr, enum object_type type,
void *buf, unsigned long size)
{
added_object(nr, type, buf, size);
if (!strict) {
if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
die("failed to write object");
free(buf);
obj_list[nr].obj = 0;
} else if (type == OBJ_BLOB) {
struct blob *blob;
if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
die("failed to write object");
free(buf);
blob = lookup_blob(obj_list[nr].sha1);
if (blob)
blob->object.flags |= FLAG_WRITTEN;
else
die("invalid blob object");
obj_list[nr].obj = 0;
} else {
struct object *obj;
int eaten;
hash_sha1_file(buf, size, typename(type), obj_list[nr].sha1);
obj = parse_object_buffer(obj_list[nr].sha1, type, size, buf, &eaten);
if (!obj)
die("invalid %s", typename(type));
/* buf is stored via add_object_buffer and in obj, if its a tree or commit */
add_object_buffer(obj, buf, size);
obj->flags |= FLAG_OPEN;
obj_list[nr].obj = obj;
}
}
static void resolve_delta(unsigned nr, enum object_type type,
void *base, unsigned long base_size,
void *delta, unsigned long delta_size)
{
void *result;
unsigned long result_size;
result = patch_delta(base, base_size,
delta, delta_size,
&result_size);
if (!result)
die("failed to apply delta");
free(delta);
write_object(nr, type, result, result_size);
}
static void added_object(unsigned nr, enum object_type type,
void *data, unsigned long size)
{
struct delta_info **p = &delta_list;
struct delta_info *info;
while ((info = *p) != NULL) {
if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
info->base_offset == obj_list[nr].offset) {
*p = info->next;
p = &delta_list;
resolve_delta(info->nr, type, data, size,
info->delta, info->size);
free(info);
continue;
}
p = &info->next;
}
}
static void unpack_non_delta_entry(enum object_type type, unsigned long size,
unsigned nr)
{
void *buf = get_data(size);
if (!dry_run && buf)
write_object(nr, type, buf, size);
else
free(buf);
}
static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
unsigned nr)
{
void *delta_data, *base;
unsigned long base_size;
unsigned char base_sha1[20];
struct object *obj;
if (type == OBJ_REF_DELTA) {
hashcpy(base_sha1, fill(20));
use(20);
delta_data = get_data(delta_size);
if (dry_run || !delta_data) {
free(delta_data);
return;
}
if (!has_sha1_file(base_sha1)) {
hashcpy(obj_list[nr].sha1, null_sha1);
add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
return;
}
} else {
unsigned base_found = 0;
unsigned char *pack, c;
off_t base_offset;
unsigned lo, mid, hi;
pack = fill(1);
c = *pack;
use(1);
base_offset = c & 127;
while (c & 128) {
base_offset += 1;
if (!base_offset || MSB(base_offset, 7))
die("offset value overflow for delta base object");
pack = fill(1);
c = *pack;
use(1);
base_offset = (base_offset << 7) + (c & 127);
}
base_offset = obj_list[nr].offset - base_offset;
delta_data = get_data(delta_size);
if (dry_run || !delta_data) {
free(delta_data);
return;
}
lo = 0;
hi = nr;
while (lo < hi) {
mid = (lo + hi)/2;
if (base_offset < obj_list[mid].offset) {
hi = mid;
} else if (base_offset > obj_list[mid].offset) {
lo = mid + 1;
} else {
hashcpy(base_sha1, obj_list[mid].sha1);
base_found = !is_null_sha1(base_sha1);
break;
}
}
if (!base_found) {
/* The delta base object is itself a delta that
has not been resolved yet. */
hashcpy(obj_list[nr].sha1, null_sha1);
add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
return;
}
}
obj = lookup_object(base_sha1);
if (obj) {
struct obj_buffer *obj_buf = lookup_object_buffer(obj);
if (obj_buf) {
resolve_delta(nr, obj->type, obj_buf->buffer, obj_buf->size, delta_data, delta_size);
return;
}
}
base = read_sha1_file(base_sha1, &type, &base_size);
if (!base) {
error("failed to read delta-pack base object %s",
sha1_to_hex(base_sha1));
if (!recover)
exit(1);
has_errors = 1;
return;
}
resolve_delta(nr, type, base, base_size, delta_data, delta_size);
free(base);
}
static void unpack_one(unsigned nr)
{
unsigned shift;
unsigned char *pack, c;
unsigned long size;
enum object_type type;
obj_list[nr].offset = consumed_bytes;
pack = fill(1);
c = *pack;
use(1);
type = (c >> 4) & 7;
size = (c & 15);
shift = 4;
while (c & 0x80) {
pack = fill(1);
c = *pack;
use(1);
size += (c & 0x7f) << shift;
shift += 7;
}
switch (type) {
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
unpack_non_delta_entry(type, size, nr);
return;
case OBJ_REF_DELTA:
case OBJ_OFS_DELTA:
unpack_delta_entry(type, size, nr);
return;
default:
error("bad object type %d", type);
has_errors = 1;
if (recover)
return;
exit(1);
}
}
static void unpack_all(void)
{
int i;
struct progress *progress = NULL;
struct pack_header *hdr = fill(sizeof(struct pack_header));
nr_objects = ntohl(hdr->hdr_entries);
if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
die("bad pack file");
if (!pack_version_ok(hdr->hdr_version))
die("unknown pack file version %d", ntohl(hdr->hdr_version));
use(sizeof(struct pack_header));
if (!quiet)
progress = start_progress("Unpacking objects", nr_objects);
obj_list = xmalloc(nr_objects * sizeof(*obj_list));
memset(obj_list, 0, nr_objects * sizeof(*obj_list));
for (i = 0; i < nr_objects; i++) {
unpack_one(i);
display_progress(progress, i + 1);
}
stop_progress(&progress);
if (delta_list)
die("unresolved deltas left after unpacking");
}
int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
{
int i;
unsigned char sha1[20];
git_config(git_default_config);
quiet = !isatty(2);
for (i = 1 ; i < argc; i++) {
const char *arg = argv[i];
if (*arg == '-') {
if (!strcmp(arg, "-n")) {
dry_run = 1;
continue;
}
if (!strcmp(arg, "-q")) {
quiet = 1;
continue;
}
if (!strcmp(arg, "-r")) {
recover = 1;
continue;
}
if (!strcmp(arg, "--strict")) {
strict = 1;
continue;
}
if (!prefixcmp(arg, "--pack_header=")) {
struct pack_header *hdr;
char *c;
hdr = (struct pack_header *)buffer;
hdr->hdr_signature = htonl(PACK_SIGNATURE);
hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
if (*c != ',')
die("bad %s", arg);
hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
if (*c)
die("bad %s", arg);
len = sizeof(*hdr);
continue;
}
usage(unpack_usage);
}
/* We don't take any non-flag arguments now.. Maybe some day */
usage(unpack_usage);
}
SHA1_Init(&ctx);
unpack_all();
SHA1_Update(&ctx, buffer, offset);
SHA1_Final(sha1, &ctx);
if (strict)
write_rest();
if (hashcmp(fill(20), sha1))
die("final sha1 did not match");
use(20);
/* Write the last part of the buffer to stdout */
while (len) {
int ret = xwrite(1, buffer + offset, len);
if (ret <= 0)
break;
len -= ret;
offset += ret;
}
/* All done */
return has_errors;
}