git/fast-import.c
Shawn O. Pearce 6bb5b3291d Implemented branch handling and basic tree support in fast-import.
This provides the basic data structures needed to store trees in
memory while we are processing them for a branch.  What we are
attempting to do is track one complete tree for each branch that
the frontend has registered with us through the 'newb' (new_branch)
command.  When the frontend edits that tree through 'updf' or 'delf'
commands we'll mark the affected tree(s) as being dirty and recompute
their objects during 'comt' (commit).

Currently the protocol is decidedly _not_ user friendly.  I crashed
fast-import by giving it bad input data from Perl.  I may try to
improve upon it, or at least upon its error handling.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-01-14 02:15:03 -05:00

567 lines
13 KiB
C

#include "builtin.h"
#include "cache.h"
#include "object.h"
#include "blob.h"
#include "delta.h"
#include "pack.h"
#include "csum-file.h"
struct object_entry
{
struct object_entry *next;
unsigned long offset;
unsigned char sha1[20];
};
struct object_entry_block
{
struct object_entry_block *next_block;
struct object_entry *next_free;
struct object_entry *end;
struct object_entry entries[FLEX_ARRAY]; /* more */
};
struct last_object
{
void *data;
unsigned int len;
unsigned int depth;
unsigned char sha1[20];
};
struct tree;
struct tree_entry
{
struct tree *tree;
mode_t mode;
unsigned char sha1[20];
char name[FLEX_ARRAY]; /* more */
};
struct tree
{
struct last_object last_tree;
unsigned long entry_count;
struct tree_entry **entries;
};
struct branch
{
struct branch *next_branch;
struct tree_entry tree;
unsigned char sha1[20];
const char *name;
};
/* Stats and misc. counters. */
static int max_depth = 10;
static unsigned long alloc_count;
static unsigned long branch_count;
static unsigned long object_count;
static unsigned long duplicate_count;
static unsigned long object_count_by_type[9];
static unsigned long duplicate_count_by_type[9];
/* The .pack file */
static int pack_fd;
static unsigned long pack_offset;
static unsigned char pack_sha1[20];
/* Table of objects we've written. */
struct object_entry_block *blocks;
struct object_entry *object_table[1 << 16];
/* Our last blob */
struct last_object last_blob;
/* Branch data */
struct branch *branches;
struct branch *current_branch;
static void alloc_objects(int cnt)
{
struct object_entry_block *b;
b = xmalloc(sizeof(struct object_entry_block)
+ cnt * sizeof(struct object_entry));
b->next_block = blocks;
b->next_free = b->entries;
b->end = b->entries + cnt;
blocks = b;
alloc_count += cnt;
}
static struct object_entry* new_object(unsigned char *sha1)
{
struct object_entry *e;
if (blocks->next_free == blocks->end)
alloc_objects(1000);
e = blocks->next_free++;
memcpy(e->sha1, sha1, sizeof(e->sha1));
return e;
}
static struct object_entry* insert_object(unsigned char *sha1)
{
unsigned int h = sha1[0] << 8 | sha1[1];
struct object_entry *e = object_table[h];
struct object_entry *p = 0;
while (e) {
if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
return e;
p = e;
e = e->next;
}
e = new_object(sha1);
e->next = 0;
e->offset = 0;
if (p)
p->next = e;
else
object_table[h] = e;
return e;
}
static ssize_t yread(int fd, void *buffer, size_t length)
{
ssize_t ret = 0;
while (ret < length) {
ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
if (size < 0) {
return size;
}
if (size == 0) {
return ret;
}
ret += size;
}
return ret;
}
static ssize_t ywrite(int fd, void *buffer, size_t length)
{
ssize_t ret = 0;
while (ret < length) {
ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
if (size < 0) {
return size;
}
if (size == 0) {
return ret;
}
ret += size;
}
return ret;
}
static const char* read_string()
{
static char sn[PATH_MAX];
unsigned long slen;
if (yread(0, &slen, 4) != 4)
die("Can't obtain string");
if (!slen)
return 0;
if (slen > (PATH_MAX - 1))
die("Can't handle excessive string length %lu", slen);
if (yread(0, sn, slen) != slen)
die("Can't obtain string of length %lu", slen);
sn[slen] = 0;
return sn;
}
static const char* read_required_string()
{
const char *r = read_string();
if (!r)
die("Expected string command parameter, didn't find one");
return r;
}
static unsigned long encode_header(
enum object_type type,
unsigned long size,
unsigned char *hdr)
{
int n = 1;
unsigned char c;
if (type < OBJ_COMMIT || type > OBJ_DELTA)
die("bad type %d", type);
c = (type << 4) | (size & 15);
size >>= 4;
while (size) {
*hdr++ = c | 0x80;
c = size & 0x7f;
size >>= 7;
n++;
}
*hdr = c;
return n;
}
static int store_object(
enum object_type type,
void *dat,
unsigned long datlen,
struct last_object *last,
unsigned char *sha1out)
{
void *out, *delta;
struct object_entry *e;
unsigned char hdr[96];
unsigned char sha1[20];
unsigned long hdrlen, deltalen;
SHA_CTX c;
z_stream s;
hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1;
SHA1_Init(&c);
SHA1_Update(&c, hdr, hdrlen);
SHA1_Update(&c, dat, datlen);
SHA1_Final(sha1, &c);
if (sha1out)
memcpy(sha1out, sha1, sizeof(sha1));
e = insert_object(sha1);
if (e->offset) {
duplicate_count++;
duplicate_count_by_type[type]++;
return 0;
}
e->offset = pack_offset;
object_count++;
object_count_by_type[type]++;
if (last->data && last->depth < max_depth)
delta = diff_delta(last->data, last->len,
dat, datlen,
&deltalen, 0);
else
delta = 0;
memset(&s, 0, sizeof(s));
deflateInit(&s, zlib_compression_level);
if (delta) {
last->depth++;
s.next_in = delta;
s.avail_in = deltalen;
hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
die("Can't write object header: %s", strerror(errno));
if (ywrite(pack_fd, last->sha1, sizeof(sha1)) != sizeof(sha1))
die("Can't write object base: %s", strerror(errno));
pack_offset += hdrlen + sizeof(sha1);
} else {
last->depth = 0;
s.next_in = dat;
s.avail_in = datlen;
hdrlen = encode_header(type, datlen, hdr);
if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
die("Can't write object header: %s", strerror(errno));
pack_offset += hdrlen;
}
s.avail_out = deflateBound(&s, s.avail_in);
s.next_out = out = xmalloc(s.avail_out);
while (deflate(&s, Z_FINISH) == Z_OK)
/* nothing */;
deflateEnd(&s);
if (ywrite(pack_fd, out, s.total_out) != s.total_out)
die("Failed writing compressed data %s", strerror(errno));
pack_offset += s.total_out;
free(out);
if (delta)
free(delta);
if (last->data)
free(last->data);
last->data = dat;
last->len = datlen;
memcpy(last->sha1, sha1, sizeof(sha1));
return 1;
}
static void init_pack_header()
{
const char* magic = "PACK";
unsigned long version = 3;
unsigned long zero = 0;
version = htonl(version);
if (ywrite(pack_fd, (char*)magic, 4) != 4)
die("Can't write pack magic: %s", strerror(errno));
if (ywrite(pack_fd, &version, 4) != 4)
die("Can't write pack version: %s", strerror(errno));
if (ywrite(pack_fd, &zero, 4) != 4)
die("Can't write 0 object count: %s", strerror(errno));
pack_offset = 4 * 3;
}
static void fixup_header_footer()
{
SHA_CTX c;
char hdr[8];
unsigned long cnt;
char *buf;
size_t n;
if (lseek(pack_fd, 0, SEEK_SET) != 0)
die("Failed seeking to start: %s", strerror(errno));
SHA1_Init(&c);
if (yread(pack_fd, hdr, 8) != 8)
die("Failed reading header: %s", strerror(errno));
SHA1_Update(&c, hdr, 8);
cnt = htonl(object_count);
SHA1_Update(&c, &cnt, 4);
if (ywrite(pack_fd, &cnt, 4) != 4)
die("Failed writing object count: %s", strerror(errno));
buf = xmalloc(128 * 1024);
for (;;) {
n = xread(pack_fd, buf, 128 * 1024);
if (n <= 0)
break;
SHA1_Update(&c, buf, n);
}
free(buf);
SHA1_Final(pack_sha1, &c);
if (ywrite(pack_fd, pack_sha1, sizeof(pack_sha1)) != sizeof(pack_sha1))
die("Failed writing pack checksum: %s", strerror(errno));
}
static int oecmp (const void *_a, const void *_b)
{
struct object_entry *a = *((struct object_entry**)_a);
struct object_entry *b = *((struct object_entry**)_b);
return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
}
static void write_index(const char *idx_name)
{
struct sha1file *f;
struct object_entry **idx, **c, **last;
struct object_entry *e;
struct object_entry_block *o;
unsigned int array[256];
int i;
/* Build the sorted table of object IDs. */
idx = xmalloc(object_count * sizeof(struct object_entry*));
c = idx;
for (o = blocks; o; o = o->next_block)
for (e = o->entries; e != o->next_free; e++)
*c++ = e;
last = idx + object_count;
qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
/* Generate the fan-out array. */
c = idx;
for (i = 0; i < 256; i++) {
struct object_entry **next = c;;
while (next < last) {
if ((*next)->sha1[0] != i)
break;
next++;
}
array[i] = htonl(next - idx);
c = next;
}
f = sha1create("%s", idx_name);
sha1write(f, array, 256 * sizeof(int));
for (c = idx; c != last; c++) {
unsigned int offset = htonl((*c)->offset);
sha1write(f, &offset, 4);
sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
}
sha1write(f, pack_sha1, sizeof(pack_sha1));
sha1close(f, NULL, 1);
free(idx);
}
static void new_blob()
{
unsigned long datlen;
void *dat;
if (yread(0, &datlen, 4) != 4)
die("Can't obtain blob length");
dat = xmalloc(datlen);
if (yread(0, dat, datlen) != datlen)
die("Con't obtain %lu bytes of blob data", datlen);
if (!store_object(OBJ_BLOB, dat, datlen, &last_blob, 0))
free(dat);
}
static struct branch* lookup_branch(const char *name)
{
struct branch *b;
for (b = branches; b; b = b->next_branch) {
if (!strcmp(name, b->name))
return b;
}
die("No branch named '%s' has been declared", name);
}
static struct tree* deep_copy_tree (struct tree *t)
{
struct tree *r = xmalloc(sizeof(struct tree));
unsigned long i;
if (t->last_tree.data) {
r->last_tree.data = xmalloc(t->last_tree.len);
r->last_tree.len = t->last_tree.len;
r->last_tree.depth = t->last_tree.depth;
memcpy(r->last_tree.data, t->last_tree.data, t->last_tree.len);
memcpy(r->last_tree.sha1, t->last_tree.sha1, sizeof(t->last_tree.sha1));
}
r->entry_count = t->entry_count;
r->entries = xmalloc(t->entry_count * sizeof(struct tree_entry*));
for (i = 0; i < t->entry_count; i++) {
struct tree_entry *a = t->entries[i];
struct tree_entry *b;
b = xmalloc(sizeof(struct tree_entry) + strlen(a->name) + 1);
b->tree = a->tree ? deep_copy_tree(a->tree) : 0;
b->mode = a->mode;
memcpy(b->sha1, a->sha1, sizeof(a->sha1));
strcpy(b->name, a->name);
r->entries[i] = b;
}
return r;
}
static void store_tree (struct tree_entry *e)
{
struct tree *t = e->tree;
unsigned long maxlen, i;
char *buf, *c;
if (memcmp(null_sha1, e->sha1, sizeof(e->sha1)))
return;
maxlen = t->entry_count * 32;
for (i = 0; i < t->entry_count; i++)
maxlen += strlen(t->entries[i]->name);
buf = c = xmalloc(maxlen);
for (i = 0; i < t->entry_count; i++) {
struct tree_entry *e = t->entries[i];
c += sprintf(c, "%o %s", e->mode, e->name) + 1;
if (e->tree)
store_tree(e);
memcpy(c, e->sha1, sizeof(e->sha1));
c += sizeof(e->sha1);
}
if (!store_object(OBJ_TREE, buf, c - buf, &t->last_tree, e->sha1))
free(buf);
}
static void new_branch()
{
struct branch *nb = xcalloc(1, sizeof(struct branch));
const char *source_name;
nb->name = strdup(read_required_string());
source_name = read_string();
if (source_name) {
struct branch *sb = lookup_branch(source_name);
nb->tree.tree = deep_copy_tree(sb->tree.tree);
memcpy(nb->tree.sha1, sb->tree.sha1, sizeof(sb->tree.sha1));
memcpy(nb->sha1, sb->sha1, sizeof(sb->sha1));
} else {
nb->tree.tree = xcalloc(1, sizeof(struct tree));
nb->tree.tree->entries = xmalloc(8*sizeof(struct tree_entry*));
}
nb->next_branch = branches;
branches = nb;
branch_count++;
}
static void set_branch()
{
current_branch = lookup_branch(read_required_string());
}
static void commit()
{
store_tree(&current_branch->tree);
}
int main(int argc, const char **argv)
{
const char *base_name = argv[1];
int est_obj_cnt = atoi(argv[2]);
char *pack_name;
char *idx_name;
struct stat sb;
pack_name = xmalloc(strlen(base_name) + 6);
sprintf(pack_name, "%s.pack", base_name);
idx_name = xmalloc(strlen(base_name) + 5);
sprintf(idx_name, "%s.idx", base_name);
pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
if (pack_fd < 0)
die("Can't create %s: %s", pack_name, strerror(errno));
alloc_objects(est_obj_cnt);
init_pack_header();
for (;;) {
unsigned long cmd;
if (yread(0, &cmd, 4) != 4)
break;
switch (cmd) {
case 'blob': new_blob(); break;
case 'newb': new_branch(); break;
case 'setb': set_branch(); break;
case 'comt': commit(); break;
default:
die("Invalid command %lu", cmd);
}
}
fixup_header_footer();
close(pack_fd);
write_index(idx_name);
fprintf(stderr, "%s statistics:\n", argv[0]);
fprintf(stderr, "---------------------------------------------------\n");
fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow )\n", alloc_count, alloc_count - est_obj_cnt);
fprintf(stderr, "Total objects: %10lu (%10lu duplicates)\n", object_count, duplicate_count);
fprintf(stderr, " blobs : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB]);
fprintf(stderr, " trees : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE]);
fprintf(stderr, " commits: %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT]);
fprintf(stderr, " tags : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG]);
fprintf(stderr, "Total branches: %10lu\n", branch_count);
fprintf(stderr, "---------------------------------------------------\n");
stat(pack_name, &sb);
fprintf(stderr, "Pack size: %10lu KiB\n", (unsigned long)(sb.st_size/1024));
stat(idx_name, &sb);
fprintf(stderr, "Index size: %10lu KiB\n", (unsigned long)(sb.st_size/1024));
fprintf(stderr, "\n");
return 0;
}