From 8bcce30126b90af83c1291e072f74950e73a2584 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sun, 6 Aug 2006 13:51:39 -0400 Subject: [PATCH] Added automatic index generation to fast-import. Signed-off-by: Shawn O. Pearce --- fast-import.c | 182 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 163 insertions(+), 19 deletions(-) diff --git a/fast-import.c b/fast-import.c index 416ba5c7c6..0d95118499 100644 --- a/fast-import.c +++ b/fast-import.c @@ -8,11 +8,75 @@ static int max_depth = 10; static unsigned long object_count; +static unsigned long duplicate_count; +static unsigned long packoff; +static unsigned long overflow_count; static int packfd; static int current_depth; static void *lastdat; static unsigned long lastdatlen; static unsigned char lastsha1[20]; +static unsigned char packsha1[20]; + +struct object_entry +{ + struct object_entry *next; + unsigned long offset; + unsigned char sha1[20]; +}; + +struct overflow_object_entry +{ + struct overflow_object_entry *next; + struct object_entry oe; +}; + +struct object_entry *pool_start; +struct object_entry *pool_next; +struct object_entry *pool_end; +struct overflow_object_entry *overflow; +struct object_entry *table[1 << 16]; + +static struct object_entry* new_object(unsigned char *sha1) +{ + if (pool_next != pool_end) { + struct object_entry *e = pool_next++; + memcpy(e->sha1, sha1, sizeof(e->sha1)); + return e; + } else { + struct overflow_object_entry *e; + + e = xmalloc(sizeof(struct overflow_object_entry)); + e->next = overflow; + memcpy(e->oe.sha1, sha1, sizeof(e->oe.sha1)); + overflow = e; + overflow_count++; + return &e->oe; + } +} + +static struct object_entry* insert_object(unsigned char *sha1) +{ + unsigned int h = sha1[0] << 8 | sha1[1]; + struct object_entry *e = table[h]; + struct object_entry *p = 0; + + while (e) { + if (!memcmp(sha1, e->sha1, sizeof(e->sha1))) + return e; + p = e; + e = e->next; + } + + e = new_object(sha1); + e->next = 0; + e->offset = 0; + if (p) + p->next = e; + else + table[h] = e; + return e; +} static ssize_t yread(int fd, void *buffer, size_t length) { @@ -66,7 +130,7 @@ static unsigned long encode_header(enum object_type type, unsigned long size, un return n; } -static void write_blob (void *dat, unsigned long datlen) +static void write_blob(void *dat, unsigned long datlen) { z_stream s; void *out, *delta; @@ -92,6 +156,7 @@ static void write_blob (void *dat, unsigned long datlen) die("Can't write object header: %s", strerror(errno)); if (ywrite(packfd, lastsha1, sizeof(lastsha1)) != sizeof(lastsha1)) die("Can't write object base: %s", strerror(errno)); + packoff += hdrlen + sizeof(lastsha1); } else { current_depth = 0; s.next_in = dat; @@ -99,6 +164,7 @@ static void write_blob (void *dat, unsigned long datlen) hdrlen = encode_header(OBJ_BLOB, datlen, hdr); if (ywrite(packfd, hdr, hdrlen) != hdrlen) die("Can't write object header: %s", strerror(errno)); + packoff += hdrlen; } s.avail_out = deflateBound(&s, s.avail_in); @@ -109,13 +175,14 @@ static void write_blob (void *dat, unsigned long datlen) if (ywrite(packfd, out, s.total_out) != s.total_out) die("Failed writing compressed data %s", strerror(errno)); + packoff += s.total_out; free(out); if (delta) free(delta); } -static void init_pack_header () +static void init_pack_header() { const char* magic = "PACK"; unsigned long version = 2; @@ -129,13 +196,13 @@ static void init_pack_header () die("Can't write pack version: %s", strerror(errno)); if (ywrite(packfd, &zero, 4) != 4) die("Can't write 0 object count: %s", strerror(errno)); + packoff = 4 * 3; } -static void fixup_header_footer () +static void fixup_header_footer() { SHA_CTX c; char hdr[8]; - unsigned char sha1[20]; unsigned long cnt; char *buf; size_t n; @@ -148,7 +215,6 @@ static void fixup_header_footer () die("Failed reading header: %s", strerror(errno)); SHA1_Update(&c, hdr, 8); -fprintf(stderr, "%lu objects\n", object_count); cnt = htonl(object_count); SHA1_Update(&c, &cnt, 4); if (ywrite(packfd, &cnt, 4) != 4) @@ -163,16 +229,81 @@ fprintf(stderr, "%lu objects\n", object_count); } free(buf); - SHA1_Final(sha1, &c); - if (ywrite(packfd, sha1, sizeof(sha1)) != sizeof(sha1)) + SHA1_Final(packsha1, &c); + if (ywrite(packfd, packsha1, sizeof(packsha1)) != sizeof(packsha1)) die("Failed writing pack checksum: %s", strerror(errno)); } -int main (int argc, const char **argv) +static int oecmp (const void *_a, const void *_b) { - packfd = open(argv[1], O_RDWR|O_CREAT|O_TRUNC, 0666); + struct object_entry *a = *((struct object_entry**)_a); + struct object_entry *b = *((struct object_entry**)_b); + return memcmp(a->sha1, b->sha1, sizeof(a->sha1)); +} + +static void write_index(const char *idx_name) +{ + struct sha1file *f; + struct object_entry **idx, **c, **last; + struct object_entry *e; + struct overflow_object_entry *o; + unsigned int array[256]; + int i; + + /* Build the sorted table of object IDs. */ + idx = xmalloc(object_count * sizeof(struct object_entry*)); + c = idx; + for (e = pool_start; e != pool_next; e++) + *c++ = e; + for (o = overflow; o; o = o->next) + *c++ = &o->oe; + last = idx + object_count; + qsort(idx, object_count, sizeof(struct object_entry*), oecmp); + + /* Generate the fan-out array. */ + c = idx; + for (i = 0; i < 256; i++) { + struct object_entry **next = c;; + while (next < last) { + if ((*next)->sha1[0] != i) + break; + next++; + } + array[i] = htonl(next - idx); + c = next; + } + + f = sha1create("%s", idx_name); + sha1write(f, array, 256 * sizeof(int)); + for (c = idx; c != last; c++) { + unsigned int offset = htonl((*c)->offset); + sha1write(f, &offset, 4); + sha1write(f, (*c)->sha1, sizeof((*c)->sha1)); + } + sha1write(f, packsha1, sizeof(packsha1)); + sha1close(f, NULL, 1); + free(idx); +} + +int main(int argc, const char **argv) +{ + const char *base_name = argv[1]; + int est_obj_cnt = atoi(argv[2]); + char *pack_name; + char *idx_name; + + pack_name = xmalloc(strlen(base_name) + 6); + sprintf(pack_name, "%s.pack", base_name); + idx_name = xmalloc(strlen(base_name) + 5); + sprintf(idx_name, "%s.idx", base_name); + + packfd = open(pack_name, O_RDWR|O_CREAT|O_TRUNC, 0666); if (packfd < 0) - die("Can't create pack file %s: %s", argv[1], strerror(errno)); + die("Can't create pack file %s: %s", pack_name, strerror(errno)); + + pool_start = xmalloc(est_obj_cnt * sizeof(struct object_entry)); + pool_next = pool_start; + pool_end = pool_start + est_obj_cnt; init_pack_header(); for (;;) { @@ -182,8 +313,10 @@ int main (int argc, const char **argv) char hdr[128]; unsigned char sha1[20]; SHA_CTX c; + struct object_entry *e; if (yread(0, &datlen, 4) != 4) + break; dat = xmalloc(datlen); @@ -196,19 +329,30 @@ int main (int argc, const char **argv) SHA1_Update(&c, dat, datlen); SHA1_Final(sha1, &c); - write_blob(dat, datlen); - object_count++; - printf("%s\n", sha1_to_hex(sha1)); - fflush(stdout); + e = insert_object(sha1); + if (!e->offset) { + e->offset = packoff; + write_blob(dat, datlen); + object_count++; + printf("%s\n", sha1_to_hex(sha1)); + fflush(stdout); - if (lastdat) - free(lastdat); - lastdat = dat; - lastdatlen = datlen; - memcpy(lastsha1, sha1, sizeof(sha1)); + if (lastdat) + free(lastdat); + lastdat = dat; + lastdatlen = datlen; + memcpy(lastsha1, sha1, sizeof(sha1)); + } else { + duplicate_count++; + free(dat); + } } fixup_header_footer(); close(packfd); + write_index(idx_name); + + fprintf(stderr, "%lu objects, %lu duplicates, %lu pool overflow\n", + object_count, duplicate_count, overflow_count); return 0; }