Merge branch 'np/fast-import-idx-v2'

* np/fast-import-idx-v2:
  fast-import: use the diff_delta() max_delta_size argument
  fast-import: honor pack.indexversion and pack.packsizelimit config vars
  fast-import: make default pack size unlimited
  fast-import: use write_idx_file() instead of custom code
  fast-import: use sha1write() for pack data
  fast-import: start using struct pack_idx_entry

Acked-by: Shawn O. Pearce <spearce@spearce.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Junio C Hamano 2010-02-17 15:28:25 -08:00
commit 44e0f45035
2 changed files with 84 additions and 95 deletions

View file

@ -45,10 +45,7 @@ OPTIONS
--max-pack-size=<n>::
Maximum size of each output packfile.
The default is 4 GiB as that is the maximum allowed
packfile size (due to file format limitations). Some
importers may wish to lower this, such as to ensure the
resulting packfiles fit on CDs.
The default is unlimited.
--big-file-threshold=<n>::
Maximum size of a blob that fast-import will attempt to

View file

@ -164,12 +164,11 @@ Format of STDIN stream:
struct object_entry
{
struct pack_idx_entry idx;
struct object_entry *next;
uint32_t offset;
uint32_t type : TYPE_BITS,
pack_id : PACK_ID_BITS,
depth : DEPTH_BITS;
unsigned char sha1[20];
};
struct object_entry_pool
@ -192,7 +191,7 @@ struct mark_set
struct last_object
{
struct strbuf data;
uint32_t offset;
off_t offset;
unsigned int depth;
unsigned no_swap : 1;
};
@ -280,7 +279,7 @@ struct recent_command
/* Configured limits on output */
static unsigned long max_depth = 10;
static off_t max_packsize = (1LL << 32) - 1;
static off_t max_packsize;
static uintmax_t big_file_threshold = 512 * 1024 * 1024;
static int force_update;
static int pack_compression_level = Z_DEFAULT_COMPRESSION;
@ -313,9 +312,10 @@ static struct atom_str **atom_table;
/* The .pack file being generated */
static unsigned int pack_id;
static struct sha1file *pack_file;
static struct packed_git *pack_data;
static struct packed_git **all_packs;
static unsigned long pack_size;
static off_t pack_size;
/* Table of objects we've written. */
static unsigned int object_entry_alloc = 5000;
@ -521,7 +521,7 @@ static struct object_entry *new_object(unsigned char *sha1)
alloc_objects(object_entry_alloc);
e = blocks->next_free++;
hashcpy(e->sha1, sha1);
hashcpy(e->idx.sha1, sha1);
return e;
}
@ -530,7 +530,7 @@ static struct object_entry *find_object(unsigned char *sha1)
unsigned int h = sha1[0] << 8 | sha1[1];
struct object_entry *e;
for (e = object_table[h]; e; e = e->next)
if (!hashcmp(sha1, e->sha1))
if (!hashcmp(sha1, e->idx.sha1))
return e;
return NULL;
}
@ -542,7 +542,7 @@ static struct object_entry *insert_object(unsigned char *sha1)
struct object_entry *p = NULL;
while (e) {
if (!hashcmp(sha1, e->sha1))
if (!hashcmp(sha1, e->idx.sha1))
return e;
p = e;
e = e->next;
@ -550,7 +550,7 @@ static struct object_entry *insert_object(unsigned char *sha1)
e = new_object(sha1);
e->next = NULL;
e->offset = 0;
e->idx.offset = 0;
if (p)
p->next = e;
else
@ -839,11 +839,12 @@ static void start_packfile(void)
p = xcalloc(1, sizeof(*p) + strlen(tmpfile) + 2);
strcpy(p->pack_name, tmpfile);
p->pack_fd = pack_fd;
pack_file = sha1fd(pack_fd, p->pack_name);
hdr.hdr_signature = htonl(PACK_SIGNATURE);
hdr.hdr_version = htonl(2);
hdr.hdr_entries = 0;
write_or_die(p->pack_fd, &hdr, sizeof(hdr));
sha1write(pack_file, &hdr, sizeof(hdr));
pack_data = p;
pack_size = sizeof(hdr);
@ -853,67 +854,30 @@ static void start_packfile(void)
all_packs[pack_id] = p;
}
static int oecmp (const void *a_, const void *b_)
static const char *create_index(void)
{
struct object_entry *a = *((struct object_entry**)a_);
struct object_entry *b = *((struct object_entry**)b_);
return hashcmp(a->sha1, b->sha1);
}
static char *create_index(void)
{
static char tmpfile[PATH_MAX];
git_SHA_CTX ctx;
struct sha1file *f;
struct object_entry **idx, **c, **last, *e;
const char *tmpfile;
struct pack_idx_entry **idx, **c, **last;
struct object_entry *e;
struct object_entry_pool *o;
uint32_t array[256];
int i, idx_fd;
/* Build the sorted table of object IDs. */
idx = xmalloc(object_count * sizeof(struct object_entry*));
/* Build the table of object IDs. */
idx = xmalloc(object_count * sizeof(*idx));
c = idx;
for (o = blocks; o; o = o->next_pool)
for (e = o->next_free; e-- != o->entries;)
if (pack_id == e->pack_id)
*c++ = e;
*c++ = &e->idx;
last = idx + object_count;
if (c != last)
die("internal consistency error creating the index");
qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
/* Generate the fan-out array. */
c = idx;
for (i = 0; i < 256; i++) {
struct object_entry **next = c;
while (next < last) {
if ((*next)->sha1[0] != i)
break;
next++;
}
array[i] = htonl(next - idx);
c = next;
}
idx_fd = odb_mkstemp(tmpfile, sizeof(tmpfile),
"pack/tmp_idx_XXXXXX");
f = sha1fd(idx_fd, tmpfile);
sha1write(f, array, 256 * sizeof(int));
git_SHA1_Init(&ctx);
for (c = idx; c != last; c++) {
uint32_t offset = htonl((*c)->offset);
sha1write(f, &offset, 4);
sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
git_SHA1_Update(&ctx, (*c)->sha1, 20);
}
sha1write(f, pack_data->sha1, sizeof(pack_data->sha1));
sha1close(f, NULL, CSUM_FSYNC);
tmpfile = write_idx_file(NULL, idx, object_count, pack_data->sha1);
free(idx);
git_SHA1_Final(pack_data->sha1, &ctx);
return tmpfile;
}
static char *keep_pack(char *curr_index_name)
static char *keep_pack(const char *curr_index_name)
{
static char name[PATH_MAX];
static const char *keep_msg = "fast-import";
@ -935,6 +899,7 @@ static char *keep_pack(char *curr_index_name)
get_object_directory(), sha1_to_hex(pack_data->sha1));
if (move_temp_to_file(curr_index_name, name))
die("cannot store index file");
free((void *)curr_index_name);
return name;
}
@ -957,15 +922,17 @@ static void end_packfile(void)
clear_delta_base_cache();
if (object_count) {
unsigned char cur_pack_sha1[20];
char *idx_name;
int i;
struct branch *b;
struct tag *t;
close_pack_windows(pack_data);
sha1close(pack_file, cur_pack_sha1, 0);
fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1,
pack_data->pack_name, object_count,
NULL, 0);
cur_pack_sha1, pack_size);
close(pack_data->pack_fd);
idx_name = keep_pack(create_index());
@ -1063,25 +1030,21 @@ static int store_object(
e = insert_object(sha1);
if (mark)
insert_mark(mark, e);
if (e->offset) {
if (e->idx.offset) {
duplicate_count_by_type[type]++;
return 1;
} else if (find_sha1_pack(sha1, packed_git)) {
e->type = type;
e->pack_id = MAX_PACK_ID;
e->offset = 1; /* just not zero! */
e->idx.offset = 1; /* just not zero! */
duplicate_count_by_type[type]++;
return 1;
}
if (last && last->data.buf && last->depth < max_depth) {
if (last && last->data.buf && last->depth < max_depth && dat->len > 20) {
delta = diff_delta(last->data.buf, last->data.len,
dat->buf, dat->len,
&deltalen, 0);
if (delta && deltalen >= dat->len) {
free(delta);
delta = NULL;
}
&deltalen, dat->len - 20);
} else
delta = NULL;
@ -1101,7 +1064,7 @@ static int store_object(
deflateEnd(&s);
/* Determine if we should auto-checkpoint. */
if ((pack_size + 60 + s.total_out) > max_packsize
if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize)
|| (pack_size + 60 + s.total_out) < pack_size) {
/* This new object needs to *not* have the current pack_id. */
@ -1127,36 +1090,40 @@ static int store_object(
e->type = type;
e->pack_id = pack_id;
e->offset = pack_size;
e->idx.offset = pack_size;
object_count++;
object_count_by_type[type]++;
crc32_begin(pack_file);
if (delta) {
unsigned long ofs = e->offset - last->offset;
off_t ofs = e->idx.offset - last->offset;
unsigned pos = sizeof(hdr) - 1;
delta_count_by_type[type]++;
e->depth = last->depth + 1;
hdrlen = encode_header(OBJ_OFS_DELTA, deltalen, hdr);
write_or_die(pack_data->pack_fd, hdr, hdrlen);
sha1write(pack_file, hdr, hdrlen);
pack_size += hdrlen;
hdr[pos] = ofs & 127;
while (ofs >>= 7)
hdr[--pos] = 128 | (--ofs & 127);
write_or_die(pack_data->pack_fd, hdr + pos, sizeof(hdr) - pos);
sha1write(pack_file, hdr + pos, sizeof(hdr) - pos);
pack_size += sizeof(hdr) - pos;
} else {
e->depth = 0;
hdrlen = encode_header(type, dat->len, hdr);
write_or_die(pack_data->pack_fd, hdr, hdrlen);
sha1write(pack_file, hdr, hdrlen);
pack_size += hdrlen;
}
write_or_die(pack_data->pack_fd, out, s.total_out);
sha1write(pack_file, out, s.total_out);
pack_size += s.total_out;
e->idx.crc32 = crc32_end(pack_file);
free(out);
free(delta);
if (last) {
@ -1165,18 +1132,23 @@ static int store_object(
} else {
strbuf_swap(&last->data, dat);
}
last->offset = e->offset;
last->offset = e->idx.offset;
last->depth = e->depth;
}
return 0;
}
static void truncate_pack(off_t to)
static void truncate_pack(off_t to, git_SHA_CTX *ctx)
{
if (ftruncate(pack_data->pack_fd, to)
|| lseek(pack_data->pack_fd, to, SEEK_SET) != to)
die_errno("cannot truncate pack to skip duplicate");
pack_size = to;
/* yes this is a layering violation */
pack_file->total = to;
pack_file->offset = 0;
pack_file->ctx = *ctx;
}
static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
@ -1189,16 +1161,21 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
unsigned long hdrlen;
off_t offset;
git_SHA_CTX c;
git_SHA_CTX pack_file_ctx;
z_stream s;
int status = Z_OK;
/* Determine if we should auto-checkpoint. */
if ((pack_size + 60 + len) > max_packsize
if ((max_packsize && (pack_size + 60 + len) > max_packsize)
|| (pack_size + 60 + len) < pack_size)
cycle_packfile();
offset = pack_size;
/* preserve the pack_file SHA1 ctx in case we have to truncate later */
sha1flush(pack_file);
pack_file_ctx = pack_file->ctx;
hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1;
if (out_sz <= hdrlen)
die("impossibly large object header");
@ -1206,6 +1183,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
git_SHA1_Init(&c);
git_SHA1_Update(&c, out_buf, hdrlen);
crc32_begin(pack_file);
memset(&s, 0, sizeof(s));
deflateInit(&s, pack_compression_level);
@ -1233,7 +1212,7 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
if (!s.avail_out || status == Z_STREAM_END) {
size_t n = s.next_out - out_buf;
write_or_die(pack_data->pack_fd, out_buf, n);
sha1write(pack_file, out_buf, n);
pack_size += n;
s.next_out = out_buf;
s.avail_out = out_sz;
@ -1259,22 +1238,23 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
if (mark)
insert_mark(mark, e);
if (e->offset) {
if (e->idx.offset) {
duplicate_count_by_type[OBJ_BLOB]++;
truncate_pack(offset);
truncate_pack(offset, &pack_file_ctx);
} else if (find_sha1_pack(sha1, packed_git)) {
e->type = OBJ_BLOB;
e->pack_id = MAX_PACK_ID;
e->offset = 1; /* just not zero! */
e->idx.offset = 1; /* just not zero! */
duplicate_count_by_type[OBJ_BLOB]++;
truncate_pack(offset);
truncate_pack(offset, &pack_file_ctx);
} else {
e->depth = 0;
e->type = OBJ_BLOB;
e->pack_id = pack_id;
e->offset = offset;
e->idx.offset = offset;
e->idx.crc32 = crc32_end(pack_file);
object_count++;
object_count_by_type[OBJ_BLOB]++;
}
@ -1317,6 +1297,7 @@ static void *gfi_unpack_entry(
* the newly written data.
*/
close_pack_windows(p);
sha1flush(pack_file);
/* We have to offer 20 bytes additional on the end of
* the packfile as the core unpacker code assumes the
@ -1326,7 +1307,7 @@ static void *gfi_unpack_entry(
*/
p->pack_size = pack_size + 20;
}
return unpack_entry(p, oe->offset, &type, sizep);
return unpack_entry(p, oe->idx.offset, &type, sizep);
}
static const char *get_mode(const char *str, uint16_t *modep)
@ -1457,7 +1438,7 @@ static void store_tree(struct tree_entry *root)
if (S_ISDIR(root->versions[0].mode) && le && le->pack_id == pack_id) {
mktree(t, 0, &old_tree);
lo.data = old_tree;
lo.offset = le->offset;
lo.offset = le->idx.offset;
lo.depth = t->delta_depth;
}
@ -1715,7 +1696,7 @@ static void dump_marks_helper(FILE *f,
for (k = 0; k < 1024; k++) {
if (m->data.marked[k])
fprintf(f, ":%" PRIuMAX " %s\n", base + k,
sha1_to_hex(m->data.marked[k]->sha1));
sha1_to_hex(m->data.marked[k]->idx.sha1));
}
}
}
@ -1798,7 +1779,7 @@ static void read_marks(void)
e = insert_object(sha1);
e->type = type;
e->pack_id = MAX_PACK_ID;
e->offset = 1; /* just not zero! */
e->idx.offset = 1; /* just not zero! */
}
insert_mark(mark, e);
}
@ -2183,7 +2164,7 @@ static void file_change_m(struct branch *b)
if (*p == ':') {
char *x;
oe = find_mark(strtoumax(p + 1, &x, 10));
hashcpy(sha1, oe->sha1);
hashcpy(sha1, oe->idx.sha1);
p = x;
} else if (!prefixcmp(p, "inline")) {
inline_data = 1;
@ -2316,7 +2297,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout)
if (*p == ':') {
char *x;
oe = find_mark(strtoumax(p + 1, &x, 10));
hashcpy(sha1, oe->sha1);
hashcpy(sha1, oe->idx.sha1);
p = x;
} else if (!prefixcmp(p, "inline")) {
inline_data = 1;
@ -2339,7 +2320,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout)
struct object_entry *commit_oe = find_mark(commit_mark);
if (commit_oe->type != OBJ_COMMIT)
die("Mark :%" PRIuMAX " not a commit", commit_mark);
hashcpy(commit_sha1, commit_oe->sha1);
hashcpy(commit_sha1, commit_oe->idx.sha1);
} else if (!get_sha1(p, commit_sha1)) {
unsigned long size;
char *buf = read_object_with_reference(commit_sha1,
@ -2446,7 +2427,7 @@ static int parse_from(struct branch *b)
struct object_entry *oe = find_mark(idnum);
if (oe->type != OBJ_COMMIT)
die("Mark :%" PRIuMAX " not a commit", idnum);
hashcpy(b->sha1, oe->sha1);
hashcpy(b->sha1, oe->idx.sha1);
if (oe->pack_id != MAX_PACK_ID) {
unsigned long size;
char *buf = gfi_unpack_entry(oe, &size);
@ -2481,7 +2462,7 @@ static struct hash_list *parse_merge(unsigned int *count)
struct object_entry *oe = find_mark(idnum);
if (oe->type != OBJ_COMMIT)
die("Mark :%" PRIuMAX " not a commit", idnum);
hashcpy(n->sha1, oe->sha1);
hashcpy(n->sha1, oe->idx.sha1);
} else if (!get_sha1(from, n->sha1)) {
unsigned long size;
char *buf = read_object_with_reference(n->sha1,
@ -2639,7 +2620,7 @@ static void parse_new_tag(void)
from_mark = strtoumax(from + 1, NULL, 10);
oe = find_mark(from_mark);
type = oe->type;
hashcpy(sha1, oe->sha1);
hashcpy(sha1, oe->idx.sha1);
} else if (!get_sha1(from, sha1)) {
unsigned long size;
char *buf;
@ -2891,6 +2872,17 @@ static int git_pack_config(const char *k, const char *v, void *cb)
pack_compression_seen = 1;
return 0;
}
if (!strcmp(k, "pack.indexversion")) {
pack_idx_default_version = git_config_int(k, v);
if (pack_idx_default_version > 2)
die("bad pack.indexversion=%"PRIu32,
pack_idx_default_version);
return 0;
}
if (!strcmp(k, "pack.packsizelimit")) {
max_packsize = git_config_ulong(k, v);
return 0;
}
if (!strcmp(k, "core.bigfilethreshold")) {
long n = git_config_int(k, v);
big_file_threshold = 0 < n ? n : 0;