From 7111feede9c5905199ba48645fadc369faca5711 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Mon, 14 Aug 2006 02:50:18 -0400 Subject: [PATCH] Implement blob ID validation in fast-import. When accepting revision SHA1 IDs from the frontend verify the SHA1 actually refers to a blob and is known to exist. Its an error to use a SHA1 in a tree if the blob doesn't exist as this would cause git-fsck-objects to report a missing blob should the pack get closed without the blob being appended into it or a subsequent pack. So right now we'll just ask that the frontend "pre-declare" any blobs it wants to use in a tree before it can use them. Signed-off-by: Shawn O. Pearce --- fast-import.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/fast-import.c b/fast-import.c index 4605b7469b..95b84f57e5 100644 --- a/fast-import.c +++ b/fast-import.c @@ -70,6 +70,7 @@ Format of STDIN stream: struct object_entry { struct object_entry *next; + enum object_type type; unsigned long offset; unsigned char sha1[20]; }; @@ -528,6 +529,7 @@ static int store_object( duplicate_count_by_type[type]++; return 1; } + e->type = type; e->offset = pack_offset; object_count++; object_count_by_type[type]++; @@ -713,7 +715,7 @@ static int tree_content_set( } if (!S_ISDIR(e->mode)) { e->tree = new_tree_content(8); - e->mode = 040000; + e->mode = S_IFDIR; } if (!e->tree) load_tree(e); @@ -732,7 +734,7 @@ static int tree_content_set( t->entries[t->entry_count++] = e; if (slash1) { e->tree = new_tree_content(8); - e->mode = 040000; + e->mode = S_IFDIR; tree_content_set(e, slash1 + 1, sha1, mode); } else { e->tree = NULL; @@ -948,16 +950,28 @@ static void load_branch(struct branch *b) static void file_change_m(struct branch *b) { const char *path = read_path(); + struct object_entry *oe; char hexsha1[41]; unsigned char sha1[20]; + char type[20]; yread(0, hexsha1, 40); hexsha1[40] = 0; if (get_sha1_hex(hexsha1, sha1)) die("Invalid sha1 %s for %s", hexsha1, path); + oe = find_object(sha1); + if (oe) { + if (oe->type != OBJ_BLOB) + die("%s is a %s not a blob (for %s)", hexsha1, type_names[oe->type], path); + } else { + if (sha1_object_info(sha1, type, NULL)) + die("No blob %s for %s", hexsha1, path); + if (strcmp(blob_type, type)) + die("%s is a %s not a blob (for %s)", hexsha1, type, path); + } - tree_content_set(&b->branch_tree, path, sha1, 0100644); + tree_content_set(&b->branch_tree, path, sha1, S_IFREG | 0644); } static void file_change_d(struct branch *b) @@ -986,6 +1000,10 @@ static void cmd_new_commit() c = body + max_hdr_len; yread(0, c, acmsglen); + /* oddly enough this is all that fsck-objects cares about */ + if (memcmp(c, "author ", 7)) + die("Invalid commit format on branch %s", name); + /* file_change* */ for (;;) { unsigned char cmd; @@ -1104,7 +1122,9 @@ int main(int argc, const char **argv) fprintf(stderr, " tags : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG]); fprintf(stderr, "Total branches: %10lu\n", branch_count); fprintf(stderr, "Total atoms: %10u\n", atom_cnt); - fprintf(stderr, "Memory pools: %10lu MiB\n", total_allocd/(1024*1024)); + fprintf(stderr, "Memory total: %10lu KiB\n", (total_allocd + alloc_count*sizeof(struct object_entry))/1024); + fprintf(stderr, " pools: %10lu KiB\n", total_allocd/1024); + fprintf(stderr, " objects: %10lu KiB\n", (alloc_count*sizeof(struct object_entry))/1024); fprintf(stderr, "---------------------------------------------------\n"); stat(pack_name, &sb);