From f9b54e2630f4c839fbc4195693d66e790b09371b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 17 Mar 2013 04:33:12 -0400 Subject: [PATCH 1/2] fast-export: rename handle_object function The handle_object function is rather vaguely named; it only operates on blobs, and its purpose is to export the blob to the output stream. Let's call it "export_blob" to make it more clear what it does. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fast-export.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 12220ad8da..9e89925312 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -113,7 +113,7 @@ static void show_progress(void) printf("progress %d objects\n", counter); } -static void handle_object(const unsigned char *sha1) +static void export_blob(const unsigned char *sha1) { unsigned long size; enum object_type type; @@ -312,7 +312,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev) /* Export the referenced blobs, and remember the marks. */ for (i = 0; i < diff_queued_diff.nr; i++) if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode)) - handle_object(diff_queued_diff.queue[i]->two->sha1); + export_blob(diff_queued_diff.queue[i]->two->sha1); mark_next_object(&commit->object); if (!is_encoding_utf8(encoding)) @@ -509,7 +509,7 @@ static void get_tags_and_duplicates(struct object_array *pending, commit = (struct commit *)tag; break; case OBJ_BLOB: - handle_object(tag->object.sha1); + export_blob(tag->object.sha1); continue; default: /* OBJ_TAG (nested tags) is already handled */ warning("Tag points to object of unexpected type %s, skipping.", From 30b939c33ad5b8a9dfbe3fe5aafc36d89c40409f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 17 Mar 2013 04:38:57 -0400 Subject: [PATCH 2/2] fast-export: do not load blob objects twice When fast-export wants to export a blob object, it first calls parse_object to get a "struct object" and check whether we have already shown the object. If we haven't shown it, we then use read_sha1_file to pull it from disk and write it out. That means we load each blob from disk twice: once for parse_object to find its type and check its sha1, and a second time when we actually output it. We can drop this to a single load by using lookup_object to check the SHOWN flag, and then checking the signature on and outputting a single buffer. This provides modest speedups on git.git (best-of-five, "git fast-export HEAD >/dev/null"): [before] [after] real 0m14.347s real 0m13.780s user 0m14.084s user 0m13.620s sys 0m0.208s sys 0m0.100s and somewhat more on more blob-heavy repos (this is a repository full of media files): [before] [after] real 0m52.236s real 0m44.451s user 0m50.568s user 0m43.000s sys 0m1.536s sys 0m1.284s Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fast-export.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 9e89925312..ed2916595f 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -119,6 +119,7 @@ static void export_blob(const unsigned char *sha1) enum object_type type; char *buf; struct object *object; + int eaten; if (no_data) return; @@ -126,16 +127,18 @@ static void export_blob(const unsigned char *sha1) if (is_null_sha1(sha1)) return; - object = parse_object(sha1); - if (!object) - die ("Could not read blob %s", sha1_to_hex(sha1)); - - if (object->flags & SHOWN) + object = lookup_object(sha1); + if (object && object->flags & SHOWN) return; buf = read_sha1_file(sha1, &type, &size); if (!buf) die ("Could not read blob %s", sha1_to_hex(sha1)); + if (check_sha1_signature(sha1, buf, size, typename(type)) < 0) + die("sha1 mismatch in blob %s", sha1_to_hex(sha1)); + object = parse_object_buffer(sha1, type, size, buf, &eaten); + if (!object) + die("Could not read blob %s", sha1_to_hex(sha1)); mark_next_object(object); @@ -147,7 +150,8 @@ static void export_blob(const unsigned char *sha1) show_progress(); object->flags |= SHOWN; - free(buf); + if (!eaten) + free(buf); } static int depth_first(const void *a_, const void *b_)