git/fetch.c
Sergey Vlasov a82d07e5e6 [PATCH] fetch.c: Make process() look at each object only once
The process() function is very often called multiple times for the
same object (because lots of trees refer to the same blobs), but did
not have a fast check for this, therefore a lot of useless calls to
has_sha1_file() and parse_object() were made before discovering that
nothing needs to be done.

This patch adds the SEEN flag which is used in process() to make it
look at each object only once.  When testing git-local-fetch on the
repository of GIT, this gives a 14x improvement in CPU usage (mainly
because the redundant calls to parse_object() are now avoided -
parse_object() always unpacks and parses the object data, even if it
was already parsed before).

Signed-off-by: Sergey Vlasov <vsu@altlinux.ru>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-09-22 21:52:11 -07:00

247 lines
4.9 KiB
C

#include "fetch.h"
#include "cache.h"
#include "commit.h"
#include "tree.h"
#include "tag.h"
#include "blob.h"
#include "refs.h"
const char *write_ref = NULL;
const unsigned char *current_ref = NULL;
int get_tree = 0;
int get_history = 0;
int get_all = 0;
int get_verbosely = 0;
static unsigned char current_commit_sha1[20];
void pull_say(const char *fmt, const char *hex)
{
if (get_verbosely)
fprintf(stderr, fmt, hex);
}
static void report_missing(const char *what, const unsigned char *missing)
{
char missing_hex[41];
strcpy(missing_hex, sha1_to_hex(missing));;
fprintf(stderr,
"Cannot obtain needed %s %s\nwhile processing commit %s.\n",
what, missing_hex, sha1_to_hex(current_commit_sha1));
}
static int process(struct object *obj);
static int process_tree(struct tree *tree)
{
struct tree_entry_list *entry;
if (parse_tree(tree))
return -1;
entry = tree->entries;
tree->entries = NULL;
while (entry) {
struct tree_entry_list *next = entry->next;
if (process(entry->item.any))
return -1;
free(entry);
entry = next;
}
return 0;
}
#define COMPLETE 1U
#define TO_FETCH 2U
#define TO_SCAN 4U
#define SCANNED 8U
#define SEEN 16U
static struct commit_list *complete = NULL;
static int process_commit(struct commit *commit)
{
if (parse_commit(commit))
return -1;
while (complete && complete->item->date >= commit->date) {
pop_most_recent_commit(&complete, COMPLETE);
}
if (commit->object.flags & COMPLETE)
return 0;
memcpy(current_commit_sha1, commit->object.sha1, 20);
pull_say("walk %s\n", sha1_to_hex(commit->object.sha1));
if (get_tree) {
if (process(&commit->tree->object))
return -1;
if (!get_all)
get_tree = 0;
}
if (get_history) {
struct commit_list *parents = commit->parents;
for (; parents; parents = parents->next) {
if (process(&parents->item->object))
return -1;
}
}
return 0;
}
static int process_tag(struct tag *tag)
{
if (parse_tag(tag))
return -1;
return process(tag->tagged);
}
static struct object_list *process_queue = NULL;
static struct object_list **process_queue_end = &process_queue;
static int process_object(struct object *obj)
{
if (obj->flags & SCANNED)
return 0;
obj->flags |= SCANNED;
if (obj->type == commit_type) {
if (process_commit((struct commit *)obj))
return -1;
return 0;
}
if (obj->type == tree_type) {
if (process_tree((struct tree *)obj))
return -1;
return 0;
}
if (obj->type == blob_type) {
return 0;
}
if (obj->type == tag_type) {
if (process_tag((struct tag *)obj))
return -1;
return 0;
}
return error("Unable to determine requirements "
"of type %s for %s",
obj->type, sha1_to_hex(obj->sha1));
}
static int process(struct object *obj)
{
if (obj->flags & SEEN)
return 0;
obj->flags |= SEEN;
if (has_sha1_file(obj->sha1)) {
parse_object(obj->sha1);
/* We already have it, so we should scan it now. */
if (obj->flags & (SCANNED | TO_SCAN))
return 0;
object_list_insert(obj, process_queue_end);
process_queue_end = &(*process_queue_end)->next;
obj->flags |= TO_SCAN;
return 0;
}
if (obj->flags & (COMPLETE | TO_FETCH))
return 0;
object_list_insert(obj, process_queue_end);
process_queue_end = &(*process_queue_end)->next;
obj->flags |= TO_FETCH;
prefetch(obj->sha1);
return 0;
}
static int loop(void)
{
struct object_list *elem;
while (process_queue) {
struct object *obj = process_queue->item;
elem = process_queue;
process_queue = elem->next;
free(elem);
if (!process_queue)
process_queue_end = &process_queue;
/* If we are not scanning this object, we placed it in
* the queue because we needed to fetch it first.
*/
if (! (obj->flags & TO_SCAN)) {
if (!has_sha1_file(obj->sha1) && fetch(obj->sha1)) {
report_missing(obj->type
? obj->type
: "object", obj->sha1);
return -1;
}
}
if (!obj->type)
parse_object(obj->sha1);
if (process_object(obj))
return -1;
}
return 0;
}
static int interpret_target(char *target, unsigned char *sha1)
{
if (!get_sha1_hex(target, sha1))
return 0;
if (!check_ref_format(target)) {
if (!fetch_ref(target, sha1)) {
return 0;
}
}
return -1;
}
static int mark_complete(const char *path, const unsigned char *sha1)
{
struct commit *commit = lookup_commit_reference_gently(sha1, 1);
if (commit) {
commit->object.flags |= COMPLETE;
insert_by_date(commit, &complete);
}
return 0;
}
int pull(char *target)
{
unsigned char sha1[20];
int fd = -1;
save_commit_buffer = 0;
if (write_ref && current_ref) {
fd = lock_ref_sha1(write_ref, current_ref);
if (fd < 0)
return -1;
}
for_each_ref(mark_complete);
if (interpret_target(target, sha1))
return error("Could not interpret %s as something to pull",
target);
if (process(lookup_unknown_object(sha1)))
return -1;
if (loop())
return -1;
if (write_ref) {
if (current_ref) {
write_ref_sha1(write_ref, fd, sha1);
} else {
write_ref_sha1_unlocked(write_ref, sha1);
}
}
return 0;
}