git/http-walker.c
Tay Ray Chuan b8caac2b8a http*: add http_get_info_packs
http-push.c and http-walker.c no longer have to use fetch_index or
setup_index; they simply need to use http_get_info_packs, a new http
method, in their fetch_indices implementations.

Move fetch_index() and rename to fetch_pack_index() in http.c; this
method is not meant to be used outside of http.c. It invokes
end_url_with_slash with base_url; apart from that change, the code is
identical.

Move setup_index() and rename to fetch_and_setup_pack_index() in
http.c; this method is not meant to be used outside of http.c.

Do not immediately set ret to 0 in http-walker.c::fetch_indices();
instead do it in the HTTP_MISSING_TARGET case, to make it clear that
the HTTP_OK and HTTP_MISSING_TARGET cases both return 0.

Signed-off-by: Tay Ray Chuan <rctay89@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-06-06 11:03:11 -07:00

817 lines
20 KiB
C

#include "cache.h"
#include "commit.h"
#include "pack.h"
#include "walker.h"
#include "http.h"
#define PREV_BUF_SIZE 4096
struct alt_base
{
char *base;
int got_indices;
struct packed_git *packs;
struct alt_base *next;
};
enum object_request_state {
WAITING,
ABORTED,
ACTIVE,
COMPLETE,
};
struct object_request
{
struct walker *walker;
unsigned char sha1[20];
struct alt_base *repo;
char *url;
char filename[PATH_MAX];
char tmpfile[PATH_MAX];
int local;
enum object_request_state state;
CURLcode curl_result;
char errorstr[CURL_ERROR_SIZE];
long http_code;
unsigned char real_sha1[20];
git_SHA_CTX c;
z_stream stream;
int zret;
int rename;
struct active_request_slot *slot;
struct object_request *next;
};
struct alternates_request {
struct walker *walker;
const char *base;
char *url;
struct strbuf *buffer;
struct active_request_slot *slot;
int http_specific;
};
struct walker_data {
const char *url;
int got_alternates;
struct alt_base *alt;
};
static struct object_request *object_queue_head;
static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
void *data)
{
unsigned char expn[4096];
size_t size = eltsize * nmemb;
int posn = 0;
struct object_request *obj_req = (struct object_request *)data;
do {
ssize_t retval = xwrite(obj_req->local,
(char *) ptr + posn, size - posn);
if (retval < 0)
return posn;
posn += retval;
} while (posn < size);
obj_req->stream.avail_in = size;
obj_req->stream.next_in = ptr;
do {
obj_req->stream.next_out = expn;
obj_req->stream.avail_out = sizeof(expn);
obj_req->zret = git_inflate(&obj_req->stream, Z_SYNC_FLUSH);
git_SHA1_Update(&obj_req->c, expn,
sizeof(expn) - obj_req->stream.avail_out);
} while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
data_received++;
return size;
}
static void fetch_alternates(struct walker *walker, const char *base);
static void process_object_response(void *callback_data);
static void start_object_request(struct walker *walker,
struct object_request *obj_req)
{
char *hex = sha1_to_hex(obj_req->sha1);
char prevfile[PATH_MAX];
char *url;
char *posn;
int prevlocal;
unsigned char prev_buf[PREV_BUF_SIZE];
ssize_t prev_read = 0;
long prev_posn = 0;
char range[RANGE_HEADER_SIZE];
struct curl_slist *range_header = NULL;
struct active_request_slot *slot;
snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
unlink_or_warn(prevfile);
rename(obj_req->tmpfile, prevfile);
unlink_or_warn(obj_req->tmpfile);
if (obj_req->local != -1)
error("fd leakage in start: %d", obj_req->local);
obj_req->local = open(obj_req->tmpfile,
O_WRONLY | O_CREAT | O_EXCL, 0666);
/*
* This could have failed due to the "lazy directory creation";
* try to mkdir the last path component.
*/
if (obj_req->local < 0 && errno == ENOENT) {
char *dir = strrchr(obj_req->tmpfile, '/');
if (dir) {
*dir = 0;
mkdir(obj_req->tmpfile, 0777);
*dir = '/';
}
obj_req->local = open(obj_req->tmpfile,
O_WRONLY | O_CREAT | O_EXCL, 0666);
}
if (obj_req->local < 0) {
obj_req->state = ABORTED;
error("Couldn't create temporary file %s for %s: %s",
obj_req->tmpfile, obj_req->filename, strerror(errno));
return;
}
memset(&obj_req->stream, 0, sizeof(obj_req->stream));
git_inflate_init(&obj_req->stream);
git_SHA1_Init(&obj_req->c);
url = xmalloc(strlen(obj_req->repo->base) + 51);
obj_req->url = xmalloc(strlen(obj_req->repo->base) + 51);
strcpy(url, obj_req->repo->base);
posn = url + strlen(obj_req->repo->base);
strcpy(posn, "/objects/");
posn += 9;
memcpy(posn, hex, 2);
posn += 2;
*(posn++) = '/';
strcpy(posn, hex + 2);
strcpy(obj_req->url, url);
/*
* If a previous temp file is present, process what was already
* fetched.
*/
prevlocal = open(prevfile, O_RDONLY);
if (prevlocal != -1) {
do {
prev_read = xread(prevlocal, prev_buf, PREV_BUF_SIZE);
if (prev_read>0) {
if (fwrite_sha1_file(prev_buf,
1,
prev_read,
obj_req) == prev_read)
prev_posn += prev_read;
else
prev_read = -1;
}
} while (prev_read > 0);
close(prevlocal);
}
unlink_or_warn(prevfile);
/*
* Reset inflate/SHA1 if there was an error reading the previous temp
* file; also rewind to the beginning of the local file.
*/
if (prev_read == -1) {
memset(&obj_req->stream, 0, sizeof(obj_req->stream));
git_inflate_init(&obj_req->stream);
git_SHA1_Init(&obj_req->c);
if (prev_posn>0) {
prev_posn = 0;
lseek(obj_req->local, 0, SEEK_SET);
ftruncate(obj_req->local, 0);
}
}
slot = get_active_slot();
slot->callback_func = process_object_response;
slot->callback_data = obj_req;
obj_req->slot = slot;
curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
curl_easy_setopt(slot->curl, CURLOPT_URL, url);
curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
/*
* If we have successfully processed data from a previous fetch
* attempt, only fetch the data we don't already have.
*/
if (prev_posn>0) {
if (walker->get_verbosely)
fprintf(stderr,
"Resuming fetch of object %s at byte %ld\n",
hex, prev_posn);
sprintf(range, "Range: bytes=%ld-", prev_posn);
range_header = curl_slist_append(range_header, range);
curl_easy_setopt(slot->curl,
CURLOPT_HTTPHEADER, range_header);
}
/* Try to get the request started, abort the request on error */
obj_req->state = ACTIVE;
if (!start_active_slot(slot)) {
obj_req->state = ABORTED;
obj_req->slot = NULL;
close(obj_req->local);
obj_req->local = -1;
free(obj_req->url);
return;
}
}
static void finish_object_request(struct object_request *obj_req)
{
struct stat st;
close(obj_req->local);
obj_req->local = -1;
if (obj_req->http_code == 416) {
fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
} else if (obj_req->curl_result != CURLE_OK) {
if (stat(obj_req->tmpfile, &st) == 0)
if (st.st_size == 0)
unlink_or_warn(obj_req->tmpfile);
return;
}
git_inflate_end(&obj_req->stream);
git_SHA1_Final(obj_req->real_sha1, &obj_req->c);
if (obj_req->zret != Z_STREAM_END) {
unlink_or_warn(obj_req->tmpfile);
return;
}
if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
unlink_or_warn(obj_req->tmpfile);
return;
}
obj_req->rename =
move_temp_to_file(obj_req->tmpfile, obj_req->filename);
if (obj_req->rename == 0)
walker_say(obj_req->walker, "got %s\n", sha1_to_hex(obj_req->sha1));
}
static void process_object_response(void *callback_data)
{
struct object_request *obj_req =
(struct object_request *)callback_data;
struct walker *walker = obj_req->walker;
struct walker_data *data = walker->data;
struct alt_base *alt = data->alt;
obj_req->curl_result = obj_req->slot->curl_result;
obj_req->http_code = obj_req->slot->http_code;
obj_req->slot = NULL;
obj_req->state = COMPLETE;
/* Use alternates if necessary */
if (missing_target(obj_req)) {
fetch_alternates(walker, alt->base);
if (obj_req->repo->next != NULL) {
obj_req->repo =
obj_req->repo->next;
close(obj_req->local);
obj_req->local = -1;
start_object_request(walker, obj_req);
return;
}
}
finish_object_request(obj_req);
}
static void release_object_request(struct object_request *obj_req)
{
struct object_request *entry = object_queue_head;
if (obj_req->local != -1)
error("fd leakage in release: %d", obj_req->local);
if (obj_req == object_queue_head) {
object_queue_head = obj_req->next;
} else {
while (entry->next != NULL && entry->next != obj_req)
entry = entry->next;
if (entry->next == obj_req)
entry->next = entry->next->next;
}
free(obj_req->url);
free(obj_req);
}
#ifdef USE_CURL_MULTI
static int fill_active_slot(struct walker *walker)
{
struct object_request *obj_req;
for (obj_req = object_queue_head; obj_req; obj_req = obj_req->next) {
if (obj_req->state == WAITING) {
if (has_sha1_file(obj_req->sha1))
obj_req->state = COMPLETE;
else {
start_object_request(walker, obj_req);
return 1;
}
}
}
return 0;
}
#endif
static void prefetch(struct walker *walker, unsigned char *sha1)
{
struct object_request *newreq;
struct object_request *tail;
struct walker_data *data = walker->data;
char *filename = sha1_file_name(sha1);
newreq = xmalloc(sizeof(*newreq));
newreq->walker = walker;
hashcpy(newreq->sha1, sha1);
newreq->repo = data->alt;
newreq->url = NULL;
newreq->local = -1;
newreq->state = WAITING;
snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
"%s.temp", filename);
newreq->slot = NULL;
newreq->next = NULL;
http_is_verbose = walker->get_verbosely;
if (object_queue_head == NULL) {
object_queue_head = newreq;
} else {
tail = object_queue_head;
while (tail->next != NULL)
tail = tail->next;
tail->next = newreq;
}
#ifdef USE_CURL_MULTI
fill_active_slots();
step_active_slots();
#endif
}
static void process_alternates_response(void *callback_data)
{
struct alternates_request *alt_req =
(struct alternates_request *)callback_data;
struct walker *walker = alt_req->walker;
struct walker_data *cdata = walker->data;
struct active_request_slot *slot = alt_req->slot;
struct alt_base *tail = cdata->alt;
const char *base = alt_req->base;
static const char null_byte = '\0';
char *data;
int i = 0;
if (alt_req->http_specific) {
if (slot->curl_result != CURLE_OK ||
!alt_req->buffer->len) {
/* Try reusing the slot to get non-http alternates */
alt_req->http_specific = 0;
sprintf(alt_req->url, "%s/objects/info/alternates",
base);
curl_easy_setopt(slot->curl, CURLOPT_URL,
alt_req->url);
active_requests++;
slot->in_use = 1;
if (slot->finished != NULL)
(*slot->finished) = 0;
if (!start_active_slot(slot)) {
cdata->got_alternates = -1;
slot->in_use = 0;
if (slot->finished != NULL)
(*slot->finished) = 1;
}
return;
}
} else if (slot->curl_result != CURLE_OK) {
if (!missing_target(slot)) {
cdata->got_alternates = -1;
return;
}
}
fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
alt_req->buffer->len--;
data = alt_req->buffer->buf;
while (i < alt_req->buffer->len) {
int posn = i;
while (posn < alt_req->buffer->len && data[posn] != '\n')
posn++;
if (data[posn] == '\n') {
int okay = 0;
int serverlen = 0;
struct alt_base *newalt;
char *target = NULL;
if (data[i] == '/') {
/*
* This counts
* http://git.host/pub/scm/linux.git/
* -----------here^
* so memcpy(dst, base, serverlen) will
* copy up to "...git.host".
*/
const char *colon_ss = strstr(base,"://");
if (colon_ss) {
serverlen = (strchr(colon_ss + 3, '/')
- base);
okay = 1;
}
} else if (!memcmp(data + i, "../", 3)) {
/*
* Relative URL; chop the corresponding
* number of subpath from base (and ../
* from data), and concatenate the result.
*
* The code first drops ../ from data, and
* then drops one ../ from data and one path
* from base. IOW, one extra ../ is dropped
* from data than path is dropped from base.
*
* This is not wrong. The alternate in
* http://git.host/pub/scm/linux.git/
* to borrow from
* http://git.host/pub/scm/linus.git/
* is ../../linus.git/objects/. You need
* two ../../ to borrow from your direct
* neighbour.
*/
i += 3;
serverlen = strlen(base);
while (i + 2 < posn &&
!memcmp(data + i, "../", 3)) {
do {
serverlen--;
} while (serverlen &&
base[serverlen - 1] != '/');
i += 3;
}
/* If the server got removed, give up. */
okay = strchr(base, ':') - base + 3 <
serverlen;
} else if (alt_req->http_specific) {
char *colon = strchr(data + i, ':');
char *slash = strchr(data + i, '/');
if (colon && slash && colon < data + posn &&
slash < data + posn && colon < slash) {
okay = 1;
}
}
/* skip "objects\n" at end */
if (okay) {
target = xmalloc(serverlen + posn - i - 6);
memcpy(target, base, serverlen);
memcpy(target + serverlen, data + i,
posn - i - 7);
target[serverlen + posn - i - 7] = 0;
if (walker->get_verbosely)
fprintf(stderr,
"Also look at %s\n", target);
newalt = xmalloc(sizeof(*newalt));
newalt->next = NULL;
newalt->base = target;
newalt->got_indices = 0;
newalt->packs = NULL;
while (tail->next != NULL)
tail = tail->next;
tail->next = newalt;
}
}
i = posn + 1;
}
cdata->got_alternates = 1;
}
static void fetch_alternates(struct walker *walker, const char *base)
{
struct strbuf buffer = STRBUF_INIT;
char *url;
struct active_request_slot *slot;
struct alternates_request alt_req;
struct walker_data *cdata = walker->data;
/*
* If another request has already started fetching alternates,
* wait for them to arrive and return to processing this request's
* curl message
*/
#ifdef USE_CURL_MULTI
while (cdata->got_alternates == 0) {
step_active_slots();
}
#endif
/* Nothing to do if they've already been fetched */
if (cdata->got_alternates == 1)
return;
/* Start the fetch */
cdata->got_alternates = 0;
if (walker->get_verbosely)
fprintf(stderr, "Getting alternates list for %s\n", base);
url = xmalloc(strlen(base) + 31);
sprintf(url, "%s/objects/info/http-alternates", base);
/*
* Use a callback to process the result, since another request
* may fail and need to have alternates loaded before continuing
*/
slot = get_active_slot();
slot->callback_func = process_alternates_response;
alt_req.walker = walker;
slot->callback_data = &alt_req;
curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
curl_easy_setopt(slot->curl, CURLOPT_URL, url);
alt_req.base = base;
alt_req.url = url;
alt_req.buffer = &buffer;
alt_req.http_specific = 1;
alt_req.slot = slot;
if (start_active_slot(slot))
run_active_slot(slot);
else
cdata->got_alternates = -1;
strbuf_release(&buffer);
free(url);
}
static int fetch_indices(struct walker *walker, struct alt_base *repo)
{
int ret;
if (repo->got_indices)
return 0;
if (walker->get_verbosely)
fprintf(stderr, "Getting pack list for %s\n", repo->base);
switch (http_get_info_packs(repo->base, &repo->packs)) {
case HTTP_OK:
case HTTP_MISSING_TARGET:
repo->got_indices = 1;
ret = 0;
break;
default:
repo->got_indices = 0;
ret = -1;
}
return ret;
}
static int fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
{
char *url;
struct packed_git *target;
struct packed_git **lst;
FILE *packfile;
char *filename;
char tmpfile[PATH_MAX];
int ret;
long prev_posn = 0;
char range[RANGE_HEADER_SIZE];
struct curl_slist *range_header = NULL;
struct active_request_slot *slot;
struct slot_results results;
if (fetch_indices(walker, repo))
return -1;
target = find_sha1_pack(sha1, repo->packs);
if (!target)
return -1;
if (walker->get_verbosely) {
fprintf(stderr, "Getting pack %s\n",
sha1_to_hex(target->sha1));
fprintf(stderr, " which contains %s\n",
sha1_to_hex(sha1));
}
url = xmalloc(strlen(repo->base) + 65);
sprintf(url, "%s/objects/pack/pack-%s.pack",
repo->base, sha1_to_hex(target->sha1));
filename = sha1_pack_name(target->sha1);
snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
packfile = fopen(tmpfile, "a");
if (!packfile)
return error("Unable to open local file %s for pack",
tmpfile);
slot = get_active_slot();
slot->results = &results;
curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
curl_easy_setopt(slot->curl, CURLOPT_URL, url);
curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
slot->local = packfile;
/*
* If there is data present from a previous transfer attempt,
* resume where it left off
*/
prev_posn = ftell(packfile);
if (prev_posn>0) {
if (walker->get_verbosely)
fprintf(stderr,
"Resuming fetch of pack %s at byte %ld\n",
sha1_to_hex(target->sha1), prev_posn);
sprintf(range, "Range: bytes=%ld-", prev_posn);
range_header = curl_slist_append(range_header, range);
curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
}
if (start_active_slot(slot)) {
run_active_slot(slot);
if (results.curl_result != CURLE_OK) {
fclose(packfile);
slot->local = NULL;
return error("Unable to get pack file %s\n%s", url,
curl_errorstr);
}
} else {
fclose(packfile);
slot->local = NULL;
return error("Unable to start request");
}
target->pack_size = ftell(packfile);
fclose(packfile);
slot->local = NULL;
ret = move_temp_to_file(tmpfile, filename);
if (ret)
return ret;
lst = &repo->packs;
while (*lst != target)
lst = &((*lst)->next);
*lst = (*lst)->next;
if (verify_pack(target))
return -1;
install_packed_git(target);
return 0;
}
static void abort_object_request(struct object_request *obj_req)
{
if (obj_req->local >= 0) {
close(obj_req->local);
obj_req->local = -1;
}
unlink_or_warn(obj_req->tmpfile);
if (obj_req->slot) {
release_active_slot(obj_req->slot);
obj_req->slot = NULL;
}
release_object_request(obj_req);
}
static int fetch_object(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
{
char *hex = sha1_to_hex(sha1);
int ret = 0;
struct object_request *obj_req = object_queue_head;
while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
obj_req = obj_req->next;
if (obj_req == NULL)
return error("Couldn't find request for %s in the queue", hex);
if (has_sha1_file(obj_req->sha1)) {
abort_object_request(obj_req);
return 0;
}
#ifdef USE_CURL_MULTI
while (obj_req->state == WAITING)
step_active_slots();
#else
start_object_request(walker, obj_req);
#endif
while (obj_req->state == ACTIVE)
run_active_slot(obj_req->slot);
if (obj_req->local != -1) {
close(obj_req->local);
obj_req->local = -1;
}
if (obj_req->state == ABORTED) {
ret = error("Request for %s aborted", hex);
} else if (obj_req->curl_result != CURLE_OK &&
obj_req->http_code != 416) {
if (missing_target(obj_req))
ret = -1; /* Be silent, it is probably in a pack. */
else
ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
obj_req->errorstr, obj_req->curl_result,
obj_req->http_code, hex);
} else if (obj_req->zret != Z_STREAM_END) {
walker->corrupt_object_found++;
ret = error("File %s (%s) corrupt", hex, obj_req->url);
} else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
ret = error("File %s has bad hash", hex);
} else if (obj_req->rename < 0) {
ret = error("unable to write sha1 filename %s",
obj_req->filename);
}
release_object_request(obj_req);
return ret;
}
static int fetch(struct walker *walker, unsigned char *sha1)
{
struct walker_data *data = walker->data;
struct alt_base *altbase = data->alt;
if (!fetch_object(walker, altbase, sha1))
return 0;
while (altbase) {
if (!fetch_pack(walker, altbase, sha1))
return 0;
fetch_alternates(walker, data->alt->base);
altbase = altbase->next;
}
return error("Unable to find %s under %s", sha1_to_hex(sha1),
data->alt->base);
}
static int fetch_ref(struct walker *walker, struct ref *ref)
{
struct walker_data *data = walker->data;
return http_fetch_ref(data->alt->base, ref);
}
static void cleanup(struct walker *walker)
{
http_cleanup();
}
struct walker *get_http_walker(const char *url, struct remote *remote)
{
char *s;
struct walker_data *data = xmalloc(sizeof(struct walker_data));
struct walker *walker = xmalloc(sizeof(struct walker));
http_init(remote);
data->alt = xmalloc(sizeof(*data->alt));
data->alt->base = xmalloc(strlen(url) + 1);
strcpy(data->alt->base, url);
for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s)
*s = 0;
data->alt->got_indices = 0;
data->alt->packs = NULL;
data->alt->next = NULL;
data->got_alternates = -1;
walker->corrupt_object_found = 0;
walker->fetch = fetch;
walker->fetch_ref = fetch_ref;
walker->prefetch = prefetch;
walker->cleanup = cleanup;
walker->data = data;
#ifdef USE_CURL_MULTI
add_fill_function(walker, (int (*)(void *)) fill_active_slot);
#endif
return walker;
}