diff --git a/Documentation/git-http-backend.txt b/Documentation/git-http-backend.txt index 3ca18c4de5..9268fb6b1e 100644 --- a/Documentation/git-http-backend.txt +++ b/Documentation/git-http-backend.txt @@ -255,6 +255,15 @@ The GIT_HTTP_EXPORT_ALL environmental variable may be passed to 'git-http-backend' to bypass the check for the "git-daemon-export-ok" file in each repository before allowing export of that repository. +The `GIT_HTTP_MAX_REQUEST_BUFFER` environment variable (or the +`http.maxRequestBuffer` config variable) may be set to change the +largest ref negotiation request that git will handle during a fetch; any +fetch requiring a larger buffer will not succeed. This value should not +normally need to be changed, but may be helpful if you are fetching from +a repository with an extremely large number of refs. The value can be +specified with a unit (e.g., `100M` for 100 megabytes). The default is +10 megabytes. + The backend process sets GIT_COMMITTER_NAME to '$REMOTE_USER' and GIT_COMMITTER_EMAIL to '$\{REMOTE_USER}@http.$\{REMOTE_ADDR\}', ensuring that any reflogs created by 'git-receive-pack' contain some diff --git a/http-backend.c b/http-backend.c index b6c0484fb2..6bf139b768 100644 --- a/http-backend.c +++ b/http-backend.c @@ -13,18 +13,20 @@ static const char content_type[] = "Content-Type"; static const char content_length[] = "Content-Length"; static const char last_modified[] = "Last-Modified"; static int getanyfile = 1; +static unsigned long max_request_buffer = 10 * 1024 * 1024; static struct string_list *query_params; struct rpc_service { const char *name; const char *config_name; + unsigned buffer_input : 1; signed enabled : 2; }; static struct rpc_service rpc_service[] = { - { "upload-pack", "uploadpack", 1 }, - { "receive-pack", "receivepack", -1 }, + { "upload-pack", "uploadpack", 1, 1 }, + { "receive-pack", "receivepack", 0, -1 }, }; static struct string_list *get_parameters(void) @@ -225,6 +227,7 @@ static void http_config(void) struct strbuf var = STRBUF_INIT; git_config_get_bool("http.getanyfile", &getanyfile); + git_config_get_ulong("http.maxrequestbuffer", &max_request_buffer); for (i = 0; i < ARRAY_SIZE(rpc_service); i++) { struct rpc_service *svc = &rpc_service[i]; @@ -266,9 +269,52 @@ static struct rpc_service *select_service(const char *name) return svc; } -static void inflate_request(const char *prog_name, int out) +/* + * This is basically strbuf_read(), except that if we + * hit max_request_buffer we die (we'd rather reject a + * maliciously large request than chew up infinite memory). + */ +static ssize_t read_request(int fd, unsigned char **out) +{ + size_t len = 0, alloc = 8192; + unsigned char *buf = xmalloc(alloc); + + if (max_request_buffer < alloc) + max_request_buffer = alloc; + + while (1) { + ssize_t cnt; + + cnt = read_in_full(fd, buf + len, alloc - len); + if (cnt < 0) { + free(buf); + return -1; + } + + /* partial read from read_in_full means we hit EOF */ + len += cnt; + if (len < alloc) { + *out = buf; + return len; + } + + /* otherwise, grow and try again (if we can) */ + if (alloc == max_request_buffer) + die("request was larger than our maximum size (%lu);" + " try setting GIT_HTTP_MAX_REQUEST_BUFFER", + max_request_buffer); + + alloc = alloc_nr(alloc); + if (alloc > max_request_buffer) + alloc = max_request_buffer; + REALLOC_ARRAY(buf, alloc); + } +} + +static void inflate_request(const char *prog_name, int out, int buffer_input) { git_zstream stream; + unsigned char *full_request = NULL; unsigned char in_buf[8192]; unsigned char out_buf[8192]; unsigned long cnt = 0; @@ -277,11 +323,21 @@ static void inflate_request(const char *prog_name, int out) git_inflate_init_gzip_only(&stream); while (1) { - ssize_t n = xread(0, in_buf, sizeof(in_buf)); + ssize_t n; + + if (buffer_input) { + if (full_request) + n = 0; /* nothing left to read */ + else + n = read_request(0, &full_request); + stream.next_in = full_request; + } else { + n = xread(0, in_buf, sizeof(in_buf)); + stream.next_in = in_buf; + } + if (n <= 0) die("request ended in the middle of the gzip stream"); - - stream.next_in = in_buf; stream.avail_in = n; while (0 < stream.avail_in) { @@ -307,9 +363,22 @@ static void inflate_request(const char *prog_name, int out) done: git_inflate_end(&stream); close(out); + free(full_request); } -static void run_service(const char **argv) +static void copy_request(const char *prog_name, int out) +{ + unsigned char *buf; + ssize_t n = read_request(0, &buf); + if (n < 0) + die_errno("error reading request body"); + if (write_in_full(out, buf, n) != n) + die("%s aborted reading request", prog_name); + close(out); + free(buf); +} + +static void run_service(const char **argv, int buffer_input) { const char *encoding = getenv("HTTP_CONTENT_ENCODING"); const char *user = getenv("REMOTE_USER"); @@ -334,7 +403,7 @@ static void run_service(const char **argv) "GIT_COMMITTER_EMAIL=%s@http.%s", user, host); cld.argv = argv; - if (gzipped_request) + if (buffer_input || gzipped_request) cld.in = -1; cld.git_cmd = 1; if (start_command(&cld)) @@ -342,7 +411,9 @@ static void run_service(const char **argv) close(1); if (gzipped_request) - inflate_request(argv[0], cld.in); + inflate_request(argv[0], cld.in, buffer_input); + else if (buffer_input) + copy_request(argv[0], cld.in); else close(0); @@ -392,7 +463,7 @@ static void get_info_refs(char *arg) packet_flush(1); argv[0] = svc->name; - run_service(argv); + run_service(argv, 0); } else { select_getanyfile(); @@ -496,25 +567,28 @@ static void service_rpc(char *service_name) end_headers(); argv[0] = svc->name; - run_service(argv); + run_service(argv, svc->buffer_input); strbuf_release(&buf); } +static int dead; static NORETURN void die_webcgi(const char *err, va_list params) { - static int dead; + if (dead <= 1) { + vreportf("fatal: ", err, params); - if (!dead) { - dead = 1; http_status(500, "Internal Server Error"); hdr_nocache(); end_headers(); - - vreportf("fatal: ", err, params); } exit(0); /* we successfully reported a failure ;-) */ } +static int die_webcgi_recursing(void) +{ + return dead++ > 1; +} + static char* getdir(void) { struct strbuf buf = STRBUF_INIT; @@ -569,6 +643,7 @@ int main(int argc, char **argv) git_extract_argv0_path(argv[0]); set_die_routine(die_webcgi); + set_die_is_recursing_routine(die_webcgi_recursing); if (!method) die("No REQUEST_METHOD from server"); @@ -619,6 +694,9 @@ int main(int argc, char **argv) not_found("Repository not exported: '%s'", dir); http_config(); + max_request_buffer = git_env_ulong("GIT_HTTP_MAX_REQUEST_BUFFER", + max_request_buffer); + cmd->imp(cmd_arg); return 0; } diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh index 2b29311901..58207d8825 100755 --- a/t/t5551-http-fetch-smart.sh +++ b/t/t5551-http-fetch-smart.sh @@ -218,27 +218,35 @@ test_expect_success 'transfer.hiderefs works over smart-http' ' git -C hidden.git rev-parse --verify b ' -test_expect_success 'create 2,000 tags in the repo' ' - ( - cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" && - for i in $(test_seq 2000) +# create an arbitrary number of tags, numbered from tag-$1 to tag-$2 +create_tags () { + rm -f marks && + for i in $(test_seq "$1" "$2") do - echo "commit refs/heads/too-many-refs" - echo "mark :$i" - echo "committer git $i +0000" - echo "data 0" - echo "M 644 inline bla.txt" - echo "data 4" - echo "bla" + # don't use here-doc, because it requires a process + # per loop iteration + echo "commit refs/heads/too-many-refs-$1" && + echo "mark :$i" && + echo "committer git $i +0000" && + echo "data 0" && + echo "M 644 inline bla.txt" && + echo "data 4" && + echo "bla" && # make every commit dangling by always # rewinding the branch after each commit - echo "reset refs/heads/too-many-refs" - echo "from :1" + echo "reset refs/heads/too-many-refs-$1" && + echo "from :$1" done | git fast-import --export-marks=marks && # now assign tags to all the dangling commits we created above tag=$(perl -e "print \"bla\" x 30") && sed -e "s|^:\([^ ]*\) \(.*\)$|\2 refs/tags/$tag-\1|" >packed-refs +} + +test_expect_success 'create 2,000 tags in the repo' ' + ( + cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" && + create_tags 1 2000 ) ' @@ -259,5 +267,20 @@ test_expect_success 'large fetch-pack requests can be split across POSTs' ' test_line_count = 2 posts ' +test_expect_success EXPENSIVE 'http can handle enormous ref negotiation' ' + ( + cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" && + create_tags 2001 50000 + ) && + git -C too-many-refs fetch -q --tags && + ( + cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" && + create_tags 50001 100000 + ) && + git -C too-many-refs fetch -q --tags && + git -C too-many-refs for-each-ref refs/tags >tags && + test_line_count = 100000 tags +' + stop_httpd test_done