fsck: convert gitmodules url to URL passed to curl

In 07259e74ec (fsck: detect gitmodules URLs with embedded newlines,
2020-03-11), git fsck learned to check whether URLs in .gitmodules could
be understood by the credential machinery when they are handled by
git-remote-curl.

However, the check is overbroad: it checks all URLs instead of only
URLs that would be passed to git-remote-curl. In principle a git:// or
file:/// URL does not need to follow the same conventions as an http://
URL; in particular, git:// and file:// protocols are not succeptible to
issues in the credential API because they do not support attaching
credentials.

In the HTTP case, the URL in .gitmodules does not always match the URL
that would be passed to git-remote-curl and the credential machinery:
Git's URL syntax allows specifying a remote helper followed by a "::"
delimiter and a URL to be passed to it, so that

	git ls-remote http::https://example.com/repo.git

invokes git-remote-http with https://example.com/repo.git as its URL
argument. With today's checks, that distinction does not make a
difference, but for a check we are about to introduce (for empty URL
schemes) it will matter.

.gitmodules files also support relative URLs. To ensure coverage for the
https based embedded-newline attack, urldecode and check them directly
for embedded newlines.

Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Reviewed-by: Jeff King <peff@peff.net>
This commit is contained in:
Jonathan Nieder 2020-04-18 20:52:34 -07:00
parent 8ba8ed568e
commit a2b26ffb1a
2 changed files with 118 additions and 5 deletions

94
fsck.c
View file

@ -7,6 +7,7 @@
#include "tag.h"
#include "fsck.h"
#include "refs.h"
#include "url.h"
#include "utf8.h"
#include "sha1-array.h"
#include "decorate.h"
@ -942,17 +943,100 @@ static int fsck_tag(struct tag *tag, const char *data,
return fsck_tag_buffer(tag, data, size, options);
}
/*
* Like builtin/submodule--helper.c's starts_with_dot_slash, but without
* relying on the platform-dependent is_dir_sep helper.
*
* This is for use in checking whether a submodule URL is interpreted as
* relative to the current directory on any platform, since \ is a
* directory separator on Windows but not on other platforms.
*/
static int starts_with_dot_slash(const char *str)
{
return str[0] == '.' && (str[1] == '/' || str[1] == '\\');
}
/*
* Like starts_with_dot_slash, this is a variant of submodule--helper's
* helper of the same name with the twist that it accepts backslash as a
* directory separator even on non-Windows platforms.
*/
static int starts_with_dot_dot_slash(const char *str)
{
return str[0] == '.' && starts_with_dot_slash(str + 1);
}
static int submodule_url_is_relative(const char *url)
{
return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
}
/*
* Check whether a transport is implemented by git-remote-curl.
*
* If it is, returns 1 and writes the URL that would be passed to
* git-remote-curl to the "out" parameter.
*
* Otherwise, returns 0 and leaves "out" untouched.
*
* Examples:
* http::https://example.com/repo.git -> 1, https://example.com/repo.git
* https://example.com/repo.git -> 1, https://example.com/repo.git
* git://example.com/repo.git -> 0
*
* This is for use in checking for previously exploitable bugs that
* required a submodule URL to be passed to git-remote-curl.
*/
static int url_to_curl_url(const char *url, const char **out)
{
/*
* We don't need to check for case-aliases, "http.exe", and so
* on because in the default configuration, is_transport_allowed
* prevents URLs with those schemes from being cloned
* automatically.
*/
if (skip_prefix(url, "http::", out) ||
skip_prefix(url, "https::", out) ||
skip_prefix(url, "ftp::", out) ||
skip_prefix(url, "ftps::", out))
return 1;
if (starts_with(url, "http://") ||
starts_with(url, "https://") ||
starts_with(url, "ftp://") ||
starts_with(url, "ftps://")) {
*out = url;
return 1;
}
return 0;
}
static int check_submodule_url(const char *url)
{
struct credential c = CREDENTIAL_INIT;
int ret;
const char *curl_url;
if (looks_like_command_line_option(url))
return -1;
ret = credential_from_url_gently(&c, url, 1);
credential_clear(&c);
return ret;
if (submodule_url_is_relative(url)) {
/*
* This could be appended to an http URL and url-decoded;
* check for malicious characters.
*/
char *decoded = url_decode(url);
int has_nl = !!strchr(decoded, '\n');
free(decoded);
if (has_nl)
return -1;
}
else if (url_to_curl_url(url, &curl_url)) {
struct credential c = CREDENTIAL_INIT;
int ret = credential_from_url_gently(&c, curl_url, 1);
credential_clear(&c);
return ret;
}
return 0;
}
struct fsck_gitmodules_data {

View file

@ -60,6 +60,20 @@ test_expect_success 'trailing backslash is handled correctly' '
test_i18ngrep ! "unknown option" err
'
test_expect_success 'fsck permits embedded newline with unrecognized scheme' '
git checkout --orphan newscheme &&
cat >.gitmodules <<-\EOF &&
[submodule "foo"]
url = "data://acjbkd%0akajfdickajkd"
EOF
git add .gitmodules &&
git commit -m "gitmodules with unrecognized scheme" &&
test_when_finished "rm -rf dst" &&
git init --bare dst &&
git -C dst config transfer.fsckObjects true &&
git push dst HEAD
'
test_expect_success 'fsck rejects embedded newline in url' '
# create an orphan branch to avoid existing .gitmodules objects
git checkout --orphan newline &&
@ -76,4 +90,19 @@ test_expect_success 'fsck rejects embedded newline in url' '
grep gitmodulesUrl err
'
test_expect_success 'fsck rejects embedded newline in relative url' '
git checkout --orphan relative-newline &&
cat >.gitmodules <<-\EOF &&
[submodule "foo"]
url = "./%0ahost=two.example.com/foo.git"
EOF
git add .gitmodules &&
git commit -m "relative url with newline" &&
test_when_finished "rm -rf dst" &&
git init --bare dst &&
git -C dst config transfer.fsckObjects true &&
test_must_fail git push dst HEAD 2>err &&
grep gitmodulesUrl err
'
test_done