diff --git a/fsck.c b/fsck.c index 1ad02fcdfa..8ded0a473a 100644 --- a/fsck.c +++ b/fsck.c @@ -20,7 +20,6 @@ #include "packfile.h" #include "submodule-config.h" #include "config.h" -#include "credential.h" #include "help.h" static ssize_t max_tree_entry_len = 4096; @@ -1047,138 +1046,6 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, return ret; } -static int starts_with_dot_slash(const char *const path) -{ - return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH | - PATH_MATCH_XPLATFORM); -} - -static int starts_with_dot_dot_slash(const char *const path) -{ - return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH | - PATH_MATCH_XPLATFORM); -} - -static int submodule_url_is_relative(const char *url) -{ - return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url); -} - -/* - * Count directory components that a relative submodule URL should chop - * from the remote_url it is to be resolved against. - * - * In other words, this counts "../" components at the start of a - * submodule URL. - * - * Returns the number of directory components to chop and writes a - * pointer to the next character of url after all leading "./" and - * "../" components to out. - */ -static int count_leading_dotdots(const char *url, const char **out) -{ - int result = 0; - while (1) { - if (starts_with_dot_dot_slash(url)) { - result++; - url += strlen("../"); - continue; - } - if (starts_with_dot_slash(url)) { - url += strlen("./"); - continue; - } - *out = url; - return result; - } -} -/* - * Check whether a transport is implemented by git-remote-curl. - * - * If it is, returns 1 and writes the URL that would be passed to - * git-remote-curl to the "out" parameter. - * - * Otherwise, returns 0 and leaves "out" untouched. - * - * Examples: - * http::https://example.com/repo.git -> 1, https://example.com/repo.git - * https://example.com/repo.git -> 1, https://example.com/repo.git - * git://example.com/repo.git -> 0 - * - * This is for use in checking for previously exploitable bugs that - * required a submodule URL to be passed to git-remote-curl. - */ -static int url_to_curl_url(const char *url, const char **out) -{ - /* - * We don't need to check for case-aliases, "http.exe", and so - * on because in the default configuration, is_transport_allowed - * prevents URLs with those schemes from being cloned - * automatically. - */ - if (skip_prefix(url, "http::", out) || - skip_prefix(url, "https::", out) || - skip_prefix(url, "ftp::", out) || - skip_prefix(url, "ftps::", out)) - return 1; - if (starts_with(url, "http://") || - starts_with(url, "https://") || - starts_with(url, "ftp://") || - starts_with(url, "ftps://")) { - *out = url; - return 1; - } - return 0; -} - -static int check_submodule_url(const char *url) -{ - const char *curl_url; - - if (looks_like_command_line_option(url)) - return -1; - - if (submodule_url_is_relative(url) || starts_with(url, "git://")) { - char *decoded; - const char *next; - int has_nl; - - /* - * This could be appended to an http URL and url-decoded; - * check for malicious characters. - */ - decoded = url_decode(url); - has_nl = !!strchr(decoded, '\n'); - - free(decoded); - if (has_nl) - return -1; - - /* - * URLs which escape their root via "../" can overwrite - * the host field and previous components, resolving to - * URLs like https::example.com/submodule.git and - * https:///example.com/submodule.git that were - * susceptible to CVE-2020-11008. - */ - if (count_leading_dotdots(url, &next) > 0 && - (*next == ':' || *next == '/')) - return -1; - } - - else if (url_to_curl_url(url, &curl_url)) { - struct credential c = CREDENTIAL_INIT; - int ret = 0; - if (credential_from_url_gently(&c, curl_url, 1) || - !*c.host) - ret = -1; - credential_clear(&c); - return ret; - } - - return 0; -} - struct fsck_gitmodules_data { const struct object_id *oid; struct fsck_options *options; diff --git a/submodule-config.c b/submodule-config.c index f4dd482abc..54130f6a38 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -14,6 +14,8 @@ #include "parse-options.h" #include "thread-utils.h" #include "tree-walk.h" +#include "url.h" +#include "urlmatch.h" /* * submodule cache lookup structure @@ -228,6 +230,144 @@ int check_submodule_name(const char *name) return 0; } +static int starts_with_dot_slash(const char *const path) +{ + return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH | + PATH_MATCH_XPLATFORM); +} + +static int starts_with_dot_dot_slash(const char *const path) +{ + return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH | + PATH_MATCH_XPLATFORM); +} + +static int submodule_url_is_relative(const char *url) +{ + return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url); +} + +/* + * Count directory components that a relative submodule URL should chop + * from the remote_url it is to be resolved against. + * + * In other words, this counts "../" components at the start of a + * submodule URL. + * + * Returns the number of directory components to chop and writes a + * pointer to the next character of url after all leading "./" and + * "../" components to out. + */ +static int count_leading_dotdots(const char *url, const char **out) +{ + int result = 0; + while (1) { + if (starts_with_dot_dot_slash(url)) { + result++; + url += strlen("../"); + continue; + } + if (starts_with_dot_slash(url)) { + url += strlen("./"); + continue; + } + *out = url; + return result; + } +} +/* + * Check whether a transport is implemented by git-remote-curl. + * + * If it is, returns 1 and writes the URL that would be passed to + * git-remote-curl to the "out" parameter. + * + * Otherwise, returns 0 and leaves "out" untouched. + * + * Examples: + * http::https://example.com/repo.git -> 1, https://example.com/repo.git + * https://example.com/repo.git -> 1, https://example.com/repo.git + * git://example.com/repo.git -> 0 + * + * This is for use in checking for previously exploitable bugs that + * required a submodule URL to be passed to git-remote-curl. + */ +static int url_to_curl_url(const char *url, const char **out) +{ + /* + * We don't need to check for case-aliases, "http.exe", and so + * on because in the default configuration, is_transport_allowed + * prevents URLs with those schemes from being cloned + * automatically. + */ + if (skip_prefix(url, "http::", out) || + skip_prefix(url, "https::", out) || + skip_prefix(url, "ftp::", out) || + skip_prefix(url, "ftps::", out)) + return 1; + if (starts_with(url, "http://") || + starts_with(url, "https://") || + starts_with(url, "ftp://") || + starts_with(url, "ftps://")) { + *out = url; + return 1; + } + return 0; +} + +int check_submodule_url(const char *url) +{ + const char *curl_url; + + if (looks_like_command_line_option(url)) + return -1; + + if (submodule_url_is_relative(url) || starts_with(url, "git://")) { + char *decoded; + const char *next; + int has_nl; + + /* + * This could be appended to an http URL and url-decoded; + * check for malicious characters. + */ + decoded = url_decode(url); + has_nl = !!strchr(decoded, '\n'); + + free(decoded); + if (has_nl) + return -1; + + /* + * URLs which escape their root via "../" can overwrite + * the host field and previous components, resolving to + * URLs like https::example.com/submodule.git and + * https:///example.com/submodule.git that were + * susceptible to CVE-2020-11008. + */ + if (count_leading_dotdots(url, &next) > 0 && + (*next == ':' || *next == '/')) + return -1; + } + + else if (url_to_curl_url(url, &curl_url)) { + int ret = 0; + char *normalized = url_normalize(curl_url, NULL); + if (normalized) { + char *decoded = url_decode(normalized); + if (strchr(decoded, '\n')) + ret = -1; + free(normalized); + free(decoded); + } else { + ret = -1; + } + + return ret; + } + + return 0; +} + static int name_and_item_from_var(const char *var, struct strbuf *name, struct strbuf *item) { diff --git a/submodule-config.h b/submodule-config.h index 958f320ac6..b6133af71b 100644 --- a/submodule-config.h +++ b/submodule-config.h @@ -89,6 +89,9 @@ int config_set_in_gitmodules_file_gently(const char *key, const char *value); */ int check_submodule_name(const char *name); +/* Returns 0 if the URL valid per RFC3986 and -1 otherwise. */ +int check_submodule_url(const char *url); + /* * Note: these helper functions exist solely to maintain backward * compatibility with 'fetch' and 'update_clone' storing configuration in diff --git a/t/helper/test-submodule.c b/t/helper/test-submodule.c index 50c154d037..7197969a08 100644 --- a/t/helper/test-submodule.c +++ b/t/helper/test-submodule.c @@ -9,12 +9,19 @@ #include "submodule.h" #define TEST_TOOL_CHECK_NAME_USAGE \ - "test-tool submodule check-name " + "test-tool submodule check-name" static const char *submodule_check_name_usage[] = { TEST_TOOL_CHECK_NAME_USAGE, NULL }; +#define TEST_TOOL_CHECK_URL_USAGE \ + "test-tool submodule check-url" +static const char *submodule_check_url_usage[] = { + TEST_TOOL_CHECK_URL_USAGE, + NULL +}; + #define TEST_TOOL_IS_ACTIVE_USAGE \ "test-tool submodule is-active " static const char *submodule_is_active_usage[] = { @@ -31,31 +38,26 @@ static const char *submodule_resolve_relative_url_usage[] = { static const char *submodule_usage[] = { TEST_TOOL_CHECK_NAME_USAGE, + TEST_TOOL_CHECK_URL_USAGE, TEST_TOOL_IS_ACTIVE_USAGE, TEST_TOOL_RESOLVE_RELATIVE_URL_USAGE, NULL }; +typedef int (*check_fn_t)(const char *); + /* - * Exit non-zero if any of the submodule names given on the command line is - * invalid. If no names are given, filter stdin to print only valid names - * (which is primarily intended for testing). + * Apply 'check_fn' to each line of stdin, printing values that pass the check + * to stdout. */ -static int check_name(int argc, const char **argv) +static int check_submodule(check_fn_t check_fn) { - if (argc > 1) { - while (*++argv) { - if (check_submodule_name(*argv) < 0) - return 1; - } - } else { - struct strbuf buf = STRBUF_INIT; - while (strbuf_getline(&buf, stdin) != EOF) { - if (!check_submodule_name(buf.buf)) - printf("%s\n", buf.buf); - } - strbuf_release(&buf); + struct strbuf buf = STRBUF_INIT; + while (strbuf_getline(&buf, stdin) != EOF) { + if (!check_fn(buf.buf)) + printf("%s\n", buf.buf); } + strbuf_release(&buf); return 0; } @@ -69,7 +71,20 @@ static int cmd__submodule_check_name(int argc, const char **argv) if (argc) usage_with_options(submodule_check_name_usage, options); - return check_name(argc, argv); + return check_submodule(check_submodule_name); +} + +static int cmd__submodule_check_url(int argc, const char **argv) +{ + struct option options[] = { + OPT_END() + }; + argc = parse_options(argc, argv, "test-tools", options, + submodule_check_url_usage, 0); + if (argc) + usage_with_options(submodule_check_url_usage, options); + + return check_submodule(check_submodule_url); } static int cmd__submodule_is_active(int argc, const char **argv) @@ -195,6 +210,7 @@ static int cmd__submodule_config_writeable(int argc, const char **argv UNUSED) static struct test_cmd cmds[] = { { "check-name", cmd__submodule_check_name }, + { "check-url", cmd__submodule_check_url }, { "is-active", cmd__submodule_is_active }, { "resolve-relative-url", cmd__submodule_resolve_relative_url}, { "config-list", cmd__submodule_config_list }, diff --git a/t/t7450-bad-git-dotfiles.sh b/t/t7450-bad-git-dotfiles.sh index 35a31acd4d..46d4fb0354 100755 --- a/t/t7450-bad-git-dotfiles.sh +++ b/t/t7450-bad-git-dotfiles.sh @@ -45,6 +45,32 @@ test_expect_success 'check names' ' test_cmp expect actual ' +test_expect_success 'check urls' ' + cat >expect <<-\EOF && + ./bar/baz/foo.git + https://example.com/foo.git + http://example.com:80/deeper/foo.git + EOF + + test-tool submodule check-url >actual <<-\EOF && + ./bar/baz/foo.git + https://example.com/foo.git + http://example.com:80/deeper/foo.git + -a./foo + ../../..//test/foo.git + ../../../../../:localhost:8080/foo.git + ..\../.\../:example.com/foo.git + ./%0ahost=example.com/foo.git + https://one.example.com/evil?%0ahost=two.example.com + https:///example.com/foo.git + http://example.com:test/foo.git + https::example.com/foo.git + http:::example.com/foo.git + EOF + + test_cmp expect actual +' + test_expect_success 'create innocent subrepo' ' git init innocent && git -C innocent commit --allow-empty -m foo