From 13320ff610b4083341175c4f8e636a1bc52145f5 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Thu, 18 Jan 2024 01:55:15 +0000 Subject: [PATCH 1/4] submodule-config.h: move check_submodule_url Move 'check_submodule_url' out of 'fsck.c' and into 'submodule-config.h' as a public method, similar to 'check_submodule_name'. With the function now accessible outside of 'fsck', it can be used in a later commit to extend 'test-tool submodule' to check the validity of submodule URLs as it does with names in the 'check-name' subcommand. Other than its location, no changes are made to 'check_submodule_url' in this patch. Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- fsck.c | 133 -------------------------------------------- submodule-config.c | 134 +++++++++++++++++++++++++++++++++++++++++++++ submodule-config.h | 3 + 3 files changed, 137 insertions(+), 133 deletions(-) diff --git a/fsck.c b/fsck.c index 6a0bbc5087..129bc0630d 100644 --- a/fsck.c +++ b/fsck.c @@ -21,7 +21,6 @@ #include "packfile.h" #include "submodule-config.h" #include "config.h" -#include "credential.h" #include "help.h" static ssize_t max_tree_entry_len = 4096; @@ -1048,138 +1047,6 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, return ret; } -static int starts_with_dot_slash(const char *const path) -{ - return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH | - PATH_MATCH_XPLATFORM); -} - -static int starts_with_dot_dot_slash(const char *const path) -{ - return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH | - PATH_MATCH_XPLATFORM); -} - -static int submodule_url_is_relative(const char *url) -{ - return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url); -} - -/* - * Count directory components that a relative submodule URL should chop - * from the remote_url it is to be resolved against. - * - * In other words, this counts "../" components at the start of a - * submodule URL. - * - * Returns the number of directory components to chop and writes a - * pointer to the next character of url after all leading "./" and - * "../" components to out. - */ -static int count_leading_dotdots(const char *url, const char **out) -{ - int result = 0; - while (1) { - if (starts_with_dot_dot_slash(url)) { - result++; - url += strlen("../"); - continue; - } - if (starts_with_dot_slash(url)) { - url += strlen("./"); - continue; - } - *out = url; - return result; - } -} -/* - * Check whether a transport is implemented by git-remote-curl. - * - * If it is, returns 1 and writes the URL that would be passed to - * git-remote-curl to the "out" parameter. - * - * Otherwise, returns 0 and leaves "out" untouched. - * - * Examples: - * http::https://example.com/repo.git -> 1, https://example.com/repo.git - * https://example.com/repo.git -> 1, https://example.com/repo.git - * git://example.com/repo.git -> 0 - * - * This is for use in checking for previously exploitable bugs that - * required a submodule URL to be passed to git-remote-curl. - */ -static int url_to_curl_url(const char *url, const char **out) -{ - /* - * We don't need to check for case-aliases, "http.exe", and so - * on because in the default configuration, is_transport_allowed - * prevents URLs with those schemes from being cloned - * automatically. - */ - if (skip_prefix(url, "http::", out) || - skip_prefix(url, "https::", out) || - skip_prefix(url, "ftp::", out) || - skip_prefix(url, "ftps::", out)) - return 1; - if (starts_with(url, "http://") || - starts_with(url, "https://") || - starts_with(url, "ftp://") || - starts_with(url, "ftps://")) { - *out = url; - return 1; - } - return 0; -} - -static int check_submodule_url(const char *url) -{ - const char *curl_url; - - if (looks_like_command_line_option(url)) - return -1; - - if (submodule_url_is_relative(url) || starts_with(url, "git://")) { - char *decoded; - const char *next; - int has_nl; - - /* - * This could be appended to an http URL and url-decoded; - * check for malicious characters. - */ - decoded = url_decode(url); - has_nl = !!strchr(decoded, '\n'); - - free(decoded); - if (has_nl) - return -1; - - /* - * URLs which escape their root via "../" can overwrite - * the host field and previous components, resolving to - * URLs like https::example.com/submodule.git and - * https:///example.com/submodule.git that were - * susceptible to CVE-2020-11008. - */ - if (count_leading_dotdots(url, &next) > 0 && - (*next == ':' || *next == '/')) - return -1; - } - - else if (url_to_curl_url(url, &curl_url)) { - struct credential c = CREDENTIAL_INIT; - int ret = 0; - if (credential_from_url_gently(&c, curl_url, 1) || - !*c.host) - ret = -1; - credential_clear(&c); - return ret; - } - - return 0; -} - struct fsck_gitmodules_data { const struct object_id *oid; struct fsck_options *options; diff --git a/submodule-config.c b/submodule-config.c index 6a48fd12f6..cbec13b3a2 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -14,6 +14,8 @@ #include "parse-options.h" #include "thread-utils.h" #include "tree-walk.h" +#include "url.h" +#include "credential.h" /* * submodule cache lookup structure @@ -228,6 +230,138 @@ int check_submodule_name(const char *name) return 0; } +static int starts_with_dot_slash(const char *const path) +{ + return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH | + PATH_MATCH_XPLATFORM); +} + +static int starts_with_dot_dot_slash(const char *const path) +{ + return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH | + PATH_MATCH_XPLATFORM); +} + +static int submodule_url_is_relative(const char *url) +{ + return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url); +} + +/* + * Count directory components that a relative submodule URL should chop + * from the remote_url it is to be resolved against. + * + * In other words, this counts "../" components at the start of a + * submodule URL. + * + * Returns the number of directory components to chop and writes a + * pointer to the next character of url after all leading "./" and + * "../" components to out. + */ +static int count_leading_dotdots(const char *url, const char **out) +{ + int result = 0; + while (1) { + if (starts_with_dot_dot_slash(url)) { + result++; + url += strlen("../"); + continue; + } + if (starts_with_dot_slash(url)) { + url += strlen("./"); + continue; + } + *out = url; + return result; + } +} +/* + * Check whether a transport is implemented by git-remote-curl. + * + * If it is, returns 1 and writes the URL that would be passed to + * git-remote-curl to the "out" parameter. + * + * Otherwise, returns 0 and leaves "out" untouched. + * + * Examples: + * http::https://example.com/repo.git -> 1, https://example.com/repo.git + * https://example.com/repo.git -> 1, https://example.com/repo.git + * git://example.com/repo.git -> 0 + * + * This is for use in checking for previously exploitable bugs that + * required a submodule URL to be passed to git-remote-curl. + */ +static int url_to_curl_url(const char *url, const char **out) +{ + /* + * We don't need to check for case-aliases, "http.exe", and so + * on because in the default configuration, is_transport_allowed + * prevents URLs with those schemes from being cloned + * automatically. + */ + if (skip_prefix(url, "http::", out) || + skip_prefix(url, "https::", out) || + skip_prefix(url, "ftp::", out) || + skip_prefix(url, "ftps::", out)) + return 1; + if (starts_with(url, "http://") || + starts_with(url, "https://") || + starts_with(url, "ftp://") || + starts_with(url, "ftps://")) { + *out = url; + return 1; + } + return 0; +} + +int check_submodule_url(const char *url) +{ + const char *curl_url; + + if (looks_like_command_line_option(url)) + return -1; + + if (submodule_url_is_relative(url) || starts_with(url, "git://")) { + char *decoded; + const char *next; + int has_nl; + + /* + * This could be appended to an http URL and url-decoded; + * check for malicious characters. + */ + decoded = url_decode(url); + has_nl = !!strchr(decoded, '\n'); + + free(decoded); + if (has_nl) + return -1; + + /* + * URLs which escape their root via "../" can overwrite + * the host field and previous components, resolving to + * URLs like https::example.com/submodule.git and + * https:///example.com/submodule.git that were + * susceptible to CVE-2020-11008. + */ + if (count_leading_dotdots(url, &next) > 0 && + (*next == ':' || *next == '/')) + return -1; + } + + else if (url_to_curl_url(url, &curl_url)) { + struct credential c = CREDENTIAL_INIT; + int ret = 0; + if (credential_from_url_gently(&c, curl_url, 1) || + !*c.host) + ret = -1; + credential_clear(&c); + return ret; + } + + return 0; +} + static int name_and_item_from_var(const char *var, struct strbuf *name, struct strbuf *item) { diff --git a/submodule-config.h b/submodule-config.h index 2a37689cc2..b5785686f5 100644 --- a/submodule-config.h +++ b/submodule-config.h @@ -91,6 +91,9 @@ int config_set_in_gitmodules_file_gently(const char *key, const char *value); */ int check_submodule_name(const char *name); +/* Returns 0 if the URL valid per RFC3986 and -1 otherwise. */ +int check_submodule_url(const char *url); + /* * Note: these helper functions exist solely to maintain backward * compatibility with 'fetch' and 'update_clone' storing configuration in From 6af2c4ad45083df07b81ebb2c449f97f0bb69315 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Thu, 18 Jan 2024 01:55:16 +0000 Subject: [PATCH 2/4] test-submodule: remove command line handling for check-name The 'check-name' subcommand to 'test-tool submodule' is documented as being able to take a command line argument ''. However, this does not work - and has never worked - because 'argc > 0' triggers the usage message in 'cmd__submodule_check_name()'. To simplify the helper and avoid future confusion around proper use of the subcommand, remove any references to command line arguments for 'check-name' in usage strings and handling in 'check_name()'. Helped-by: Jeff King Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- t/helper/test-submodule.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/t/helper/test-submodule.c b/t/helper/test-submodule.c index 356e0a26c5..b266820739 100644 --- a/t/helper/test-submodule.c +++ b/t/helper/test-submodule.c @@ -8,7 +8,7 @@ #include "submodule.h" #define TEST_TOOL_CHECK_NAME_USAGE \ - "test-tool submodule check-name " + "test-tool submodule check-name" static const char *submodule_check_name_usage[] = { TEST_TOOL_CHECK_NAME_USAGE, NULL @@ -35,26 +35,15 @@ static const char *submodule_usage[] = { NULL }; -/* - * Exit non-zero if any of the submodule names given on the command line is - * invalid. If no names are given, filter stdin to print only valid names - * (which is primarily intended for testing). - */ -static int check_name(int argc, const char **argv) +/* Filter stdin to print only valid names. */ +static int check_name(void) { - if (argc > 1) { - while (*++argv) { - if (check_submodule_name(*argv) < 0) - return 1; - } - } else { - struct strbuf buf = STRBUF_INIT; - while (strbuf_getline(&buf, stdin) != EOF) { - if (!check_submodule_name(buf.buf)) - printf("%s\n", buf.buf); - } - strbuf_release(&buf); + struct strbuf buf = STRBUF_INIT; + while (strbuf_getline(&buf, stdin) != EOF) { + if (!check_submodule_name(buf.buf)) + printf("%s\n", buf.buf); } + strbuf_release(&buf); return 0; } @@ -68,7 +57,7 @@ static int cmd__submodule_check_name(int argc, const char **argv) if (argc) usage_with_options(submodule_check_name_usage, options); - return check_name(argc, argv); + return check_name(); } static int cmd__submodule_is_active(int argc, const char **argv) From 7e2fc39d8c02048e9dddcba1b1b6786a8088a1a8 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Thu, 18 Jan 2024 01:55:17 +0000 Subject: [PATCH 3/4] t7450: test submodule urls Add tests to 't7450-bad-git-dotfiles.sh' to check the validity of different submodule URLs. To verify this directly (without setting up test repositories & submodules), add a 'check-url' subcommand to 'test-tool submodule' that calls 'check_submodule_url' in the same way that 'check-name' calls 'check_submodule_name'. Add two tests to separately address cases where the URL check correctly filters out invalid URLs and cases where the check misses invalid URLs. Mark the latter ("url check misses invalid cases") with 'test_expect_failure' to indicate that this is currently broken, which will be fixed in the next step. Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- t/helper/test-submodule.c | 35 +++++++++++++++++++++++++++++++---- t/t7450-bad-git-dotfiles.sh | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/t/helper/test-submodule.c b/t/helper/test-submodule.c index b266820739..b7b2fb6e44 100644 --- a/t/helper/test-submodule.c +++ b/t/helper/test-submodule.c @@ -14,6 +14,13 @@ static const char *submodule_check_name_usage[] = { NULL }; +#define TEST_TOOL_CHECK_URL_USAGE \ + "test-tool submodule check-url" +static const char *submodule_check_url_usage[] = { + TEST_TOOL_CHECK_URL_USAGE, + NULL +}; + #define TEST_TOOL_IS_ACTIVE_USAGE \ "test-tool submodule is-active " static const char *submodule_is_active_usage[] = { @@ -30,17 +37,23 @@ static const char *submodule_resolve_relative_url_usage[] = { static const char *submodule_usage[] = { TEST_TOOL_CHECK_NAME_USAGE, + TEST_TOOL_CHECK_URL_USAGE, TEST_TOOL_IS_ACTIVE_USAGE, TEST_TOOL_RESOLVE_RELATIVE_URL_USAGE, NULL }; -/* Filter stdin to print only valid names. */ -static int check_name(void) +typedef int (*check_fn_t)(const char *); + +/* + * Apply 'check_fn' to each line of stdin, printing values that pass the check + * to stdout. + */ +static int check_submodule(check_fn_t check_fn) { struct strbuf buf = STRBUF_INIT; while (strbuf_getline(&buf, stdin) != EOF) { - if (!check_submodule_name(buf.buf)) + if (!check_fn(buf.buf)) printf("%s\n", buf.buf); } strbuf_release(&buf); @@ -57,7 +70,20 @@ static int cmd__submodule_check_name(int argc, const char **argv) if (argc) usage_with_options(submodule_check_name_usage, options); - return check_name(); + return check_submodule(check_submodule_name); +} + +static int cmd__submodule_check_url(int argc, const char **argv) +{ + struct option options[] = { + OPT_END() + }; + argc = parse_options(argc, argv, "test-tools", options, + submodule_check_url_usage, 0); + if (argc) + usage_with_options(submodule_check_url_usage, options); + + return check_submodule(check_submodule_url); } static int cmd__submodule_is_active(int argc, const char **argv) @@ -183,6 +209,7 @@ static int cmd__submodule_config_writeable(int argc, const char **argv UNUSED) static struct test_cmd cmds[] = { { "check-name", cmd__submodule_check_name }, + { "check-url", cmd__submodule_check_url }, { "is-active", cmd__submodule_is_active }, { "resolve-relative-url", cmd__submodule_resolve_relative_url}, { "config-list", cmd__submodule_config_list }, diff --git a/t/t7450-bad-git-dotfiles.sh b/t/t7450-bad-git-dotfiles.sh index 35a31acd4d..c73b1c92ec 100755 --- a/t/t7450-bad-git-dotfiles.sh +++ b/t/t7450-bad-git-dotfiles.sh @@ -45,6 +45,41 @@ test_expect_success 'check names' ' test_cmp expect actual ' +test_expect_success 'check urls' ' + cat >expect <<-\EOF && + ./bar/baz/foo.git + https://example.com/foo.git + http://example.com:80/deeper/foo.git + EOF + + test-tool submodule check-url >actual <<-\EOF && + ./bar/baz/foo.git + https://example.com/foo.git + http://example.com:80/deeper/foo.git + -a./foo + ../../..//test/foo.git + ../../../../../:localhost:8080/foo.git + ..\../.\../:example.com/foo.git + ./%0ahost=example.com/foo.git + https://one.example.com/evil?%0ahost=two.example.com + https:///example.com/foo.git + https::example.com/foo.git + http:::example.com/foo.git + EOF + + test_cmp expect actual +' + +# NEEDSWORK: the URL checked here is not valid (and will not work as a remote if +# a user attempts to clone it), but the fsck check passes. +test_expect_failure 'url check misses invalid cases' ' + test-tool submodule check-url >actual <<-\EOF && + http://example.com:test/foo.git + EOF + + test_must_be_empty actual +' + test_expect_success 'create innocent subrepo' ' git init innocent && git -C innocent commit --allow-empty -m foo From 8430b438f628f2f0df08622a550e750158167f28 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Thu, 18 Jan 2024 01:55:18 +0000 Subject: [PATCH 4/4] submodule-config.c: strengthen URL fsck check Update the validation of "curl URL" submodule URLs (i.e. those that specify an "http[s]" or "ftp[s]" protocol) in 'check_submodule_url()' to catch more invalid URLs. The existing validation using 'credential_from_url_gently()' parses certain URLs incorrectly, leading to invalid submodule URLs passing 'git fsck' checks. Conversely, 'url_normalize()' - used to validate remote URLs in 'remote_get()' - correctly identifies the invalid URLs missed by 'credential_from_url_gently()'. To catch more invalid cases, replace 'credential_from_url_gently()' with 'url_normalize()' followed by a 'url_decode()' and a check for newlines (mirroring 'check_url_component()' in the 'credential_from_url_gently()' validation). Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- submodule-config.c | 16 +++++++++++----- t/t7450-bad-git-dotfiles.sh | 11 +---------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/submodule-config.c b/submodule-config.c index cbec13b3a2..e9b94cb28d 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -15,7 +15,7 @@ #include "thread-utils.h" #include "tree-walk.h" #include "url.h" -#include "credential.h" +#include "urlmatch.h" /* * submodule cache lookup structure @@ -350,12 +350,18 @@ int check_submodule_url(const char *url) } else if (url_to_curl_url(url, &curl_url)) { - struct credential c = CREDENTIAL_INIT; int ret = 0; - if (credential_from_url_gently(&c, curl_url, 1) || - !*c.host) + char *normalized = url_normalize(curl_url, NULL); + if (normalized) { + char *decoded = url_decode(normalized); + if (strchr(decoded, '\n')) + ret = -1; + free(normalized); + free(decoded); + } else { ret = -1; - credential_clear(&c); + } + return ret; } diff --git a/t/t7450-bad-git-dotfiles.sh b/t/t7450-bad-git-dotfiles.sh index c73b1c92ec..46d4fb0354 100755 --- a/t/t7450-bad-git-dotfiles.sh +++ b/t/t7450-bad-git-dotfiles.sh @@ -63,6 +63,7 @@ test_expect_success 'check urls' ' ./%0ahost=example.com/foo.git https://one.example.com/evil?%0ahost=two.example.com https:///example.com/foo.git + http://example.com:test/foo.git https::example.com/foo.git http:::example.com/foo.git EOF @@ -70,16 +71,6 @@ test_expect_success 'check urls' ' test_cmp expect actual ' -# NEEDSWORK: the URL checked here is not valid (and will not work as a remote if -# a user attempts to clone it), but the fsck check passes. -test_expect_failure 'url check misses invalid cases' ' - test-tool submodule check-url >actual <<-\EOF && - http://example.com:test/foo.git - EOF - - test_must_be_empty actual -' - test_expect_success 'create innocent subrepo' ' git init innocent && git -C innocent commit --allow-empty -m foo