Merge branch 'vd/fsck-submodule-url-test'

Tighten URL checks fsck makes in a URL recorded for submodules.

* vd/fsck-submodule-url-test:
  submodule-config.c: strengthen URL fsck check
  t7450: test submodule urls
  test-submodule: remove command line handling for check-name
  submodule-config.h: move check_submodule_url
This commit is contained in:
Junio C Hamano 2024-01-26 08:54:47 -08:00
commit 76bd1294d8
5 changed files with 203 additions and 151 deletions

133
fsck.c
View file

@ -20,7 +20,6 @@
#include "packfile.h"
#include "submodule-config.h"
#include "config.h"
#include "credential.h"
#include "help.h"
static ssize_t max_tree_entry_len = 4096;
@ -1047,138 +1046,6 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
return ret;
}
static int starts_with_dot_slash(const char *const path)
{
return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH |
PATH_MATCH_XPLATFORM);
}
static int starts_with_dot_dot_slash(const char *const path)
{
return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH |
PATH_MATCH_XPLATFORM);
}
static int submodule_url_is_relative(const char *url)
{
return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
}
/*
* Count directory components that a relative submodule URL should chop
* from the remote_url it is to be resolved against.
*
* In other words, this counts "../" components at the start of a
* submodule URL.
*
* Returns the number of directory components to chop and writes a
* pointer to the next character of url after all leading "./" and
* "../" components to out.
*/
static int count_leading_dotdots(const char *url, const char **out)
{
int result = 0;
while (1) {
if (starts_with_dot_dot_slash(url)) {
result++;
url += strlen("../");
continue;
}
if (starts_with_dot_slash(url)) {
url += strlen("./");
continue;
}
*out = url;
return result;
}
}
/*
* Check whether a transport is implemented by git-remote-curl.
*
* If it is, returns 1 and writes the URL that would be passed to
* git-remote-curl to the "out" parameter.
*
* Otherwise, returns 0 and leaves "out" untouched.
*
* Examples:
* http::https://example.com/repo.git -> 1, https://example.com/repo.git
* https://example.com/repo.git -> 1, https://example.com/repo.git
* git://example.com/repo.git -> 0
*
* This is for use in checking for previously exploitable bugs that
* required a submodule URL to be passed to git-remote-curl.
*/
static int url_to_curl_url(const char *url, const char **out)
{
/*
* We don't need to check for case-aliases, "http.exe", and so
* on because in the default configuration, is_transport_allowed
* prevents URLs with those schemes from being cloned
* automatically.
*/
if (skip_prefix(url, "http::", out) ||
skip_prefix(url, "https::", out) ||
skip_prefix(url, "ftp::", out) ||
skip_prefix(url, "ftps::", out))
return 1;
if (starts_with(url, "http://") ||
starts_with(url, "https://") ||
starts_with(url, "ftp://") ||
starts_with(url, "ftps://")) {
*out = url;
return 1;
}
return 0;
}
static int check_submodule_url(const char *url)
{
const char *curl_url;
if (looks_like_command_line_option(url))
return -1;
if (submodule_url_is_relative(url) || starts_with(url, "git://")) {
char *decoded;
const char *next;
int has_nl;
/*
* This could be appended to an http URL and url-decoded;
* check for malicious characters.
*/
decoded = url_decode(url);
has_nl = !!strchr(decoded, '\n');
free(decoded);
if (has_nl)
return -1;
/*
* URLs which escape their root via "../" can overwrite
* the host field and previous components, resolving to
* URLs like https::example.com/submodule.git and
* https:///example.com/submodule.git that were
* susceptible to CVE-2020-11008.
*/
if (count_leading_dotdots(url, &next) > 0 &&
(*next == ':' || *next == '/'))
return -1;
}
else if (url_to_curl_url(url, &curl_url)) {
struct credential c = CREDENTIAL_INIT;
int ret = 0;
if (credential_from_url_gently(&c, curl_url, 1) ||
!*c.host)
ret = -1;
credential_clear(&c);
return ret;
}
return 0;
}
struct fsck_gitmodules_data {
const struct object_id *oid;
struct fsck_options *options;

View file

@ -14,6 +14,8 @@
#include "parse-options.h"
#include "thread-utils.h"
#include "tree-walk.h"
#include "url.h"
#include "urlmatch.h"
/*
* submodule cache lookup structure
@ -228,6 +230,144 @@ int check_submodule_name(const char *name)
return 0;
}
static int starts_with_dot_slash(const char *const path)
{
return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH |
PATH_MATCH_XPLATFORM);
}
static int starts_with_dot_dot_slash(const char *const path)
{
return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH |
PATH_MATCH_XPLATFORM);
}
static int submodule_url_is_relative(const char *url)
{
return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
}
/*
* Count directory components that a relative submodule URL should chop
* from the remote_url it is to be resolved against.
*
* In other words, this counts "../" components at the start of a
* submodule URL.
*
* Returns the number of directory components to chop and writes a
* pointer to the next character of url after all leading "./" and
* "../" components to out.
*/
static int count_leading_dotdots(const char *url, const char **out)
{
int result = 0;
while (1) {
if (starts_with_dot_dot_slash(url)) {
result++;
url += strlen("../");
continue;
}
if (starts_with_dot_slash(url)) {
url += strlen("./");
continue;
}
*out = url;
return result;
}
}
/*
* Check whether a transport is implemented by git-remote-curl.
*
* If it is, returns 1 and writes the URL that would be passed to
* git-remote-curl to the "out" parameter.
*
* Otherwise, returns 0 and leaves "out" untouched.
*
* Examples:
* http::https://example.com/repo.git -> 1, https://example.com/repo.git
* https://example.com/repo.git -> 1, https://example.com/repo.git
* git://example.com/repo.git -> 0
*
* This is for use in checking for previously exploitable bugs that
* required a submodule URL to be passed to git-remote-curl.
*/
static int url_to_curl_url(const char *url, const char **out)
{
/*
* We don't need to check for case-aliases, "http.exe", and so
* on because in the default configuration, is_transport_allowed
* prevents URLs with those schemes from being cloned
* automatically.
*/
if (skip_prefix(url, "http::", out) ||
skip_prefix(url, "https::", out) ||
skip_prefix(url, "ftp::", out) ||
skip_prefix(url, "ftps::", out))
return 1;
if (starts_with(url, "http://") ||
starts_with(url, "https://") ||
starts_with(url, "ftp://") ||
starts_with(url, "ftps://")) {
*out = url;
return 1;
}
return 0;
}
int check_submodule_url(const char *url)
{
const char *curl_url;
if (looks_like_command_line_option(url))
return -1;
if (submodule_url_is_relative(url) || starts_with(url, "git://")) {
char *decoded;
const char *next;
int has_nl;
/*
* This could be appended to an http URL and url-decoded;
* check for malicious characters.
*/
decoded = url_decode(url);
has_nl = !!strchr(decoded, '\n');
free(decoded);
if (has_nl)
return -1;
/*
* URLs which escape their root via "../" can overwrite
* the host field and previous components, resolving to
* URLs like https::example.com/submodule.git and
* https:///example.com/submodule.git that were
* susceptible to CVE-2020-11008.
*/
if (count_leading_dotdots(url, &next) > 0 &&
(*next == ':' || *next == '/'))
return -1;
}
else if (url_to_curl_url(url, &curl_url)) {
int ret = 0;
char *normalized = url_normalize(curl_url, NULL);
if (normalized) {
char *decoded = url_decode(normalized);
if (strchr(decoded, '\n'))
ret = -1;
free(normalized);
free(decoded);
} else {
ret = -1;
}
return ret;
}
return 0;
}
static int name_and_item_from_var(const char *var, struct strbuf *name,
struct strbuf *item)
{

View file

@ -89,6 +89,9 @@ int config_set_in_gitmodules_file_gently(const char *key, const char *value);
*/
int check_submodule_name(const char *name);
/* Returns 0 if the URL valid per RFC3986 and -1 otherwise. */
int check_submodule_url(const char *url);
/*
* Note: these helper functions exist solely to maintain backward
* compatibility with 'fetch' and 'update_clone' storing configuration in

View file

@ -9,12 +9,19 @@
#include "submodule.h"
#define TEST_TOOL_CHECK_NAME_USAGE \
"test-tool submodule check-name <name>"
"test-tool submodule check-name"
static const char *submodule_check_name_usage[] = {
TEST_TOOL_CHECK_NAME_USAGE,
NULL
};
#define TEST_TOOL_CHECK_URL_USAGE \
"test-tool submodule check-url"
static const char *submodule_check_url_usage[] = {
TEST_TOOL_CHECK_URL_USAGE,
NULL
};
#define TEST_TOOL_IS_ACTIVE_USAGE \
"test-tool submodule is-active <name>"
static const char *submodule_is_active_usage[] = {
@ -31,31 +38,26 @@ static const char *submodule_resolve_relative_url_usage[] = {
static const char *submodule_usage[] = {
TEST_TOOL_CHECK_NAME_USAGE,
TEST_TOOL_CHECK_URL_USAGE,
TEST_TOOL_IS_ACTIVE_USAGE,
TEST_TOOL_RESOLVE_RELATIVE_URL_USAGE,
NULL
};
typedef int (*check_fn_t)(const char *);
/*
* Exit non-zero if any of the submodule names given on the command line is
* invalid. If no names are given, filter stdin to print only valid names
* (which is primarily intended for testing).
* Apply 'check_fn' to each line of stdin, printing values that pass the check
* to stdout.
*/
static int check_name(int argc, const char **argv)
static int check_submodule(check_fn_t check_fn)
{
if (argc > 1) {
while (*++argv) {
if (check_submodule_name(*argv) < 0)
return 1;
}
} else {
struct strbuf buf = STRBUF_INIT;
while (strbuf_getline(&buf, stdin) != EOF) {
if (!check_submodule_name(buf.buf))
if (!check_fn(buf.buf))
printf("%s\n", buf.buf);
}
strbuf_release(&buf);
}
return 0;
}
@ -69,7 +71,20 @@ static int cmd__submodule_check_name(int argc, const char **argv)
if (argc)
usage_with_options(submodule_check_name_usage, options);
return check_name(argc, argv);
return check_submodule(check_submodule_name);
}
static int cmd__submodule_check_url(int argc, const char **argv)
{
struct option options[] = {
OPT_END()
};
argc = parse_options(argc, argv, "test-tools", options,
submodule_check_url_usage, 0);
if (argc)
usage_with_options(submodule_check_url_usage, options);
return check_submodule(check_submodule_url);
}
static int cmd__submodule_is_active(int argc, const char **argv)
@ -195,6 +210,7 @@ static int cmd__submodule_config_writeable(int argc, const char **argv UNUSED)
static struct test_cmd cmds[] = {
{ "check-name", cmd__submodule_check_name },
{ "check-url", cmd__submodule_check_url },
{ "is-active", cmd__submodule_is_active },
{ "resolve-relative-url", cmd__submodule_resolve_relative_url},
{ "config-list", cmd__submodule_config_list },

View file

@ -45,6 +45,32 @@ test_expect_success 'check names' '
test_cmp expect actual
'
test_expect_success 'check urls' '
cat >expect <<-\EOF &&
./bar/baz/foo.git
https://example.com/foo.git
http://example.com:80/deeper/foo.git
EOF
test-tool submodule check-url >actual <<-\EOF &&
./bar/baz/foo.git
https://example.com/foo.git
http://example.com:80/deeper/foo.git
-a./foo
../../..//test/foo.git
../../../../../:localhost:8080/foo.git
..\../.\../:example.com/foo.git
./%0ahost=example.com/foo.git
https://one.example.com/evil?%0ahost=two.example.com
https:///example.com/foo.git
http://example.com:test/foo.git
https::example.com/foo.git
http:::example.com/foo.git
EOF
test_cmp expect actual
'
test_expect_success 'create innocent subrepo' '
git init innocent &&
git -C innocent commit --allow-empty -m foo