diff --git a/Documentation/RelNotes/2.13.7.txt b/Documentation/RelNotes/2.13.7.txt new file mode 100644 index 0000000000..09fc01406c --- /dev/null +++ b/Documentation/RelNotes/2.13.7.txt @@ -0,0 +1,20 @@ +Git v2.13.7 Release Notes +========================= + +Fixes since v2.13.6 +------------------- + + * Submodule "names" come from the untrusted .gitmodules file, but we + blindly append them to $GIT_DIR/modules to create our on-disk repo + paths. This means you can do bad things by putting "../" into the + name. We now enforce some rules for submodule names which will cause + Git to ignore these malicious names (CVE-2018-11235). + + Credit for finding this vulnerability and the proof of concept from + which the test script was adapted goes to Etienne Stalmans. + + * It was possible to trick the code that sanity-checks paths on NTFS + into reading random piece of memory (CVE-2018-11233). + +Credit for fixing for these bugs goes to Jeff King, Johannes +Schindelin and others. diff --git a/Documentation/RelNotes/2.14.4.txt b/Documentation/RelNotes/2.14.4.txt new file mode 100644 index 0000000000..97755a89d9 --- /dev/null +++ b/Documentation/RelNotes/2.14.4.txt @@ -0,0 +1,5 @@ +Git v2.14.4 Release Notes +========================= + +This release is to forward-port the fixes made in the v2.13.7 version +of Git. See its release notes for details. diff --git a/Documentation/RelNotes/2.15.2.txt b/Documentation/RelNotes/2.15.2.txt index 9f7e28f8a2..b480e56b68 100644 --- a/Documentation/RelNotes/2.15.2.txt +++ b/Documentation/RelNotes/2.15.2.txt @@ -43,5 +43,8 @@ Fixes since v2.15.1 * Clarify and enhance documentation for "merge-base --fork-point", as it was clear what it computed but not why/what for. + * This release also contains the fixes made in the v2.13.7 version of + Git. See its release notes for details. + Also contains various documentation updates and code clean-ups. diff --git a/Documentation/RelNotes/2.16.4.txt b/Documentation/RelNotes/2.16.4.txt new file mode 100644 index 0000000000..6be538ba30 --- /dev/null +++ b/Documentation/RelNotes/2.16.4.txt @@ -0,0 +1,5 @@ +Git v2.16.4 Release Notes +========================= + +This release is to forward-port the fixes made in the v2.13.7 version +of Git. See its release notes for details. diff --git a/Documentation/RelNotes/2.17.1.txt b/Documentation/RelNotes/2.17.1.txt new file mode 100644 index 0000000000..e01384fe8e --- /dev/null +++ b/Documentation/RelNotes/2.17.1.txt @@ -0,0 +1,16 @@ +Git v2.17.1 Release Notes +========================= + +Fixes since v2.17 +----------------- + + * This release contains the same fixes made in the v2.13.7 version of + Git, covering CVE-2018-11233 and 11235, and forward-ported to + v2.14.4, v2.15.2 and v2.16.4 releases. See release notes to + v2.13.7 for details. + + * In addition to the above fixes, this release has support on the + server side to reject pushes to repositories that attempt to create + such problematic .gitmodules file etc. as tracked contents, to help + hosting sites protect their customers by preventing malicious + contents from spreading. diff --git a/apply.c b/apply.c index 7e5792c996..d80b26bc33 100644 --- a/apply.c +++ b/apply.c @@ -3860,9 +3860,9 @@ static int check_unsafe_path(struct patch *patch) if (!patch->is_delete) new_name = patch->new_name; - if (old_name && !verify_path(old_name)) + if (old_name && !verify_path(old_name, patch->old_mode)) return error(_("invalid path '%s'"), old_name); - if (new_name && !verify_path(new_name)) + if (new_name && !verify_path(new_name, patch->new_mode)) return error(_("invalid path '%s'"), new_name); return 0; } diff --git a/builtin/fsck.c b/builtin/fsck.c index 9d59d7d5a2..916109ac1c 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -340,7 +340,7 @@ static void check_connectivity(void) } } -static int fsck_obj(struct object *obj) +static int fsck_obj(struct object *obj, void *buffer, unsigned long size) { int err; @@ -354,7 +354,7 @@ static int fsck_obj(struct object *obj) if (fsck_walk(obj, NULL, &fsck_obj_options)) objerror(obj, "broken links"); - err = fsck_object(obj, NULL, 0, &fsck_obj_options); + err = fsck_object(obj, buffer, size, &fsck_obj_options); if (err) goto out; @@ -399,7 +399,7 @@ static int fsck_obj_buffer(const struct object_id *oid, enum object_type type, } obj->flags &= ~(REACHABLE | SEEN); obj->flags |= HAS_OBJ; - return fsck_obj(obj); + return fsck_obj(obj, buffer, size); } static int default_refs; @@ -507,44 +507,42 @@ static void get_default_heads(void) } } -static struct object *parse_loose_object(const struct object_id *oid, - const char *path) -{ - struct object *obj; - void *contents; - enum object_type type; - unsigned long size; - int eaten; - - if (read_loose_object(path, oid, &type, &size, &contents) < 0) - return NULL; - - if (!contents && type != OBJ_BLOB) - die("BUG: read_loose_object streamed a non-blob"); - - obj = parse_object_buffer(oid, type, size, contents, &eaten); - - if (!eaten) - free(contents); - return obj; -} - static int fsck_loose(const struct object_id *oid, const char *path, void *data) { - struct object *obj = parse_loose_object(oid, path); + struct object *obj; + enum object_type type; + unsigned long size; + void *contents; + int eaten; - if (!obj) { + if (read_loose_object(path, oid, &type, &size, &contents) < 0) { errors_found |= ERROR_OBJECT; error("%s: object corrupt or missing: %s", oid_to_hex(oid), path); return 0; /* keep checking other objects */ } + if (!contents && type != OBJ_BLOB) + BUG("read_loose_object streamed a non-blob"); + + obj = parse_object_buffer(oid, type, size, contents, &eaten); + if (!obj) { + errors_found |= ERROR_OBJECT; + error("%s: object could not be parsed: %s", + oid_to_hex(oid), path); + if (!eaten) + free(contents); + return 0; /* keep checking other objects */ + } + obj->flags &= ~(REACHABLE | SEEN); obj->flags |= HAS_OBJ; - if (fsck_obj(obj)) + if (fsck_obj(obj, contents, size)) errors_found |= ERROR_OBJECT; - return 0; + + if (!eaten) + free(contents); + return 0; /* keep checking other objects, even if we saw an error */ } static int fsck_cruft(const char *basename, const char *path, void *data) @@ -756,6 +754,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) } stop_progress(&progress); } + + if (fsck_finish(&fsck_obj_options)) + errors_found |= ERROR_OBJECT; } for (i = 0; i < argc; i++) { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index e2f670bef9..59130e8ecb 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -837,6 +837,9 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, blob->object.flags |= FLAG_CHECKED; else die(_("invalid blob object %s"), oid_to_hex(oid)); + if (do_fsck_object && + fsck_object(&blob->object, (void *)data, size, &fsck_options)) + die(_("fsck error in packed object")); } else { struct object *obj; int eaten; @@ -854,7 +857,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, die(_("invalid %s"), type_name(type)); if (do_fsck_object && fsck_object(obj, buf, size, &fsck_options)) - die(_("Error in object")); + die(_("fsck error in packed object")); if (strict && fsck_walk(obj, NULL, &fsck_options)) die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid)); @@ -1479,6 +1482,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name, } else chmod(final_index_name, 0444); + if (do_fsck_object) + add_packed_git(final_index_name, strlen(final_index_name), 0); + if (!from_stdin) { printf("%s\n", sha1_to_hex(hash)); } else { @@ -1820,6 +1826,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) pack_hash); else close(input_fd); + + if (do_fsck_object && fsck_finish(&fsck_options)) + die(_("fsck error in pack objects")); + free(objects); strbuf_release(&index_name_buf); if (pack_name == NULL) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index c2403a915f..df841d4ab3 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1825,6 +1825,29 @@ static int is_active(int argc, const char **argv, const char *prefix) return !is_submodule_active(the_repository, argv[1]); } +/* + * Exit non-zero if any of the submodule names given on the command line is + * invalid. If no names are given, filter stdin to print only valid names + * (which is primarily intended for testing). + */ +static int check_name(int argc, const char **argv, const char *prefix) +{ + if (argc > 1) { + while (*++argv) { + if (check_submodule_name(*argv) < 0) + return 1; + } + } else { + struct strbuf buf = STRBUF_INIT; + while (strbuf_getline(&buf, stdin) != EOF) { + if (!check_submodule_name(buf.buf)) + printf("%s\n", buf.buf); + } + strbuf_release(&buf); + } + return 0; +} + #define SUPPORT_SUPER_PREFIX (1<<0) struct cmd_struct { @@ -1850,6 +1873,7 @@ static struct cmd_struct commands[] = { {"push-check", push_check, 0}, {"absorb-git-dirs", absorb_git_dirs, SUPPORT_SUPER_PREFIX}, {"is-active", is_active, 0}, + {"check-name", check_name, 0}, }; int cmd_submodule__helper(int argc, const char **argv, const char *prefix) diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index cfe9019f80..6e81ca8ca2 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -210,7 +210,7 @@ static int check_object(struct object *obj, int type, void *data, struct fsck_op if (!obj_buf) die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid)); if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options)) - die("Error in object"); + die("fsck error in packed object"); fsck_options.walk = check_object; if (fsck_walk(obj, NULL, &fsck_options)) die("Error on reachable objects of %s", oid_to_hex(&obj->oid)); @@ -572,8 +572,11 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) unpack_all(); the_hash_algo->update_fn(&ctx, buffer, offset); the_hash_algo->final_fn(oid.hash, &ctx); - if (strict) + if (strict) { write_rest(); + if (fsck_finish(&fsck_options)) + die(_("fsck error in pack objects")); + } if (hashcmp(fill(the_hash_algo->rawsz), oid.hash)) die("final sha1 did not match"); use(the_hash_algo->rawsz); diff --git a/builtin/update-index.c b/builtin/update-index.c index 10d070a76f..6598bc06ad 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -364,10 +364,9 @@ static int process_directory(const char *path, int len, struct stat *st) return error("%s: is a directory - add files inside instead", path); } -static int process_path(const char *path) +static int process_path(const char *path, struct stat *st, int stat_errno) { int pos, len; - struct stat st; const struct cache_entry *ce; len = strlen(path); @@ -391,13 +390,13 @@ static int process_path(const char *path) * First things first: get the stat information, to decide * what to do about the pathname! */ - if (lstat(path, &st) < 0) - return process_lstat_error(path, errno); + if (stat_errno) + return process_lstat_error(path, stat_errno); - if (S_ISDIR(st.st_mode)) - return process_directory(path, len, &st); + if (S_ISDIR(st->st_mode)) + return process_directory(path, len, st); - return add_one_path(ce, path, len, &st); + return add_one_path(ce, path, len, st); } static int add_cacheinfo(unsigned int mode, const struct object_id *oid, @@ -406,7 +405,7 @@ static int add_cacheinfo(unsigned int mode, const struct object_id *oid, int size, len, option; struct cache_entry *ce; - if (!verify_path(path)) + if (!verify_path(path, mode)) return error("Invalid path '%s'", path); len = strlen(path); @@ -449,7 +448,18 @@ static void chmod_path(char flip, const char *path) static void update_one(const char *path) { - if (!verify_path(path)) { + int stat_errno = 0; + struct stat st; + + if (mark_valid_only || mark_skip_worktree_only || force_remove || + mark_fsmonitor_only) + st.st_mode = 0; + else if (lstat(path, &st) < 0) { + st.st_mode = 0; + stat_errno = errno; + } /* else stat is valid */ + + if (!verify_path(path, st.st_mode)) { fprintf(stderr, "Ignoring path %s\n", path); return; } @@ -475,7 +485,7 @@ static void update_one(const char *path) report("remove '%s'", path); return; } - if (process_path(path)) + if (process_path(path, &st, stat_errno)) die("Unable to process path %s", path); report("add '%s'", path); } @@ -545,7 +555,7 @@ static void read_index_info(int nul_term_line) path_name = uq.buf; } - if (!verify_path(path_name)) { + if (!verify_path(path_name, mode)) { fprintf(stderr, "Ignoring path %s\n", path_name); continue; } diff --git a/cache.h b/cache.h index 6dedf3c4f9..3bbe4f8845 100644 --- a/cache.h +++ b/cache.h @@ -642,7 +642,7 @@ extern int unmerged_index(const struct index_state *); */ extern int index_has_changes(struct strbuf *sb); -extern int verify_path(const char *path); +extern int verify_path(const char *path, unsigned mode); extern int strcmp_offset(const char *s1, const char *s2, size_t *first_change); extern int index_dir_exists(struct index_state *istate, const char *name, int namelen); extern void adjust_dirname_case(struct index_state *istate, char *name); @@ -1168,7 +1168,15 @@ int normalize_path_copy(char *dst, const char *src); int longest_ancestor_length(const char *path, struct string_list *prefixes); char *strip_path_suffix(const char *path, const char *suffix); int daemon_avoid_alias(const char *path); -extern int is_ntfs_dotgit(const char *name); + +/* + * These functions match their is_hfs_dotgit() counterparts; see utf8.h for + * details. + */ +int is_ntfs_dotgit(const char *name); +int is_ntfs_dotgitmodules(const char *name); +int is_ntfs_dotgitignore(const char *name); +int is_ntfs_dotgitattributes(const char *name); /* * Returns true iff "str" could be confused as a command-line option when diff --git a/dir.c b/dir.c index be08d3d296..7ca730fac4 100644 --- a/dir.c +++ b/dir.c @@ -2993,7 +2993,7 @@ void untracked_cache_invalidate_path(struct index_state *istate, { if (!istate->untracked || !istate->untracked->root) return; - if (!safe_path && !verify_path(path)) + if (!safe_path && !verify_path(path, 0)) return; invalidate_one_component(istate->untracked, istate->untracked->root, path, strlen(path)); diff --git a/fsck.c b/fsck.c index 640422a6c6..4db2277ab8 100644 --- a/fsck.c +++ b/fsck.c @@ -10,6 +10,13 @@ #include "utf8.h" #include "sha1-array.h" #include "decorate.h" +#include "oidset.h" +#include "packfile.h" +#include "submodule-config.h" +#include "config.h" + +static struct oidset gitmodules_found = OIDSET_INIT; +static struct oidset gitmodules_done = OIDSET_INIT; #define FSCK_FATAL -1 #define FSCK_INFO -2 @@ -44,6 +51,7 @@ FUNC(MISSING_TAG_ENTRY, ERROR) \ FUNC(MISSING_TAG_OBJECT, ERROR) \ FUNC(MISSING_TREE, ERROR) \ + FUNC(MISSING_TREE_OBJECT, ERROR) \ FUNC(MISSING_TYPE, ERROR) \ FUNC(MISSING_TYPE_ENTRY, ERROR) \ FUNC(MULTIPLE_AUTHORS, ERROR) \ @@ -51,6 +59,11 @@ FUNC(TREE_NOT_SORTED, ERROR) \ FUNC(UNKNOWN_TYPE, ERROR) \ FUNC(ZERO_PADDED_DATE, ERROR) \ + FUNC(GITMODULES_MISSING, ERROR) \ + FUNC(GITMODULES_BLOB, ERROR) \ + FUNC(GITMODULES_PARSE, ERROR) \ + FUNC(GITMODULES_NAME, ERROR) \ + FUNC(GITMODULES_SYMLINK, ERROR) \ /* warnings */ \ FUNC(BAD_FILEMODE, WARN) \ FUNC(EMPTY_NAME, WARN) \ @@ -563,10 +576,18 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) has_empty_name |= !*name; has_dot |= !strcmp(name, "."); has_dotdot |= !strcmp(name, ".."); - has_dotgit |= (!strcmp(name, ".git") || - is_hfs_dotgit(name) || - is_ntfs_dotgit(name)); + has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name); has_zero_pad |= *(char *)desc.buffer == '0'; + + if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) { + if (!S_ISLNK(mode)) + oidset_insert(&gitmodules_found, oid); + else + retval += report(options, &item->object, + FSCK_MSG_GITMODULES_SYMLINK, + ".gitmodules is a symbolic link"); + } + if (update_tree_entry_gently(&desc)) { retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); break; @@ -903,6 +924,66 @@ static int fsck_tag(struct tag *tag, const char *data, return fsck_tag_buffer(tag, data, size, options); } +struct fsck_gitmodules_data { + struct object *obj; + struct fsck_options *options; + int ret; +}; + +static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata) +{ + struct fsck_gitmodules_data *data = vdata; + const char *subsection, *key; + int subsection_len; + char *name; + + if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 || + !subsection) + return 0; + + name = xmemdupz(subsection, subsection_len); + if (check_submodule_name(name) < 0) + data->ret |= report(data->options, data->obj, + FSCK_MSG_GITMODULES_NAME, + "disallowed submodule name: %s", + name); + free(name); + + return 0; +} + +static int fsck_blob(struct blob *blob, const char *buf, + unsigned long size, struct fsck_options *options) +{ + struct fsck_gitmodules_data data; + + if (!oidset_contains(&gitmodules_found, &blob->object.oid)) + return 0; + oidset_insert(&gitmodules_done, &blob->object.oid); + + if (!buf) { + /* + * A missing buffer here is a sign that the caller found the + * blob too gigantic to load into memory. Let's just consider + * that an error. + */ + return report(options, &blob->object, + FSCK_MSG_GITMODULES_PARSE, + ".gitmodules too large to parse"); + } + + data.obj = &blob->object; + data.options = options; + data.ret = 0; + if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB, + ".gitmodules", buf, size, &data)) + data.ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_PARSE, + "could not parse gitmodules blob"); + + return data.ret; +} + int fsck_object(struct object *obj, void *data, unsigned long size, struct fsck_options *options) { @@ -910,7 +991,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size, return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck"); if (obj->type == OBJ_BLOB) - return 0; + return fsck_blob((struct blob *)obj, data, size, options); if (obj->type == OBJ_TREE) return fsck_tree((struct tree *) obj, options); if (obj->type == OBJ_COMMIT) @@ -934,3 +1015,52 @@ int fsck_error_function(struct fsck_options *o, error("object %s: %s", describe_object(o, obj), message); return 1; } + +int fsck_finish(struct fsck_options *options) +{ + int ret = 0; + struct oidset_iter iter; + const struct object_id *oid; + + oidset_iter_init(&gitmodules_found, &iter); + while ((oid = oidset_iter_next(&iter))) { + struct blob *blob; + enum object_type type; + unsigned long size; + char *buf; + + if (oidset_contains(&gitmodules_done, oid)) + continue; + + blob = lookup_blob(oid); + if (!blob) { + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_BLOB, + "non-blob found at .gitmodules"); + continue; + } + + buf = read_object_file(oid, &type, &size); + if (!buf) { + if (is_promisor_object(&blob->object.oid)) + continue; + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_MISSING, + "unable to read .gitmodules blob"); + continue; + } + + if (type == OBJ_BLOB) + ret |= fsck_blob(blob, buf, size, options); + else + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_BLOB, + "non-blob found at .gitmodules"); + free(buf); + } + + + oidset_clear(&gitmodules_found); + oidset_clear(&gitmodules_done); + return ret; +} diff --git a/fsck.h b/fsck.h index 4525510d99..c3cf5e0034 100644 --- a/fsck.h +++ b/fsck.h @@ -53,4 +53,11 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options); int fsck_object(struct object *obj, void *data, unsigned long size, struct fsck_options *options); +/* + * Some fsck checks are context-dependent, and may end up queued; run this + * after completing all fsck_object() calls in order to resolve any remaining + * checks. + */ +int fsck_finish(struct fsck_options *options); + #endif diff --git a/git-compat-util.h b/git-compat-util.h index f9e4c5f9bc..94a108c03e 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1006,6 +1006,23 @@ static inline int sane_iscase(int x, int is_lower) return (x & 0x20) == 0; } +/* + * Like skip_prefix, but compare case-insensitively. Note that the comparison + * is done via tolower(), so it is strictly ASCII (no multi-byte characters or + * locale-specific conversions). + */ +static inline int skip_iprefix(const char *str, const char *prefix, + const char **out) +{ + do { + if (!*prefix) { + *out = str; + return 1; + } + } while (tolower(*str++) == tolower(*prefix++)); + return 0; +} + static inline int strtoul_ui(char const *s, int base, unsigned int *result) { unsigned long ul; diff --git a/git-submodule.sh b/git-submodule.sh index 24914963ca..7e27d0a0da 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -229,6 +229,11 @@ Use -f if you really want to add it." >&2 sm_name="$sm_path" fi + if ! git submodule--helper check-name "$sm_name" + then + die "$(eval_gettext "'$sm_name' is not a valid submodule name")" + fi + # perhaps the path exists and is already a git repo, else clone it if test -e "$sm_path" then diff --git a/path.c b/path.c index 3308b7b958..7f109f6618 100644 --- a/path.c +++ b/path.c @@ -1306,7 +1306,7 @@ static int only_spaces_and_periods(const char *path, size_t len, size_t skip) int is_ntfs_dotgit(const char *name) { - int len; + size_t len; for (len = 0; ; len++) if (!name[len] || name[len] == '\\' || is_dir_sep(name[len])) { @@ -1323,6 +1323,90 @@ int is_ntfs_dotgit(const char *name) } } +static int is_ntfs_dot_generic(const char *name, + const char *dotgit_name, + size_t len, + const char *dotgit_ntfs_shortname_prefix) +{ + int saw_tilde; + size_t i; + + if ((name[0] == '.' && !strncasecmp(name + 1, dotgit_name, len))) { + i = len + 1; +only_spaces_and_periods: + for (;;) { + char c = name[i++]; + if (!c) + return 1; + if (c != ' ' && c != '.') + return 0; + } + } + + /* + * Is it a regular NTFS short name, i.e. shortened to 6 characters, + * followed by ~1, ... ~4? + */ + if (!strncasecmp(name, dotgit_name, 6) && name[6] == '~' && + name[7] >= '1' && name[7] <= '4') { + i = 8; + goto only_spaces_and_periods; + } + + /* + * Is it a fall-back NTFS short name (for details, see + * https://en.wikipedia.org/wiki/8.3_filename? + */ + for (i = 0, saw_tilde = 0; i < 8; i++) + if (name[i] == '\0') + return 0; + else if (saw_tilde) { + if (name[i] < '0' || name[i] > '9') + return 0; + } else if (name[i] == '~') { + if (name[++i] < '1' || name[i] > '9') + return 0; + saw_tilde = 1; + } else if (i >= 6) + return 0; + else if (name[i] < 0) { + /* + * We know our needles contain only ASCII, so we clamp + * here to make the results of tolower() sane. + */ + return 0; + } else if (tolower(name[i]) != dotgit_ntfs_shortname_prefix[i]) + return 0; + + goto only_spaces_and_periods; +} + +/* + * Inline helper to make sure compiler resolves strlen() on literals at + * compile time. + */ +static inline int is_ntfs_dot_str(const char *name, const char *dotgit_name, + const char *dotgit_ntfs_shortname_prefix) +{ + return is_ntfs_dot_generic(name, dotgit_name, strlen(dotgit_name), + dotgit_ntfs_shortname_prefix); +} + +int is_ntfs_dotgitmodules(const char *name) +{ + return is_ntfs_dot_str(name, "gitmodules", "gi7eba"); +} + +int is_ntfs_dotgitignore(const char *name) +{ + return is_ntfs_dot_str(name, "gitignore", "gi250a"); +} + +int is_ntfs_dotgitattributes(const char *name) +{ + return is_ntfs_dot_str(name, "gitattributes", "gi7d29"); +} + int looks_like_command_line_option(const char *str) { return str && str[0] == '-'; diff --git a/read-cache.c b/read-cache.c index fa3df2e72e..cebd24849d 100644 --- a/read-cache.c +++ b/read-cache.c @@ -752,7 +752,7 @@ struct cache_entry *make_cache_entry(unsigned int mode, int size, len; struct cache_entry *ce, *ret; - if (!verify_path(path)) { + if (!verify_path(path, mode)) { error("Invalid path '%s'", path); return NULL; } @@ -817,7 +817,7 @@ int ce_same_name(const struct cache_entry *a, const struct cache_entry *b) * Also, we don't want double slashes or slashes at the * end that can make pathnames ambiguous. */ -static int verify_dotfile(const char *rest) +static int verify_dotfile(const char *rest, unsigned mode) { /* * The first character was '.', but that @@ -831,8 +831,13 @@ static int verify_dotfile(const char *rest) switch (*rest) { /* - * ".git" followed by NUL or slash is bad. This - * shares the path end test with the ".." case. + * ".git" followed by NUL or slash is bad. Note that we match + * case-insensitively here, even if ignore_case is not set. + * This outlaws ".GIT" everywhere out of an abundance of caution, + * since there's really no good reason to allow it. + * + * Once we've seen ".git", we can also find ".gitmodules", etc (also + * case-insensitively). */ case 'g': case 'G': @@ -840,8 +845,15 @@ static int verify_dotfile(const char *rest) break; if (rest[2] != 't' && rest[2] != 'T') break; - rest += 2; - /* fallthrough */ + if (rest[3] == '\0' || is_dir_sep(rest[3])) + return 0; + if (S_ISLNK(mode)) { + rest += 3; + if (skip_iprefix(rest, "modules", &rest) && + (*rest == '\0' || is_dir_sep(*rest))) + return 0; + } + break; case '.': if (rest[1] == '\0' || is_dir_sep(rest[1])) return 0; @@ -849,7 +861,7 @@ static int verify_dotfile(const char *rest) return 1; } -int verify_path(const char *path) +int verify_path(const char *path, unsigned mode) { char c; @@ -862,12 +874,25 @@ int verify_path(const char *path) return 1; if (is_dir_sep(c)) { inside: - if (protect_hfs && is_hfs_dotgit(path)) - return 0; - if (protect_ntfs && is_ntfs_dotgit(path)) - return 0; + if (protect_hfs) { + if (is_hfs_dotgit(path)) + return 0; + if (S_ISLNK(mode)) { + if (is_hfs_dotgitmodules(path)) + return 0; + } + } + if (protect_ntfs) { + if (is_ntfs_dotgit(path)) + return 0; + if (S_ISLNK(mode)) { + if (is_ntfs_dotgitmodules(path)) + return 0; + } + } + c = *path++; - if ((c == '.' && !verify_dotfile(path)) || + if ((c == '.' && !verify_dotfile(path, mode)) || is_dir_sep(c) || c == '\0') return 0; } @@ -1184,7 +1209,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e if (!ok_to_add) return -1; - if (!verify_path(ce->name)) + if (!verify_path(ce->name, ce->ce_mode)) return error("Invalid path '%s'", ce->name); if (!skip_df_check && diff --git a/sha1-file.c b/sha1-file.c index f66059ed7d..e47098eff2 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -2232,7 +2232,7 @@ int read_loose_object(const char *path, goto out; } - if (*type == OBJ_BLOB) { + if (*type == OBJ_BLOB && *size > big_file_threshold) { if (check_stream_sha1(&stream, hdr, *size, path, expected_oid->hash) < 0) goto out; } else { diff --git a/submodule-config.c b/submodule-config.c index d87c3ff63a..90d29348d3 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -190,6 +190,31 @@ static struct submodule *cache_lookup_name(struct submodule_cache *cache, return NULL; } +int check_submodule_name(const char *name) +{ + /* Disallow empty names */ + if (!*name) + return -1; + + /* + * Look for '..' as a path component. Check both '/' and '\\' as + * separators rather than is_dir_sep(), because we want the name rules + * to be consistent across platforms. + */ + goto in_component; /* always start inside component */ + while (*name) { + char c = *name++; + if (c == '/' || c == '\\') { +in_component: + if (name[0] == '.' && name[1] == '.' && + (!name[2] || name[2] == '/' || name[2] == '\\')) + return -1; + } + } + + return 0; +} + static int name_and_item_from_var(const char *var, struct strbuf *name, struct strbuf *item) { @@ -201,6 +226,12 @@ static int name_and_item_from_var(const char *var, struct strbuf *name, return 0; strbuf_add(name, subsection, subsection_len); + if (check_submodule_name(name->buf) < 0) { + warning(_("ignoring suspicious submodule name: %s"), name->buf); + strbuf_release(name); + return 0; + } + strbuf_addstr(item, key); return 1; diff --git a/submodule-config.h b/submodule-config.h index 6f686184e8..21273f56a3 100644 --- a/submodule-config.h +++ b/submodule-config.h @@ -47,4 +47,11 @@ const struct submodule *submodule_from_path(struct repository *r, const char *path); void submodule_free(struct repository *r); +/* + * Returns 0 if the name is syntactically acceptable as a submodule "name" + * (e.g., that may be found in the subsection of a .gitmodules file) and -1 + * otherwise. + */ +int check_submodule_name(const char *name); + #endif /* SUBMODULE_CONFIG_H */ diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c index e115d44ac2..ae091d9b3e 100644 --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@ -1,6 +1,7 @@ #include "test-tool.h" #include "cache.h" #include "string-list.h" +#include "utf8.h" /* * A "string_list_each_func_t" function that normalizes an entry from @@ -171,6 +172,11 @@ static struct test_data dirname_data[] = { { NULL, NULL } }; +static int is_dotgitmodules(const char *path) +{ + return is_hfs_dotgitmodules(path) || is_ntfs_dotgitmodules(path); +} + int cmd__path_utils(int argc, const char **argv) { if (argc == 3 && !strcmp(argv[1], "normalize_path_copy")) { @@ -271,6 +277,20 @@ int cmd__path_utils(int argc, const char **argv) if (argc == 2 && !strcmp(argv[1], "dirname")) return test_function(dirname_data, posix_dirname, argv[1]); + if (argc > 2 && !strcmp(argv[1], "is_dotgitmodules")) { + int res = 0, expect = 1, i; + for (i = 2; i < argc; i++) + if (!strcmp("--not", argv[i])) + expect = !expect; + else if (expect != is_dotgitmodules(argv[i])) + res = error("'%s' is %s.gitmodules", argv[i], + expect ? "not " : ""); + else + fprintf(stderr, "ok: '%s' is %s.gitmodules\n", + argv[i], expect ? "" : "not "); + return !!res; + } + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], argv[1] ? argv[1] : "(there was none)"); return 1; diff --git a/t/lib-pack.sh b/t/lib-pack.sh index 501078249d..c4d907a450 100644 --- a/t/lib-pack.sh +++ b/t/lib-pack.sh @@ -79,6 +79,18 @@ pack_obj () { ;; esac + # If it's not a delta, we can convince pack-objects to generate a pack + # with just our entry, and then strip off the header (12 bytes) and + # trailer (20 bytes). + if test -z "$2" + then + echo "$1" | git pack-objects --stdout >pack_obj.tmp && + size=$(wc -c &2 "BUG: don't know how to print $1${2:+ (from $2)}" return 1 } diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index f46e3c4995..21a8b53132 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -349,4 +349,90 @@ test_submodule_relative_url "(null)" "ssh://hostname:22/repo" "../subrepo" "ssh: test_submodule_relative_url "(null)" "user@host:path/to/repo" "../subrepo" "user@host:path/to/subrepo" test_submodule_relative_url "(null)" "user@host:repo" "../subrepo" "user@host:subrepo" +test_expect_success 'match .gitmodules' ' + test-tool path-utils is_dotgitmodules \ + .gitmodules \ + \ + .git${u200c}modules \ + \ + .Gitmodules \ + .gitmoduleS \ + \ + ".gitmodules " \ + ".gitmodules." \ + ".gitmodules " \ + ".gitmodules. " \ + ".gitmodules ." \ + ".gitmodules.." \ + ".gitmodules " \ + ".gitmodules. " \ + ".gitmodules . " \ + ".gitmodules ." \ + \ + ".Gitmodules " \ + ".Gitmodules." \ + ".Gitmodules " \ + ".Gitmodules. " \ + ".Gitmodules ." \ + ".Gitmodules.." \ + ".Gitmodules " \ + ".Gitmodules. " \ + ".Gitmodules . " \ + ".Gitmodules ." \ + \ + GITMOD~1 \ + gitmod~1 \ + GITMOD~2 \ + gitmod~3 \ + GITMOD~4 \ + \ + "GITMOD~1 " \ + "gitmod~2." \ + "GITMOD~3 " \ + "gitmod~4. " \ + "GITMOD~1 ." \ + "gitmod~2 " \ + "GITMOD~3. " \ + "gitmod~4 . " \ + \ + GI7EBA~1 \ + gi7eba~9 \ + \ + GI7EB~10 \ + GI7EB~11 \ + GI7EB~99 \ + GI7EB~10 \ + GI7E~100 \ + GI7E~101 \ + GI7E~999 \ + ~1000000 \ + ~9999999 \ + \ + --not \ + ".gitmodules x" \ + ".gitmodules .x" \ + \ + " .gitmodules" \ + \ + ..gitmodules \ + \ + gitmodules \ + \ + .gitmodule \ + \ + ".gitmodules x " \ + ".gitmodules .x" \ + \ + GI7EBA~ \ + GI7EBA~0 \ + GI7EBA~~1 \ + GI7EBA~X \ + Gx7EBA~1 \ + GI7EBX~1 \ + \ + GI7EB~1 \ + GI7EB~01 \ + GI7EB~1X +' + test_done diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh new file mode 100755 index 0000000000..a770d92a55 --- /dev/null +++ b/t/t7415-submodule-names.sh @@ -0,0 +1,154 @@ +#!/bin/sh + +test_description='check handling of .. in submodule names + +Exercise the name-checking function on a variety of names, and then give a +real-world setup that confirms we catch this in practice. +' +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-pack.sh + +test_expect_success 'check names' ' + cat >expect <<-\EOF && + valid + valid/with/paths + EOF + + git submodule--helper check-name >actual <<-\EOF && + valid + valid/with/paths + + ../foo + /../foo + ..\foo + \..\foo + foo/.. + foo/../ + foo\.. + foo\..\ + foo/../bar + EOF + + test_cmp expect actual +' + +test_expect_success 'create innocent subrepo' ' + git init innocent && + git -C innocent commit --allow-empty -m foo +' + +test_expect_success 'submodule add refuses invalid names' ' + test_must_fail \ + git submodule add --name ../../modules/evil "$PWD/innocent" evil +' + +test_expect_success 'add evil submodule' ' + git submodule add "$PWD/innocent" evil && + + mkdir modules && + cp -r .git/modules/evil modules && + write_script modules/evil/hooks/post-checkout <<-\EOF && + echo >&2 "RUNNING POST CHECKOUT" + EOF + + git config -f .gitmodules submodule.evil.update checkout && + git config -f .gitmodules --rename-section \ + submodule.evil submodule.../../modules/evil && + git add modules && + git commit -am evil +' + +# This step seems like it shouldn't be necessary, since the payload is +# contained entirely in the evil submodule. But due to the vagaries of the +# submodule code, checking out the evil module will fail unless ".git/modules" +# exists. Adding another submodule (with a name that sorts before "evil") is an +# easy way to make sure this is the case in the victim clone. +test_expect_success 'add other submodule' ' + git submodule add "$PWD/innocent" another-module && + git add another-module && + git commit -am another +' + +test_expect_success 'clone evil superproject' ' + git clone --recurse-submodules . victim >output 2>&1 && + ! grep "RUNNING POST CHECKOUT" output +' + +test_expect_success 'fsck detects evil superproject' ' + test_must_fail git fsck +' + +test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' ' + rm -rf dst.git && + git init --bare dst.git && + git -C dst.git config transfer.fsckObjects true && + test_must_fail git push dst.git HEAD +' + +test_expect_success 'transfer.fsckObjects detects evil superproject (index)' ' + rm -rf dst.git && + git init --bare dst.git && + git -C dst.git config transfer.fsckObjects true && + git -C dst.git config transfer.unpackLimit 1 && + test_must_fail git push dst.git HEAD +' + +# Normally our packs contain commits followed by trees followed by blobs. This +# reverses the order, which requires backtracking to find the context of a +# blob. We'll start with a fresh gitmodules-only tree to make it simpler. +test_expect_success 'create oddly ordered pack' ' + git checkout --orphan odd && + git rm -rf --cached . && + git add .gitmodules && + git commit -m odd && + { + pack_header 3 && + pack_obj $(git rev-parse HEAD:.gitmodules) && + pack_obj $(git rev-parse HEAD^{tree}) && + pack_obj $(git rev-parse HEAD) + } >odd.pack && + pack_trailer odd.pack +' + +test_expect_success 'transfer.fsckObjects handles odd pack (unpack)' ' + rm -rf dst.git && + git init --bare dst.git && + test_must_fail git -C dst.git unpack-objects --strict output && + grep gitmodulesSymlink output + ) +' + +test_done diff --git a/utf8.c b/utf8.c index 0fcc6487e3..d55e20c641 100644 --- a/utf8.c +++ b/utf8.c @@ -681,28 +681,33 @@ static ucs_char_t next_hfs_char(const char **in) } } -int is_hfs_dotgit(const char *path) +static int is_hfs_dot_generic(const char *path, + const char *needle, size_t needle_len) { ucs_char_t c; c = next_hfs_char(&path); if (c != '.') return 0; - c = next_hfs_char(&path); /* * there's a great deal of other case-folding that occurs - * in HFS+, but this is enough to catch anything that will - * convert to ".git" + * in HFS+, but this is enough to catch our fairly vanilla + * hard-coded needles. */ - if (c != 'g' && c != 'G') - return 0; - c = next_hfs_char(&path); - if (c != 'i' && c != 'I') - return 0; - c = next_hfs_char(&path); - if (c != 't' && c != 'T') - return 0; + for (; needle_len > 0; needle++, needle_len--) { + c = next_hfs_char(&path); + + /* + * We know our needles contain only ASCII, so we clamp here to + * make the results of tolower() sane. + */ + if (c > 127) + return 0; + if (tolower(c) != *needle) + return 0; + } + c = next_hfs_char(&path); if (c && !is_dir_sep(c)) return 0; @@ -710,6 +715,35 @@ int is_hfs_dotgit(const char *path) return 1; } +/* + * Inline wrapper to make sure the compiler resolves strlen() on literals at + * compile time. + */ +static inline int is_hfs_dot_str(const char *path, const char *needle) +{ + return is_hfs_dot_generic(path, needle, strlen(needle)); +} + +int is_hfs_dotgit(const char *path) +{ + return is_hfs_dot_str(path, "git"); +} + +int is_hfs_dotgitmodules(const char *path) +{ + return is_hfs_dot_str(path, "gitmodules"); +} + +int is_hfs_dotgitignore(const char *path) +{ + return is_hfs_dot_str(path, "gitignore"); +} + +int is_hfs_dotgitattributes(const char *path) +{ + return is_hfs_dot_str(path, "gitattributes"); +} + const char utf8_bom[] = "\357\273\277"; int skip_utf8_bom(char **text, size_t len) diff --git a/utf8.h b/utf8.h index cce654a64a..db73a2d8d3 100644 --- a/utf8.h +++ b/utf8.h @@ -52,8 +52,13 @@ int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding); * The path should be NUL-terminated, but we will match variants of both ".git\0" * and ".git/..." (but _not_ ".../.git"). This makes it suitable for both fsck * and verify_path(). + * + * Likewise, the is_hfs_dotgitfoo() variants look for ".gitfoo". */ int is_hfs_dotgit(const char *path); +int is_hfs_dotgitmodules(const char *path); +int is_hfs_dotgitignore(const char *path); +int is_hfs_dotgitattributes(const char *path); typedef enum { ALIGN_LEFT,