From 83115ac4a811ef37318bc0e68a5e8b229751a88f Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Mon, 4 May 2015 03:25:13 -0400 Subject: [PATCH 1/4] git-hash-object.txt: document --literally option Document the git-hash-object --literally option added by 5ba9a93 (hash-object: add --literally option, 2014-09-11). While here, also correct a minor typesetting oversight. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- Documentation/git-hash-object.txt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Documentation/git-hash-object.txt b/Documentation/git-hash-object.txt index 02c1f12685..0c75f3b610 100644 --- a/Documentation/git-hash-object.txt +++ b/Documentation/git-hash-object.txt @@ -9,7 +9,7 @@ git-hash-object - Compute object ID and optionally creates a blob from a file SYNOPSIS -------- [verse] -'git hash-object' [-t ] [-w] [--path=|--no-filters] [--stdin] [--] ... +'git hash-object' [-t ] [-w] [--path=|--no-filters] [--stdin [--literally]] [--] ... 'git hash-object' [-t ] [-w] --stdin-paths [--no-filters] < DESCRIPTION @@ -51,7 +51,13 @@ OPTIONS Hash the contents as is, ignoring any input filter that would have been chosen by the attributes mechanism, including the end-of-line conversion. If the file is read from standard input then this - is always implied, unless the --path option is given. + is always implied, unless the `--path` option is given. + +--literally:: + Allow `--stdin` to hash any garbage into a loose object which might not + otherwise pass standard object parsing or git-fsck checks. Useful for + stress-testing Git itself or reproducing characteristics of corrupt or + bogus objects encountered in the wild. GIT --- From 0c3db67cc8137cebea5b1a9c3c7fc379ef8ffda6 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Mon, 4 May 2015 03:25:15 -0400 Subject: [PATCH 2/4] hash-object --literally: fix buffer overrun with extra-long object type "hash-object" learned in 5ba9a93 (hash-object: add --literally option, 2014-09-11) to allow crafting a corrupt/broken object of unknown type. When the user-provided type is particularly long, however, it can overflow the relatively small stack-based character array handed to write_sha1_file_prepare() by hash_sha1_file() and write_sha1_file(), leading to stack corruption (and crash). Introduce a custom helper to allow arbitrarily long typenames just for "hash-object --literally". [jc: Eric's original used a strbuf in the more common codepaths, and I rewrote it to avoid penalizing the non-literally code. Bugs are mine] Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- builtin/hash-object.c | 4 +--- cache.h | 1 + sha1_file.c | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/builtin/hash-object.c b/builtin/hash-object.c index 6158363318..17e8bfdc44 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -22,10 +22,8 @@ static int hash_literally(unsigned char *sha1, int fd, const char *type, unsigne if (strbuf_read(&buf, fd, 4096) < 0) ret = -1; - else if (flags & HASH_WRITE_OBJECT) - ret = write_sha1_file(buf.buf, buf.len, type, sha1); else - ret = hash_sha1_file(buf.buf, buf.len, type, sha1); + ret = hash_sha1_file_literally(buf.buf, buf.len, type, sha1, flags); strbuf_release(&buf); return ret; } diff --git a/cache.h b/cache.h index dfa1a5696d..e037cadf4c 100644 --- a/cache.h +++ b/cache.h @@ -888,6 +888,7 @@ static inline const unsigned char *lookup_replace_object_extended(const unsigned extern int sha1_object_info(const unsigned char *, unsigned long *); extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1); extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1); +extern int hash_sha1_file_literally(const void *buf, unsigned long len, const char *type, unsigned char *sha1, unsigned flags); extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *); extern int force_object_loose(const unsigned char *sha1, time_t mtime); extern int git_open_noatime(const char *name); diff --git a/sha1_file.c b/sha1_file.c index c08c0cbea8..dc940e63c4 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2962,6 +2962,27 @@ int write_sha1_file(const void *buf, unsigned long len, const char *type, unsign return write_loose_object(sha1, hdr, hdrlen, buf, len, 0); } +int hash_sha1_file_literally(const void *buf, unsigned long len, const char *type, + unsigned char *sha1, unsigned flags) +{ + char *header; + int hdrlen, status = 0; + + /* type string, SP, %lu of the length plus NUL must fit this */ + header = xmalloc(strlen(type) + 32); + write_sha1_file_prepare(buf, len, type, sha1, header, &hdrlen); + + if (!(flags & HASH_WRITE_OBJECT)) + goto cleanup; + if (has_sha1_file(sha1)) + goto cleanup; + status = write_loose_object(sha1, header, hdrlen, buf, len, 0); + +cleanup: + free(header); + return status; +} + int force_object_loose(const unsigned char *sha1, time_t mtime) { void *buf; From 383c3427afa3201eb05e931825c5c2f20616b58b Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Mon, 4 May 2015 03:25:14 -0400 Subject: [PATCH 3/4] t1007: add hash-object --literally tests git-hash-object learned a --literally option in 5ba9a93 (hash-object: add --literally option, 2014-09-11). Check that --literally allows object creation with a bogus type, with two type strings whose length is reasonably short and very long. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/t1007-hash-object.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index f83df8eb8b..7c3dcfb70c 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -201,4 +201,15 @@ test_expect_success 'corrupt tag' ' test_must_fail git hash-object -t tag --stdin Date: Mon, 4 May 2015 11:08:10 -0700 Subject: [PATCH 4/4] write_sha1_file(): do not use a separate sha1[] array In the beginning, write_sha1_file() did not have a way to tell the caller the name of the object it wrote to the caller. This was changed in d6d3f9d0 (This implements the new "recursive tree" write-tree., 2005-04-09) by adding the "returnsha1" parameter to the function so that the callers who are interested in the value can optionally pass a pointer to receive it. It turns out that all callers do want to know the name of the object it just has written. Nobody passes a NULL to this parameter, hence it is not necessary to use a separate sha1[] array to receive the result from write_sha1_file_prepare(), and copy the result to the returnsha1 supplied by the caller. Signed-off-by: Junio C Hamano --- sha1_file.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sha1_file.c b/sha1_file.c index dc940e63c4..5d320753c3 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2945,9 +2945,8 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen, return move_temp_to_file(tmp_file, filename); } -int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *returnsha1) +int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1) { - unsigned char sha1[20]; char hdr[32]; int hdrlen; @@ -2955,8 +2954,6 @@ int write_sha1_file(const void *buf, unsigned long len, const char *type, unsign * it out into .git/objects/??/?{38} file. */ write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen); - if (returnsha1) - hashcpy(returnsha1, sha1); if (has_sha1_file(sha1)) return 0; return write_loose_object(sha1, hdr, hdrlen, buf, len, 0);