From fe8e3b71805cd13d139b62fa5a0c75387568c9ea Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 10 Sep 2014 15:52:44 +0200 Subject: [PATCH 1/6] Refactor type_from_string() to allow continuing after detecting an error In the next commits, we will enhance the fsck_tag() function to check tag objects more thoroughly. To this end, we need a function to verify that a given string is a valid object type, but that does not die() in the negative case. While at it, prepare type_from_string() for counted strings, i.e. strings with an explicitly specified length rather than a NUL termination. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- object.c | 11 +++++++++-- object.h | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/object.c b/object.c index a16b9f9e93..aedac243f0 100644 --- a/object.c +++ b/object.c @@ -33,13 +33,20 @@ const char *typename(unsigned int type) return object_type_strings[type]; } -int type_from_string(const char *str) +int type_from_string_gently(const char *str, ssize_t len, int gentle) { int i; + if (len < 0) + len = strlen(str); + for (i = 1; i < ARRAY_SIZE(object_type_strings); i++) - if (!strcmp(str, object_type_strings[i])) + if (!strncmp(str, object_type_strings[i], len)) return i; + + if (gentle) + return -1; + die("invalid object type \"%s\"", str); } diff --git a/object.h b/object.h index 5e8d8ee548..e028ced74c 100644 --- a/object.h +++ b/object.h @@ -53,7 +53,8 @@ struct object { }; extern const char *typename(unsigned int type); -extern int type_from_string(const char *str); +extern int type_from_string_gently(const char *str, ssize_t, int gentle); +#define type_from_string(str) type_from_string_gently(str, -1, 0) /* * Return the current number of buckets in the object hashmap. From 90a398bbd72477d5d228818db5665fdfcf13431b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 10 Sep 2014 15:52:51 +0200 Subject: [PATCH 2/6] fsck_object(): allow passing object data separately from the object itself When fsck'ing an incoming pack, we need to fsck objects that cannot be read via read_sha1_file() because they are not local yet (and might even be rejected if transfer.fsckobjects is set to 'true'). For commits, there is a hack in place: we basically cache commit objects' buffers anyway, but the same is not true, say, for tag objects. By refactoring fsck_object() to take the object buffer and size as optional arguments -- optional, because we still fall back to the previous method to look at the cached commit objects if the caller passes NULL -- we prepare the machinery for the upcoming handling of tag objects. The assumption that such buffers are inherently NUL terminated is now wrong, of course, hence we pass the size of the buffer so that we can add a sanity check later, to prevent running past the end of the buffer. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- builtin/fsck.c | 2 +- builtin/index-pack.c | 3 ++- builtin/unpack-objects.c | 14 ++++++++++---- fsck.c | 24 +++++++++++++++--------- fsck.h | 4 +++- 5 files changed, 31 insertions(+), 16 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index d42a27da89..d9f4e6e38c 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -298,7 +298,7 @@ static int fsck_obj(struct object *obj) if (fsck_walk(obj, mark_used, NULL)) objerror(obj, "broken links"); - if (fsck_object(obj, check_strict, fsck_error_func)) + if (fsck_object(obj, NULL, 0, check_strict, fsck_error_func)) return -1; if (obj->type == OBJ_TREE) { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 5568a5bc3b..f2465ff18e 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -773,7 +773,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, if (!obj) die(_("invalid %s"), typename(type)); if (do_fsck_object && - fsck_object(obj, 1, fsck_error_function)) + fsck_object(obj, buf, size, 1, + fsck_error_function)) die(_("Error in object")); if (fsck_walk(obj, mark_link, NULL)) die(_("Not all child objects of %s are reachable"), sha1_to_hex(obj->sha1)); diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 99cde45879..855d94b90b 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -164,10 +164,10 @@ static unsigned nr_objects; * Called only from check_object() after it verified this object * is Ok. */ -static void write_cached_object(struct object *obj) +static void write_cached_object(struct object *obj, struct obj_buffer *obj_buf) { unsigned char sha1[20]; - struct obj_buffer *obj_buf = lookup_object_buffer(obj); + if (write_sha1_file(obj_buf->buffer, obj_buf->size, typename(obj->type), sha1) < 0) die("failed to write object %s", sha1_to_hex(obj->sha1)); obj->flags |= FLAG_WRITTEN; @@ -180,6 +180,8 @@ static void write_cached_object(struct object *obj) */ static int check_object(struct object *obj, int type, void *data) { + struct obj_buffer *obj_buf; + if (!obj) return 1; @@ -198,11 +200,15 @@ static int check_object(struct object *obj, int type, void *data) return 0; } - if (fsck_object(obj, 1, fsck_error_function)) + obj_buf = lookup_object_buffer(obj); + if (!obj_buf) + die("Whoops! Cannot find object '%s'", sha1_to_hex(obj->sha1)); + if (fsck_object(obj, obj_buf->buffer, obj_buf->size, 1, + fsck_error_function)) die("Error in object"); if (fsck_walk(obj, check_object, NULL)) die("Error on reachable objects of %s", sha1_to_hex(obj->sha1)); - write_cached_object(obj); + write_cached_object(obj, obj_buf); return 0; } diff --git a/fsck.c b/fsck.c index 56156fff44..dd77628958 100644 --- a/fsck.c +++ b/fsck.c @@ -277,7 +277,7 @@ static int fsck_ident(const char **ident, struct object *obj, fsck_error error_f } static int fsck_commit_buffer(struct commit *commit, const char *buffer, - fsck_error error_func) + unsigned long size, fsck_error error_func) { unsigned char tree_sha1[20], sha1[20]; struct commit_graft *graft; @@ -322,15 +322,18 @@ static int fsck_commit_buffer(struct commit *commit, const char *buffer, return 0; } -static int fsck_commit(struct commit *commit, fsck_error error_func) +static int fsck_commit(struct commit *commit, const char *data, + unsigned long size, fsck_error error_func) { - const char *buffer = get_commit_buffer(commit, NULL); - int ret = fsck_commit_buffer(commit, buffer, error_func); - unuse_commit_buffer(commit, buffer); + const char *buffer = data ? data : get_commit_buffer(commit, &size); + int ret = fsck_commit_buffer(commit, buffer, size, error_func); + if (!data) + unuse_commit_buffer(commit, buffer); return ret; } -static int fsck_tag(struct tag *tag, fsck_error error_func) +static int fsck_tag(struct tag *tag, const char *data, + unsigned long size, fsck_error error_func) { struct object *tagged = tag->tagged; @@ -339,7 +342,8 @@ static int fsck_tag(struct tag *tag, fsck_error error_func) return 0; } -int fsck_object(struct object *obj, int strict, fsck_error error_func) +int fsck_object(struct object *obj, void *data, unsigned long size, + int strict, fsck_error error_func) { if (!obj) return error_func(obj, FSCK_ERROR, "no valid object to fsck"); @@ -349,9 +353,11 @@ int fsck_object(struct object *obj, int strict, fsck_error error_func) if (obj->type == OBJ_TREE) return fsck_tree((struct tree *) obj, strict, error_func); if (obj->type == OBJ_COMMIT) - return fsck_commit((struct commit *) obj, error_func); + return fsck_commit((struct commit *) obj, (const char *) data, + size, error_func); if (obj->type == OBJ_TAG) - return fsck_tag((struct tag *) obj, error_func); + return fsck_tag((struct tag *) obj, (const char *) data, + size, error_func); return error_func(obj, FSCK_ERROR, "unknown type '%d' (internal fsck error)", obj->type); diff --git a/fsck.h b/fsck.h index 1e4f527318..d1e6387a44 100644 --- a/fsck.h +++ b/fsck.h @@ -28,6 +28,8 @@ int fsck_error_function(struct object *obj, int type, const char *fmt, ...); * 0 everything OK */ int fsck_walk(struct object *obj, fsck_walk_func walk, void *data); -int fsck_object(struct object *obj, int strict, fsck_error error_func); +/* If NULL is passed for data, we assume the object is local and read it. */ +int fsck_object(struct object *obj, void *data, unsigned long size, + int strict, fsck_error error_func); #endif From 4d0d89755e82c40df88cf94d84031978f8eac827 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 11 Sep 2014 16:26:33 +0200 Subject: [PATCH 3/6] Make sure fsck_commit_buffer() does not run out of the buffer So far, we assumed that the buffer is NUL terminated, but this is not a safe assumption, now that we opened the fsck_object() API to pass a buffer directly. So let's make sure that there is at least an empty line in the buffer. That way, our checks would fail if the empty line was encountered prematurely, and consequently we can get away with the current string comparisons even with non-NUL-terminated buffers are passed to fsck_object(). Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- fsck.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fsck.c b/fsck.c index dd77628958..73da6f8147 100644 --- a/fsck.c +++ b/fsck.c @@ -237,6 +237,26 @@ static int fsck_tree(struct tree *item, int strict, fsck_error error_func) return retval; } +static int require_end_of_header(const void *data, unsigned long size, + struct object *obj, fsck_error error_func) +{ + const char *buffer = (const char *)data; + unsigned long i; + + for (i = 0; i < size; i++) { + switch (buffer[i]) { + case '\0': + return error_func(obj, FSCK_ERROR, + "unterminated header: NUL at offset %d", i); + case '\n': + if (i + 1 < size && buffer[i + 1] == '\n') + return 0; + } + } + + return error_func(obj, FSCK_ERROR, "unterminated header"); +} + static int fsck_ident(const char **ident, struct object *obj, fsck_error error_func) { char *end; @@ -284,6 +304,9 @@ static int fsck_commit_buffer(struct commit *commit, const char *buffer, unsigned parent_count, parent_line_count = 0; int err; + if (require_end_of_header(buffer, size, &commit->object, error_func)) + return -1; + if (!skip_prefix(buffer, "tree ", &buffer)) return error_func(&commit->object, FSCK_ERROR, "invalid format - expected 'tree' line"); if (get_sha1_hex(buffer, tree_sha1) || buffer[40] != '\n') From cec097be3a98be3773ddadf6824de03b849758d7 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 11 Sep 2014 16:26:38 +0200 Subject: [PATCH 4/6] fsck: check tag objects' headers We inspect commit objects pretty much in detail in git-fsck, but we just glanced over the tag objects. Let's be stricter. Since we do not want to limit 'tag' lines unduly, values that would fail the refname check only result in warnings, not errors. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- fsck.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/fsck.c b/fsck.c index 73da6f8147..2fffa434a5 100644 --- a/fsck.c +++ b/fsck.c @@ -6,6 +6,7 @@ #include "commit.h" #include "tag.h" #include "fsck.h" +#include "refs.h" static int fsck_walk_tree(struct tree *tree, fsck_walk_func walk, void *data) { @@ -355,6 +356,88 @@ static int fsck_commit(struct commit *commit, const char *data, return ret; } +static int fsck_tag_buffer(struct tag *tag, const char *data, + unsigned long size, fsck_error error_func) +{ + unsigned char sha1[20]; + int ret = 0; + const char *buffer; + char *to_free = NULL, *eol; + struct strbuf sb = STRBUF_INIT; + + if (data) + buffer = data; + else { + enum object_type type; + + buffer = to_free = + read_sha1_file(tag->object.sha1, &type, &size); + if (!buffer) + return error_func(&tag->object, FSCK_ERROR, + "cannot read tag object"); + + if (type != OBJ_TAG) { + ret = error_func(&tag->object, FSCK_ERROR, + "expected tag got %s", + typename(type)); + goto done; + } + } + + if (require_end_of_header(buffer, size, &tag->object, error_func)) + goto done; + + if (!skip_prefix(buffer, "object ", &buffer)) { + ret = error_func(&tag->object, FSCK_ERROR, "invalid format - expected 'object' line"); + goto done; + } + if (get_sha1_hex(buffer, sha1) || buffer[40] != '\n') { + ret = error_func(&tag->object, FSCK_ERROR, "invalid 'object' line format - bad sha1"); + goto done; + } + buffer += 41; + + if (!skip_prefix(buffer, "type ", &buffer)) { + ret = error_func(&tag->object, FSCK_ERROR, "invalid format - expected 'type' line"); + goto done; + } + eol = strchr(buffer, '\n'); + if (!eol) { + ret = error_func(&tag->object, FSCK_ERROR, "invalid format - unexpected end after 'type' line"); + goto done; + } + if (type_from_string_gently(buffer, eol - buffer, 1) < 0) + ret = error_func(&tag->object, FSCK_ERROR, "invalid 'type' value"); + if (ret) + goto done; + buffer = eol + 1; + + if (!skip_prefix(buffer, "tag ", &buffer)) { + ret = error_func(&tag->object, FSCK_ERROR, "invalid format - expected 'tag' line"); + goto done; + } + eol = strchr(buffer, '\n'); + if (!eol) { + ret = error_func(&tag->object, FSCK_ERROR, "invalid format - unexpected end after 'type' line"); + goto done; + } + strbuf_addf(&sb, "refs/tags/%.*s", (int)(eol - buffer), buffer); + if (check_refname_format(sb.buf, 0)) + error_func(&tag->object, FSCK_WARN, "invalid 'tag' name: %s", buffer); + buffer = eol + 1; + + if (!skip_prefix(buffer, "tagger ", &buffer)) + /* early tags do not contain 'tagger' lines; warn only */ + error_func(&tag->object, FSCK_WARN, "invalid format - expected 'tagger' line"); + else + ret = fsck_ident(&buffer, &tag->object, error_func); + +done: + strbuf_release(&sb); + free(to_free); + return ret; +} + static int fsck_tag(struct tag *tag, const char *data, unsigned long size, fsck_error error_func) { @@ -362,7 +445,8 @@ static int fsck_tag(struct tag *tag, const char *data, if (!tagged) return error_func(&tag->object, FSCK_ERROR, "could not load tagged object"); - return 0; + + return fsck_tag_buffer(tag, data, size, error_func); } int fsck_object(struct object *obj, void *data, unsigned long size, From 90e3e5f0574480cb873cca1c7b968dd1516c05d2 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 11 Sep 2014 16:26:41 +0200 Subject: [PATCH 5/6] Add regression tests for stricter tag fsck'ing The intent of the new test case is to catch general breakages in the fsck_tag() function, not so much to test it extensively, trying to strike the proper balance between thoroughness and speed. While it *would* have been nice to test the code path where fsck_object() encounters an invalid tag object, this is not possible using git fsck: tag objects are parsed already before fsck'ing (and the parser already fails upon such objects). Even worse: we would not even be able write out invalid tag objects because git hash-object parses those objects, too, unless we resorted to really ugly hacks such as using something like this in the unit tests (essentially depending on Perl *and* Compress::Zlib): hash_invalid_object () { contents="$(printf '%s %d\0%s' "$1" ${#2} "$2")" && sha1=$(echo "$contents" | test-sha1) && suffix=${sha1#??} && mkdir -p .git/objects/${sha1%$suffix} && echo "$contents" | perl -MCompress::Zlib -e 'undef $/; print compress(<>)' \ > .git/objects/${sha1%$suffix}/$suffix && echo $sha1 } Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t1450-fsck.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 8c739c9613..1b96b4045b 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -194,6 +194,25 @@ test_expect_success 'tag pointing to something else than its type' ' test_must_fail git fsck --tags ' +test_expect_success 'tag with incorrect tag name & missing tagger' ' + sha=$(git rev-parse HEAD) && + cat >wrong-tag <<-EOF && + object $sha + type commit + tag wrong name format + + This is an invalid tag. + EOF + + tag=$(git hash-object -t tag -w --stdin .git/refs/tags/wrong && + test_when_finished "git update-ref -d refs/tags/wrong" && + git fsck --tags 2>out && + grep "invalid .tag. name" out && + grep "expected .tagger. line" out +' + test_expect_success 'cleaned up' ' git fsck >actual 2>&1 && test_cmp empty actual From f99b7af661f89865f918e52223a3bdaf312a0de0 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 12 Sep 2014 10:08:16 +0200 Subject: [PATCH 6/6] Make sure that index-pack --strict checks tag objects One of the most important use cases for the strict tag object checking is when transfer.fsckobjects is set to true to catch invalid objects early on. This new regression test essentially tests the same code path by directly calling 'index-pack --strict' on a pack containing an tag object without a 'tagger' line. Technically, this test is not enough: it only exercises a code path that *warns*, not one that *fails*. The reason is that hash-object and pack-objects both insist on parsing the tag objects and would fail on invalid tag objects at this time. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t5302-pack-index.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh index 4bbb718751..61bc8da560 100755 --- a/t/t5302-pack-index.sh +++ b/t/t5302-pack-index.sh @@ -243,4 +243,23 @@ test_expect_success 'running index-pack in the object store' ' test -f .git/objects/pack/pack-${pack1}.idx ' +test_expect_success 'index-pack --strict warns upon missing tagger in tag' ' + sha=$(git rev-parse HEAD) && + cat >wrong-tag <err && + grep "^error:.* expected .tagger. line" err +' + test_done