From 993a21b0a05bf2e2063c58e5722c29f5747e39d4 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 17 Jul 2016 12:59:44 +0200 Subject: [PATCH 1/4] fsck: refactor how to describe objects In many places, we refer to objects via their SHA-1s. Let's abstract that into a function. For the moment, it does nothing else than what we did previously: print out the 40-digit hex string. But that will change over the course of the next patches. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- builtin/fsck.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 3f27456883..87df191e49 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -40,6 +40,11 @@ static int show_dangling = 1; #define ERROR_PACK 04 #define ERROR_REFS 010 +static const char *describe_object(struct object *obj) +{ + return oid_to_hex(&obj->oid); +} + static int fsck_config(const char *var, const char *value, void *cb) { if (strcmp(var, "fsck.skiplist") == 0) { @@ -67,7 +72,7 @@ static void objreport(struct object *obj, const char *msg_type, const char *err) { fprintf(stderr, "%s in %s %s: %s\n", - msg_type, typename(obj->type), oid_to_hex(&obj->oid), err); + msg_type, typename(obj->type), describe_object(obj), err); } static int objerror(struct object *obj, const char *err) @@ -97,7 +102,7 @@ static int mark_object(struct object *obj, int type, void *data, struct fsck_opt if (!obj) { /* ... these references to parent->fld are safe here */ printf("broken link from %7s %s\n", - typename(parent->type), oid_to_hex(&parent->oid)); + typename(parent->type), describe_object(parent)); printf("broken link from %7s %s\n", (type == OBJ_ANY ? "unknown" : typename(type)), "unknown"); errors_found |= ERROR_REACHABLE; @@ -114,9 +119,9 @@ static int mark_object(struct object *obj, int type, void *data, struct fsck_opt if (!(obj->flags & HAS_OBJ)) { if (parent && !has_object_file(&obj->oid)) { printf("broken link from %7s %s\n", - typename(parent->type), oid_to_hex(&parent->oid)); + typename(parent->type), describe_object(parent)); printf(" to %7s %s\n", - typename(obj->type), oid_to_hex(&obj->oid)); + typename(obj->type), describe_object(obj)); errors_found |= ERROR_REACHABLE; } return 1; @@ -190,7 +195,8 @@ static void check_reachable_object(struct object *obj) return; /* it is in pack - forget about it */ if (connectivity_only && has_object_file(&obj->oid)) return; - printf("missing %s %s\n", typename(obj->type), oid_to_hex(&obj->oid)); + printf("missing %s %s\n", typename(obj->type), + describe_object(obj)); errors_found |= ERROR_REACHABLE; return; } @@ -215,7 +221,8 @@ static void check_unreachable_object(struct object *obj) * since this is something that is prunable. */ if (show_unreachable) { - printf("unreachable %s %s\n", typename(obj->type), oid_to_hex(&obj->oid)); + printf("unreachable %s %s\n", typename(obj->type), + describe_object(obj)); return; } @@ -234,11 +241,11 @@ static void check_unreachable_object(struct object *obj) if (!obj->used) { if (show_dangling) printf("dangling %s %s\n", typename(obj->type), - oid_to_hex(&obj->oid)); + describe_object(obj)); if (write_lost_and_found) { char *filename = git_pathdup("lost-found/%s/%s", obj->type == OBJ_COMMIT ? "commit" : "other", - oid_to_hex(&obj->oid)); + describe_object(obj)); FILE *f; if (safe_create_leading_directories_const(filename)) { @@ -252,7 +259,7 @@ static void check_unreachable_object(struct object *obj) if (stream_blob_to_fd(fileno(f), obj->oid.hash, NULL, 1)) die_errno("Could not write '%s'", filename); } else - fprintf(f, "%s\n", oid_to_hex(&obj->oid)); + fprintf(f, "%s\n", describe_object(obj)); if (fclose(f)) die_errno("Could not finish '%s'", filename); @@ -271,7 +278,7 @@ static void check_unreachable_object(struct object *obj) static void check_object(struct object *obj) { if (verbose) - fprintf(stderr, "Checking %s\n", oid_to_hex(&obj->oid)); + fprintf(stderr, "Checking %s\n", describe_object(obj)); if (obj->flags & REACHABLE) check_reachable_object(obj); @@ -307,7 +314,7 @@ static int fsck_obj(struct object *obj) if (verbose) fprintf(stderr, "Checking %s %s\n", - typename(obj->type), oid_to_hex(&obj->oid)); + typename(obj->type), describe_object(obj)); if (fsck_walk(obj, NULL, &fsck_obj_options)) objerror(obj, "broken links"); @@ -326,15 +333,17 @@ static int fsck_obj(struct object *obj) free_commit_buffer(commit); if (!commit->parents && show_root) - printf("root %s\n", oid_to_hex(&commit->object.oid)); + printf("root %s\n", describe_object(&commit->object)); } if (obj->type == OBJ_TAG) { struct tag *tag = (struct tag *) obj; if (show_tags && tag->tagged) { - printf("tagged %s %s", typename(tag->tagged->type), oid_to_hex(&tag->tagged->oid)); - printf(" (%s) in %s\n", tag->tag, oid_to_hex(&tag->object.oid)); + printf("tagged %s %s", typename(tag->tagged->type), + describe_object(tag->tagged)); + printf(" (%s) in %s\n", tag->tag, + describe_object(&tag->object)); } } From 7b35efd734e501f9e4692768a8b6aea818c0c93e Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 17 Jul 2016 12:59:49 +0200 Subject: [PATCH 2/4] fsck_walk(): optionally name objects on the go MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If fsck_options->name_objects is initialized, and if it already has name(s) for the object(s) that are to be the starting point(s) for fsck_walk(), then that function will now add names for the objects that were walked. This will be highly useful for teaching git-fsck to identify root causes for broken links, which is the task for the next patch in this series. Note that this patch opts for decorating the objects with plain strings instead of full-blown structs (à la `struct rev_name` in the code of the `git name-rev` command), for several reasons: - the code is much simpler than if it had to work with structs that describe arbitrarily long names such as "master~14^2~5:builtin/am.c", - the string processing is actually quite light-weight compared to the rest of fsck's operation, - the caller of fsck_walk() is expected to provide names for the starting points, and using plain and simple strings is just the easiest way to do that. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- fsck.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--- fsck.h | 1 + 2 files changed, 84 insertions(+), 4 deletions(-) diff --git a/fsck.c b/fsck.c index 05315451c5..6ed90ec15e 100644 --- a/fsck.c +++ b/fsck.c @@ -9,6 +9,7 @@ #include "refs.h" #include "utf8.h" #include "sha1-array.h" +#include "decorate.h" #define FSCK_FATAL -1 #define FSCK_INFO -2 @@ -297,25 +298,64 @@ static int report(struct fsck_options *options, struct object *object, return result; } +static char *get_object_name(struct fsck_options *options, struct object *obj) +{ + if (!options->object_names) + return NULL; + return lookup_decoration(options->object_names, obj); +} + +static void put_object_name(struct fsck_options *options, struct object *obj, + const char *fmt, ...) +{ + va_list ap; + struct strbuf buf = STRBUF_INIT; + char *existing; + + if (!options->object_names) + return; + existing = lookup_decoration(options->object_names, obj); + if (existing) + return; + va_start(ap, fmt); + strbuf_vaddf(&buf, fmt, ap); + add_decoration(options->object_names, obj, strbuf_detach(&buf, NULL)); + va_end(ap); +} + static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *options) { struct tree_desc desc; struct name_entry entry; int res = 0; + const char *name; if (parse_tree(tree)) return -1; + name = get_object_name(options, &tree->object); init_tree_desc(&desc, tree->buffer, tree->size); while (tree_entry(&desc, &entry)) { + struct object *obj; int result; if (S_ISGITLINK(entry.mode)) continue; - if (S_ISDIR(entry.mode)) - result = options->walk(&lookup_tree(entry.oid->hash)->object, OBJ_TREE, data, options); - else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) - result = options->walk(&lookup_blob(entry.oid->hash)->object, OBJ_BLOB, data, options); + + if (S_ISDIR(entry.mode)) { + obj = &lookup_tree(entry.oid->hash)->object; + if (name) + put_object_name(options, obj, "%s%s/", name, + entry.path); + result = options->walk(obj, OBJ_TREE, data, options); + } + else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) { + obj = &lookup_blob(entry.oid->hash)->object; + if (name) + put_object_name(options, obj, "%s%s", name, + entry.path); + result = options->walk(obj, OBJ_BLOB, data, options); + } else { result = error("in tree %s: entry %s has bad mode %.6o", oid_to_hex(&tree->object.oid), entry.path, entry.mode); @@ -330,20 +370,55 @@ static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *op static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_options *options) { + int counter = 0, generation = 0, name_prefix_len = 0; struct commit_list *parents; int res; int result; + const char *name; if (parse_commit(commit)) return -1; + name = get_object_name(options, &commit->object); + if (name) + put_object_name(options, &commit->tree->object, "%s:", name); + result = options->walk((struct object *)commit->tree, OBJ_TREE, data, options); if (result < 0) return result; res = result; parents = commit->parents; + if (name && parents) { + int len = strlen(name), power; + + if (len && name[len - 1] == '^') { + generation = 1; + name_prefix_len = len - 1; + } + else { /* parse ~ suffix */ + for (generation = 0, power = 1; + len && isdigit(name[len - 1]); + power *= 10) + generation += power * (name[--len] - '0'); + if (power > 1 && len && name[len - 1] == '~') + name_prefix_len = len - 1; + } + } + while (parents) { + if (name) { + struct object *obj = &parents->item->object; + + if (++counter > 1) + put_object_name(options, obj, "%s^%d", + name, counter); + else if (generation > 0) + put_object_name(options, obj, "%.*s~%d", + name_prefix_len, name, generation + 1); + else + put_object_name(options, obj, "%s^", name); + } result = options->walk((struct object *)parents->item, OBJ_COMMIT, data, options); if (result < 0) return result; @@ -356,8 +431,12 @@ static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_optio static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *options) { + char *name = get_object_name(options, &tag->object); + if (parse_tag(tag)) return -1; + if (name) + put_object_name(options, tag->tagged, "%s", name); return options->walk(tag->tagged, OBJ_ANY, data, options); } diff --git a/fsck.h b/fsck.h index dded84b5f9..26c0d41eab 100644 --- a/fsck.h +++ b/fsck.h @@ -33,6 +33,7 @@ struct fsck_options { unsigned strict:1; int *msg_type; struct sha1_array *skiplist; + struct decoration *object_names; }; #define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL } From 1cd772cc4124e43b14231dcaeae8a5dddf4ffdb9 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 17 Jul 2016 12:59:57 +0200 Subject: [PATCH 3/4] fsck: give the error function a chance to see the fsck_options We will need this in the next commit, where fsck will be taught to optionally name the objects when reporting issues about them. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- builtin/fsck.c | 3 ++- fsck.c | 5 +++-- fsck.h | 6 ++++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 87df191e49..6c9d598853 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -82,7 +82,8 @@ static int objerror(struct object *obj, const char *err) return -1; } -static int fsck_error_func(struct object *obj, int type, const char *message) +static int fsck_error_func(struct fsck_options *o, + struct object *obj, int type, const char *message) { objreport(obj, (type == FSCK_WARN) ? "warning" : "error", message); return (type == FSCK_WARN) ? 0 : 1; diff --git a/fsck.c b/fsck.c index 6ed90ec15e..828c5c6484 100644 --- a/fsck.c +++ b/fsck.c @@ -291,7 +291,7 @@ static int report(struct fsck_options *options, struct object *object, va_start(ap, fmt); strbuf_vaddf(&sb, fmt, ap); - result = options->error_func(object, msg_type, sb.buf); + result = options->error_func(options, object, msg_type, sb.buf); strbuf_release(&sb); va_end(ap); @@ -897,7 +897,8 @@ int fsck_object(struct object *obj, void *data, unsigned long size, obj->type); } -int fsck_error_function(struct object *obj, int msg_type, const char *message) +int fsck_error_function(struct fsck_options *o, + struct object *obj, int msg_type, const char *message) { if (msg_type == FSCK_WARN) { warning("object %s: %s", oid_to_hex(&obj->oid), message); diff --git a/fsck.h b/fsck.h index 26c0d41eab..1891c1863b 100644 --- a/fsck.h +++ b/fsck.h @@ -23,9 +23,11 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type); typedef int (*fsck_walk_func)(struct object *obj, int type, void *data, struct fsck_options *options); /* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */ -typedef int (*fsck_error)(struct object *obj, int type, const char *message); +typedef int (*fsck_error)(struct fsck_options *o, + struct object *obj, int type, const char *message); -int fsck_error_function(struct object *obj, int type, const char *message); +int fsck_error_function(struct fsck_options *o, + struct object *obj, int type, const char *message); struct fsck_options { fsck_walk_func walk; From 90cf590f53f2939a47ca7b397e270e8228699829 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 17 Jul 2016 13:00:02 +0200 Subject: [PATCH 4/4] fsck: optionally show more helpful info for broken links When reporting broken links between commits/trees/blobs, it would be quite helpful at times if the user would be told how the object is supposed to be reachable. With the new --name-objects option, git-fsck will try to do exactly that: name the objects in a way that shows how they are reachable. For example, when some reflog got corrupted and a blob is missing that should not be, the user might want to remove the corresponding reflog entry. This option helps them find that entry: `git fsck` will now report something like this: broken link from tree b5eb6ff... (refs/stash@{}~37:) to blob ec5cf80... Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/git-fsck.txt | 9 +++++++- builtin/fsck.c | 42 ++++++++++++++++++++++++++++++++++---- fsck.c | 21 +++++++++++++++---- t/t1450-fsck.sh | 22 ++++++++++++++++++++ 4 files changed, 85 insertions(+), 9 deletions(-) diff --git a/Documentation/git-fsck.txt b/Documentation/git-fsck.txt index 7fc68eb319..b9f060e3b2 100644 --- a/Documentation/git-fsck.txt +++ b/Documentation/git-fsck.txt @@ -11,7 +11,8 @@ SYNOPSIS [verse] 'git fsck' [--tags] [--root] [--unreachable] [--cache] [--no-reflogs] [--[no-]full] [--strict] [--verbose] [--lost-found] - [--[no-]dangling] [--[no-]progress] [--connectivity-only] [*] + [--[no-]dangling] [--[no-]progress] [--connectivity-only] + [--[no-]name-objects] [*] DESCRIPTION ----------- @@ -82,6 +83,12 @@ index file, all SHA-1 references in `refs` namespace, and all reflogs a blob, the contents are written into the file, rather than its object name. +--name-objects:: + When displaying names of reachable objects, in addition to the + SHA-1 also display a name that describes *how* they are reachable, + compatible with linkgit:git-rev-parse[1], e.g. + `HEAD@{1234567890}~25^2:src/`. + --[no-]progress:: Progress status is reported on the standard error stream by default when it is attached to a terminal, unless diff --git a/builtin/fsck.c b/builtin/fsck.c index 6c9d598853..c6d17e63fd 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -13,6 +13,7 @@ #include "dir.h" #include "progress.h" #include "streaming.h" +#include "decorate.h" #define REACHABLE 0x0001 #define SEEN 0x0002 @@ -35,6 +36,7 @@ static int write_lost_and_found; static int verbose; static int show_progress = -1; static int show_dangling = 1; +static int name_objects; #define ERROR_OBJECT 01 #define ERROR_REACHABLE 02 #define ERROR_PACK 04 @@ -42,7 +44,16 @@ static int show_dangling = 1; static const char *describe_object(struct object *obj) { - return oid_to_hex(&obj->oid); + static struct strbuf buf = STRBUF_INIT; + char *name = name_objects ? + lookup_decoration(fsck_walk_options.object_names, obj) : NULL; + + strbuf_reset(&buf); + strbuf_addstr(&buf, oid_to_hex(&obj->oid)); + if (name) + strbuf_addf(&buf, " (%s)", name); + + return buf.buf; } static int fsck_config(const char *var, const char *value, void *cb) @@ -378,13 +389,18 @@ static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type, static int default_refs; -static void fsck_handle_reflog_sha1(const char *refname, unsigned char *sha1) +static void fsck_handle_reflog_sha1(const char *refname, unsigned char *sha1, + unsigned long timestamp) { struct object *obj; if (!is_null_sha1(sha1)) { obj = lookup_object(sha1); if (obj) { + if (timestamp && name_objects) + add_decoration(fsck_walk_options.object_names, + obj, + xstrfmt("%s@{%ld}", refname, timestamp)); obj->used = 1; mark_object_reachable(obj); } else { @@ -404,8 +420,8 @@ static int fsck_handle_reflog_ent(unsigned char *osha1, unsigned char *nsha1, fprintf(stderr, "Checking reflog %s->%s\n", sha1_to_hex(osha1), sha1_to_hex(nsha1)); - fsck_handle_reflog_sha1(refname, osha1); - fsck_handle_reflog_sha1(refname, nsha1); + fsck_handle_reflog_sha1(refname, osha1, 0); + fsck_handle_reflog_sha1(refname, nsha1, timestamp); return 0; } @@ -434,6 +450,9 @@ static int fsck_handle_ref(const char *refname, const struct object_id *oid, } default_refs++; obj->used = 1; + if (name_objects) + add_decoration(fsck_walk_options.object_names, + obj, xstrdup(refname)); mark_object_reachable(obj); return 0; @@ -549,6 +568,9 @@ static int fsck_cache_tree(struct cache_tree *it) return 1; } obj->used = 1; + if (name_objects) + add_decoration(fsck_walk_options.object_names, + obj, xstrdup(":")); mark_object_reachable(obj); if (obj->type != OBJ_TREE) err |= objerror(obj, "non-tree in cache-tree"); @@ -577,6 +599,7 @@ static struct option fsck_opts[] = { OPT_BOOL(0, "lost-found", &write_lost_and_found, N_("write dangling objects in .git/lost-found")), OPT_BOOL(0, "progress", &show_progress, N_("show progress")), + OPT_BOOL(0, "name-objects", &name_objects, N_("show verbose names for reachable objects")), OPT_END(), }; @@ -606,6 +629,10 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) include_reflogs = 0; } + if (name_objects) + fsck_walk_options.object_names = + xcalloc(1, sizeof(struct decoration)); + git_config(fsck_config, NULL); fsck_head_link(); @@ -661,6 +688,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) continue; obj->used = 1; + if (name_objects) + add_decoration(fsck_walk_options.object_names, + obj, xstrdup(arg)); mark_object_reachable(obj); heads++; continue; @@ -693,6 +723,10 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) continue; obj = &blob->object; obj->used = 1; + if (name_objects) + add_decoration(fsck_walk_options.object_names, + obj, + xstrfmt(":%s", active_cache[i]->name)); mark_object_reachable(obj); } if (active_cache_tree) diff --git a/fsck.c b/fsck.c index 828c5c6484..c9cf3de8d3 100644 --- a/fsck.c +++ b/fsck.c @@ -323,6 +323,19 @@ static void put_object_name(struct fsck_options *options, struct object *obj, va_end(ap); } +static const char *describe_object(struct fsck_options *o, struct object *obj) +{ + static struct strbuf buf = STRBUF_INIT; + char *name; + + strbuf_reset(&buf); + strbuf_addstr(&buf, oid_to_hex(&obj->oid)); + if (o->object_names && (name = lookup_decoration(o->object_names, obj))) + strbuf_addf(&buf, " (%s)", name); + + return buf.buf; +} + static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *options) { struct tree_desc desc; @@ -358,7 +371,7 @@ static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *op } else { result = error("in tree %s: entry %s has bad mode %.6o", - oid_to_hex(&tree->object.oid), entry.path, entry.mode); + describe_object(options, &tree->object), entry.path, entry.mode); } if (result < 0) return result; @@ -454,7 +467,7 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options) case OBJ_TAG: return fsck_walk_tag((struct tag *)obj, data, options); default: - error("Unknown object type for %s", oid_to_hex(&obj->oid)); + error("Unknown object type for %s", describe_object(options, obj)); return -1; } } @@ -901,9 +914,9 @@ int fsck_error_function(struct fsck_options *o, struct object *obj, int msg_type, const char *message) { if (msg_type == FSCK_WARN) { - warning("object %s: %s", oid_to_hex(&obj->oid), message); + warning("object %s: %s", describe_object(o, obj), message); return 0; } - error("object %s: %s", oid_to_hex(&obj->oid), message); + error("object %s: %s", describe_object(o, obj), message); return 1; } diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 7ee8ea004f..8f52da2771 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -523,4 +523,26 @@ test_expect_success 'fsck --connectivity-only' ' ) ' +remove_loose_object () { + sha1="$(git rev-parse "$1")" && + remainder=${sha1#??} && + firsttwo=${sha1%$remainder} && + rm .git/objects/$firsttwo/$remainder +} + +test_expect_success 'fsck --name-objects' ' + rm -rf name-objects && + git init name-objects && + ( + cd name-objects && + test_commit julius caesar.t && + test_commit augustus && + test_commit caesar && + remove_loose_object $(git rev-parse julius:caesar.t) && + test_must_fail git fsck --name-objects >out && + tree=$(git rev-parse --verify julius:) && + grep "$tree (\(refs/heads/master\|HEAD\)@{[0-9]*}:" out + ) +' + test_done