diff --git a/Documentation/git-fsck.txt b/Documentation/git-fsck.txt index 55950d9eea9..e0eae642c10 100644 --- a/Documentation/git-fsck.txt +++ b/Documentation/git-fsck.txt @@ -62,9 +62,17 @@ index file, all SHA-1 references in `refs` namespace, and all reflogs with --no-full. --connectivity-only:: - Check only the connectivity of tags, commits and tree objects. By - avoiding to unpack blobs, this speeds up the operation, at the - expense of missing corrupt objects or other problematic issues. + Check only the connectivity of reachable objects, making sure + that any objects referenced by a reachable tag, commit, or tree + is present. This speeds up the operation by avoiding reading + blobs entirely (though it does still check that referenced blobs + exist). This will detect corruption in commits and trees, but + not do any semantic checks (e.g., for format errors). Corruption + in blob objects will not be detected at all. ++ +Unreachable tags, commits, and trees will also be accessed to find the +tips of dangling segments of history. Use `--no-dangling` if you don't +care about this output and want to speed it up further. --strict:: Enable more strict checking, namely to catch a file mode diff --git a/builtin/fsck.c b/builtin/fsck.c index bb4227bebc6..d26fb0a0447 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -235,6 +235,48 @@ static int mark_used(struct object *obj, int type, void *data, struct fsck_optio return 0; } +static void mark_unreachable_referents(const struct object_id *oid) +{ + struct fsck_options options = FSCK_OPTIONS_DEFAULT; + struct object *obj = lookup_object(the_repository, oid->hash); + + if (!obj || !(obj->flags & HAS_OBJ)) + return; /* not part of our original set */ + if (obj->flags & REACHABLE) + return; /* reachable objects already traversed */ + + /* + * Avoid passing OBJ_NONE to fsck_walk, which will parse the object + * (and we want to avoid parsing blobs). + */ + if (obj->type == OBJ_NONE) { + enum object_type type = oid_object_info(the_repository, + &obj->oid, NULL); + if (type > 0) + object_as_type(the_repository, obj, type, 0); + } + + options.walk = mark_used; + fsck_walk(obj, NULL, &options); +} + +static int mark_loose_unreachable_referents(const struct object_id *oid, + const char *path, + void *data) +{ + mark_unreachable_referents(oid); + return 0; +} + +static int mark_packed_unreachable_referents(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *data) +{ + mark_unreachable_referents(oid); + return 0; +} + /* * Check a single reachable object */ @@ -347,6 +389,26 @@ static void check_connectivity(void) /* Traverse the pending reachable objects */ traverse_reachable(); + /* + * With --connectivity-only, we won't have actually opened and marked + * unreachable objects with USED. Do that now to make --dangling, etc + * accurate. + */ + if (connectivity_only && (show_dangling || write_lost_and_found)) { + /* + * Even though we already have a "struct object" for each of + * these in memory, we must not iterate over the internal + * object hash as we do below. Our loop would potentially + * resize the hash, making our iteration invalid. + * + * Instead, we'll just go back to the source list of objects, + * and ignore any that weren't present in our earlier + * traversal. + */ + for_each_loose_object(mark_loose_unreachable_referents, NULL, 0); + for_each_packed_object(mark_packed_unreachable_referents, NULL, 0); + } + /* Look up all the requirements, warn about missing objects.. */ max = get_max_object_index(); if (verbose) diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index c61f9721413..49f08d5b9c0 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -740,7 +740,7 @@ test_expect_success 'fsck detects truncated loose object' ' # for each of type, we have one version which is referenced by another object # (and so while unreachable, not dangling), and another variant which really is # dangling. -test_expect_success 'fsck notices dangling objects' ' +test_expect_success 'create dangling-object repository' ' git init dangling && ( cd dangling && @@ -751,12 +751,17 @@ test_expect_success 'fsck notices dangling objects' ' commit=$(git commit-tree $tree) && dcommit=$(git commit-tree -p $commit $tree) && - cat >expect <<-EOF && + cat >expect <<-EOF dangling blob $dblob dangling commit $dcommit dangling tree $dtree EOF + ) +' +test_expect_success 'fsck notices dangling objects' ' + ( + cd dangling && git fsck >actual && # the output order is non-deterministic, as it comes from a hash sort actual.sorted && @@ -764,6 +769,16 @@ test_expect_success 'fsck notices dangling objects' ' ) ' +test_expect_success 'fsck --connectivity-only notices dangling objects' ' + ( + cd dangling && + git fsck --connectivity-only >actual && + # the output order is non-deterministic, as it comes from a hash + sort actual.sorted && + test_i18ncmp expect actual.sorted + ) +' + test_expect_success 'fsck $name notices bogus $name' ' test_must_fail git fsck bogus && test_must_fail git fsck $ZERO_OID