diff --git a/builtin/fsck.c b/builtin/fsck.c index 3e67203f9c..75e836e2fd 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -205,8 +205,6 @@ static void check_reachable_object(struct object *obj) if (!(obj->flags & HAS_OBJ)) { if (has_sha1_pack(obj->oid.hash)) return; /* it is in pack - forget about it */ - if (connectivity_only && has_object_file(&obj->oid)) - return; printf("missing %s %s\n", typename(obj->type), describe_object(obj)); errors_found |= ERROR_REACHABLE; @@ -584,6 +582,79 @@ static int fsck_cache_tree(struct cache_tree *it) return err; } +static void mark_object_for_connectivity(const unsigned char *sha1) +{ + struct object *obj = lookup_object(sha1); + + /* + * Setting the object type here isn't strictly necessary for a + * connectivity check. In most cases, our walk will expect a certain + * type (e.g., a tree referencing a blob) and will use lookup_blob() to + * assign the type. But doing it here has two advantages: + * + * 1. When the fsck_walk code looks at objects that _don't_ come from + * links (e.g., the tip of a ref), it may complain about the + * "unknown object type". + * + * 2. This serves as a nice cross-check that the graph links are + * sane. So --connectivity-only does not check that the bits of + * blobs are not corrupted, but it _does_ check that 100644 tree + * entries point to blobs, and so forth. + * + * Unfortunately we can't just use parse_object() here, because the + * whole point of --connectivity-only is to avoid reading the object + * data more than necessary. + */ + if (!obj || obj->type == OBJ_NONE) { + enum object_type type = sha1_object_info(sha1, NULL); + switch (type) { + case OBJ_BAD: + error("%s: unable to read object type", + sha1_to_hex(sha1)); + break; + case OBJ_COMMIT: + obj = (struct object *)lookup_commit(sha1); + break; + case OBJ_TREE: + obj = (struct object *)lookup_tree(sha1); + break; + case OBJ_BLOB: + obj = (struct object *)lookup_blob(sha1); + break; + case OBJ_TAG: + obj = (struct object *)lookup_tag(sha1); + break; + default: + error("%s: unknown object type %d", + sha1_to_hex(sha1), type); + } + + if (!obj || obj->type == OBJ_NONE) { + errors_found |= ERROR_OBJECT; + return; + } + } + + obj->flags |= HAS_OBJ; +} + +static int mark_loose_for_connectivity(const unsigned char *sha1, + const char *path, + void *data) +{ + mark_object_for_connectivity(sha1); + return 0; +} + +static int mark_packed_for_connectivity(const unsigned char *sha1, + struct packed_git *pack, + uint32_t pos, + void *data) +{ + mark_object_for_connectivity(sha1); + return 0; +} + static char const * const fsck_usage[] = { N_("git fsck [] [...]"), NULL @@ -640,38 +711,41 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) git_config(fsck_config, NULL); fsck_head_link(); - if (!connectivity_only) { + if (connectivity_only) { + for_each_loose_object(mark_loose_for_connectivity, NULL, 0); + for_each_packed_object(mark_packed_for_connectivity, NULL, 0); + } else { fsck_object_dir(get_object_directory()); prepare_alt_odb(); for (alt = alt_odb_list; alt; alt = alt->next) fsck_object_dir(alt->path); - } - if (check_full) { - struct packed_git *p; - uint32_t total = 0, count = 0; - struct progress *progress = NULL; + if (check_full) { + struct packed_git *p; + uint32_t total = 0, count = 0; + struct progress *progress = NULL; - prepare_packed_git(); + prepare_packed_git(); - if (show_progress) { - for (p = packed_git; p; p = p->next) { - if (open_pack_index(p)) - continue; - total += p->num_objects; + if (show_progress) { + for (p = packed_git; p; p = p->next) { + if (open_pack_index(p)) + continue; + total += p->num_objects; + } + + progress = start_progress(_("Checking objects"), total); } - - progress = start_progress(_("Checking objects"), total); + for (p = packed_git; p; p = p->next) { + /* verify gives error messages itself */ + if (verify_pack(p, fsck_obj_buffer, + progress, count)) + errors_found |= ERROR_PACK; + count += p->num_objects; + } + stop_progress(&progress); } - for (p = packed_git; p; p = p->next) { - /* verify gives error messages itself */ - if (verify_pack(p, fsck_obj_buffer, - progress, count)) - errors_found |= ERROR_PACK; - count += p->num_objects; - } - stop_progress(&progress); } heads = 0; diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index e88ec7747b..4d1c3ba664 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -523,9 +523,21 @@ test_expect_success 'fsck --connectivity-only' ' touch empty && git add empty && test_commit empty && + + # Drop the index now; we want to be sure that we + # recursively notice the broken objects + # because they are reachable from refs, not because + # they are in the index. + rm -f .git/index && + + # corrupt the blob, but in a way that we can still identify + # its type. That lets us see that --connectivity-only is + # not actually looking at the contents, but leaves it + # free to examine the type if it chooses. empty=.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 && - rm -f $empty && - echo invalid >$empty && + blob=$(echo unrelated | git hash-object -w --stdin) && + mv $(sha1_file $blob) $empty && + test_must_fail git fsck --strict && git fsck --strict --connectivity-only && tree=$(git rev-parse HEAD:) && @@ -537,6 +549,19 @@ test_expect_success 'fsck --connectivity-only' ' ) ' +test_expect_success 'fsck --connectivity-only with explicit head' ' + rm -rf connectivity-only && + git init connectivity-only && + ( + cd connectivity-only && + test_commit foo && + rm -f .git/index && + tree=$(git rev-parse HEAD^{tree}) && + remove_object $(git rev-parse HEAD:foo.t) && + test_must_fail git fsck --connectivity-only $tree + ) +' + remove_loose_object () { sha1="$(git rev-parse "$1")" && remainder=${sha1#??} &&