Merge branch 'jk/upload-pack-skip-hash-check'

The server side that responds to "git fetch" and "git clone"
request has been optimized by allowing it to send objects in its
object store without recomputing and validating the object names.

* jk/upload-pack-skip-hash-check:
  t1060: check partial clone of misnamed blob
  parse_object(): check commit-graph when skip_hash set
  upload-pack: skip parse-object re-hashing of "want" objects
  parse_object(): allow skipping hash check
This commit is contained in:
Junio C Hamano 2022-09-13 11:38:23 -07:00
commit 8b2f027e20
6 changed files with 56 additions and 20 deletions

View file

@ -263,8 +263,11 @@ struct object *parse_object_or_die(const struct object_id *oid,
die(_("unable to parse object: %s"), name ? name : oid_to_hex(oid));
}
struct object *parse_object(struct repository *r, const struct object_id *oid)
struct object *parse_object_with_flags(struct repository *r,
const struct object_id *oid,
enum parse_object_flags flags)
{
int skip_hash = !!(flags & PARSE_OBJECT_SKIP_HASH_CHECK);
unsigned long size;
enum object_type type;
int eaten;
@ -276,10 +279,16 @@ struct object *parse_object(struct repository *r, const struct object_id *oid)
if (obj && obj->parsed)
return obj;
if (skip_hash) {
struct commit *commit = lookup_commit_in_graph(r, repl);
if (commit)
return &commit->object;
}
if ((obj && obj->type == OBJ_BLOB && repo_has_object_file(r, oid)) ||
(!obj && repo_has_object_file(r, oid) &&
oid_object_info(r, oid, NULL) == OBJ_BLOB)) {
if (stream_object_signature(r, repl) < 0) {
if (!skip_hash && stream_object_signature(r, repl) < 0) {
error(_("hash mismatch %s"), oid_to_hex(oid));
return NULL;
}
@ -289,7 +298,8 @@ struct object *parse_object(struct repository *r, const struct object_id *oid)
buffer = repo_read_object_file(r, oid, &type, &size);
if (buffer) {
if (check_object_signature(r, repl, buffer, size, type) < 0) {
if (!skip_hash &&
check_object_signature(r, repl, buffer, size, type) < 0) {
free(buffer);
error(_("hash mismatch %s"), oid_to_hex(repl));
return NULL;
@ -304,6 +314,11 @@ struct object *parse_object(struct repository *r, const struct object_id *oid)
return NULL;
}
struct object *parse_object(struct repository *r, const struct object_id *oid)
{
return parse_object_with_flags(r, oid, 0);
}
struct object_list *object_list_insert(struct object *item,
struct object_list **list_p)
{

View file

@ -128,7 +128,13 @@ void *object_as_type(struct object *obj, enum object_type type, int quiet);
*
* Returns NULL if the object is missing or corrupt.
*/
enum parse_object_flags {
PARSE_OBJECT_SKIP_HASH_CHECK = 1 << 0,
};
struct object *parse_object(struct repository *r, const struct object_id *oid);
struct object *parse_object_with_flags(struct repository *r,
const struct object_id *oid,
enum parse_object_flags flags);
/*
* Like parse_object, but will die() instead of returning NULL. If the

View file

@ -373,18 +373,10 @@ static struct object *get_reference(struct rev_info *revs, const char *name,
unsigned int flags)
{
struct object *object;
struct commit *commit;
/*
* If the repository has commit graphs, we try to opportunistically
* look up the object ID in those graphs. Like this, we can avoid
* parsing commit data from disk.
*/
commit = lookup_commit_in_graph(revs->repo, oid);
if (commit)
object = &commit->object;
else
object = parse_object(revs->repo, oid);
object = parse_object_with_flags(revs->repo, oid,
revs->verify_objects ? 0 :
PARSE_OBJECT_SKIP_HASH_CHECK);
if (!object) {
if (revs->ignore_missing)

View file

@ -139,4 +139,11 @@ test_expect_success 'internal tree objects are not "missing"' '
)
'
test_expect_success 'partial clone of corrupted repository' '
test_config -C misnamed uploadpack.allowFilter true &&
git clone --no-local --no-checkout --filter=blob:none \
misnamed corrupt-partial && \
test_must_fail git -C corrupt-partial checkout --force
'
test_done

View file

@ -507,6 +507,26 @@ test_expect_success 'rev-list --verify-objects with bad sha1' '
test_i18ngrep -q "error: hash mismatch $(dirname $new)$(test_oid ff_2)" out
'
# An actual bit corruption is more likely than swapped commits, but
# this provides an easy way to have commits which don't match their purported
# hashes, but which aren't so broken we can't read them at all.
test_expect_success 'rev-list --verify-objects notices swapped commits' '
git init swapped-commits &&
(
cd swapped-commits &&
test_commit one &&
test_commit two &&
one_oid=$(git rev-parse HEAD) &&
two_oid=$(git rev-parse HEAD^) &&
one=.git/objects/$(test_oid_to_path $one_oid) &&
two=.git/objects/$(test_oid_to_path $two_oid) &&
mv $one tmp &&
mv $two $one &&
mv tmp $two &&
test_must_fail git rev-list --verify-objects HEAD
)
'
test_expect_success 'force fsck to ignore double author' '
git cat-file commit HEAD >basis &&
sed "s/^author .*/&,&/" <basis | tr , \\n >multiple-authors &&

View file

@ -1409,18 +1409,14 @@ static int parse_want(struct packet_writer *writer, const char *line,
const char *arg;
if (skip_prefix(line, "want ", &arg)) {
struct object_id oid;
struct commit *commit;
struct object *o;
if (get_oid_hex(arg, &oid))
die("git upload-pack: protocol error, "
"expected to get oid, not '%s'", line);
commit = lookup_commit_in_graph(the_repository, &oid);
if (commit)
o = &commit->object;
else
o = parse_object(the_repository, &oid);
o = parse_object_with_flags(the_repository, &oid,
PARSE_OBJECT_SKIP_HASH_CHECK);
if (!o) {
packet_writer_error(writer,