From d0badf87978dceef4567bb032a74ea1f1a42632c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 21 Feb 2020 21:47:27 +0000 Subject: [PATCH 1/2] partial-clone: demonstrate bugs in partial fetch While testing partial clone, I noticed some odd behavior. I was testing a way of running 'git init', followed by manually configuring the remote for partial clone, and then running 'git fetch'. Astonishingly, I saw the 'git fetch' process start asking the server for multiple rounds of pack-file downloads! When tweaking the situation a little more, I discovered that I could cause the remote to hang up with an error. Add two tests that demonstrate these two issues. In the first test, we find that when fetching with blob filters from a repository that previously did not have any tags, the 'git fetch --tags origin' command fails because the server sends "multiple filter-specs cannot be combined". This only happens when using protocol v2. In the second test, we see that a 'git fetch origin' request with several ref updates results in multiple pack-file downloads. This must be due to Git trying to fault-in the objects pointed by the refs. What makes this matter particularly nasty is that this goes through the do_oid_object_info_extended() method, so there are no "haves" in the negotiation. This leads the remote to send every reachable commit and tree from each new ref, providing a quadratic amount of data transfer! This test is fixed if we revert 6462d5eb9a (fetch: remove fetch_if_missing=0, 2019-11-05), but that revert causes other test failures. The real fix will need more care. The tests are ordered in this way because if I swap the test order the tag test will succeed instead of fail. I believe this is because somehow we need the srv.bare repo to not have any tags when we clone, but then have tags in our next fetch. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5616-partial-clone.sh | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index fea56cda6d..022f1018ac 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -374,6 +374,37 @@ test_expect_success 'fetch lazy-fetches only to resolve deltas, protocol v2' ' grep "want $(cat hash)" trace ' +# The following two tests must be in this order, or else +# the first will not fail. It is important that the srv.bare +# repository did not have tags during clone, but has tags +# in the fetch. + +test_expect_failure 'verify fetch succeeds when asking for new tags' ' + git clone --filter=blob:none "file://$(pwd)/srv.bare" tag-test && + for i in I J K + do + test_commit -C src $i && + git -C src branch $i || return 1 + done && + git -C srv.bare fetch --tags origin +refs/heads/*:refs/heads/* && + git -C tag-test -c protocol.version=2 fetch --tags origin +' + +test_expect_failure 'verify fetch downloads only one pack when updating refs' ' + git clone --filter=blob:none "file://$(pwd)/srv.bare" pack-test && + ls pack-test/.git/objects/pack/*pack >pack-list && + test_line_count = 2 pack-list && + for i in A B C + do + test_commit -C src $i && + git -C src branch $i || return 1 + done && + git -C srv.bare fetch origin +refs/heads/*:refs/heads/* && + git -C pack-test fetch origin && + ls pack-test/.git/objects/pack/*pack >pack-list && + test_line_count = 3 pack-list +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd From 3e96c66805f17d7345eba4733bc80c6fe646b95f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 21 Feb 2020 21:47:28 +0000 Subject: [PATCH 2/2] partial-clone: avoid fetching when looking for objects When using partial clone, find_non_local_tags() in builtin/fetch.c checks each remote tag to see if its object also exists locally. There is no expectation that the object exist locally, but this function nevertheless triggers a lazy fetch if the object does not exist. This can be extremely expensive when asking for a commit, as we are completely removed from the context of the non-existent object and thus supply no "haves" in the request. 6462d5eb9a (fetch: remove fetch_if_missing=0, 2019-11-05) removed a global variable that prevented these fetches in favor of a bitflag. However, some object existence checks were not updated to use this flag. Update find_non_local_tags() to use OBJECT_INFO_SKIP_FETCH_OBJECT in addition to OBJECT_INFO_QUICK. The _QUICK option only prevents repreparing the pack-file structures. We need to be extremely careful about supplying _SKIP_FETCH_OBJECT when we expect an object to not exist due to updated refs. This resolves a broken test in t5616-partial-clone.sh. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/fetch.c | 10 +++++----- t/t5616-partial-clone.sh | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index b4c6d921d0..fd69c4f69d 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -335,6 +335,7 @@ static void find_non_local_tags(const struct ref *refs, struct string_list_item *remote_ref_item; const struct ref *ref; struct refname_hash_entry *item = NULL; + const int quick_flags = OBJECT_INFO_QUICK | OBJECT_INFO_SKIP_FETCH_OBJECT; refname_hash_init(&existing_refs); refname_hash_init(&remote_refs); @@ -353,10 +354,9 @@ static void find_non_local_tags(const struct ref *refs, */ if (ends_with(ref->name, "^{}")) { if (item && - !has_object_file_with_flags(&ref->old_oid, - OBJECT_INFO_QUICK) && + !has_object_file_with_flags(&ref->old_oid, quick_flags) && !oidset_contains(&fetch_oids, &ref->old_oid) && - !has_object_file_with_flags(&item->oid, OBJECT_INFO_QUICK) && + !has_object_file_with_flags(&item->oid, quick_flags) && !oidset_contains(&fetch_oids, &item->oid)) clear_item(item); item = NULL; @@ -370,7 +370,7 @@ static void find_non_local_tags(const struct ref *refs, * fetch. */ if (item && - !has_object_file_with_flags(&item->oid, OBJECT_INFO_QUICK) && + !has_object_file_with_flags(&item->oid, quick_flags) && !oidset_contains(&fetch_oids, &item->oid)) clear_item(item); @@ -391,7 +391,7 @@ static void find_non_local_tags(const struct ref *refs, * checked to see if it needs fetching. */ if (item && - !has_object_file_with_flags(&item->oid, OBJECT_INFO_QUICK) && + !has_object_file_with_flags(&item->oid, quick_flags) && !oidset_contains(&fetch_oids, &item->oid)) clear_item(item); diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 022f1018ac..da3ad4e32a 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -390,7 +390,7 @@ test_expect_failure 'verify fetch succeeds when asking for new tags' ' git -C tag-test -c protocol.version=2 fetch --tags origin ' -test_expect_failure 'verify fetch downloads only one pack when updating refs' ' +test_expect_success 'verify fetch downloads only one pack when updating refs' ' git clone --filter=blob:none "file://$(pwd)/srv.bare" pack-test && ls pack-test/.git/objects/pack/*pack >pack-list && test_line_count = 2 pack-list &&