repack: avoid loosening promisor objects in partial clones

When `git repack -A -d` is run in a partial clone, `pack-objects`
is invoked twice: once to repack all promisor objects, and once to
repack all non-promisor objects. The latter `pack-objects` invocation
is with --exclude-promisor-objects and --unpack-unreachable, which
loosens all objects unused during this invocation. Unfortunately,
this includes promisor objects.

Because the -d argument to `git repack` subsequently deletes all loose
objects also in packs, these just-loosened promisor objects will be
immediately deleted. However, this extra disk churn is unnecessary in
the first place.  For example, in a newly-cloned partial repo that
filters all blob objects (e.g. `--filter=blob:none`), `repack` ends up
unpacking all trees and commits into the filesystem because every
object, in this particular case, is a promisor object. Depending on
the repo size, this increases the disk usage considerably: In my copy
of the linux.git, the object directory peaked 26GB of more disk usage.

In order to avoid this extra disk churn, pass the names of the promisor
packfiles as --keep-pack arguments to the second invocation of
`pack-objects`. This informs `pack-objects` that the promisor objects
are already in a safe packfile and, therefore, do not need to be
loosened.

For testing, we need to validate whether any object was loosened.
However, the "evidence" (loosened objects) is deleted during the
process which prevents us from inspecting the object directory.
Instead, let's teach `pack-objects` to count loosened objects and
emit via trace2 thus allowing inspecting the debug events after the
process is finished. This new event is used on the added regression
test.

Lastly, add a new perf test to evaluate the performance impact
made by this changes (tested on git.git):

     Test          HEAD^                 HEAD
     ----------------------------------------------------------
     5600.3: gc    134.38(41.93+90.95)   7.80(6.72+1.35) -94.2%

For a bigger repository, such as linux.git, the improvement is
even bigger:

     Test          HEAD^                     HEAD
     -------------------------------------------------------------------
     5600.3: gc    6833.00(918.07+3162.74)   268.79(227.02+39.18) -96.1%

These improvements are particular big because every object in the
newly-cloned partial repository is a promisor object.

Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
Helped-by: Jeff King <peff@peff.net>
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Rafael Silva <rafaeloliveira.cs@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Rafael Silva 2021-04-21 21:32:12 +02:00 committed by Junio C Hamano
parent c1fa951d7e
commit a643157d5a
4 changed files with 26 additions and 3 deletions

View file

@ -3479,6 +3479,7 @@ static void loosen_unused_packed_objects(void)
{
struct packed_git *p;
uint32_t i;
uint32_t loosened_objects_nr = 0;
struct object_id oid;
for (p = get_all_packs(the_repository); p; p = p->next) {
@ -3492,11 +3493,16 @@ static void loosen_unused_packed_objects(void)
nth_packed_object_id(&oid, p, i);
if (!packlist_find(&to_pack, &oid) &&
!has_sha1_pack_kept_or_nonlocal(&oid) &&
!loosened_object_can_be_discarded(&oid, p->mtime))
!loosened_object_can_be_discarded(&oid, p->mtime)) {
if (force_object_loose(&oid, p->mtime))
die(_("unable to force loose object"));
loosened_objects_nr++;
}
}
}
trace2_data_intmax("pack-objects", the_repository,
"loosen_unused_packed_objects/loosened", loosened_objects_nr);
}
/*

View file

@ -20,7 +20,7 @@ static int delta_base_offset = 1;
static int pack_kept_objects = -1;
static int write_bitmaps = -1;
static int use_delta_islands;
static char *packdir, *packtmp;
static char *packdir, *packtmp_name, *packtmp;
static const char *const git_repack_usage[] = {
N_("git repack [<options>]"),
@ -530,7 +530,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
}
packdir = mkpathdup("%s/pack", get_object_directory());
packtmp = mkpathdup("%s/.tmp-%d-pack", packdir, (int)getpid());
packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid());
packtmp = mkpathdup("%s/%s", packdir, packtmp_name);
sigchain_push_common(remove_pack_on_signal);
@ -573,6 +574,10 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
repack_promisor_objects(&po_args, &names);
if (existing_packs.nr && delete_redundant) {
for_each_string_list_item(item, &names) {
strvec_pushf(&cmd.args, "--keep-pack=%s-%s.pack",
packtmp_name, item->string);
}
if (unpack_unreachable) {
strvec_pushf(&cmd.args,
"--unpack-unreachable=%s",

View file

@ -35,4 +35,8 @@ test_perf 'count non-promisor commits' '
git -C bare.git rev-list --all --count --exclude-promisor-objects
'
test_perf 'gc' '
git -C bare.git gc
'
test_done

View file

@ -548,6 +548,14 @@ test_expect_success 'fetch from a partial clone, protocol v2' '
grep "version 2" trace
'
test_expect_success 'repack does not loosen promisor objects' '
rm -rf client trace &&
git clone --bare --filter=blob:none "file://$(pwd)/srv.bare" client &&
test_when_finished "rm -rf client trace" &&
GIT_TRACE2_PERF="$(pwd)/trace" git -C client repack -A -d &&
grep "loosen_unused_packed_objects/loosened:0" trace
'
. "$TEST_DIRECTORY"/lib-httpd.sh
start_httpd