Merge branch 'tb/multi-cruft-pack'

Use of --max-pack-size to allow multiple packfiles to be created is
now supported even when we are sending unreachable objects to cruft
packs.

* tb/multi-cruft-pack:
  Documentation/gitformat-pack.txt: drop mixed version section
  Documentation/gitformat-pack.txt: remove multi-cruft packs alternative
  builtin/pack-objects.c: support `--max-pack-size` with `--cruft`
  builtin/pack-objects.c: remove unnecessary strbuf_reset()
This commit is contained in:
Junio C Hamano 2023-09-07 15:06:07 -07:00
commit 8af5aac986
5 changed files with 49 additions and 53 deletions

View file

@ -116,9 +116,7 @@ unreachable object whose mtime is newer than the `--cruft-expiration`).
+
Incompatible with `--unpack-unreachable`, `--keep-unreachable`,
`--pack-loose-unreachable`, `--stdin-packs`, as well as any other
options which imply `--revs`. Also incompatible with `--max-pack-size`;
when this option is set, the maximum pack size is not inferred from
`pack.packSizeLimit`.
options which imply `--revs`.
--cruft-expiration=<approxidate>::
If specified, objects are eliminated from the cruft pack if they

View file

@ -588,51 +588,17 @@ later on.
It is linkgit:git-gc[1] that is typically responsible for removing expired
unreachable objects.
=== Caution for mixed-version environments
Repositories that have cruft packs in them will continue to work with any older
version of Git. Note, however, that previous versions of Git which do not
understand the `.mtimes` file will use the cruft pack's mtime as the mtime for
all of the objects in it. In other words, do not expect older (pre-cruft pack)
versions of Git to interpret or even read the contents of the `.mtimes` file.
Note that having mixed versions of Git GC-ing the same repository can lead to
unreachable objects never being completely pruned. This can happen under the
following circumstances:
- An older version of Git running GC explodes the contents of an existing
cruft pack loose, using the cruft pack's mtime.
- A newer version running GC collects those loose objects into a cruft pack,
where the .mtime file reflects the loose object's actual mtimes, but the
cruft pack mtime is "now".
Repeating this process will lead to unreachable objects not getting pruned as a
result of repeatedly resetting the objects' mtimes to the present time.
If you are GC-ing repositories in a mixed version environment, consider omitting
the `--cruft` option when using linkgit:git-repack[1] and linkgit:git-gc[1], and
setting the `gc.cruftPacks` configuration to "false" until all writers
understand cruft packs.
=== Alternatives
Notable alternatives to this design include:
- The location of the per-object mtime data, and
- Storing unreachable objects in multiple cruft packs.
- The location of the per-object mtime data.
On the location of mtime data, a new auxiliary file tied to the pack was chosen
to avoid complicating the `.idx` format. If the `.idx` format were ever to gain
support for optional chunks of data, it may make sense to consolidate the
`.mtimes` format into the `.idx` itself.
Storing unreachable objects among multiple cruft packs (e.g., creating a new
cruft pack during each repacking operation including only unreachable objects
which aren't already stored in an earlier cruft pack) is significantly more
complicated to construct, and so aren't pursued here. The obvious drawback to
the current implementation is that the entire cruft pack must be re-written from
scratch.
GIT
---
Part of the linkgit:git[1] suite

View file

@ -3603,7 +3603,6 @@ static void read_cruft_objects(void)
string_list_append(&discard_packs, buf.buf + 1);
else
string_list_append(&fresh_packs, buf.buf);
strbuf_reset(&buf);
}
string_list_sort(&discard_packs);
@ -4383,7 +4382,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (!HAVE_THREADS && delta_search_threads != 1)
warning(_("no threads support, ignoring --threads"));
if (!pack_to_stdout && !pack_size_limit && !cruft)
if (!pack_to_stdout && !pack_size_limit)
pack_size_limit = pack_size_limit_cfg;
if (pack_to_stdout && pack_size_limit)
die(_("--max-pack-size cannot be used to build a pack for transfer"));
@ -4415,8 +4414,6 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
die(_("cannot use internal rev list with --cruft"));
if (stdin_packs)
die(_("cannot use --stdin-packs with --cruft"));
if (pack_size_limit)
die(_("cannot use --max-pack-size with --cruft"));
}
/*

View file

@ -720,7 +720,6 @@ static int write_cruft_pack(const struct pack_objects_args *args,
strvec_push(&cmd.args, "--honor-pack-keep");
strvec_push(&cmd.args, "--non-empty");
strvec_push(&cmd.args, "--max-pack-size=0");
cmd.in = -1;
@ -1048,6 +1047,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
cruft_po_args.depth = po_args.depth;
if (!cruft_po_args.threads)
cruft_po_args.threads = po_args.threads;
if (!cruft_po_args.max_pack_size)
cruft_po_args.max_pack_size = po_args.max_pack_size;
cruft_po_args.local = po_args.local;
cruft_po_args.quiet = po_args.quiet;

View file

@ -573,23 +573,54 @@ test_expect_success 'cruft repack with no reachable objects' '
)
'
test_expect_success 'cruft repack ignores --max-pack-size' '
write_blob () {
test-tool genrandom "$@" >in &&
git hash-object -w -t blob in
}
find_pack () {
for idx in $(ls $packdir/pack-*.idx)
do
git show-index <$idx >out &&
if grep -q "$1" out
then
echo $idx
fi || return 1
done
}
test_expect_success 'cruft repack with --max-pack-size' '
git init max-pack-size &&
(
cd max-pack-size &&
test_commit base &&
# two cruft objects which exceed the maximum pack size
test-tool genrandom foo 1048576 | git hash-object --stdin -w &&
test-tool genrandom bar 1048576 | git hash-object --stdin -w &&
foo=$(write_blob foo 1048576) &&
bar=$(write_blob bar 1048576) &&
test-tool chmtime --get -1000 \
"$objdir/$(test_oid_to_path $foo)" >foo.mtime &&
test-tool chmtime --get -2000 \
"$objdir/$(test_oid_to_path $bar)" >bar.mtime &&
git repack --cruft --max-pack-size=1M &&
find $packdir -name "*.mtimes" >cruft &&
test_line_count = 1 cruft &&
test-tool pack-mtimes "$(basename "$(cat cruft)")" >objects &&
test_line_count = 2 objects
test_line_count = 2 cruft &&
foo_mtimes="$(basename $(find_pack $foo) .idx).mtimes" &&
bar_mtimes="$(basename $(find_pack $bar) .idx).mtimes" &&
test-tool pack-mtimes $foo_mtimes >foo.actual &&
test-tool pack-mtimes $bar_mtimes >bar.actual &&
echo "$foo $(cat foo.mtime)" >foo.expect &&
echo "$bar $(cat bar.mtime)" >bar.expect &&
test_cmp foo.expect foo.actual &&
test_cmp bar.expect bar.actual &&
test "$foo_mtimes" != "$bar_mtimes"
)
'
test_expect_success 'cruft repack ignores pack.packSizeLimit' '
test_expect_success 'cruft repack with pack.packSizeLimit' '
(
cd max-pack-size &&
# repack everything back together to remove the existing cruft
@ -599,9 +630,12 @@ test_expect_success 'cruft repack ignores pack.packSizeLimit' '
# ensure the same post condition is met when --max-pack-size
# would otherwise be inferred from the configuration
find $packdir -name "*.mtimes" >cruft &&
test_line_count = 1 cruft &&
test-tool pack-mtimes "$(basename "$(cat cruft)")" >objects &&
test_line_count = 2 objects
test_line_count = 2 cruft &&
for pack in $(cat cruft)
do
test-tool pack-mtimes "$(basename $pack)" >objects &&
test_line_count = 1 objects || return 1
done
)
'