Merge branch 'tb/geom-repack-with-keep-and-max'

Teach "git repack --geometric" work better with "--keep-pack" and
avoid corrupting the repository when packsize limit is used.

* tb/geom-repack-with-keep-and-max:
  builtin/repack.c: ensure that `names` is sorted
  t7703: demonstrate object corruption with pack.packSizeLimit
  repack: respect --keep-pack with geometric repack
This commit is contained in:
Junio C Hamano 2022-06-03 14:30:36 -07:00
commit 16a0e92ddc
2 changed files with 114 additions and 14 deletions

View file

@ -137,6 +137,8 @@ static void collect_pack_filenames(struct string_list *fname_nonkept_list,
string_list_append_nodup(fname_nonkept_list, fname);
}
closedir(dir);
string_list_sort(fname_kept_list);
}
static void remove_redundant_pack(const char *dir_name, const char *base_name)
@ -332,17 +334,38 @@ static int geometry_cmp(const void *va, const void *vb)
return 0;
}
static void init_pack_geometry(struct pack_geometry **geometry_p)
static void init_pack_geometry(struct pack_geometry **geometry_p,
struct string_list *existing_kept_packs)
{
struct packed_git *p;
struct pack_geometry *geometry;
struct strbuf buf = STRBUF_INIT;
*geometry_p = xcalloc(1, sizeof(struct pack_geometry));
geometry = *geometry_p;
for (p = get_all_packs(the_repository); p; p = p->next) {
if (!pack_kept_objects && p->pack_keep)
continue;
if (!pack_kept_objects) {
/*
* Any pack that has its pack_keep bit set will appear
* in existing_kept_packs below, but this saves us from
* doing a more expensive check.
*/
if (p->pack_keep)
continue;
/*
* The pack may be kept via the --keep-pack option;
* check 'existing_kept_packs' to determine whether to
* ignore it.
*/
strbuf_reset(&buf);
strbuf_addstr(&buf, pack_basename(p));
strbuf_strip_suffix(&buf, ".pack");
if (string_list_has_string(existing_kept_packs, buf.buf))
continue;
}
ALLOC_GROW(geometry->pack,
geometry->pack_nr + 1,
@ -353,6 +376,7 @@ static void init_pack_geometry(struct pack_geometry **geometry_p)
}
QSORT(geometry->pack, geometry->pack_nr, geometry_cmp);
strbuf_release(&buf);
}
static void split_pack_geometry(struct pack_geometry *geometry, int factor)
@ -714,17 +738,20 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
strbuf_release(&path);
}
if (geometric_factor) {
if (pack_everything)
die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a");
init_pack_geometry(&geometry);
split_pack_geometry(geometry, geometric_factor);
}
packdir = mkpathdup("%s/pack", get_object_directory());
packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid());
packtmp = mkpathdup("%s/%s", packdir, packtmp_name);
collect_pack_filenames(&existing_nonkept_packs, &existing_kept_packs,
&keep_pack_list);
if (geometric_factor) {
if (pack_everything)
die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a");
init_pack_geometry(&geometry, &existing_kept_packs);
split_pack_geometry(geometry, geometric_factor);
}
sigchain_push_common(remove_pack_on_signal);
prepare_pack_objects(&cmd, &po_args);
@ -764,9 +791,6 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (use_delta_islands)
strvec_push(&cmd.args, "--delta-islands");
collect_pack_filenames(&existing_nonkept_packs, &existing_kept_packs,
&keep_pack_list);
if (pack_everything & ALL_INTO_ONE) {
repack_promisor_objects(&po_args, &names);
@ -832,6 +856,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (!names.nr && !po_args.quiet)
printf_ln(_("Nothing new to pack."));
string_list_sort(&names);
for_each_string_list_item(item, &names) {
item->util = (void *)(uintptr_t)populate_pack_exts(item->string);
}
@ -872,7 +898,6 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (delete_redundant && pack_everything & ALL_INTO_ONE) {
const int hexsz = the_hash_algo->hexsz;
string_list_sort(&names);
for_each_string_list_item(item, &existing_nonkept_packs) {
char *sha1;
size_t len = strlen(item->string);

View file

@ -7,6 +7,7 @@ test_description='git repack --geometric works correctly'
GIT_TEST_MULTI_PACK_INDEX=0
objdir=.git/objects
packdir=$objdir/pack
midx=$objdir/pack/multi-pack-index
test_expect_success '--geometric with no packs' '
@ -180,6 +181,34 @@ test_expect_success '--geometric ignores kept packs' '
)
'
test_expect_success '--geometric ignores --keep-pack packs' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
# Create two equal-sized packs
test_commit kept && # 3 objects
git repack -d &&
test_commit pack && # 3 objects
git repack -d &&
find $objdir/pack -type f -name "*.pack" | sort >packs.before &&
git repack --geometric 2 -dm \
--keep-pack="$(basename "$(head -n 1 packs.before)")" >out &&
find $objdir/pack -type f -name "*.pack" | sort >packs.after &&
# Packs should not have changed (only one non-kept pack, no
# loose objects), but $midx should now exist.
grep "Nothing new to pack" out &&
test_path_is_file $midx &&
test_cmp packs.before packs.after &&
git fsck
)
'
test_expect_success '--geometric chooses largest MIDX preferred pack' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
@ -202,4 +231,50 @@ test_expect_success '--geometric chooses largest MIDX preferred pack' '
)
'
test_expect_success '--geometric with pack.packSizeLimit' '
git init pack-rewrite &&
test_when_finished "rm -fr pack-rewrite" &&
(
cd pack-rewrite &&
test-tool genrandom foo 1048576 >foo &&
test-tool genrandom bar 1048576 >bar &&
git add foo bar &&
test_tick &&
git commit -m base &&
git rev-parse HEAD:foo HEAD:bar >p1.objects &&
git rev-parse HEAD HEAD^{tree} >p2.objects &&
# These two packs each contain two objects, so the following
# `--geometric` repack will try to combine them.
p1="$(git pack-objects $packdir/pack <p1.objects)" &&
p2="$(git pack-objects $packdir/pack <p2.objects)" &&
# Remove any loose objects in packs, since we do not want extra
# copies around (which would mask over potential object
# corruption issues).
git prune-packed &&
# Both p1 and p2 will be rolled up, but pack-objects will write
# three packs:
#
# - one containing object "foo",
# - another containing object "bar",
# - a final pack containing the commit and tree objects
# (identical to p2 above)
git repack --geometric 2 -d --max-pack-size=1048576 &&
# Ensure `repack` can detect that the third pack it wrote
# (containing just the tree and commit objects) was identical to
# one that was below the geometric split, so that we can save it
# from deletion.
#
# If `repack` fails to do that, we will incorrectly delete p2,
# causing object corruption.
git fsck
)
'
test_done