Merge branch 'tb/multi-pack-reuse-fix'

Assorted fixes to multi-pack-index code paths.

* tb/multi-pack-reuse-fix:
  pack-revindex.c: guard against out-of-bounds pack lookups
  pack-bitmap.c: avoid uninitialized `pack_int_id` during reuse
  midx-write.c: do not read existing MIDX with `packs_to_include`
This commit is contained in:
Junio C Hamano 2024-06-20 15:45:09 -07:00
commit 393879d473
5 changed files with 103 additions and 11 deletions

View file

@ -101,13 +101,27 @@ struct write_midx_context {
}; };
static int should_include_pack(const struct write_midx_context *ctx, static int should_include_pack(const struct write_midx_context *ctx,
const char *file_name, const char *file_name)
int exclude_from_midx)
{ {
if (exclude_from_midx && ctx->m && midx_contains_pack(ctx->m, file_name)) /*
* Note that at most one of ctx->m and ctx->to_include are set,
* so we are testing midx_contains_pack() and
* string_list_has_string() independently (guarded by the
* appropriate NULL checks).
*
* We could support passing to_include while reusing an existing
* MIDX, but don't currently since the reuse process drags
* forward all packs from an existing MIDX (without checking
* whether or not they appear in the to_include list).
*
* If we added support for that, these next two conditional
* should be performed independently (likely checking
* to_include before the existing MIDX).
*/
if (ctx->m && midx_contains_pack(ctx->m, file_name))
return 0; return 0;
if (ctx->to_include && !string_list_has_string(ctx->to_include, else if (ctx->to_include &&
file_name)) !string_list_has_string(ctx->to_include, file_name))
return 0; return 0;
return 1; return 1;
} }
@ -121,7 +135,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
if (ends_with(file_name, ".idx")) { if (ends_with(file_name, ".idx")) {
display_progress(ctx->progress, ++ctx->pack_paths_checked); display_progress(ctx->progress, ++ctx->pack_paths_checked);
if (!should_include_pack(ctx, file_name, 1)) if (!should_include_pack(ctx, file_name))
return; return;
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
@ -880,9 +894,6 @@ static int fill_packs_from_midx(struct write_midx_context *ctx,
uint32_t i; uint32_t i;
for (i = 0; i < ctx->m->num_packs; i++) { for (i = 0; i < ctx->m->num_packs; i++) {
if (!should_include_pack(ctx, ctx->m->pack_names[i], 0))
continue;
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
if (flags & MIDX_WRITE_REV_INDEX || preferred_pack_name) { if (flags & MIDX_WRITE_REV_INDEX || preferred_pack_name) {
@ -937,7 +948,15 @@ static int write_midx_internal(const char *object_dir,
die_errno(_("unable to create leading directories of %s"), die_errno(_("unable to create leading directories of %s"),
midx_name.buf); midx_name.buf);
ctx.m = lookup_multi_pack_index(the_repository, object_dir); if (!packs_to_include) {
/*
* Only reference an existing MIDX when not filtering which
* packs to include, since all packs and objects are copied
* blindly from an existing MIDX if one is present.
*/
ctx.m = lookup_multi_pack_index(the_repository, object_dir);
}
if (ctx.m && !midx_checksum_valid(ctx.m)) { if (ctx.m && !midx_checksum_valid(ctx.m)) {
warning(_("ignoring existing multi-pack-index; checksum mismatch")); warning(_("ignoring existing multi-pack-index; checksum mismatch"));
ctx.m = NULL; ctx.m = NULL;
@ -946,7 +965,6 @@ static int write_midx_internal(const char *object_dir,
ctx.nr = 0; ctx.nr = 0;
ctx.alloc = ctx.m ? ctx.m->num_packs : 16; ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
ctx.info = NULL; ctx.info = NULL;
ctx.to_include = packs_to_include;
ALLOC_ARRAY(ctx.info, ctx.alloc); ALLOC_ARRAY(ctx.info, ctx.alloc);
if (ctx.m && fill_packs_from_midx(&ctx, preferred_pack_name, if (ctx.m && fill_packs_from_midx(&ctx, preferred_pack_name,
@ -963,6 +981,8 @@ static int write_midx_internal(const char *object_dir,
else else
ctx.progress = NULL; ctx.progress = NULL;
ctx.to_include = packs_to_include;
for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx); for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx);
stop_progress(&ctx.progress); stop_progress(&ctx.progress);

View file

@ -2073,6 +2073,7 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
QSORT(packs, packs_nr, bitmapped_pack_cmp); QSORT(packs, packs_nr, bitmapped_pack_cmp);
} else { } else {
struct packed_git *pack; struct packed_git *pack;
uint32_t pack_int_id;
if (bitmap_is_midx(bitmap_git)) { if (bitmap_is_midx(bitmap_git)) {
uint32_t preferred_pack_pos; uint32_t preferred_pack_pos;
@ -2083,12 +2084,24 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
} }
pack = bitmap_git->midx->packs[preferred_pack_pos]; pack = bitmap_git->midx->packs[preferred_pack_pos];
pack_int_id = preferred_pack_pos;
} else { } else {
pack = bitmap_git->pack; pack = bitmap_git->pack;
/*
* Any value for 'pack_int_id' will do here. When we
* process the pack via try_partial_reuse(), we won't
* use the `pack_int_id` field since we have a non-MIDX
* bitmap.
*
* Use '-1' as a sentinel value to make it clear
* that we do not expect to read this field.
*/
pack_int_id = -1;
} }
ALLOC_GROW(packs, packs_nr + 1, packs_alloc); ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
packs[packs_nr].p = pack; packs[packs_nr].p = pack;
packs[packs_nr].pack_int_id = pack_int_id;
packs[packs_nr].bitmap_nr = pack->num_objects; packs[packs_nr].bitmap_nr = pack->num_objects;
packs[packs_nr].bitmap_pos = 0; packs[packs_nr].bitmap_pos = 0;

View file

@ -527,6 +527,9 @@ static int midx_key_to_pack_pos(struct multi_pack_index *m,
{ {
uint32_t *found; uint32_t *found;
if (key->pack >= m->num_packs)
BUG("MIDX pack lookup out of bounds (%"PRIu32" >= %"PRIu32")",
key->pack, m->num_packs);
/* /*
* The preferred pack sorts first, so determine its identifier by * The preferred pack sorts first, so determine its identifier by
* looking at the first object in pseudo-pack order. * looking at the first object in pseudo-pack order.

View file

@ -551,4 +551,34 @@ do
' '
done done
test_expect_success 'remove one packfile between MIDX bitmap writes' '
git init remove-pack-between-writes &&
(
cd remove-pack-between-writes &&
test_commit A &&
test_commit B &&
test_commit C &&
# Create packs with the prefix "pack-A", "pack-B",
# "pack-C" to impose a lexicographic order on these
# packs so the pack being removed is always from the
# middle.
packdir=.git/objects/pack &&
A="$(echo A | git pack-objects $packdir/pack-A --revs)" &&
B="$(echo B | git pack-objects $packdir/pack-B --revs)" &&
C="$(echo C | git pack-objects $packdir/pack-C --revs)" &&
git multi-pack-index write --bitmap &&
cat >in <<-EOF &&
pack-A-$A.idx
pack-C-$C.idx
EOF
git multi-pack-index write --bitmap --stdin-packs <in &&
git rev-list --test-bitmap HEAD
)
'
test_done test_done

View file

@ -204,4 +204,30 @@ test_expect_success 'omit delta from uninteresting base (cross pack)' '
test_pack_objects_reused_all $(($objects_nr - 1)) $packs_nr test_pack_objects_reused_all $(($objects_nr - 1)) $packs_nr
' '
test_expect_success 'non-omitted delta in MIDX preferred pack' '
test_config pack.allowPackReuse single &&
cat >p1.objects <<-EOF &&
$(git rev-parse $base)
^$(git rev-parse $delta^)
EOF
cat >p2.objects <<-EOF &&
$(git rev-parse F)
EOF
p1="$(git pack-objects --revs $packdir/pack <p1.objects)" &&
p2="$(git pack-objects --revs $packdir/pack <p2.objects)" &&
cat >in <<-EOF &&
pack-$p1.idx
pack-$p2.idx
EOF
git multi-pack-index write --bitmap --stdin-packs \
--preferred-pack=pack-$p1.pack <in &&
git show-index <$packdir/pack-$p1.idx >expect &&
test_pack_objects_reused_all $(wc -l <expect) 1
'
test_done test_done