1
0
mirror of https://github.com/git/git synced 2024-06-30 22:54:27 +00:00
git/t/t5326-multi-pack-bitmaps.sh
Taylor Blau 0c5a62f14b midx-write.c: do not read existing MIDX with packs_to_include
Commit d6a8c58675 (midx-write.c: support reading an existing MIDX with
`packs_to_include`, 2024-05-29) changed the MIDX generation machinery to
support reading from an existing MIDX when writing a new one.

Unfortunately, the rest of the MIDX generation machinery is not prepared
to deal with such a change. For instance, the function responsible for
adding to the object ID fanout table from a MIDX source
(midx_fanout_add_midx_fanout()) will gladly add objects from an existing
MIDX for some fanout level regardless of whether or not those objects
came from packs that are to be included in the subsequent MIDX write.

This results in broken pseudo-pack object order (leading to incorrect
object traversal results) and segmentation faults, like so (generated by
running the added test prior to the changes in midx-write.c):

    #0  0x000055ee31393f47 in midx_pack_order (ctx=0x7ffdde205c70) at midx-write.c:590
    #1  0x000055ee31395a69 in write_midx_internal (object_dir=0x55ee32570440 ".git/objects",
        packs_to_include=0x7ffdde205e20, packs_to_drop=0x0, preferred_pack_name=0x0,
        refs_snapshot=0x0, flags=15) at midx-write.c:1171
    #2  0x000055ee31395f38 in write_midx_file_only (object_dir=0x55ee32570440 ".git/objects",
        packs_to_include=0x7ffdde205e20, preferred_pack_name=0x0, refs_snapshot=0x0, flags=15)
        at midx-write.c:1274
    [...]

In stack frame #0, the code on midx-write.c:590 is using the new pack ID
corresponding to some object which was added from the existing MIDX.
Importantly, the pack from which that object was selected in the
existing MIDX does not appear in the new MIDX as it was excluded via
`--stdin-packs`.

In this instance, the pack in question had pack ID "1" in the existing
MIDX, but since it was excluded from the new MIDX, we never filled in
that entry in the pack_perm table, resulting in:

    (gdb) p *ctx->pack_perm@2
    $1 = {0, 1515870810}

Which is what causes the segfault above when we try and read:

    struct pack_info *pack = &ctx->info[ctx->pack_perm[i]];
    if (pack->bitmap_pos == BITMAP_POS_UNKNOWN)
        pack->bitmap_pos = 0;

Fundamentally, we should be able to read information from an existing
MIDX when generating a new one. But in practice the midx-write.c code
assumes that we won't run into issues like the above with incongruent
pack IDs, and often makes those assumptions in extremely subtle and
fragile ways.

Instead, let's avoid reading from an existing MIDX altogether, and stick
with the pre-d6a8c58675 implementation. Harden against any regressions
in this area by adding a test which demonstrates these issues.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-06-11 16:08:28 -07:00

585 lines
15 KiB
Bash
Executable File

#!/bin/sh
test_description='exercise basic multi-pack bitmap functionality'
. ./test-lib.sh
. "${TEST_DIRECTORY}/lib-bitmap.sh"
# We'll be writing our own midx and bitmaps, so avoid getting confused by the
# automatic ones.
GIT_TEST_MULTI_PACK_INDEX=0
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
# This test exercise multi-pack bitmap functionality where the object order is
# stored and read from a special chunk within the MIDX, so use the default
# behavior here.
sane_unset GIT_TEST_MIDX_WRITE_REV
sane_unset GIT_TEST_MIDX_READ_RIDX
bitmap_reuse_tests() {
from=$1
to=$2
writeLookupTable=false
for i in $3-${$#}
do
case $i in
"pack.writeBitmapLookupTable") writeLookupTable=true;;
esac
done
test_expect_success "setup pack reuse tests ($from -> $to)" '
rm -fr repo &&
git init repo &&
(
cd repo &&
git config pack.writeBitmapLookupTable '"$writeLookupTable"' &&
test_commit_bulk 16 &&
git tag old-tip &&
git config core.multiPackIndex true &&
if test "MIDX" = "$from"
then
git repack -Ad &&
git multi-pack-index write --bitmap
else
git repack -Adb
fi
)
'
test_expect_success "build bitmap from existing ($from -> $to)" '
(
cd repo &&
git config pack.writeBitmapLookupTable '"$writeLookupTable"' &&
test_commit_bulk --id=further 16 &&
git tag new-tip &&
if test "MIDX" = "$to"
then
git repack -d &&
git multi-pack-index write --bitmap
else
git repack -Adb
fi
)
'
test_expect_success "verify resulting bitmaps ($from -> $to)" '
(
cd repo &&
git config pack.writeBitmapLookupTable '"$writeLookupTable"' &&
git for-each-ref &&
git rev-list --test-bitmap refs/tags/old-tip &&
git rev-list --test-bitmap refs/tags/new-tip
)
'
}
test_midx_bitmap_cases () {
writeLookupTable=false
writeBitmapLookupTable=
for i in "$@"
do
case $i in
"pack.writeBitmapLookupTable")
writeLookupTable=true
writeBitmapLookupTable="$i"
;;
esac
done
test_expect_success 'setup test_repository' '
rm -rf * .git &&
git init &&
git config pack.writeBitmapLookupTable '"$writeLookupTable"'
'
midx_bitmap_core
bitmap_reuse_tests 'pack' 'MIDX' "$writeBitmapLookupTable"
bitmap_reuse_tests 'MIDX' 'pack' "$writeBitmapLookupTable"
bitmap_reuse_tests 'MIDX' 'MIDX' "$writeBitmapLookupTable"
test_expect_success 'missing object closure fails gracefully' '
rm -fr repo &&
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
git config pack.writeBitmapLookupTable '"$writeLookupTable"' &&
test_commit loose &&
test_commit packed &&
# Do not pass "--revs"; we want a pack without the "loose"
# commit.
git pack-objects $objdir/pack/pack <<-EOF &&
$(git rev-parse packed)
EOF
test_must_fail git multi-pack-index write --bitmap 2>err &&
grep "doesn.t have full closure" err &&
test_path_is_missing $midx
)
'
midx_bitmap_partial_tests
test_expect_success 'removing a MIDX clears stale bitmaps' '
rm -fr repo &&
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
git config pack.writeBitmapLookupTable '"$writeLookupTable"' &&
test_commit base &&
git repack &&
git multi-pack-index write --bitmap &&
# Write a MIDX and bitmap; remove the MIDX but leave the bitmap.
stale_bitmap=$midx-$(midx_checksum $objdir).bitmap &&
rm $midx &&
# Then write a new MIDX.
test_commit new &&
git repack &&
git multi-pack-index write --bitmap &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
test_path_is_missing $stale_bitmap
)
'
test_expect_success 'pack.preferBitmapTips' '
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
git config pack.writeBitmapLookupTable '"$writeLookupTable"' &&
test_commit_bulk --message="%s" 103 &&
git log --format="%H" >commits.raw &&
sort <commits.raw >commits &&
git log --format="create refs/tags/%s %H" HEAD >refs &&
git update-ref --stdin <refs &&
git multi-pack-index write --bitmap &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
test-tool bitmap list-commits | sort >bitmaps &&
comm -13 bitmaps commits >before &&
test_line_count = 1 before &&
perl -ne "printf(\"create refs/tags/include/%d \", $.); print" \
<before | git update-ref --stdin &&
rm -fr $midx-$(midx_checksum $objdir).bitmap &&
rm -fr $midx &&
git -c pack.preferBitmapTips=refs/tags/include \
multi-pack-index write --bitmap &&
test-tool bitmap list-commits | sort >bitmaps &&
comm -13 bitmaps commits >after &&
! test_cmp before after
)
'
test_expect_success 'writing a bitmap with --refs-snapshot' '
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
git config pack.writeBitmapLookupTable '"$writeLookupTable"' &&
test_commit one &&
test_commit two &&
git rev-parse one >snapshot &&
git repack -ad &&
# First, write a MIDX which see both refs/tags/one and
# refs/tags/two (causing both of those commits to receive
# bitmaps).
git multi-pack-index write --bitmap &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
test-tool bitmap list-commits | sort >bitmaps &&
grep "$(git rev-parse one)" bitmaps &&
grep "$(git rev-parse two)" bitmaps &&
rm -fr $midx-$(midx_checksum $objdir).bitmap &&
rm -fr $midx &&
# Then again, but with a refs snapshot which only sees
# refs/tags/one.
git multi-pack-index write --bitmap --refs-snapshot=snapshot &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
test-tool bitmap list-commits | sort >bitmaps &&
grep "$(git rev-parse one)" bitmaps &&
! grep "$(git rev-parse two)" bitmaps
)
'
test_expect_success 'write a bitmap with --refs-snapshot (preferred tips)' '
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
git config pack.writeBitmapLookupTable '"$writeLookupTable"' &&
test_commit_bulk --message="%s" 103 &&
git log --format="%H" >commits.raw &&
sort <commits.raw >commits &&
git log --format="create refs/tags/%s %H" HEAD >refs &&
git update-ref --stdin <refs &&
git multi-pack-index write --bitmap &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
test-tool bitmap list-commits | sort >bitmaps &&
comm -13 bitmaps commits >before &&
test_line_count = 1 before &&
(
grep -vf before commits.raw &&
# mark missing commits as preferred
sed "s/^/+/" before
) >snapshot &&
rm -fr $midx-$(midx_checksum $objdir).bitmap &&
rm -fr $midx &&
git multi-pack-index write --bitmap --refs-snapshot=snapshot &&
test-tool bitmap list-commits | sort >bitmaps &&
comm -13 bitmaps commits >after &&
! test_cmp before after
)
'
test_expect_success 'hash-cache values are propagated from pack bitmaps' '
rm -fr repo &&
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
git config pack.writeBitmapLookupTable '"$writeLookupTable"' &&
test_commit base &&
test_commit base2 &&
git repack -adb &&
test-tool bitmap dump-hashes >pack.raw &&
test_file_not_empty pack.raw &&
sort pack.raw >pack.hashes &&
test_commit new &&
git repack &&
git multi-pack-index write --bitmap &&
test-tool bitmap dump-hashes >midx.raw &&
sort midx.raw >midx.hashes &&
# ensure that every namehash in the pack bitmap can be found in
# the midx bitmap (i.e., that there are no oid-namehash pairs
# unique to the pack bitmap).
comm -23 pack.hashes midx.hashes >dropped.hashes &&
test_must_be_empty dropped.hashes
)
'
test_expect_success 'no .bitmap is written without any objects' '
rm -fr repo &&
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
git config pack.writeBitmapLookupTable '"$writeLookupTable"' &&
empty="$(git pack-objects $objdir/pack/pack </dev/null)" &&
cat >packs <<-EOF &&
pack-$empty.idx
EOF
git multi-pack-index write --bitmap --stdin-packs \
<packs 2>err &&
grep "bitmap without any objects" err &&
test_path_is_file $midx &&
test_path_is_missing $midx-$(midx_checksum $objdir).bitmap
)
'
test_expect_success 'graceful fallback when missing reverse index' '
rm -fr repo &&
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
git config pack.writeBitmapLookupTable '"$writeLookupTable"' &&
test_commit base &&
# write a pack and MIDX bitmap containing base
git repack -adb &&
git multi-pack-index write --bitmap &&
GIT_TEST_MIDX_READ_RIDX=0 \
git rev-list --use-bitmap-index HEAD 2>err &&
! grep "ignoring extra bitmap file" err
)
'
}
test_midx_bitmap_cases
test_midx_bitmap_cases "pack.writeBitmapLookupTable"
test_expect_success 'multi-pack-index write writes lookup table if enabled' '
rm -fr repo &&
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
test_commit base &&
git config pack.writeBitmapLookupTable true &&
git repack -ad &&
GIT_TRACE2_EVENT="$(pwd)/trace" \
git multi-pack-index write --bitmap &&
grep "\"label\":\"writing_lookup_table\"" trace
)
'
test_expect_success 'preferred pack change with existing MIDX bitmap' '
git init preferred-pack-with-existing &&
(
cd preferred-pack-with-existing &&
test_commit base &&
test_commit other &&
git rev-list --objects --no-object-names base >p1.objects &&
git rev-list --objects --no-object-names other >p2.objects &&
p1="$(git pack-objects "$objdir/pack/pack" \
--delta-base-offset <p1.objects)" &&
p2="$(git pack-objects "$objdir/pack/pack" \
--delta-base-offset <p2.objects)" &&
# Generate a MIDX containing the first two packs,
# marking p1 as preferred, and ensure that it can be
# successfully cloned.
git multi-pack-index write --bitmap \
--preferred-pack="pack-$p1.pack" &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
git clone --no-local . clone1 &&
# Then generate a new pack which sorts ahead of any
# existing pack (by tweaking the pack prefix).
test_commit foo &&
git pack-objects --all --unpacked $objdir/pack/pack0 &&
# Generate a new MIDX which changes the preferred pack
# to a pack contained in the existing MIDX.
git multi-pack-index write --bitmap \
--preferred-pack="pack-$p2.pack" &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
# When the above circumstances are met, the preferred
# pack should change appropriately and clones should
# (still) succeed.
git clone --no-local . clone2
)
'
test_expect_success 'tagged commits are selected for bitmapping' '
rm -fr repo &&
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
test_commit --annotate base &&
git repack -d &&
# Remove refs/heads/main which points at the commit directly,
# leaving only a reference to the annotated tag.
git branch -M main &&
git checkout base &&
git branch -d main &&
git multi-pack-index write --bitmap &&
git rev-parse HEAD >want &&
test-tool bitmap list-commits >actual &&
grep $(cat want) actual
)
'
test_expect_success 'do not follow replace objects for MIDX bitmap' '
rm -fr repo &&
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
test_commit A &&
test_commit B &&
git checkout --orphan=orphan A &&
test_commit orphan &&
git replace A HEAD &&
git repack -ad --write-midx --write-bitmap-index &&
# generating reachability bitmaps with replace refs
# enabled will result in broken clones
git clone --no-local --bare . clone.git
)
'
corrupt_file () {
chmod a+w "$1" &&
printf "bogus" | dd of="$1" bs=1 seek="12" conv=notrunc
}
test_expect_success 'git fsck correctly identifies good and bad bitmaps' '
git init valid &&
test_when_finished rm -rf valid &&
test_commit_bulk 20 &&
git repack -adbf &&
# Move pack-bitmap aside so it is not deleted
# in next repack.
packbitmap=$(ls .git/objects/pack/pack-*.bitmap) &&
mv "$packbitmap" "$packbitmap.bak" &&
test_commit_bulk 10 &&
git repack -b --write-midx &&
midxbitmap=$(ls .git/objects/pack/multi-pack-index-*.bitmap) &&
# Copy MIDX bitmap to backup. Copy pack bitmap from backup.
cp "$midxbitmap" "$midxbitmap.bak" &&
cp "$packbitmap.bak" "$packbitmap" &&
# fsck works at first
git fsck 2>err &&
test_must_be_empty err &&
corrupt_file "$packbitmap" &&
test_must_fail git fsck 2>err &&
grep "bitmap file '\''$packbitmap'\'' has invalid checksum" err &&
cp "$packbitmap.bak" "$packbitmap" &&
corrupt_file "$midxbitmap" &&
test_must_fail git fsck 2>err &&
grep "bitmap file '\''$midxbitmap'\'' has invalid checksum" err &&
corrupt_file "$packbitmap" &&
test_must_fail git fsck 2>err &&
grep "bitmap file '\''$midxbitmap'\'' has invalid checksum" err &&
grep "bitmap file '\''$packbitmap'\'' has invalid checksum" err
'
test_expect_success 'corrupt MIDX with bitmap causes fallback' '
git init corrupt-midx-bitmap &&
(
cd corrupt-midx-bitmap &&
test_commit first &&
git repack -d &&
test_commit second &&
git repack -d &&
git multi-pack-index write --bitmap &&
checksum=$(midx_checksum $objdir) &&
for f in $midx $midx-$checksum.bitmap
do
mv $f $f.bak || return 1
done &&
# pack everything together, invalidating the MIDX
git repack -ad &&
# then restore the now-stale MIDX
for f in $midx $midx-$checksum.bitmap
do
mv $f.bak $f || return 1
done &&
git rev-list --count --objects --use-bitmap-index HEAD >out 2>err &&
# should attempt opening the broken pack twice (once
# from the attempt to load it via the stale bitmap, and
# again when attempting to load it from the stale MIDX)
# before falling back to the non-MIDX case
test 2 -eq $(grep -c "could not open pack" err) &&
test 6 -eq $(cat out)
)
'
for allow_pack_reuse in single multi
do
test_expect_success "reading MIDX without BTMP chunk does not complain with $allow_pack_reuse pack reuse" '
test_when_finished "rm -rf midx-without-btmp" &&
git init midx-without-btmp &&
(
cd midx-without-btmp &&
test_commit initial &&
git repack -Adbl --write-bitmap-index --write-midx &&
GIT_TEST_MIDX_READ_BTMP=false git -c pack.allowPackReuse=$allow_pack_reuse \
pack-objects --all --use-bitmap-index --stdout </dev/null >/dev/null 2>err &&
test_must_be_empty err
)
'
done
test_expect_success 'remove one packfile between MIDX bitmap writes' '
git init remove-pack-between-writes &&
(
cd remove-pack-between-writes &&
test_commit A &&
test_commit B &&
test_commit C &&
# Create packs with the prefix "pack-A", "pack-B",
# "pack-C" to impose a lexicographic order on these
# packs so the pack being removed is always from the
# middle.
packdir=.git/objects/pack &&
A="$(echo A | git pack-objects $packdir/pack-A --revs)" &&
B="$(echo B | git pack-objects $packdir/pack-B --revs)" &&
C="$(echo C | git pack-objects $packdir/pack-C --revs)" &&
git multi-pack-index write --bitmap &&
cat >in <<-EOF &&
pack-A-$A.idx
pack-C-$C.idx
EOF
git multi-pack-index write --bitmap --stdin-packs <in &&
git rev-list --test-bitmap HEAD
)
'
test_done