Merge branch 'tb/gc-recent-object-hook'

"git pack-objects" learned to invoke a new hook program that
enumerates extra objects to be used as anchoring points to keep
otherwise unreachable objects in cruft packs.

* tb/gc-recent-object-hook:
  gc: introduce `gc.recentObjectsHook`
  reachable.c: extract `obj_is_recent()`
This commit is contained in:
Junio C Hamano 2023-06-23 11:21:17 -07:00
commit 58ecb2e383
5 changed files with 313 additions and 3 deletions

View file

@ -130,6 +130,21 @@ or rebase occurring. Since these changes are not part of the current
project most users will want to expire them sooner, which is why the
default is more aggressive than `gc.reflogExpire`.
gc.recentObjectsHook::
When considering whether or not to remove an object (either when
generating a cruft pack or storing unreachable objects as
loose), use the shell to execute the specified command(s).
Interpret their output as object IDs which Git will consider as
"recent", regardless of their age. By treating their mtimes as
"now", any objects (and their descendants) mentioned in the
output will be kept regardless of their true age.
+
Output must contain exactly one hex object ID per line, and nothing
else. Objects which cannot be found in the repository are ignored.
Multiple hooks are supported, but all must exit successfully, else the
operation (either generating a cruft pack or unpacking unreachable
objects) will be halted.
gc.rerereResolved::
Records of conflicted merge you resolved earlier are
kept for this many days when 'git rerere gc' is run.

View file

@ -16,6 +16,8 @@
#include "object-store.h"
#include "pack-bitmap.h"
#include "pack-mtimes.h"
#include "config.h"
#include "run-command.h"
struct connectivity_progress {
struct progress *progress;
@ -67,8 +69,77 @@ struct recent_data {
timestamp_t timestamp;
report_recent_object_fn *cb;
int ignore_in_core_kept_packs;
struct oidset extra_recent_oids;
int extra_recent_oids_loaded;
};
static int run_one_gc_recent_objects_hook(struct oidset *set,
const char *args)
{
struct child_process cmd = CHILD_PROCESS_INIT;
struct strbuf buf = STRBUF_INIT;
FILE *out;
int ret = 0;
cmd.use_shell = 1;
cmd.out = -1;
strvec_push(&cmd.args, args);
if (start_command(&cmd))
return -1;
out = xfdopen(cmd.out, "r");
while (strbuf_getline(&buf, out) != EOF) {
struct object_id oid;
const char *rest;
if (parse_oid_hex(buf.buf, &oid, &rest) || *rest) {
ret = error(_("invalid extra cruft tip: '%s'"), buf.buf);
break;
}
oidset_insert(set, &oid);
}
fclose(out);
ret |= finish_command(&cmd);
strbuf_release(&buf);
return ret;
}
static void load_gc_recent_objects(struct recent_data *data)
{
const struct string_list *programs;
int ret = 0;
size_t i;
data->extra_recent_oids_loaded = 1;
if (git_config_get_string_multi("gc.recentobjectshook", &programs))
return;
for (i = 0; i < programs->nr; i++) {
ret = run_one_gc_recent_objects_hook(&data->extra_recent_oids,
programs->items[i].string);
if (ret)
die(_("unable to enumerate additional recent objects"));
}
}
static int obj_is_recent(const struct object_id *oid, timestamp_t mtime,
struct recent_data *data)
{
if (mtime > data->timestamp)
return 1;
if (!data->extra_recent_oids_loaded)
load_gc_recent_objects(data);
return oidset_contains(&data->extra_recent_oids, oid);
}
static void add_recent_object(const struct object_id *oid,
struct packed_git *pack,
off_t offset,
@ -78,7 +149,7 @@ static void add_recent_object(const struct object_id *oid,
struct object *obj;
enum object_type type;
if (mtime <= data->timestamp)
if (!obj_is_recent(oid, mtime, data))
return;
/*
@ -193,16 +264,24 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
data.cb = cb;
data.ignore_in_core_kept_packs = ignore_in_core_kept_packs;
oidset_init(&data.extra_recent_oids, 0);
data.extra_recent_oids_loaded = 0;
r = for_each_loose_object(add_recent_loose, &data,
FOR_EACH_OBJECT_LOCAL_ONLY);
if (r)
return r;
goto done;
flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER;
if (ignore_in_core_kept_packs)
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
return for_each_packed_object(add_recent_packed, &data, flags);
r = for_each_packed_object(add_recent_packed, &data, flags);
done:
oidset_clear(&data.extra_recent_oids);
return r;
}
static int mark_object_seen(const struct object_id *oid,

View file

@ -350,4 +350,18 @@ test_expect_success 'old reachable-from-recent retained with bitmaps' '
test_must_fail git cat-file -e $to_drop
'
test_expect_success 'gc.recentObjectsHook' '
add_blob &&
test-tool chmtime =-86500 $BLOB_FILE &&
write_script precious-objects <<-EOF &&
echo $BLOB
EOF
test_config gc.recentObjectsHook ./precious-objects &&
git prune --expire=now &&
git cat-file -p $BLOB
'
test_done

View file

@ -739,4 +739,175 @@ test_expect_success 'cruft objects are freshend via loose' '
)
'
test_expect_success 'gc.recentObjectsHook' '
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
# Create a handful of objects.
#
# - one reachable commit, "base", designated for the reachable
# pack
# - one unreachable commit, "cruft.discard", which is marked
# for deletion
# - one unreachable commit, "cruft.old", which would be marked
# for deletion, but is rescued as an extra cruft tip
# - one unreachable commit, "cruft.new", which is not marked
# for deletion
test_commit base &&
git branch -M main &&
git checkout --orphan discard &&
git rm -fr . &&
test_commit --no-tag cruft.discard &&
git checkout --orphan old &&
git rm -fr . &&
test_commit --no-tag cruft.old &&
cruft_old="$(git rev-parse HEAD)" &&
git checkout --orphan new &&
git rm -fr . &&
test_commit --no-tag cruft.new &&
cruft_new="$(git rev-parse HEAD)" &&
git checkout main &&
git branch -D discard old new &&
git reflog expire --all --expire=all &&
# mark cruft.old with an mtime that is many minutes
# older than the expiration period, and mark cruft.new
# with an mtime that is in the future (and thus not
# eligible for pruning).
test-tool chmtime -2000 "$objdir/$(test_oid_to_path $cruft_old)" &&
test-tool chmtime +1000 "$objdir/$(test_oid_to_path $cruft_new)" &&
# Write the list of cruft objects we expect to
# accumulate, which is comprised of everything reachable
# from cruft.old and cruft.new, but not cruft.discard.
git rev-list --objects --no-object-names \
$cruft_old $cruft_new >cruft.raw &&
sort cruft.raw >cruft.expect &&
# Write the script to list extra tips, which are limited
# to cruft.old, in this case.
write_script extra-tips <<-EOF &&
echo $cruft_old
EOF
git config gc.recentObjectsHook ./extra-tips &&
git repack --cruft --cruft-expiration=now -d &&
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
git show-index <${mtimes%.mtimes}.idx >cruft &&
cut -d" " -f2 cruft | sort >cruft.actual &&
test_cmp cruft.expect cruft.actual &&
# Ensure that the "old" objects are removed after
# dropping the gc.recentObjectsHook hook.
git config --unset gc.recentObjectsHook &&
git repack --cruft --cruft-expiration=now -d &&
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
git show-index <${mtimes%.mtimes}.idx >cruft &&
cut -d" " -f2 cruft | sort >cruft.actual &&
git rev-list --objects --no-object-names $cruft_new >cruft.raw &&
cp cruft.expect cruft.old &&
sort cruft.raw >cruft.expect &&
test_cmp cruft.expect cruft.actual &&
# ensure objects which are no longer in the cruft pack were
# removed from the repository
for object in $(comm -13 cruft.expect cruft.old)
do
test_must_fail git cat-file -t $object || return 1
done
)
'
test_expect_success 'multi-valued gc.recentObjectsHook' '
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
test_commit base &&
git branch -M main &&
git checkout --orphan cruft.a &&
git rm -fr . &&
test_commit --no-tag cruft.a &&
cruft_a="$(git rev-parse HEAD)" &&
git checkout --orphan cruft.b &&
git rm -fr . &&
test_commit --no-tag cruft.b &&
cruft_b="$(git rev-parse HEAD)" &&
git checkout main &&
git branch -D cruft.a cruft.b &&
git reflog expire --all --expire=all &&
echo "echo $cruft_a" | write_script extra-tips.a &&
echo "echo $cruft_b" | write_script extra-tips.b &&
echo "false" | write_script extra-tips.c &&
git rev-list --objects --no-object-names $cruft_a $cruft_b \
>cruft.raw &&
sort cruft.raw >cruft.expect &&
# ensure that each extra cruft tip is saved by its
# respective hook
git config --add gc.recentObjectsHook ./extra-tips.a &&
git config --add gc.recentObjectsHook ./extra-tips.b &&
git repack --cruft --cruft-expiration=now -d &&
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
git show-index <${mtimes%.mtimes}.idx >cruft &&
cut -d" " -f2 cruft | sort >cruft.actual &&
test_cmp cruft.expect cruft.actual &&
# ensure that a dirty exit halts cruft pack generation
git config --add gc.recentObjectsHook ./extra-tips.c &&
test_must_fail git repack --cruft --cruft-expiration=now -d 2>err &&
grep "unable to enumerate additional recent objects" err &&
# and that the existing cruft pack is left alone
test_path_is_file "$mtimes"
)
'
test_expect_success 'additional cruft blobs via gc.recentObjectsHook' '
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
test_commit base &&
blob=$(echo "unreachable" | git hash-object -w --stdin) &&
# mark the unreachable blob we wrote above as having
# aged out of the retention period
test-tool chmtime -2000 "$objdir/$(test_oid_to_path $blob)" &&
# Write the script to list extra tips, which is just the
# extra blob as above.
write_script extra-tips <<-EOF &&
echo $blob
EOF
git config gc.recentObjectsHook ./extra-tips &&
git repack --cruft --cruft-expiration=now -d &&
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
git show-index <${mtimes%.mtimes}.idx >cruft &&
cut -d" " -f2 cruft >actual &&
echo $blob >expect &&
test_cmp expect actual
)
'
test_done

View file

@ -113,6 +113,37 @@ test_expect_success 'do not bother loosening old objects' '
test_must_fail git cat-file -p $obj2
'
test_expect_success 'gc.recentObjectsHook' '
obj1=$(echo one | git hash-object -w --stdin) &&
obj2=$(echo two | git hash-object -w --stdin) &&
obj3=$(echo three | git hash-object -w --stdin) &&
pack1=$(echo $obj1 | git pack-objects .git/objects/pack/pack) &&
pack2=$(echo $obj2 | git pack-objects .git/objects/pack/pack) &&
pack3=$(echo $obj3 | git pack-objects .git/objects/pack/pack) &&
git prune-packed &&
git cat-file -p $obj1 &&
git cat-file -p $obj2 &&
git cat-file -p $obj3 &&
git tag -a -m tag obj2-tag $obj2 &&
obj2_tag="$(git rev-parse obj2-tag)" &&
write_script precious-objects <<-EOF &&
echo $obj2_tag
EOF
git config gc.recentObjectsHook ./precious-objects &&
test-tool chmtime =-86400 .git/objects/pack/pack-$pack2.pack &&
test-tool chmtime =-86400 .git/objects/pack/pack-$pack3.pack &&
git repack -A -d --unpack-unreachable=1.hour.ago &&
git cat-file -p $obj1 &&
git cat-file -p $obj2 &&
git cat-file -p $obj2_tag &&
test_must_fail git cat-file -p $obj3
'
test_expect_success 'keep packed objects found only in index' '
echo my-unique-content >file &&
git add file &&