mirror of
https://github.com/git/git
synced 2024-10-02 14:45:21 +00:00
Merge branch 'tb/gc-recent-object-hook'
"git pack-objects" learned to invoke a new hook program that enumerates extra objects to be used as anchoring points to keep otherwise unreachable objects in cruft packs. * tb/gc-recent-object-hook: gc: introduce `gc.recentObjectsHook` reachable.c: extract `obj_is_recent()`
This commit is contained in:
commit
58ecb2e383
|
@ -130,6 +130,21 @@ or rebase occurring. Since these changes are not part of the current
|
|||
project most users will want to expire them sooner, which is why the
|
||||
default is more aggressive than `gc.reflogExpire`.
|
||||
|
||||
gc.recentObjectsHook::
|
||||
When considering whether or not to remove an object (either when
|
||||
generating a cruft pack or storing unreachable objects as
|
||||
loose), use the shell to execute the specified command(s).
|
||||
Interpret their output as object IDs which Git will consider as
|
||||
"recent", regardless of their age. By treating their mtimes as
|
||||
"now", any objects (and their descendants) mentioned in the
|
||||
output will be kept regardless of their true age.
|
||||
+
|
||||
Output must contain exactly one hex object ID per line, and nothing
|
||||
else. Objects which cannot be found in the repository are ignored.
|
||||
Multiple hooks are supported, but all must exit successfully, else the
|
||||
operation (either generating a cruft pack or unpacking unreachable
|
||||
objects) will be halted.
|
||||
|
||||
gc.rerereResolved::
|
||||
Records of conflicted merge you resolved earlier are
|
||||
kept for this many days when 'git rerere gc' is run.
|
||||
|
|
85
reachable.c
85
reachable.c
|
@ -16,6 +16,8 @@
|
|||
#include "object-store.h"
|
||||
#include "pack-bitmap.h"
|
||||
#include "pack-mtimes.h"
|
||||
#include "config.h"
|
||||
#include "run-command.h"
|
||||
|
||||
struct connectivity_progress {
|
||||
struct progress *progress;
|
||||
|
@ -67,8 +69,77 @@ struct recent_data {
|
|||
timestamp_t timestamp;
|
||||
report_recent_object_fn *cb;
|
||||
int ignore_in_core_kept_packs;
|
||||
|
||||
struct oidset extra_recent_oids;
|
||||
int extra_recent_oids_loaded;
|
||||
};
|
||||
|
||||
static int run_one_gc_recent_objects_hook(struct oidset *set,
|
||||
const char *args)
|
||||
{
|
||||
struct child_process cmd = CHILD_PROCESS_INIT;
|
||||
struct strbuf buf = STRBUF_INIT;
|
||||
FILE *out;
|
||||
int ret = 0;
|
||||
|
||||
cmd.use_shell = 1;
|
||||
cmd.out = -1;
|
||||
|
||||
strvec_push(&cmd.args, args);
|
||||
|
||||
if (start_command(&cmd))
|
||||
return -1;
|
||||
|
||||
out = xfdopen(cmd.out, "r");
|
||||
while (strbuf_getline(&buf, out) != EOF) {
|
||||
struct object_id oid;
|
||||
const char *rest;
|
||||
|
||||
if (parse_oid_hex(buf.buf, &oid, &rest) || *rest) {
|
||||
ret = error(_("invalid extra cruft tip: '%s'"), buf.buf);
|
||||
break;
|
||||
}
|
||||
|
||||
oidset_insert(set, &oid);
|
||||
}
|
||||
|
||||
fclose(out);
|
||||
ret |= finish_command(&cmd);
|
||||
|
||||
strbuf_release(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void load_gc_recent_objects(struct recent_data *data)
|
||||
{
|
||||
const struct string_list *programs;
|
||||
int ret = 0;
|
||||
size_t i;
|
||||
|
||||
data->extra_recent_oids_loaded = 1;
|
||||
|
||||
if (git_config_get_string_multi("gc.recentobjectshook", &programs))
|
||||
return;
|
||||
|
||||
for (i = 0; i < programs->nr; i++) {
|
||||
ret = run_one_gc_recent_objects_hook(&data->extra_recent_oids,
|
||||
programs->items[i].string);
|
||||
if (ret)
|
||||
die(_("unable to enumerate additional recent objects"));
|
||||
}
|
||||
}
|
||||
|
||||
static int obj_is_recent(const struct object_id *oid, timestamp_t mtime,
|
||||
struct recent_data *data)
|
||||
{
|
||||
if (mtime > data->timestamp)
|
||||
return 1;
|
||||
|
||||
if (!data->extra_recent_oids_loaded)
|
||||
load_gc_recent_objects(data);
|
||||
return oidset_contains(&data->extra_recent_oids, oid);
|
||||
}
|
||||
|
||||
static void add_recent_object(const struct object_id *oid,
|
||||
struct packed_git *pack,
|
||||
off_t offset,
|
||||
|
@ -78,7 +149,7 @@ static void add_recent_object(const struct object_id *oid,
|
|||
struct object *obj;
|
||||
enum object_type type;
|
||||
|
||||
if (mtime <= data->timestamp)
|
||||
if (!obj_is_recent(oid, mtime, data))
|
||||
return;
|
||||
|
||||
/*
|
||||
|
@ -193,16 +264,24 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
|
|||
data.cb = cb;
|
||||
data.ignore_in_core_kept_packs = ignore_in_core_kept_packs;
|
||||
|
||||
oidset_init(&data.extra_recent_oids, 0);
|
||||
data.extra_recent_oids_loaded = 0;
|
||||
|
||||
r = for_each_loose_object(add_recent_loose, &data,
|
||||
FOR_EACH_OBJECT_LOCAL_ONLY);
|
||||
if (r)
|
||||
return r;
|
||||
goto done;
|
||||
|
||||
flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER;
|
||||
if (ignore_in_core_kept_packs)
|
||||
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
|
||||
|
||||
return for_each_packed_object(add_recent_packed, &data, flags);
|
||||
r = for_each_packed_object(add_recent_packed, &data, flags);
|
||||
|
||||
done:
|
||||
oidset_clear(&data.extra_recent_oids);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int mark_object_seen(const struct object_id *oid,
|
||||
|
|
|
@ -350,4 +350,18 @@ test_expect_success 'old reachable-from-recent retained with bitmaps' '
|
|||
test_must_fail git cat-file -e $to_drop
|
||||
'
|
||||
|
||||
test_expect_success 'gc.recentObjectsHook' '
|
||||
add_blob &&
|
||||
test-tool chmtime =-86500 $BLOB_FILE &&
|
||||
|
||||
write_script precious-objects <<-EOF &&
|
||||
echo $BLOB
|
||||
EOF
|
||||
test_config gc.recentObjectsHook ./precious-objects &&
|
||||
|
||||
git prune --expire=now &&
|
||||
|
||||
git cat-file -p $BLOB
|
||||
'
|
||||
|
||||
test_done
|
||||
|
|
|
@ -739,4 +739,175 @@ test_expect_success 'cruft objects are freshend via loose' '
|
|||
)
|
||||
'
|
||||
|
||||
test_expect_success 'gc.recentObjectsHook' '
|
||||
git init repo &&
|
||||
test_when_finished "rm -fr repo" &&
|
||||
(
|
||||
cd repo &&
|
||||
|
||||
# Create a handful of objects.
|
||||
#
|
||||
# - one reachable commit, "base", designated for the reachable
|
||||
# pack
|
||||
# - one unreachable commit, "cruft.discard", which is marked
|
||||
# for deletion
|
||||
# - one unreachable commit, "cruft.old", which would be marked
|
||||
# for deletion, but is rescued as an extra cruft tip
|
||||
# - one unreachable commit, "cruft.new", which is not marked
|
||||
# for deletion
|
||||
test_commit base &&
|
||||
git branch -M main &&
|
||||
|
||||
git checkout --orphan discard &&
|
||||
git rm -fr . &&
|
||||
test_commit --no-tag cruft.discard &&
|
||||
|
||||
git checkout --orphan old &&
|
||||
git rm -fr . &&
|
||||
test_commit --no-tag cruft.old &&
|
||||
cruft_old="$(git rev-parse HEAD)" &&
|
||||
|
||||
git checkout --orphan new &&
|
||||
git rm -fr . &&
|
||||
test_commit --no-tag cruft.new &&
|
||||
cruft_new="$(git rev-parse HEAD)" &&
|
||||
|
||||
git checkout main &&
|
||||
git branch -D discard old new &&
|
||||
git reflog expire --all --expire=all &&
|
||||
|
||||
# mark cruft.old with an mtime that is many minutes
|
||||
# older than the expiration period, and mark cruft.new
|
||||
# with an mtime that is in the future (and thus not
|
||||
# eligible for pruning).
|
||||
test-tool chmtime -2000 "$objdir/$(test_oid_to_path $cruft_old)" &&
|
||||
test-tool chmtime +1000 "$objdir/$(test_oid_to_path $cruft_new)" &&
|
||||
|
||||
# Write the list of cruft objects we expect to
|
||||
# accumulate, which is comprised of everything reachable
|
||||
# from cruft.old and cruft.new, but not cruft.discard.
|
||||
git rev-list --objects --no-object-names \
|
||||
$cruft_old $cruft_new >cruft.raw &&
|
||||
sort cruft.raw >cruft.expect &&
|
||||
|
||||
# Write the script to list extra tips, which are limited
|
||||
# to cruft.old, in this case.
|
||||
write_script extra-tips <<-EOF &&
|
||||
echo $cruft_old
|
||||
EOF
|
||||
git config gc.recentObjectsHook ./extra-tips &&
|
||||
|
||||
git repack --cruft --cruft-expiration=now -d &&
|
||||
|
||||
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
|
||||
git show-index <${mtimes%.mtimes}.idx >cruft &&
|
||||
cut -d" " -f2 cruft | sort >cruft.actual &&
|
||||
test_cmp cruft.expect cruft.actual &&
|
||||
|
||||
# Ensure that the "old" objects are removed after
|
||||
# dropping the gc.recentObjectsHook hook.
|
||||
git config --unset gc.recentObjectsHook &&
|
||||
git repack --cruft --cruft-expiration=now -d &&
|
||||
|
||||
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
|
||||
git show-index <${mtimes%.mtimes}.idx >cruft &&
|
||||
cut -d" " -f2 cruft | sort >cruft.actual &&
|
||||
|
||||
git rev-list --objects --no-object-names $cruft_new >cruft.raw &&
|
||||
cp cruft.expect cruft.old &&
|
||||
sort cruft.raw >cruft.expect &&
|
||||
test_cmp cruft.expect cruft.actual &&
|
||||
|
||||
# ensure objects which are no longer in the cruft pack were
|
||||
# removed from the repository
|
||||
for object in $(comm -13 cruft.expect cruft.old)
|
||||
do
|
||||
test_must_fail git cat-file -t $object || return 1
|
||||
done
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'multi-valued gc.recentObjectsHook' '
|
||||
git init repo &&
|
||||
test_when_finished "rm -fr repo" &&
|
||||
(
|
||||
cd repo &&
|
||||
|
||||
test_commit base &&
|
||||
git branch -M main &&
|
||||
|
||||
git checkout --orphan cruft.a &&
|
||||
git rm -fr . &&
|
||||
test_commit --no-tag cruft.a &&
|
||||
cruft_a="$(git rev-parse HEAD)" &&
|
||||
|
||||
git checkout --orphan cruft.b &&
|
||||
git rm -fr . &&
|
||||
test_commit --no-tag cruft.b &&
|
||||
cruft_b="$(git rev-parse HEAD)" &&
|
||||
|
||||
git checkout main &&
|
||||
git branch -D cruft.a cruft.b &&
|
||||
git reflog expire --all --expire=all &&
|
||||
|
||||
echo "echo $cruft_a" | write_script extra-tips.a &&
|
||||
echo "echo $cruft_b" | write_script extra-tips.b &&
|
||||
echo "false" | write_script extra-tips.c &&
|
||||
|
||||
git rev-list --objects --no-object-names $cruft_a $cruft_b \
|
||||
>cruft.raw &&
|
||||
sort cruft.raw >cruft.expect &&
|
||||
|
||||
# ensure that each extra cruft tip is saved by its
|
||||
# respective hook
|
||||
git config --add gc.recentObjectsHook ./extra-tips.a &&
|
||||
git config --add gc.recentObjectsHook ./extra-tips.b &&
|
||||
git repack --cruft --cruft-expiration=now -d &&
|
||||
|
||||
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
|
||||
git show-index <${mtimes%.mtimes}.idx >cruft &&
|
||||
cut -d" " -f2 cruft | sort >cruft.actual &&
|
||||
test_cmp cruft.expect cruft.actual &&
|
||||
|
||||
# ensure that a dirty exit halts cruft pack generation
|
||||
git config --add gc.recentObjectsHook ./extra-tips.c &&
|
||||
test_must_fail git repack --cruft --cruft-expiration=now -d 2>err &&
|
||||
grep "unable to enumerate additional recent objects" err &&
|
||||
|
||||
# and that the existing cruft pack is left alone
|
||||
test_path_is_file "$mtimes"
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'additional cruft blobs via gc.recentObjectsHook' '
|
||||
git init repo &&
|
||||
test_when_finished "rm -fr repo" &&
|
||||
(
|
||||
cd repo &&
|
||||
|
||||
test_commit base &&
|
||||
|
||||
blob=$(echo "unreachable" | git hash-object -w --stdin) &&
|
||||
|
||||
# mark the unreachable blob we wrote above as having
|
||||
# aged out of the retention period
|
||||
test-tool chmtime -2000 "$objdir/$(test_oid_to_path $blob)" &&
|
||||
|
||||
# Write the script to list extra tips, which is just the
|
||||
# extra blob as above.
|
||||
write_script extra-tips <<-EOF &&
|
||||
echo $blob
|
||||
EOF
|
||||
git config gc.recentObjectsHook ./extra-tips &&
|
||||
|
||||
git repack --cruft --cruft-expiration=now -d &&
|
||||
|
||||
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
|
||||
git show-index <${mtimes%.mtimes}.idx >cruft &&
|
||||
cut -d" " -f2 cruft >actual &&
|
||||
echo $blob >expect &&
|
||||
test_cmp expect actual
|
||||
)
|
||||
'
|
||||
|
||||
test_done
|
||||
|
|
|
@ -113,6 +113,37 @@ test_expect_success 'do not bother loosening old objects' '
|
|||
test_must_fail git cat-file -p $obj2
|
||||
'
|
||||
|
||||
test_expect_success 'gc.recentObjectsHook' '
|
||||
obj1=$(echo one | git hash-object -w --stdin) &&
|
||||
obj2=$(echo two | git hash-object -w --stdin) &&
|
||||
obj3=$(echo three | git hash-object -w --stdin) &&
|
||||
pack1=$(echo $obj1 | git pack-objects .git/objects/pack/pack) &&
|
||||
pack2=$(echo $obj2 | git pack-objects .git/objects/pack/pack) &&
|
||||
pack3=$(echo $obj3 | git pack-objects .git/objects/pack/pack) &&
|
||||
git prune-packed &&
|
||||
|
||||
git cat-file -p $obj1 &&
|
||||
git cat-file -p $obj2 &&
|
||||
git cat-file -p $obj3 &&
|
||||
|
||||
git tag -a -m tag obj2-tag $obj2 &&
|
||||
obj2_tag="$(git rev-parse obj2-tag)" &&
|
||||
|
||||
write_script precious-objects <<-EOF &&
|
||||
echo $obj2_tag
|
||||
EOF
|
||||
git config gc.recentObjectsHook ./precious-objects &&
|
||||
|
||||
test-tool chmtime =-86400 .git/objects/pack/pack-$pack2.pack &&
|
||||
test-tool chmtime =-86400 .git/objects/pack/pack-$pack3.pack &&
|
||||
git repack -A -d --unpack-unreachable=1.hour.ago &&
|
||||
|
||||
git cat-file -p $obj1 &&
|
||||
git cat-file -p $obj2 &&
|
||||
git cat-file -p $obj2_tag &&
|
||||
test_must_fail git cat-file -p $obj3
|
||||
'
|
||||
|
||||
test_expect_success 'keep packed objects found only in index' '
|
||||
echo my-unique-content >file &&
|
||||
git add file &&
|
||||
|
|
Loading…
Reference in a new issue