From 5b92477f896f147d02bd2e9168a780940b57cfc5 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 20 May 2022 19:18:14 -0400 Subject: [PATCH] builtin/gc.c: conditionally avoid pruning objects via loose Expose the new `git repack --cruft` mode from `git gc` via a new opt-in flag. When invoked like `git gc --cruft`, `git gc` will avoid exploding unreachable objects as loose ones, and instead create a cruft pack and `.mtimes` file. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/gc.txt | 21 +++++++++++++------- Documentation/git-gc.txt | 5 +++++ builtin/gc.c | 10 +++++++++- t/t5329-pack-objects-cruft.sh | 37 +++++++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 8 deletions(-) diff --git a/Documentation/config/gc.txt b/Documentation/config/gc.txt index c834e07991..38fea076a2 100644 --- a/Documentation/config/gc.txt +++ b/Documentation/config/gc.txt @@ -81,14 +81,21 @@ gc.packRefs:: to enable it within all non-bare repos or it can be set to a boolean value. The default is `true`. +gc.cruftPacks:: + Store unreachable objects in a cruft pack (see + linkgit:git-repack[1]) instead of as loose objects. The default + is `false`. + gc.pruneExpire:: - When 'git gc' is run, it will call 'prune --expire 2.weeks.ago'. - Override the grace period with this config variable. The value - "now" may be used to disable this grace period and always prune - unreachable objects immediately, or "never" may be used to - suppress pruning. This feature helps prevent corruption when - 'git gc' runs concurrently with another process writing to the - repository; see the "NOTES" section of linkgit:git-gc[1]. + When 'git gc' is run, it will call 'prune --expire 2.weeks.ago' + (and 'repack --cruft --cruft-expiration 2.weeks.ago' if using + cruft packs via `gc.cruftPacks` or `--cruft`). Override the + grace period with this config variable. The value "now" may be + used to disable this grace period and always prune unreachable + objects immediately, or "never" may be used to suppress pruning. + This feature helps prevent corruption when 'git gc' runs + concurrently with another process writing to the repository; see + the "NOTES" section of linkgit:git-gc[1]. gc.worktreePruneExpire:: When 'git gc' is run, it calls diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt index 853967dea0..ba4e67700e 100644 --- a/Documentation/git-gc.txt +++ b/Documentation/git-gc.txt @@ -54,6 +54,11 @@ other housekeeping tasks (e.g. rerere, working trees, reflog...) will be performed as well. +--cruft:: + When expiring unreachable objects, pack them separately into a + cruft pack instead of storing the loose objects as loose + objects. + --prune=:: Prune loose objects older than date (default is 2 weeks ago, overridable by the config variable `gc.pruneExpire`). diff --git a/builtin/gc.c b/builtin/gc.c index b335cffa33..4d995e85e9 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -42,6 +42,7 @@ static const char * const builtin_gc_usage[] = { static int pack_refs = 1; static int prune_reflogs = 1; +static int cruft_packs = 0; static int aggressive_depth = 50; static int aggressive_window = 250; static int gc_auto_threshold = 6700; @@ -152,6 +153,7 @@ static void gc_config(void) git_config_get_int("gc.auto", &gc_auto_threshold); git_config_get_int("gc.autopacklimit", &gc_auto_pack_limit); git_config_get_bool("gc.autodetach", &detach_auto); + git_config_get_bool("gc.cruftpacks", &cruft_packs); git_config_get_expiry("gc.pruneexpire", &prune_expire); git_config_get_expiry("gc.worktreepruneexpire", &prune_worktrees_expire); git_config_get_expiry("gc.logexpiry", &gc_log_expire); @@ -331,7 +333,11 @@ static void add_repack_all_option(struct string_list *keep_pack) { if (prune_expire && !strcmp(prune_expire, "now")) strvec_push(&repack, "-a"); - else { + else if (cruft_packs) { + strvec_push(&repack, "--cruft"); + if (prune_expire) + strvec_pushf(&repack, "--cruft-expiration=%s", prune_expire); + } else { strvec_push(&repack, "-A"); if (prune_expire) strvec_pushf(&repack, "--unpack-unreachable=%s", prune_expire); @@ -551,6 +557,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) { OPTION_STRING, 0, "prune", &prune_expire, N_("date"), N_("prune unreferenced objects"), PARSE_OPT_OPTARG, NULL, (intptr_t)prune_expire }, + OPT_BOOL(0, "cruft", &cruft_packs, N_("pack unreferenced objects separately")), OPT_BOOL(0, "aggressive", &aggressive, N_("be more thorough (increased runtime)")), OPT_BOOL_F(0, "auto", &auto_gc, N_("enable auto-gc mode"), PARSE_OPT_NOCOMPLETE), @@ -670,6 +677,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) die(FAILED_RUN, repack.v[0]); if (prune_expire) { + /* run `git prune` even if using cruft packs */ strvec_push(&prune, prune_expire); if (quiet) strvec_push(&prune, "--no-progress"); diff --git a/t/t5329-pack-objects-cruft.sh b/t/t5329-pack-objects-cruft.sh index 8de87afce2..70a6a9553c 100755 --- a/t/t5329-pack-objects-cruft.sh +++ b/t/t5329-pack-objects-cruft.sh @@ -429,6 +429,43 @@ test_expect_success 'loose objects mtimes upsert others' ' ) ' +test_expect_success 'expiring cruft objects with git gc' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit reachable && + git branch -M main && + git checkout --orphan other && + test_commit unreachable && + + git checkout main && + git branch -D other && + git tag -d unreachable && + # objects are not cruft if they are contained in the reflogs + git reflog expire --all --expire=all && + + git rev-list --objects --all --no-object-names >reachable.raw && + git cat-file --batch-all-objects --batch-check="%(objectname)" >objects && + sort reachable && + comm -13 reachable objects >unreachable && + + git repack --cruft -d && + + mtimes=$(ls .git/objects/pack/pack-*.mtimes) && + test_path_is_file $mtimes && + + git gc --cruft --prune=now && + + git cat-file --batch-all-objects --batch-check="%(objectname)" >objects && + + comm -23 unreachable objects >removed && + test_cmp unreachable removed && + test_path_is_missing $mtimes + ) +' + test_expect_success 'cruft packs are not included in geometric repack' ' git init repo && test_when_finished "rm -fr repo" &&