Merge branch 'ps/pack-refs-auto-heuristics'

"git pack-refs --auto" for the files backend was too aggressive,
which has been a bit tamed.

* ps/pack-refs-auto-heuristics:
  refs/files: use heuristic to decide whether to repack with `--auto`
  t0601: merge tests for auto-packing of refs
  wrapper: introduce `log2u()`
This commit is contained in:
Junio C Hamano 2024-09-12 11:47:23 -07:00
commit 143682ec43
6 changed files with 194 additions and 27 deletions

View file

@ -1130,16 +1130,6 @@ enum bisect_error bisect_next_all(struct repository *r, const char *prefix)
return res;
}
static inline int log2i(int n)
{
int log2 = 0;
for (; n > 1; n >>= 1)
log2++;
return log2;
}
static inline int exp2i(int n)
{
return 1 << n;
@ -1162,7 +1152,7 @@ int estimate_bisect_steps(int all)
if (all < 3)
return 0;
n = log2i(all);
n = log2u(all);
e = exp2i(n);
x = all - e;

View file

@ -1311,6 +1311,68 @@ static int should_pack_ref(struct files_ref_store *refs,
return 0;
}
static int should_pack_refs(struct files_ref_store *refs,
struct pack_refs_opts *opts)
{
struct ref_iterator *iter;
size_t packed_size;
size_t refcount = 0;
size_t limit;
int ret;
if (!(opts->flags & PACK_REFS_AUTO))
return 1;
ret = packed_refs_size(refs->packed_ref_store, &packed_size);
if (ret < 0)
die("cannot determine packed-refs size");
/*
* Packing loose references into the packed-refs file scales with the
* number of references we're about to write. We thus decide whether we
* repack refs by weighing the current size of the packed-refs file
* against the number of loose references. This is done such that we do
* not repack too often on repositories with a huge number of
* references, where we can expect a lot of churn in the number of
* references.
*
* As a heuristic, we repack if the number of loose references in the
* repository exceeds `log2(nr_packed_refs) * 5`, where we estimate
* `nr_packed_refs = packed_size / 100`, which scales as following:
*
* - 1kB ~ 10 packed refs: 16 refs
* - 10kB ~ 100 packed refs: 33 refs
* - 100kB ~ 1k packed refs: 49 refs
* - 1MB ~ 10k packed refs: 66 refs
* - 10MB ~ 100k packed refs: 82 refs
* - 100MB ~ 1m packed refs: 99 refs
*
* We thus allow roughly 16 additional loose refs per factor of ten of
* packed refs. This heuristic may be tweaked in the future, but should
* serve as a sufficiently good first iteration.
*/
limit = log2u(packed_size / 100) * 5;
if (limit < 16)
limit = 16;
iter = cache_ref_iterator_begin(get_loose_ref_cache(refs, 0), NULL,
refs->base.repo, 0);
while ((ret = ref_iterator_advance(iter)) == ITER_OK) {
if (should_pack_ref(refs, iter->refname, iter->oid,
iter->flags, opts))
refcount++;
if (refcount >= limit) {
ref_iterator_abort(iter);
return 1;
}
}
if (ret != ITER_DONE)
die("error while iterating over references");
return 0;
}
static int files_pack_refs(struct ref_store *ref_store,
struct pack_refs_opts *opts)
{
@ -1323,6 +1385,9 @@ static int files_pack_refs(struct ref_store *ref_store,
struct strbuf err = STRBUF_INIT;
struct ref_transaction *transaction;
if (!should_pack_refs(refs, opts))
return 0;
transaction = ref_store_transaction_begin(refs->packed_ref_store, &err);
if (!transaction)
return -1;

View file

@ -1250,6 +1250,24 @@ int packed_refs_is_locked(struct ref_store *ref_store)
return is_lock_file_locked(&refs->lock);
}
int packed_refs_size(struct ref_store *ref_store,
size_t *out)
{
struct packed_ref_store *refs = packed_downcast(ref_store, REF_STORE_READ,
"packed_refs_size");
struct stat st;
if (stat(refs->path, &st) < 0) {
if (errno != ENOENT)
return -1;
*out = 0;
return 0;
}
*out = st.st_size;
return 0;
}
/*
* The packed-refs header line that we write out. Perhaps other traits
* will be added later.

View file

@ -27,6 +27,13 @@ int packed_refs_lock(struct ref_store *ref_store, int flags, struct strbuf *err)
void packed_refs_unlock(struct ref_store *ref_store);
int packed_refs_is_locked(struct ref_store *ref_store);
/*
* Obtain the size of the `packed-refs` file. Reports `0` as size in case there
* is no packed-refs file. Returns 0 on success, negative otherwise.
*/
int packed_refs_size(struct ref_store *ref_store,
size_t *out);
/*
* Return true if `transaction` really needs to be carried out against
* the specified packed_ref_store, or false if it can be skipped

View file

@ -161,13 +161,6 @@ test_expect_success 'test --exclude takes precedence over --include' '
git pack-refs --include "refs/heads/pack*" --exclude "refs/heads/pack*" &&
test -f .git/refs/heads/dont_pack5'
test_expect_success '--auto packs and prunes refs as usual' '
git branch auto &&
test_path_is_file .git/refs/heads/auto &&
git pack-refs --auto --all &&
test_path_is_missing .git/refs/heads/auto
'
test_expect_success 'see if up-to-date packed refs are preserved' '
git branch q &&
git pack-refs --all --prune &&
@ -367,14 +360,90 @@ test_expect_success 'pack-refs does not drop broken refs during deletion' '
test_cmp expect actual
'
test_expect_success 'maintenance --auto unconditionally packs loose refs' '
git update-ref refs/heads/something HEAD &&
test_path_is_file .git/refs/heads/something &&
git rev-parse refs/heads/something >expect &&
git maintenance run --task=pack-refs --auto &&
test_path_is_missing .git/refs/heads/something &&
git rev-parse refs/heads/something >actual &&
test_cmp expect actual
'
for command in "git pack-refs --all --auto" "git maintenance run --task=pack-refs --auto"
do
test_expect_success "$command does not repack below 16 refs without packed-refs" '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
git config set maintenance.auto false &&
git commit --allow-empty --message "initial" &&
# Create 14 additional references, which brings us to
# 15 together with the default branch.
printf "create refs/heads/loose-%d HEAD\n" $(test_seq 14) >stdin &&
git update-ref --stdin <stdin &&
test_path_is_missing .git/packed-refs &&
git pack-refs --auto --all &&
test_path_is_missing .git/packed-refs &&
# Create the 16th reference, which should cause us to repack.
git update-ref refs/heads/loose-15 HEAD &&
git pack-refs --auto --all &&
test_path_is_file .git/packed-refs
)
'
test_expect_success "$command does not repack below 16 refs with small packed-refs" '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
git config set maintenance.auto false &&
git commit --allow-empty --message "initial" &&
git pack-refs --all &&
test_line_count = 2 .git/packed-refs &&
# Create 15 loose references.
printf "create refs/heads/loose-%d HEAD\n" $(test_seq 15) >stdin &&
git update-ref --stdin <stdin &&
git pack-refs --auto --all &&
test_line_count = 2 .git/packed-refs &&
# Create the 16th loose reference, which should cause us to repack.
git update-ref refs/heads/loose-17 HEAD &&
git pack-refs --auto --all &&
test_line_count = 18 .git/packed-refs
)
'
test_expect_success "$command scales with size of packed-refs" '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
git config set maintenance.auto false &&
git commit --allow-empty --message "initial" &&
# Create 99 packed refs. This should cause the heuristic
# to require more than the minimum amount of loose refs.
test_seq 99 |
while read i
do
printf "create refs/heads/packed-%d HEAD\n" $i || return 1
done >stdin &&
git update-ref --stdin <stdin &&
git pack-refs --all &&
test_line_count = 101 .git/packed-refs &&
# Create 24 loose refs, which should not yet cause us to repack.
printf "create refs/heads/loose-%d HEAD\n" $(test_seq 24) >stdin &&
git update-ref --stdin <stdin &&
git pack-refs --auto --all &&
test_line_count = 101 .git/packed-refs &&
# Create another handful of refs to cross the border.
# Note that we explicitly do not check for strict
# boundaries here, as this also depends on the size of
# the object hash.
printf "create refs/heads/addn-%d HEAD\n" $(test_seq 10) >stdin &&
git update-ref --stdin <stdin &&
git pack-refs --auto --all &&
test_line_count = 135 .git/packed-refs
)
'
done
test_done

View file

@ -140,4 +140,22 @@ int csprng_bytes(void *buf, size_t len);
*/
uint32_t git_rand(void);
/* Provide log2 of the given `size_t`. */
static inline unsigned log2u(uintmax_t sz)
{
unsigned l = 0;
/*
* Technically this isn't required, but it helps the compiler optimize
* this to a `bsr` instruction.
*/
if (!sz)
return 0;
for (; sz; sz >>= 1)
l++;
return l - 1;
}
#endif /* WRAPPER_H */