2023-05-16 06:33:57 +00:00
|
|
|
#include "git-compat-util.h"
|
2023-03-21 06:26:03 +00:00
|
|
|
#include "environment.h"
|
2023-02-24 00:09:27 +00:00
|
|
|
#include "hex.h"
|
2014-10-01 10:28:42 +00:00
|
|
|
#include "lockfile.h"
|
2006-04-23 23:52:20 +00:00
|
|
|
#include "tree.h"
|
2009-04-20 10:58:18 +00:00
|
|
|
#include "tree-walk.h"
|
2006-04-23 23:52:20 +00:00
|
|
|
#include "cache-tree.h"
|
2022-04-05 05:20:10 +00:00
|
|
|
#include "bulk-checkin.h"
|
2023-04-11 07:41:53 +00:00
|
|
|
#include "object-file.h"
|
2023-05-16 06:34:06 +00:00
|
|
|
#include "object-store-ll.h"
|
2023-05-16 06:33:56 +00:00
|
|
|
#include "read-cache-ll.h"
|
2018-08-18 14:41:28 +00:00
|
|
|
#include "replace-object.h"
|
2019-06-25 13:40:31 +00:00
|
|
|
#include "promisor-remote.h"
|
sparse-index: convert from full to sparse
If we have a full index, then we can convert it to a sparse index by
replacing directories outside of the sparse cone with sparse directory
entries. The convert_to_sparse() method does this, when the situation is
appropriate.
For now, we avoid converting the index to a sparse index if:
1. the index is split.
2. the index is already sparse.
3. sparse-checkout is disabled.
4. sparse-checkout does not use cone mode.
Finally, we currently limit the conversion to when the
GIT_TEST_SPARSE_INDEX environment variable is enabled. A mode using Git
config will be added in a later change.
The trickiest thing about this conversion is that we might not be able
to mark a directory as a sparse directory just because it is outside the
sparse cone. There might be unmerged files within that directory, so we
need to look for those. Also, if there is some strange reason why a file
is not marked with CE_SKIP_WORKTREE, then we should give up on
converting that directory. There is still hope that some of its
subdirectories might be able to convert to sparse, so we keep looking
deeper.
The conversion process is assisted by the cache-tree extension. This is
calculated from the full index if it does not already exist. We then
abandon the cache-tree as it no longer applies to the newly-sparse
index. Thus, this cache-tree will be recalculated in every
sparse-full-sparse round-trip until we integrate the cache-tree
extension with the sparse index.
Some Git commands use the index after writing it. For example, 'git add'
will update the index, then write it to disk, then read its entries to
report information. To keep the in-memory index in a full state after
writing, we re-expand it to a full one after the write. This is wasteful
for commands that only write the index and do not read from it again,
but that is only the case until we make those commands "sparse aware."
We can compare the behavior of the sparse-index in
t1092-sparse-checkout-compability.sh by using GIT_TEST_SPARSE_INDEX=1
when operating on the 'sparse-index' repo. We can also compare the two
sparse repos directly, such as comparing their indexes (when expanded to
full in the case of the 'sparse-index' repo). We also verify that the
index is actually populated with sparse directory entries.
The 'checkout and reset (mixed)' test is marked for failure when
comparing a sparse repo to a full repo, but we can compare the two
sparse-checkout cases directly to ensure that we are not changing the
behavior when using a sparse index.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-03-30 13:10:55 +00:00
|
|
|
#include "sparse-index.h"
|
2023-04-11 03:00:38 +00:00
|
|
|
#include "trace.h"
|
|
|
|
#include "trace2.h"
|
2006-04-23 23:52:20 +00:00
|
|
|
|
2019-06-19 21:05:58 +00:00
|
|
|
#ifndef DEBUG_CACHE_TREE
|
|
|
|
#define DEBUG_CACHE_TREE 0
|
2006-10-30 23:29:53 +00:00
|
|
|
#endif
|
2006-04-23 23:52:20 +00:00
|
|
|
|
|
|
|
struct cache_tree *cache_tree(void)
|
|
|
|
{
|
|
|
|
struct cache_tree *it = xcalloc(1, sizeof(struct cache_tree));
|
|
|
|
it->entry_count = -1;
|
|
|
|
return it;
|
|
|
|
}
|
|
|
|
|
2006-04-25 04:18:58 +00:00
|
|
|
void cache_tree_free(struct cache_tree **it_p)
|
2006-04-23 23:52:20 +00:00
|
|
|
{
|
|
|
|
int i;
|
2006-04-25 04:18:58 +00:00
|
|
|
struct cache_tree *it = *it_p;
|
2006-04-23 23:52:20 +00:00
|
|
|
|
|
|
|
if (!it)
|
|
|
|
return;
|
|
|
|
for (i = 0; i < it->subtree_nr; i++)
|
2010-09-06 21:40:16 +00:00
|
|
|
if (it->down[i]) {
|
2006-04-26 00:40:02 +00:00
|
|
|
cache_tree_free(&it->down[i]->cache_tree);
|
2010-09-06 21:40:16 +00:00
|
|
|
free(it->down[i]);
|
|
|
|
}
|
2006-04-23 23:52:20 +00:00
|
|
|
free(it->down);
|
|
|
|
free(it);
|
2006-04-25 04:18:58 +00:00
|
|
|
*it_p = NULL;
|
2006-04-23 23:52:20 +00:00
|
|
|
}
|
|
|
|
|
2006-04-26 00:40:02 +00:00
|
|
|
static int subtree_name_cmp(const char *one, int onelen,
|
|
|
|
const char *two, int twolen)
|
|
|
|
{
|
|
|
|
if (onelen < twolen)
|
|
|
|
return -1;
|
|
|
|
if (twolen < onelen)
|
|
|
|
return 1;
|
|
|
|
return memcmp(one, two, onelen);
|
|
|
|
}
|
|
|
|
|
2021-01-23 19:58:13 +00:00
|
|
|
int cache_tree_subtree_pos(struct cache_tree *it, const char *path, int pathlen)
|
2006-04-26 00:40:02 +00:00
|
|
|
{
|
|
|
|
struct cache_tree_sub **down = it->down;
|
|
|
|
int lo, hi;
|
|
|
|
lo = 0;
|
|
|
|
hi = it->subtree_nr;
|
|
|
|
while (lo < hi) {
|
2017-10-08 18:29:37 +00:00
|
|
|
int mi = lo + (hi - lo) / 2;
|
2006-04-26 00:40:02 +00:00
|
|
|
struct cache_tree_sub *mdl = down[mi];
|
|
|
|
int cmp = subtree_name_cmp(path, pathlen,
|
|
|
|
mdl->name, mdl->namelen);
|
|
|
|
if (!cmp)
|
|
|
|
return mi;
|
|
|
|
if (cmp < 0)
|
|
|
|
hi = mi;
|
|
|
|
else
|
|
|
|
lo = mi + 1;
|
|
|
|
}
|
|
|
|
return -lo-1;
|
|
|
|
}
|
|
|
|
|
2006-04-23 23:52:20 +00:00
|
|
|
static struct cache_tree_sub *find_subtree(struct cache_tree *it,
|
|
|
|
const char *path,
|
|
|
|
int pathlen,
|
|
|
|
int create)
|
|
|
|
{
|
|
|
|
struct cache_tree_sub *down;
|
2021-01-23 19:58:13 +00:00
|
|
|
int pos = cache_tree_subtree_pos(it, path, pathlen);
|
2006-04-26 00:40:02 +00:00
|
|
|
if (0 <= pos)
|
|
|
|
return it->down[pos];
|
2006-04-23 23:52:20 +00:00
|
|
|
if (!create)
|
|
|
|
return NULL;
|
2006-04-26 00:40:02 +00:00
|
|
|
|
|
|
|
pos = -pos-1;
|
2014-03-03 22:31:51 +00:00
|
|
|
ALLOC_GROW(it->down, it->subtree_nr + 1, it->subtree_alloc);
|
2006-04-26 00:40:02 +00:00
|
|
|
it->subtree_nr++;
|
|
|
|
|
2016-02-22 22:44:32 +00:00
|
|
|
FLEX_ALLOC_MEM(down, name, path, pathlen);
|
2006-04-26 00:40:02 +00:00
|
|
|
down->cache_tree = NULL;
|
2006-04-23 23:52:20 +00:00
|
|
|
down->namelen = pathlen;
|
2006-04-26 00:40:02 +00:00
|
|
|
|
|
|
|
if (pos < it->subtree_nr)
|
2018-01-22 17:50:09 +00:00
|
|
|
MOVE_ARRAY(it->down + pos + 1, it->down + pos,
|
|
|
|
it->subtree_nr - pos - 1);
|
2006-04-26 00:40:02 +00:00
|
|
|
it->down[pos] = down;
|
2006-04-23 23:52:20 +00:00
|
|
|
return down;
|
|
|
|
}
|
|
|
|
|
2006-04-27 08:33:07 +00:00
|
|
|
struct cache_tree_sub *cache_tree_sub(struct cache_tree *it, const char *path)
|
|
|
|
{
|
|
|
|
int pathlen = strlen(path);
|
|
|
|
return find_subtree(it, path, pathlen, 1);
|
|
|
|
}
|
|
|
|
|
2014-06-13 12:19:31 +00:00
|
|
|
static int do_invalidate_path(struct cache_tree *it, const char *path)
|
2006-04-23 23:52:20 +00:00
|
|
|
{
|
|
|
|
/* a/b/c
|
|
|
|
* ==> invalidate self
|
|
|
|
* ==> find "a", have it invalidate "b/c"
|
|
|
|
* a
|
|
|
|
* ==> invalidate self
|
|
|
|
* ==> if "a" exists as a subtree, remove it.
|
|
|
|
*/
|
|
|
|
const char *slash;
|
|
|
|
int namelen;
|
|
|
|
struct cache_tree_sub *down;
|
|
|
|
|
2019-06-19 21:05:58 +00:00
|
|
|
#if DEBUG_CACHE_TREE
|
2006-05-03 23:10:45 +00:00
|
|
|
fprintf(stderr, "cache-tree invalidate <%s>\n", path);
|
|
|
|
#endif
|
|
|
|
|
2006-04-23 23:52:20 +00:00
|
|
|
if (!it)
|
2014-06-13 12:19:31 +00:00
|
|
|
return 0;
|
2014-03-08 06:48:31 +00:00
|
|
|
slash = strchrnul(path, '/');
|
|
|
|
namelen = slash - path;
|
2006-04-23 23:52:20 +00:00
|
|
|
it->entry_count = -1;
|
2014-03-08 06:48:31 +00:00
|
|
|
if (!*slash) {
|
2006-04-26 00:40:02 +00:00
|
|
|
int pos;
|
2021-01-23 19:58:13 +00:00
|
|
|
pos = cache_tree_subtree_pos(it, path, namelen);
|
2006-04-26 00:40:02 +00:00
|
|
|
if (0 <= pos) {
|
|
|
|
cache_tree_free(&it->down[pos]->cache_tree);
|
|
|
|
free(it->down[pos]);
|
2006-04-23 23:52:20 +00:00
|
|
|
/* 0 1 2 3 4 5
|
|
|
|
* ^ ^subtree_nr = 6
|
2006-04-26 00:40:02 +00:00
|
|
|
* pos
|
2006-04-23 23:52:20 +00:00
|
|
|
* move 4 and 5 up one place (2 entries)
|
2006-04-26 00:40:02 +00:00
|
|
|
* 2 = 6 - 3 - 1 = subtree_nr - pos - 1
|
2006-04-23 23:52:20 +00:00
|
|
|
*/
|
2017-07-15 20:00:45 +00:00
|
|
|
MOVE_ARRAY(it->down + pos, it->down + pos + 1,
|
|
|
|
it->subtree_nr - pos - 1);
|
2006-04-23 23:52:20 +00:00
|
|
|
it->subtree_nr--;
|
|
|
|
}
|
2014-06-13 12:19:31 +00:00
|
|
|
return 1;
|
2006-04-23 23:52:20 +00:00
|
|
|
}
|
|
|
|
down = find_subtree(it, path, namelen, 0);
|
|
|
|
if (down)
|
2014-06-13 12:19:31 +00:00
|
|
|
do_invalidate_path(down->cache_tree, slash + 1);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void cache_tree_invalidate_path(struct index_state *istate, const char *path)
|
|
|
|
{
|
|
|
|
if (do_invalidate_path(istate->cache_tree, path))
|
|
|
|
istate->cache_changed |= CACHE_TREE_CHANGED;
|
2006-04-23 23:52:20 +00:00
|
|
|
}
|
|
|
|
|
2021-01-23 19:58:12 +00:00
|
|
|
static int verify_cache(struct index_state *istate, int flags)
|
2006-04-23 23:52:20 +00:00
|
|
|
{
|
2021-01-23 19:58:12 +00:00
|
|
|
unsigned i, funny;
|
2012-01-16 02:36:46 +00:00
|
|
|
int silent = flags & WRITE_TREE_SILENT;
|
2006-04-23 23:52:20 +00:00
|
|
|
|
|
|
|
/* Verify that the tree is merged */
|
|
|
|
funny = 0;
|
2021-01-23 19:58:12 +00:00
|
|
|
for (i = 0; i < istate->cache_nr; i++) {
|
|
|
|
const struct cache_entry *ce = istate->cache[i];
|
commit: ignore intent-to-add entries instead of refusing
Originally, "git add -N" was introduced to help users from forgetting to
add new files to the index before they ran "git commit -a". As an attempt
to help them further so that they do not forget to say "-a", "git commit"
to commit the index as-is was taught to error out, reminding the user that
they may have forgotten to add the final contents of the paths before
running the command.
This turned out to be a false "safety" that is useless. If the user made
changes to already tracked paths and paths added with "git add -N", and
then ran "git add" to register the final contents of the paths added with
"git add -N", "git commit" will happily create a commit out of the index,
without including the local changes made to the already tracked paths. It
was not a useful "safety" measure to prevent "forgetful" mistakes from
happening.
It turns out that this behaviour is not just a useless false "safety", but
actively hurts use cases of "git add -N" that were discovered later and
have become popular, namely, to tell Git to be aware of these paths added
by "git add -N", so that commands like "git status" and "git diff" would
include them in their output, even though the user is not interested in
including them in the next commit they are going to make.
Fix this ancient UI mistake, and instead make a commit from the index
ignoring the paths added by "git add -N" without adding real contents.
Based on the work by Nguyễn Thái Ngọc Duy, and helped by injection of
sanity from Jonathan Nieder and others on the Git mailing list.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-02-07 19:55:48 +00:00
|
|
|
if (ce_stage(ce)) {
|
2011-12-06 17:43:37 +00:00
|
|
|
if (silent)
|
|
|
|
return -1;
|
2006-04-23 23:52:20 +00:00
|
|
|
if (10 < ++funny) {
|
|
|
|
fprintf(stderr, "...\n");
|
|
|
|
break;
|
|
|
|
}
|
2012-12-16 04:15:25 +00:00
|
|
|
fprintf(stderr, "%s: unmerged (%s)\n",
|
2016-09-05 20:07:52 +00:00
|
|
|
ce->name, oid_to_hex(&ce->oid));
|
2006-04-23 23:52:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (funny)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
/* Also verify that the cache does not have path and path/file
|
|
|
|
* at the same time. At this point we know the cache has only
|
|
|
|
* stage 0 entries.
|
|
|
|
*/
|
|
|
|
funny = 0;
|
2021-01-23 19:58:12 +00:00
|
|
|
for (i = 0; i + 1 < istate->cache_nr; i++) {
|
2006-04-23 23:52:20 +00:00
|
|
|
/* path/file always comes after path because of the way
|
|
|
|
* the cache is sorted. Also path can appear only once,
|
|
|
|
* which means conflicting one would immediately follow.
|
|
|
|
*/
|
2021-01-23 19:58:12 +00:00
|
|
|
const struct cache_entry *this_ce = istate->cache[i];
|
|
|
|
const struct cache_entry *next_ce = istate->cache[i + 1];
|
2021-01-07 16:32:10 +00:00
|
|
|
const char *this_name = this_ce->name;
|
|
|
|
const char *next_name = next_ce->name;
|
|
|
|
int this_len = ce_namelen(this_ce);
|
|
|
|
if (this_len < ce_namelen(next_ce) &&
|
2021-01-07 16:32:11 +00:00
|
|
|
next_name[this_len] == '/' &&
|
|
|
|
strncmp(this_name, next_name, this_len) == 0) {
|
2006-04-23 23:52:20 +00:00
|
|
|
if (10 < ++funny) {
|
|
|
|
fprintf(stderr, "...\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
fprintf(stderr, "You have both %s and %s\n",
|
|
|
|
this_name, next_name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (funny)
|
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void discard_unused_subtrees(struct cache_tree *it)
|
|
|
|
{
|
|
|
|
struct cache_tree_sub **down = it->down;
|
|
|
|
int nr = it->subtree_nr;
|
|
|
|
int dst, src;
|
|
|
|
for (dst = src = 0; src < nr; src++) {
|
|
|
|
struct cache_tree_sub *s = down[src];
|
|
|
|
if (s->used)
|
|
|
|
down[dst++] = s;
|
|
|
|
else {
|
2006-04-25 04:18:58 +00:00
|
|
|
cache_tree_free(&s->cache_tree);
|
2006-04-23 23:52:20 +00:00
|
|
|
free(s);
|
|
|
|
it->subtree_nr--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-04-25 04:18:58 +00:00
|
|
|
int cache_tree_fully_valid(struct cache_tree *it)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
if (!it)
|
|
|
|
return 0;
|
2023-03-28 13:58:50 +00:00
|
|
|
if (it->entry_count < 0 || !repo_has_object_file(the_repository, &it->oid))
|
2006-04-25 04:18:58 +00:00
|
|
|
return 0;
|
|
|
|
for (i = 0; i < it->subtree_nr; i++) {
|
|
|
|
if (!cache_tree_fully_valid(it->down[i]->cache_tree))
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-07-23 18:52:23 +00:00
|
|
|
static int must_check_existence(const struct cache_entry *ce)
|
|
|
|
{
|
2023-03-28 13:58:53 +00:00
|
|
|
return !(repo_has_promisor_remote(the_repository) && ce_skip_worktree(ce));
|
2021-07-23 18:52:23 +00:00
|
|
|
}
|
|
|
|
|
2006-04-23 23:52:20 +00:00
|
|
|
static int update_one(struct cache_tree *it,
|
2014-06-13 12:19:32 +00:00
|
|
|
struct cache_entry **cache,
|
2006-04-23 23:52:20 +00:00
|
|
|
int entries,
|
|
|
|
const char *base,
|
|
|
|
int baselen,
|
2012-12-16 04:15:27 +00:00
|
|
|
int *skip_count,
|
2012-01-16 02:36:46 +00:00
|
|
|
int flags)
|
2006-04-23 23:52:20 +00:00
|
|
|
{
|
2007-09-06 11:20:11 +00:00
|
|
|
struct strbuf buffer;
|
2012-01-16 02:36:46 +00:00
|
|
|
int missing_ok = flags & WRITE_TREE_MISSING_OK;
|
|
|
|
int dryrun = flags & WRITE_TREE_DRY_RUN;
|
2014-07-06 04:06:56 +00:00
|
|
|
int repair = flags & WRITE_TREE_REPAIR;
|
2012-12-16 04:15:28 +00:00
|
|
|
int to_invalidate = 0;
|
2006-04-23 23:52:20 +00:00
|
|
|
int i;
|
|
|
|
|
2014-07-06 04:06:56 +00:00
|
|
|
assert(!(dryrun && repair));
|
|
|
|
|
2012-12-16 04:15:27 +00:00
|
|
|
*skip_count = 0;
|
|
|
|
|
2021-03-30 13:11:02 +00:00
|
|
|
/*
|
|
|
|
* If the first entry of this region is a sparse directory
|
|
|
|
* entry corresponding exactly to 'base', then this cache_tree
|
|
|
|
* struct is a "leaf" in the data structure, pointing to the
|
|
|
|
* tree OID specified in the entry.
|
|
|
|
*/
|
|
|
|
if (entries > 0) {
|
|
|
|
const struct cache_entry *ce = cache[0];
|
|
|
|
|
|
|
|
if (S_ISSPARSEDIR(ce->ce_mode) &&
|
|
|
|
ce->ce_namelen == baselen &&
|
|
|
|
!strncmp(ce->name, base, baselen)) {
|
|
|
|
it->entry_count = 1;
|
|
|
|
oidcpy(&it->oid, &ce->oid);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-28 13:58:50 +00:00
|
|
|
if (0 <= it->entry_count && repo_has_object_file(the_repository, &it->oid))
|
2006-04-23 23:52:20 +00:00
|
|
|
return it->entry_count;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We first scan for subtrees and update them; we start by
|
|
|
|
* marking existing subtrees -- the ones that are unmarked
|
|
|
|
* should not be in the result.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < it->subtree_nr; i++)
|
|
|
|
it->down[i]->used = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find the subtrees and update them.
|
|
|
|
*/
|
2012-12-16 04:15:26 +00:00
|
|
|
i = 0;
|
|
|
|
while (i < entries) {
|
Convert "struct cache_entry *" to "const ..." wherever possible
I attempted to make index_state->cache[] a "const struct cache_entry **"
to find out how existing entries in index are modified and where. The
question I have is what do we do if we really need to keep track of on-disk
changes in the index. The result is
- diff-lib.c: setting CE_UPTODATE
- name-hash.c: setting CE_HASHED
- preload-index.c, read-cache.c, unpack-trees.c and
builtin/update-index: obvious
- entry.c: write_entry() may refresh the checked out entry via
fill_stat_cache_info(). This causes "non-const struct cache_entry
*" in builtin/apply.c, builtin/checkout-index.c and
builtin/checkout.c
- builtin/ls-files.c: --with-tree changes stagemask and may set
CE_UPDATE
Of these, write_entry() and its call sites are probably most
interesting because it modifies on-disk info. But this is stat info
and can be retrieved via refresh, at least for porcelain
commands. Other just uses ce_flags for local purposes.
So, keeping track of "dirty" entries is just a matter of setting a
flag in index modification functions exposed by read-cache.c. Except
unpack-trees, the rest of the code base does not do anything funny
behind read-cache's back.
The actual patch is less valueable than the summary above. But if
anyone wants to re-identify the above sites. Applying this patch, then
this:
diff --git a/cache.h b/cache.h
index 430d021..1692891 100644
--- a/cache.h
+++ b/cache.h
@@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode)
#define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
struct index_state {
- struct cache_entry **cache;
+ const struct cache_entry **cache;
unsigned int version;
unsigned int cache_nr, cache_alloc, cache_changed;
struct string_list *resolve_undo;
will help quickly identify them without bogus warnings.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 15:29:00 +00:00
|
|
|
const struct cache_entry *ce = cache[i];
|
2006-04-23 23:52:20 +00:00
|
|
|
struct cache_tree_sub *sub;
|
|
|
|
const char *path, *slash;
|
2012-12-16 04:15:27 +00:00
|
|
|
int pathlen, sublen, subcnt, subskip;
|
2006-04-23 23:52:20 +00:00
|
|
|
|
|
|
|
path = ce->name;
|
|
|
|
pathlen = ce_namelen(ce);
|
|
|
|
if (pathlen <= baselen || memcmp(base, path, baselen))
|
|
|
|
break; /* at the end of this level */
|
|
|
|
|
|
|
|
slash = strchr(path + baselen, '/');
|
2012-12-16 04:15:26 +00:00
|
|
|
if (!slash) {
|
|
|
|
i++;
|
2006-04-23 23:52:20 +00:00
|
|
|
continue;
|
2012-12-16 04:15:26 +00:00
|
|
|
}
|
2006-04-23 23:52:20 +00:00
|
|
|
/*
|
|
|
|
* a/bbb/c (base = a/, slash = /c)
|
|
|
|
* ==>
|
|
|
|
* path+baselen = bbb/c, sublen = 3
|
|
|
|
*/
|
|
|
|
sublen = slash - (path + baselen);
|
|
|
|
sub = find_subtree(it, path + baselen, sublen, 1);
|
|
|
|
if (!sub->cache_tree)
|
|
|
|
sub->cache_tree = cache_tree();
|
|
|
|
subcnt = update_one(sub->cache_tree,
|
|
|
|
cache + i, entries - i,
|
|
|
|
path,
|
|
|
|
baselen + sublen + 1,
|
2012-12-16 04:15:27 +00:00
|
|
|
&subskip,
|
2012-01-16 02:36:46 +00:00
|
|
|
flags);
|
2006-11-13 13:50:00 +00:00
|
|
|
if (subcnt < 0)
|
|
|
|
return subcnt;
|
cache-tree: avoid infinite loop on zero-entry tree
The loop in cache-tree's update_one iterates over all the
entries in the index. For each one, we find the cache-tree
subtree which represents our path (creating it if
necessary), and then recurse into update_one again. The
return value we get is the number of index entries that
belonged in that subtree. So for example, with entries:
a/one
a/two
b/one
We start by processing the first entry, "a/one". We would
find the subtree for "a" and recurse into update_one. That
would then handle "a/one" and "a/two", and return the value
2. The parent function then skips past the 2 handled
entries, and we continue by processing "b/one".
If the recursed-into update_one ever returns 0, then we make
no forward progress in our loop. We would process "a/one"
over and over, infinitely.
This should not happen normally. Any subtree we create must
have at least one path in it (the one that we are
processing!). However, we may also reuse a cache-tree entry
we found in the on-disk index. For the same reason, this
should also never have zero entries. However, certain buggy
versions of libgit2 could produce such bogus cache-tree
records. The libgit2 bug has since been fixed, but it does
not hurt to protect ourselves against bogus input coming
from the on-disk data structures.
Note that this is not a die("BUG") or assert, because it is
not an internal bug, but rather a corrupted on-disk
structure. It's possible that we could even recover from it
(by throwing out the bogus cache-tree entry), but it is not
worth the effort; the important thing is that we report an
error instead of looping infinitely.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-29 17:11:58 +00:00
|
|
|
if (!subcnt)
|
|
|
|
die("index cache-tree records empty sub-tree");
|
2012-12-16 04:15:26 +00:00
|
|
|
i += subcnt;
|
2012-12-16 04:15:27 +00:00
|
|
|
sub->count = subcnt; /* to be used in the next loop */
|
|
|
|
*skip_count += subskip;
|
2006-04-23 23:52:20 +00:00
|
|
|
sub->used = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
discard_unused_subtrees(it);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Then write out the tree object for this level.
|
|
|
|
*/
|
2007-09-10 10:35:04 +00:00
|
|
|
strbuf_init(&buffer, 8192);
|
2006-04-23 23:52:20 +00:00
|
|
|
|
2012-12-16 04:15:26 +00:00
|
|
|
i = 0;
|
|
|
|
while (i < entries) {
|
Convert "struct cache_entry *" to "const ..." wherever possible
I attempted to make index_state->cache[] a "const struct cache_entry **"
to find out how existing entries in index are modified and where. The
question I have is what do we do if we really need to keep track of on-disk
changes in the index. The result is
- diff-lib.c: setting CE_UPTODATE
- name-hash.c: setting CE_HASHED
- preload-index.c, read-cache.c, unpack-trees.c and
builtin/update-index: obvious
- entry.c: write_entry() may refresh the checked out entry via
fill_stat_cache_info(). This causes "non-const struct cache_entry
*" in builtin/apply.c, builtin/checkout-index.c and
builtin/checkout.c
- builtin/ls-files.c: --with-tree changes stagemask and may set
CE_UPDATE
Of these, write_entry() and its call sites are probably most
interesting because it modifies on-disk info. But this is stat info
and can be retrieved via refresh, at least for porcelain
commands. Other just uses ce_flags for local purposes.
So, keeping track of "dirty" entries is just a matter of setting a
flag in index modification functions exposed by read-cache.c. Except
unpack-trees, the rest of the code base does not do anything funny
behind read-cache's back.
The actual patch is less valueable than the summary above. But if
anyone wants to re-identify the above sites. Applying this patch, then
this:
diff --git a/cache.h b/cache.h
index 430d021..1692891 100644
--- a/cache.h
+++ b/cache.h
@@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode)
#define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
struct index_state {
- struct cache_entry **cache;
+ const struct cache_entry **cache;
unsigned int version;
unsigned int cache_nr, cache_alloc, cache_changed;
struct string_list *resolve_undo;
will help quickly identify them without bogus warnings.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 15:29:00 +00:00
|
|
|
const struct cache_entry *ce = cache[i];
|
2016-07-16 05:06:26 +00:00
|
|
|
struct cache_tree_sub *sub = NULL;
|
2006-04-23 23:52:20 +00:00
|
|
|
const char *path, *slash;
|
|
|
|
int pathlen, entlen;
|
2018-03-12 02:27:24 +00:00
|
|
|
const struct object_id *oid;
|
2006-04-23 23:52:20 +00:00
|
|
|
unsigned mode;
|
2014-09-02 21:16:20 +00:00
|
|
|
int expected_missing = 0;
|
2016-07-16 05:06:27 +00:00
|
|
|
int contains_ita = 0;
|
2018-10-09 18:40:37 +00:00
|
|
|
int ce_missing_ok;
|
2006-04-23 23:52:20 +00:00
|
|
|
|
|
|
|
path = ce->name;
|
|
|
|
pathlen = ce_namelen(ce);
|
|
|
|
if (pathlen <= baselen || memcmp(base, path, baselen))
|
|
|
|
break; /* at the end of this level */
|
|
|
|
|
|
|
|
slash = strchr(path + baselen, '/');
|
|
|
|
if (slash) {
|
|
|
|
entlen = slash - (path + baselen);
|
|
|
|
sub = find_subtree(it, path + baselen, entlen, 0);
|
|
|
|
if (!sub)
|
|
|
|
die("cache-tree.c: '%.*s' in '%s' not found",
|
|
|
|
entlen, path + baselen, path);
|
2012-12-16 04:15:27 +00:00
|
|
|
i += sub->count;
|
2018-03-12 02:27:24 +00:00
|
|
|
oid = &sub->cache_tree->oid;
|
2006-04-23 23:52:20 +00:00
|
|
|
mode = S_IFDIR;
|
2016-07-16 05:06:27 +00:00
|
|
|
contains_ita = sub->cache_tree->entry_count < 0;
|
|
|
|
if (contains_ita) {
|
2012-12-16 04:15:28 +00:00
|
|
|
to_invalidate = 1;
|
2014-09-02 21:16:20 +00:00
|
|
|
expected_missing = 1;
|
|
|
|
}
|
2006-04-23 23:52:20 +00:00
|
|
|
}
|
|
|
|
else {
|
2018-03-12 02:27:24 +00:00
|
|
|
oid = &ce->oid;
|
2008-01-15 00:03:17 +00:00
|
|
|
mode = ce->ce_mode;
|
2006-04-23 23:52:20 +00:00
|
|
|
entlen = pathlen - baselen;
|
2012-12-16 04:15:26 +00:00
|
|
|
i++;
|
2006-04-23 23:52:20 +00:00
|
|
|
}
|
cache-tree: reject entries with null sha1
We generally disallow null sha1s from entering the index,
due to 4337b5856 (do not write null sha1s to on-disk index,
2012-07-28). However, we loosened that in 83bd7437c
(write_index: optionally allow broken null sha1s,
2013-08-27) so that tools like filter-branch could be used
to repair broken history.
However, we should make sure that these broken entries do
not get propagated into new trees. For most entries, we'd
catch them with the missing-object check (since presumably
the null sha1 does not exist in our object database). But
gitlink entries do not need reachability, so we may blindly
copy the entry into a bogus tree.
This patch rejects all null sha1s (with the same "invalid
entry" message that missing objects get) when building trees
from the index. It does so even for non-gitlinks, and even
when "write-tree" is given the --missing-ok flag. The null
sha1 is a special sentinel value that is already rejected in
trees by fsck; whether the object exists or not, it is an
error to put it in a tree.
Note that for this to work, we must also avoid reusing an
existing cache-tree that contains the null sha1. This patch
does so by just refusing to write out any cache tree when
the index contains a null sha1. This is blunter than we need
to be; we could just reject the subtree that contains the
offending entry. But it's not worth the complexity. The
behavior is unchanged unless you have a broken index entry,
and even then we'd refuse the whole index write unless the
emergency GIT_ALLOW_NULL_SHA1 is in use. And even then the
end result is only a performance drop (any write-tree will
have to generate the whole cache-tree from scratch).
The tests bear some explanation.
The existing test in t7009 doesn't catch this problem,
because our index-filter runs "git rm --cached", which will
try to rewrite the updated index and barf on the bogus
entry. So we never even make it to write-tree. The new test
there adds a noop index-filter, which does show the problem.
The new tests in t1601 are slightly redundant with what
filter-branch is doing under the hood in t7009. But as
they're much more direct, they're easier to reason about.
And should filter-branch ever change or go away, we'd want
to make sure that these plumbing commands behave sanely.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-04-21 18:46:17 +00:00
|
|
|
|
2018-10-09 18:40:37 +00:00
|
|
|
ce_missing_ok = mode == S_IFGITLINK || missing_ok ||
|
2021-07-23 18:52:23 +00:00
|
|
|
!must_check_existence(ce);
|
2018-03-12 02:27:24 +00:00
|
|
|
if (is_null_oid(oid) ||
|
2023-03-28 13:58:50 +00:00
|
|
|
(!ce_missing_ok && !repo_has_object_file(the_repository, oid))) {
|
2010-08-10 03:32:11 +00:00
|
|
|
strbuf_release(&buffer);
|
2014-09-02 21:16:20 +00:00
|
|
|
if (expected_missing)
|
|
|
|
return -1;
|
2009-07-14 18:25:17 +00:00
|
|
|
return error("invalid object %06o %s for '%.*s'",
|
2018-03-12 02:27:24 +00:00
|
|
|
mode, oid_to_hex(oid), entlen+baselen, path);
|
2010-08-10 03:32:11 +00:00
|
|
|
}
|
2006-04-23 23:52:20 +00:00
|
|
|
|
2012-12-16 04:15:27 +00:00
|
|
|
/*
|
|
|
|
* CE_REMOVE entries are removed before the index is
|
|
|
|
* written to disk. Skip them to remain consistent
|
|
|
|
* with the future on-disk index.
|
|
|
|
*/
|
|
|
|
if (ce->ce_flags & CE_REMOVE) {
|
|
|
|
*skip_count = *skip_count + 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2012-12-16 04:15:28 +00:00
|
|
|
/*
|
2023-01-07 13:56:55 +00:00
|
|
|
* CE_INTENT_TO_ADD entries exist in on-disk index but
|
2012-12-16 04:15:28 +00:00
|
|
|
* they are not part of generated trees. Invalidate up
|
|
|
|
* to root to force cache-tree users to read elsewhere.
|
|
|
|
*/
|
2016-07-16 05:06:26 +00:00
|
|
|
if (!sub && ce_intent_to_add(ce)) {
|
2012-12-16 04:15:28 +00:00
|
|
|
to_invalidate = 1;
|
2012-12-16 04:15:27 +00:00
|
|
|
continue;
|
2012-12-16 04:15:28 +00:00
|
|
|
}
|
2006-04-23 23:52:20 +00:00
|
|
|
|
2016-07-16 05:06:27 +00:00
|
|
|
/*
|
|
|
|
* "sub" can be an empty tree if all subentries are i-t-a.
|
|
|
|
*/
|
2018-05-02 00:26:04 +00:00
|
|
|
if (contains_ita && is_empty_tree_oid(oid))
|
2016-07-16 05:06:27 +00:00
|
|
|
continue;
|
|
|
|
|
2007-09-06 11:20:11 +00:00
|
|
|
strbuf_grow(&buffer, entlen + 100);
|
|
|
|
strbuf_addf(&buffer, "%o %.*s%c", mode, entlen, path + baselen, '\0');
|
2018-03-12 02:27:24 +00:00
|
|
|
strbuf_add(&buffer, oid->hash, the_hash_algo->rawsz);
|
2006-04-23 23:52:20 +00:00
|
|
|
|
2019-06-19 21:05:58 +00:00
|
|
|
#if DEBUG_CACHE_TREE
|
2006-05-03 23:10:45 +00:00
|
|
|
fprintf(stderr, "cache-tree update-one %o %.*s\n",
|
2006-04-23 23:52:20 +00:00
|
|
|
mode, entlen, path + baselen);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2014-07-06 04:06:56 +00:00
|
|
|
if (repair) {
|
2018-01-28 00:13:13 +00:00
|
|
|
struct object_id oid;
|
2020-01-30 20:32:22 +00:00
|
|
|
hash_object_file(the_hash_algo, buffer.buf, buffer.len,
|
2022-02-04 23:48:32 +00:00
|
|
|
OBJ_TREE, &oid);
|
2023-03-28 13:58:50 +00:00
|
|
|
if (repo_has_object_file_with_flags(the_repository, &oid, OBJECT_INFO_SKIP_FETCH_OBJECT))
|
2018-01-28 00:13:13 +00:00
|
|
|
oidcpy(&it->oid, &oid);
|
2014-07-06 04:06:56 +00:00
|
|
|
else
|
|
|
|
to_invalidate = 1;
|
2018-01-28 00:13:19 +00:00
|
|
|
} else if (dryrun) {
|
2020-01-30 20:32:22 +00:00
|
|
|
hash_object_file(the_hash_algo, buffer.buf, buffer.len,
|
2022-02-04 23:48:32 +00:00
|
|
|
OBJ_TREE, &it->oid);
|
2022-02-04 23:48:26 +00:00
|
|
|
} else if (write_object_file_flags(buffer.buf, buffer.len, OBJ_TREE,
|
2021-10-12 14:30:49 +00:00
|
|
|
&it->oid, flags & WRITE_TREE_SILENT
|
|
|
|
? HASH_SILENT : 0)) {
|
2008-04-23 16:47:17 +00:00
|
|
|
strbuf_release(&buffer);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2007-09-06 11:20:11 +00:00
|
|
|
strbuf_release(&buffer);
|
2012-12-16 04:15:28 +00:00
|
|
|
it->entry_count = to_invalidate ? -1 : i - *skip_count;
|
2019-06-19 21:05:58 +00:00
|
|
|
#if DEBUG_CACHE_TREE
|
2006-05-03 23:10:45 +00:00
|
|
|
fprintf(stderr, "cache-tree update-one (%d ent, %d subtree) %s\n",
|
2006-04-23 23:52:20 +00:00
|
|
|
it->entry_count, it->subtree_nr,
|
2017-05-01 02:28:56 +00:00
|
|
|
oid_to_hex(&it->oid));
|
2006-04-23 23:52:20 +00:00
|
|
|
#endif
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
2014-06-13 12:19:32 +00:00
|
|
|
int cache_tree_update(struct index_state *istate, int flags)
|
2006-04-23 23:52:20 +00:00
|
|
|
{
|
2021-01-23 19:58:11 +00:00
|
|
|
int skip, i;
|
|
|
|
|
2021-01-23 19:58:12 +00:00
|
|
|
i = verify_cache(istate, flags);
|
2014-06-13 12:19:32 +00:00
|
|
|
|
2006-04-23 23:52:20 +00:00
|
|
|
if (i)
|
|
|
|
return i;
|
2021-01-23 19:58:11 +00:00
|
|
|
|
|
|
|
if (!istate->cache_tree)
|
|
|
|
istate->cache_tree = cache_tree();
|
|
|
|
|
2023-03-28 13:58:53 +00:00
|
|
|
if (!(flags & WRITE_TREE_MISSING_OK) && repo_has_promisor_remote(the_repository))
|
2021-07-23 18:52:23 +00:00
|
|
|
prefetch_cache_entries(istate, must_check_existence);
|
|
|
|
|
2018-08-18 14:41:23 +00:00
|
|
|
trace_performance_enter();
|
2021-01-04 03:09:12 +00:00
|
|
|
trace2_region_enter("cache_tree", "update", the_repository);
|
2022-04-05 05:20:10 +00:00
|
|
|
begin_odb_transaction();
|
2021-01-23 19:58:11 +00:00
|
|
|
i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
|
|
|
|
"", 0, &skip, flags);
|
2022-04-05 05:20:10 +00:00
|
|
|
end_odb_transaction();
|
2021-01-04 03:09:12 +00:00
|
|
|
trace2_region_leave("cache_tree", "update", the_repository);
|
2018-08-18 14:41:23 +00:00
|
|
|
trace_performance_leave("cache_tree_update");
|
2006-04-23 23:52:20 +00:00
|
|
|
if (i < 0)
|
|
|
|
return i;
|
2014-06-13 12:19:32 +00:00
|
|
|
istate->cache_changed |= CACHE_TREE_CHANGED;
|
2006-04-23 23:52:20 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-09-25 08:22:44 +00:00
|
|
|
static void write_one(struct strbuf *buffer, struct cache_tree *it,
|
2018-12-06 15:42:06 +00:00
|
|
|
const char *path, int pathlen)
|
2006-04-23 23:52:20 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* One "cache-tree" entry consists of the following:
|
|
|
|
* path (NUL terminated)
|
|
|
|
* entry_count, subtree_nr ("%d %d\n")
|
|
|
|
* tree-sha1 (missing if invalid)
|
|
|
|
* subtree_nr "cache-tree" entries for subtrees.
|
|
|
|
*/
|
2007-09-06 11:20:11 +00:00
|
|
|
strbuf_grow(buffer, pathlen + 100);
|
|
|
|
strbuf_add(buffer, path, pathlen);
|
|
|
|
strbuf_addf(buffer, "%c%d %d\n", 0, it->entry_count, it->subtree_nr);
|
2006-04-23 23:52:20 +00:00
|
|
|
|
2019-06-19 21:05:58 +00:00
|
|
|
#if DEBUG_CACHE_TREE
|
2006-04-23 23:52:20 +00:00
|
|
|
if (0 <= it->entry_count)
|
|
|
|
fprintf(stderr, "cache-tree <%.*s> (%d ent, %d subtree) %s\n",
|
|
|
|
pathlen, path, it->entry_count, it->subtree_nr,
|
2017-05-01 02:28:56 +00:00
|
|
|
oid_to_hex(&it->oid));
|
2006-04-23 23:52:20 +00:00
|
|
|
else
|
|
|
|
fprintf(stderr, "cache-tree <%.*s> (%d subtree) invalid\n",
|
|
|
|
pathlen, path, it->subtree_nr);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (0 <= it->entry_count) {
|
2018-03-12 02:27:24 +00:00
|
|
|
strbuf_add(buffer, it->oid.hash, the_hash_algo->rawsz);
|
2006-04-23 23:52:20 +00:00
|
|
|
}
|
|
|
|
for (i = 0; i < it->subtree_nr; i++) {
|
|
|
|
struct cache_tree_sub *down = it->down[i];
|
2006-04-26 00:40:02 +00:00
|
|
|
if (i) {
|
|
|
|
struct cache_tree_sub *prev = it->down[i-1];
|
|
|
|
if (subtree_name_cmp(down->name, down->namelen,
|
|
|
|
prev->name, prev->namelen) <= 0)
|
|
|
|
die("fatal - unsorted cache subtree");
|
|
|
|
}
|
2007-09-25 08:22:44 +00:00
|
|
|
write_one(buffer, down->cache_tree, down->name, down->namelen);
|
2006-04-23 23:52:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-09-25 08:22:44 +00:00
|
|
|
void cache_tree_write(struct strbuf *sb, struct cache_tree *root)
|
2006-04-23 23:52:20 +00:00
|
|
|
{
|
2021-01-04 03:09:13 +00:00
|
|
|
trace2_region_enter("cache_tree", "write", the_repository);
|
2007-09-25 08:22:44 +00:00
|
|
|
write_one(sb, root, "", 0);
|
2021-01-04 03:09:13 +00:00
|
|
|
trace2_region_leave("cache_tree", "write", the_repository);
|
2006-04-23 23:52:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct cache_tree *read_one(const char **buffer, unsigned long *size_p)
|
|
|
|
{
|
|
|
|
const char *buf = *buffer;
|
|
|
|
unsigned long size = *size_p;
|
2006-05-02 01:31:02 +00:00
|
|
|
const char *cp;
|
|
|
|
char *ep;
|
2006-04-23 23:52:20 +00:00
|
|
|
struct cache_tree *it;
|
|
|
|
int i, subtree_nr;
|
2018-03-12 02:27:24 +00:00
|
|
|
const unsigned rawsz = the_hash_algo->rawsz;
|
2006-04-23 23:52:20 +00:00
|
|
|
|
|
|
|
it = NULL;
|
|
|
|
/* skip name, but make sure name exists */
|
|
|
|
while (size && *buf) {
|
|
|
|
size--;
|
|
|
|
buf++;
|
|
|
|
}
|
|
|
|
if (!size)
|
|
|
|
goto free_return;
|
|
|
|
buf++; size--;
|
|
|
|
it = cache_tree();
|
2006-05-02 01:31:02 +00:00
|
|
|
|
|
|
|
cp = buf;
|
|
|
|
it->entry_count = strtol(cp, &ep, 10);
|
|
|
|
if (cp == ep)
|
|
|
|
goto free_return;
|
|
|
|
cp = ep;
|
|
|
|
subtree_nr = strtol(cp, &ep, 10);
|
|
|
|
if (cp == ep)
|
2006-04-23 23:52:20 +00:00
|
|
|
goto free_return;
|
|
|
|
while (size && *buf && *buf != '\n') {
|
|
|
|
size--;
|
|
|
|
buf++;
|
|
|
|
}
|
|
|
|
if (!size)
|
|
|
|
goto free_return;
|
|
|
|
buf++; size--;
|
|
|
|
if (0 <= it->entry_count) {
|
2018-03-12 02:27:24 +00:00
|
|
|
if (size < rawsz)
|
2006-04-23 23:52:20 +00:00
|
|
|
goto free_return;
|
2018-05-02 00:25:29 +00:00
|
|
|
oidread(&it->oid, (const unsigned char *)buf);
|
2018-03-12 02:27:24 +00:00
|
|
|
buf += rawsz;
|
|
|
|
size -= rawsz;
|
2006-04-23 23:52:20 +00:00
|
|
|
}
|
|
|
|
|
2019-06-19 21:05:58 +00:00
|
|
|
#if DEBUG_CACHE_TREE
|
2006-04-23 23:52:20 +00:00
|
|
|
if (0 <= it->entry_count)
|
|
|
|
fprintf(stderr, "cache-tree <%s> (%d ent, %d subtree) %s\n",
|
|
|
|
*buffer, it->entry_count, subtree_nr,
|
2017-05-01 02:28:56 +00:00
|
|
|
oid_to_hex(&it->oid));
|
2006-04-23 23:52:20 +00:00
|
|
|
else
|
|
|
|
fprintf(stderr, "cache-tree <%s> (%d subtrees) invalid\n",
|
|
|
|
*buffer, subtree_nr);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Just a heuristic -- we do not add directories that often but
|
|
|
|
* we do not want to have to extend it immediately when we do,
|
|
|
|
* hence +2.
|
|
|
|
*/
|
|
|
|
it->subtree_alloc = subtree_nr + 2;
|
2021-03-13 16:17:22 +00:00
|
|
|
CALLOC_ARRAY(it->down, it->subtree_alloc);
|
2006-04-23 23:52:20 +00:00
|
|
|
for (i = 0; i < subtree_nr; i++) {
|
|
|
|
/* read each subtree */
|
|
|
|
struct cache_tree *sub;
|
2006-04-26 00:40:02 +00:00
|
|
|
struct cache_tree_sub *subtree;
|
2006-04-23 23:52:20 +00:00
|
|
|
const char *name = buf;
|
2006-04-27 08:33:07 +00:00
|
|
|
|
2006-04-23 23:52:20 +00:00
|
|
|
sub = read_one(&buf, &size);
|
|
|
|
if (!sub)
|
|
|
|
goto free_return;
|
2006-04-27 08:33:07 +00:00
|
|
|
subtree = cache_tree_sub(it, name);
|
2006-04-26 00:40:02 +00:00
|
|
|
subtree->cache_tree = sub;
|
2006-04-23 23:52:20 +00:00
|
|
|
}
|
|
|
|
if (subtree_nr != it->subtree_nr)
|
|
|
|
die("cache-tree: internal error");
|
|
|
|
*buffer = buf;
|
|
|
|
*size_p = size;
|
|
|
|
return it;
|
|
|
|
|
|
|
|
free_return:
|
2006-04-25 04:18:58 +00:00
|
|
|
cache_tree_free(&it);
|
2006-04-23 23:52:20 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2006-04-25 04:18:58 +00:00
|
|
|
struct cache_tree *cache_tree_read(const char *buffer, unsigned long size)
|
2006-04-23 23:52:20 +00:00
|
|
|
{
|
2021-01-04 03:09:13 +00:00
|
|
|
struct cache_tree *result;
|
|
|
|
|
2006-04-25 04:18:58 +00:00
|
|
|
if (buffer[0])
|
2006-04-23 23:52:20 +00:00
|
|
|
return NULL; /* not the whole tree */
|
2021-01-04 03:09:13 +00:00
|
|
|
|
|
|
|
trace2_region_enter("cache_tree", "read", the_repository);
|
|
|
|
result = read_one(&buffer, &size);
|
|
|
|
trace2_region_leave("cache_tree", "read", the_repository);
|
|
|
|
|
|
|
|
return result;
|
2006-04-23 23:52:20 +00:00
|
|
|
}
|
2006-04-26 08:20:50 +00:00
|
|
|
|
2008-07-16 10:42:10 +00:00
|
|
|
static struct cache_tree *cache_tree_find(struct cache_tree *it, const char *path)
|
2006-04-26 08:20:50 +00:00
|
|
|
{
|
2009-05-20 22:53:57 +00:00
|
|
|
if (!it)
|
|
|
|
return NULL;
|
2006-04-26 08:20:50 +00:00
|
|
|
while (*path) {
|
|
|
|
const char *slash;
|
|
|
|
struct cache_tree_sub *sub;
|
|
|
|
|
2014-03-05 17:26:26 +00:00
|
|
|
slash = strchrnul(path, '/');
|
2014-03-05 17:26:27 +00:00
|
|
|
/*
|
|
|
|
* Between path and slash is the name of the subtree
|
|
|
|
* to look for.
|
2006-04-26 08:20:50 +00:00
|
|
|
*/
|
|
|
|
sub = find_subtree(it, path, slash - path, 0);
|
|
|
|
if (!sub)
|
|
|
|
return NULL;
|
|
|
|
it = sub->cache_tree;
|
2014-03-05 17:26:30 +00:00
|
|
|
|
2006-04-26 08:20:50 +00:00
|
|
|
path = slash;
|
2014-03-05 17:26:30 +00:00
|
|
|
while (*path == '/')
|
|
|
|
path++;
|
2006-04-26 08:20:50 +00:00
|
|
|
}
|
|
|
|
return it;
|
|
|
|
}
|
2008-01-11 06:49:35 +00:00
|
|
|
|
2019-08-17 18:41:32 +00:00
|
|
|
static int write_index_as_tree_internal(struct object_id *oid,
|
|
|
|
struct index_state *index_state,
|
|
|
|
int cache_tree_valid,
|
|
|
|
int flags,
|
|
|
|
const char *prefix)
|
|
|
|
{
|
|
|
|
if (flags & WRITE_TREE_IGNORE_CACHE_TREE) {
|
|
|
|
cache_tree_free(&index_state->cache_tree);
|
|
|
|
cache_tree_valid = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!cache_tree_valid && cache_tree_update(index_state, flags) < 0)
|
|
|
|
return WRITE_TREE_UNMERGED_INDEX;
|
|
|
|
|
|
|
|
if (prefix) {
|
|
|
|
struct cache_tree *subtree;
|
|
|
|
subtree = cache_tree_find(index_state->cache_tree, prefix);
|
|
|
|
if (!subtree)
|
|
|
|
return WRITE_TREE_PREFIX_ERROR;
|
|
|
|
oidcpy(oid, &subtree->oid);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
oidcpy(oid, &index_state->cache_tree->oid);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct tree* write_in_core_index_as_tree(struct repository *repo) {
|
|
|
|
struct object_id o;
|
|
|
|
int was_valid, ret;
|
|
|
|
|
|
|
|
struct index_state *index_state = repo->index;
|
|
|
|
was_valid = index_state->cache_tree &&
|
|
|
|
cache_tree_fully_valid(index_state->cache_tree);
|
|
|
|
|
|
|
|
ret = write_index_as_tree_internal(&o, index_state, was_valid, 0, NULL);
|
|
|
|
if (ret == WRITE_TREE_UNMERGED_INDEX) {
|
|
|
|
int i;
|
2022-06-02 12:25:37 +00:00
|
|
|
bug("there are unmerged index entries:");
|
2019-08-17 18:41:32 +00:00
|
|
|
for (i = 0; i < index_state->cache_nr; i++) {
|
|
|
|
const struct cache_entry *ce = index_state->cache[i];
|
|
|
|
if (ce_stage(ce))
|
2022-06-02 12:25:37 +00:00
|
|
|
bug("%d %.*s", ce_stage(ce),
|
|
|
|
(int)ce_namelen(ce), ce->name);
|
2019-08-17 18:41:32 +00:00
|
|
|
}
|
2022-06-02 12:25:37 +00:00
|
|
|
BUG("unmerged index entries when writing in-core index");
|
2019-08-17 18:41:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return lookup_tree(repo, &index_state->cache_tree->oid);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-03-12 02:27:23 +00:00
|
|
|
int write_index_as_tree(struct object_id *oid, struct index_state *index_state, const char *index_path, int flags, const char *prefix)
|
2008-01-11 06:49:35 +00:00
|
|
|
{
|
2017-10-05 20:32:08 +00:00
|
|
|
int entries, was_valid;
|
2017-09-05 12:15:21 +00:00
|
|
|
struct lock_file lock_file = LOCK_INIT;
|
2019-08-17 18:41:32 +00:00
|
|
|
int ret;
|
2008-01-11 06:49:35 +00:00
|
|
|
|
2017-10-05 20:32:08 +00:00
|
|
|
hold_lock_file_for_update(&lock_file, index_path, LOCK_DIE_ON_ERROR);
|
2008-01-11 06:49:35 +00:00
|
|
|
|
read-cache: fix reading the shared index for other repos
read_index_from() takes a path argument for the location of the index
file. For reading the shared index in split index mode however it just
ignores that path argument, and reads it from the gitdir of the current
repository.
This works as long as an index in the_repository is read. Once that
changes, such as when we read the index of a submodule, or of a
different working tree than the current one, the gitdir of
the_repository will no longer contain the appropriate shared index,
and git will fail to read it.
For example t3007-ls-files-recurse-submodules.sh was broken with
GIT_TEST_SPLIT_INDEX set in 188dce131f ("ls-files: use repository
object", 2017-06-22), and t7814-grep-recurse-submodules.sh was also
broken in a similar manner, probably by introducing struct repository
there, although I didn't track down the exact commit for that.
be489d02d2 ("revision.c: --indexed-objects add objects from all
worktrees", 2017-08-23) breaks with split index mode in a similar
manner, not erroring out when it can't read the index, but instead
carrying on with pruning, without taking the index of the worktree into
account.
Fix this by passing an additional gitdir parameter to read_index_from,
to indicate where it should look for and read the shared index from.
read_cache_from() defaults to using the gitdir of the_repository. As it
is mostly a convenience macro, having to pass get_git_dir() for every
call seems overkill, and if necessary users can have more control by
using read_index_from().
Helped-by: Brandon Williams <bmwill@google.com>
Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-01-07 22:30:13 +00:00
|
|
|
entries = read_index_from(index_state, index_path, get_git_dir());
|
2017-09-05 12:14:07 +00:00
|
|
|
if (entries < 0) {
|
|
|
|
ret = WRITE_TREE_UNREADABLE_INDEX;
|
|
|
|
goto out;
|
|
|
|
}
|
2008-01-11 06:49:35 +00:00
|
|
|
|
2019-08-17 18:41:32 +00:00
|
|
|
was_valid = !(flags & WRITE_TREE_IGNORE_CACHE_TREE) &&
|
|
|
|
index_state->cache_tree &&
|
|
|
|
cache_tree_fully_valid(index_state->cache_tree);
|
2008-01-11 06:49:35 +00:00
|
|
|
|
2019-08-17 18:41:32 +00:00
|
|
|
ret = write_index_as_tree_internal(oid, index_state, was_valid, flags,
|
|
|
|
prefix);
|
|
|
|
if (!ret && !was_valid) {
|
2017-10-05 20:32:08 +00:00
|
|
|
write_locked_index(index_state, &lock_file, COMMIT_LOCK);
|
2008-01-11 06:49:35 +00:00
|
|
|
/* Not being able to write is fine -- we are only interested
|
|
|
|
* in updating the cache-tree part, and if the next caller
|
|
|
|
* ends up using the old index with unupdated cache-tree part
|
|
|
|
* it misses the work we did here, but that is just a
|
|
|
|
* performance penalty and not a big deal.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
2017-09-05 12:14:07 +00:00
|
|
|
out:
|
2017-10-05 20:32:08 +00:00
|
|
|
rollback_lock_file(&lock_file);
|
2017-09-05 12:14:07 +00:00
|
|
|
return ret;
|
2008-01-11 06:49:35 +00:00
|
|
|
}
|
2009-04-20 10:58:18 +00:00
|
|
|
|
2021-11-29 15:52:41 +00:00
|
|
|
static void prime_cache_tree_sparse_dir(struct cache_tree *it,
|
|
|
|
struct tree *tree)
|
|
|
|
{
|
|
|
|
|
|
|
|
oidcpy(&it->oid, &tree->object.oid);
|
|
|
|
it->entry_count = 1;
|
|
|
|
}
|
|
|
|
|
2018-11-10 05:49:02 +00:00
|
|
|
static void prime_cache_tree_rec(struct repository *r,
|
|
|
|
struct cache_tree *it,
|
2021-11-29 15:52:41 +00:00
|
|
|
struct tree *tree,
|
|
|
|
struct strbuf *tree_path)
|
2009-04-20 10:58:18 +00:00
|
|
|
{
|
|
|
|
struct tree_desc desc;
|
|
|
|
struct name_entry entry;
|
|
|
|
int cnt;
|
2023-02-10 20:20:30 +00:00
|
|
|
size_t base_path_len = tree_path->len;
|
2009-04-20 10:58:18 +00:00
|
|
|
|
2017-05-01 02:28:56 +00:00
|
|
|
oidcpy(&it->oid, &tree->object.oid);
|
2021-11-29 15:52:41 +00:00
|
|
|
|
2009-04-20 10:58:18 +00:00
|
|
|
init_tree_desc(&desc, tree->buffer, tree->size);
|
|
|
|
cnt = 0;
|
|
|
|
while (tree_entry(&desc, &entry)) {
|
|
|
|
if (!S_ISDIR(entry.mode))
|
|
|
|
cnt++;
|
|
|
|
else {
|
|
|
|
struct cache_tree_sub *sub;
|
2019-01-15 00:39:44 +00:00
|
|
|
struct tree *subtree = lookup_tree(r, &entry.oid);
|
2021-11-29 15:52:41 +00:00
|
|
|
|
2009-04-20 10:58:18 +00:00
|
|
|
if (!subtree->object.parsed)
|
|
|
|
parse_tree(subtree);
|
|
|
|
sub = cache_tree_sub(it, entry.path);
|
|
|
|
sub->cache_tree = cache_tree();
|
2021-11-29 15:52:41 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Recursively-constructed subtree path is only needed when working
|
|
|
|
* in a sparse index (where it's used to determine whether the
|
|
|
|
* subtree is a sparse directory in the index).
|
|
|
|
*/
|
|
|
|
if (r->index->sparse_index) {
|
|
|
|
strbuf_setlen(tree_path, base_path_len);
|
|
|
|
strbuf_add(tree_path, entry.path, entry.pathlen);
|
|
|
|
strbuf_addch(tree_path, '/');
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If a sparse index is in use, the directory being processed may be
|
|
|
|
* sparse. To confirm that, we can check whether an entry with that
|
|
|
|
* exact name exists in the index. If it does, the created subtree
|
|
|
|
* should be sparse. Otherwise, cache tree expansion should continue
|
|
|
|
* as normal.
|
|
|
|
*/
|
|
|
|
if (r->index->sparse_index &&
|
|
|
|
index_entry_exists(r->index, tree_path->buf, tree_path->len))
|
|
|
|
prime_cache_tree_sparse_dir(sub->cache_tree, subtree);
|
|
|
|
else
|
|
|
|
prime_cache_tree_rec(r, sub->cache_tree, subtree, tree_path);
|
2009-04-20 10:58:18 +00:00
|
|
|
cnt += sub->cache_tree->entry_count;
|
|
|
|
}
|
|
|
|
}
|
2021-11-29 15:52:41 +00:00
|
|
|
|
2009-04-20 10:58:18 +00:00
|
|
|
it->entry_count = cnt;
|
|
|
|
}
|
|
|
|
|
2018-11-10 05:49:02 +00:00
|
|
|
void prime_cache_tree(struct repository *r,
|
|
|
|
struct index_state *istate,
|
|
|
|
struct tree *tree)
|
2009-04-20 10:58:18 +00:00
|
|
|
{
|
2021-11-29 15:52:41 +00:00
|
|
|
struct strbuf tree_path = STRBUF_INIT;
|
|
|
|
|
libs: use "struct repository *" argument, not "the_repository"
As can easily be seen from grepping in our sources, we had these uses
of "the_repository" in various library code in cases where the
function in question was already getting a "struct repository *"
argument. Let's use that argument instead.
Out of these changes only the changes to "cache-tree.c",
"commit-reach.c", "shallow.c" and "upload-pack.c" would have cleanly
applied before the migration away from the "repo_*()" wrapper macros
in the preceding commits.
The rest aren't new, as we'd previously implicitly refer to
"the_repository", but it's now more obvious that we were doing the
wrong thing all along, and should have used the parameter instead.
The change to change "get_index_format_default(the_repository)" in
"read-cache.c" to use the "r" variable instead should arguably have
been part of [1], or in the subsequent cleanup in [2]. Let's do it
here, as can be seen from the initial code in [3] it's not important
that we use "the_repository" there, but would prefer to always use the
current repository.
This change excludes the "the_repository" use in "upload-pack.c"'s
upload_pack_advertise(), as the in-flight [4] makes that change.
1. ee1f0c242ef (read-cache: add index.skipHash config option,
2023-01-06)
2. 6269f8eaad0 (treewide: always have a valid "index_state.repo"
member, 2023-01-17)
3. 7211b9e7534 (repo-settings: consolidate some config settings,
2019-08-13)
4. <Y/hbUsGPVNAxTdmS@coredump.intra.peff.net>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-28 13:58:58 +00:00
|
|
|
trace2_region_enter("cache-tree", "prime_cache_tree", r);
|
2014-06-13 12:19:33 +00:00
|
|
|
cache_tree_free(&istate->cache_tree);
|
|
|
|
istate->cache_tree = cache_tree();
|
2021-01-04 03:09:14 +00:00
|
|
|
|
2021-11-29 15:52:41 +00:00
|
|
|
prime_cache_tree_rec(r, istate->cache_tree, tree, &tree_path);
|
|
|
|
strbuf_release(&tree_path);
|
2014-06-13 12:19:33 +00:00
|
|
|
istate->cache_changed |= CACHE_TREE_CHANGED;
|
libs: use "struct repository *" argument, not "the_repository"
As can easily be seen from grepping in our sources, we had these uses
of "the_repository" in various library code in cases where the
function in question was already getting a "struct repository *"
argument. Let's use that argument instead.
Out of these changes only the changes to "cache-tree.c",
"commit-reach.c", "shallow.c" and "upload-pack.c" would have cleanly
applied before the migration away from the "repo_*()" wrapper macros
in the preceding commits.
The rest aren't new, as we'd previously implicitly refer to
"the_repository", but it's now more obvious that we were doing the
wrong thing all along, and should have used the parameter instead.
The change to change "get_index_format_default(the_repository)" in
"read-cache.c" to use the "r" variable instead should arguably have
been part of [1], or in the subsequent cleanup in [2]. Let's do it
here, as can be seen from the initial code in [3] it's not important
that we use "the_repository" there, but would prefer to always use the
current repository.
This change excludes the "the_repository" use in "upload-pack.c"'s
upload_pack_advertise(), as the in-flight [4] makes that change.
1. ee1f0c242ef (read-cache: add index.skipHash config option,
2023-01-06)
2. 6269f8eaad0 (treewide: always have a valid "index_state.repo"
member, 2023-01-17)
3. 7211b9e7534 (repo-settings: consolidate some config settings,
2019-08-13)
4. <Y/hbUsGPVNAxTdmS@coredump.intra.peff.net>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-28 13:58:58 +00:00
|
|
|
trace2_region_leave("cache-tree", "prime_cache_tree", r);
|
2009-04-20 10:58:18 +00:00
|
|
|
}
|
Optimize "diff-index --cached" using cache-tree
When running "diff-index --cached" after making a change to only a small
portion of the index, there is no point unpacking unchanged subtrees into
the index recursively, only to find that all entries match anyway. Tweak
unpack_trees() logic that is used to read in the tree object to catch the
case where the tree entry we are looking at matches the index as a whole
by looking at the cache-tree.
As an exercise, after modifying a few paths in the kernel tree, here are
a few numbers on my Athlon 64X2 3800+:
(without patch, hot cache)
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9407minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+2446minor)pagefaults 0swaps
Cold cache numbers are very impressive, but it does not matter very much
in practice:
(without patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k
247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps
(with patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k
18440inputs+0outputs (79major+2369minor)pagefaults 0swaps
This of course helps "git status" as well.
(without patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+10970minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+3921minor)pagefaults 0swaps
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-05-20 22:57:22 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* find the cache_tree that corresponds to the current level without
|
|
|
|
* exploding the full path into textual form. The root of the
|
|
|
|
* cache tree is given as "root", and our current level is "info".
|
|
|
|
* (1) When at root level, info->prev is NULL, so it is "root" itself.
|
|
|
|
* (2) Otherwise, find the cache_tree that corresponds to one level
|
|
|
|
* above us, and find ourselves in there.
|
|
|
|
*/
|
|
|
|
static struct cache_tree *find_cache_tree_from_traversal(struct cache_tree *root,
|
|
|
|
struct traverse_info *info)
|
|
|
|
{
|
|
|
|
struct cache_tree *our_parent;
|
|
|
|
|
|
|
|
if (!info->prev)
|
|
|
|
return root;
|
|
|
|
our_parent = find_cache_tree_from_traversal(root, info->prev);
|
2019-07-31 04:38:15 +00:00
|
|
|
return cache_tree_find(our_parent, info->name);
|
Optimize "diff-index --cached" using cache-tree
When running "diff-index --cached" after making a change to only a small
portion of the index, there is no point unpacking unchanged subtrees into
the index recursively, only to find that all entries match anyway. Tweak
unpack_trees() logic that is used to read in the tree object to catch the
case where the tree entry we are looking at matches the index as a whole
by looking at the cache-tree.
As an exercise, after modifying a few paths in the kernel tree, here are
a few numbers on my Athlon 64X2 3800+:
(without patch, hot cache)
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9407minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+2446minor)pagefaults 0swaps
Cold cache numbers are very impressive, but it does not matter very much
in practice:
(without patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k
247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps
(with patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k
18440inputs+0outputs (79major+2369minor)pagefaults 0swaps
This of course helps "git status" as well.
(without patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+10970minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+3921minor)pagefaults 0swaps
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-05-20 22:57:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int cache_tree_matches_traversal(struct cache_tree *root,
|
|
|
|
struct name_entry *ent,
|
|
|
|
struct traverse_info *info)
|
|
|
|
{
|
|
|
|
struct cache_tree *it;
|
|
|
|
|
|
|
|
it = find_cache_tree_from_traversal(root, info);
|
|
|
|
it = cache_tree_find(it, ent->path);
|
2019-01-15 00:39:44 +00:00
|
|
|
if (it && it->entry_count > 0 && oideq(&ent->oid, &it->oid))
|
Optimize "diff-index --cached" using cache-tree
When running "diff-index --cached" after making a change to only a small
portion of the index, there is no point unpacking unchanged subtrees into
the index recursively, only to find that all entries match anyway. Tweak
unpack_trees() logic that is used to read in the tree object to catch the
case where the tree entry we are looking at matches the index as a whole
by looking at the cache-tree.
As an exercise, after modifying a few paths in the kernel tree, here are
a few numbers on my Athlon 64X2 3800+:
(without patch, hot cache)
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9407minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+2446minor)pagefaults 0swaps
Cold cache numbers are very impressive, but it does not matter very much
in practice:
(without patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k
247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps
(with patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k
18440inputs+0outputs (79major+2369minor)pagefaults 0swaps
This of course helps "git status" as well.
(without patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+10970minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+3921minor)pagefaults 0swaps
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-05-20 22:57:22 +00:00
|
|
|
return it->entry_count;
|
|
|
|
return 0;
|
|
|
|
}
|
2011-12-06 17:43:37 +00:00
|
|
|
|
2022-08-20 09:02:48 +00:00
|
|
|
static void verify_one_sparse(struct index_state *istate,
|
2021-03-30 13:11:03 +00:00
|
|
|
struct strbuf *path,
|
|
|
|
int pos)
|
|
|
|
{
|
|
|
|
struct cache_entry *ce = istate->cache[pos];
|
|
|
|
|
|
|
|
if (!S_ISSPARSEDIR(ce->ce_mode))
|
|
|
|
BUG("directory '%s' is present in index, but not sparse",
|
|
|
|
path->buf);
|
|
|
|
}
|
|
|
|
|
2021-10-07 18:07:21 +00:00
|
|
|
/*
|
|
|
|
* Returns:
|
|
|
|
* 0 - Verification completed.
|
|
|
|
* 1 - Restart verification - a call to ensure_full_index() freed the cache
|
|
|
|
* tree that is being verified and verification needs to be restarted from
|
|
|
|
* the new toplevel cache tree.
|
|
|
|
*/
|
|
|
|
static int verify_one(struct repository *r,
|
|
|
|
struct index_state *istate,
|
|
|
|
struct cache_tree *it,
|
|
|
|
struct strbuf *path)
|
2018-08-18 14:41:28 +00:00
|
|
|
{
|
|
|
|
int i, pos, len = path->len;
|
|
|
|
struct strbuf tree_buf = STRBUF_INIT;
|
|
|
|
struct object_id new_oid;
|
|
|
|
|
|
|
|
for (i = 0; i < it->subtree_nr; i++) {
|
|
|
|
strbuf_addf(path, "%s/", it->down[i]->name);
|
2021-10-07 18:07:21 +00:00
|
|
|
if (verify_one(r, istate, it->down[i]->cache_tree, path))
|
|
|
|
return 1;
|
2018-08-18 14:41:28 +00:00
|
|
|
strbuf_setlen(path, len);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (it->entry_count < 0 ||
|
|
|
|
/* no verification on tests (t7003) that replace trees */
|
2018-11-10 05:49:02 +00:00
|
|
|
lookup_replace_object(r, &it->oid) != &it->oid)
|
2021-10-07 18:07:21 +00:00
|
|
|
return 0;
|
2018-08-18 14:41:28 +00:00
|
|
|
|
|
|
|
if (path->len) {
|
2021-10-07 18:07:21 +00:00
|
|
|
/*
|
|
|
|
* If the index is sparse and the cache tree is not
|
|
|
|
* index_name_pos() may trigger ensure_full_index() which will
|
|
|
|
* free the tree that is being verified.
|
|
|
|
*/
|
|
|
|
int is_sparse = istate->sparse_index;
|
2018-08-18 14:41:28 +00:00
|
|
|
pos = index_name_pos(istate, path->buf, path->len);
|
2021-10-07 18:07:21 +00:00
|
|
|
if (is_sparse && !istate->sparse_index)
|
|
|
|
return 1;
|
2021-03-30 13:11:03 +00:00
|
|
|
|
|
|
|
if (pos >= 0) {
|
2022-08-20 09:02:48 +00:00
|
|
|
verify_one_sparse(istate, path, pos);
|
2021-10-07 18:07:21 +00:00
|
|
|
return 0;
|
2021-03-30 13:11:03 +00:00
|
|
|
}
|
|
|
|
|
2018-08-18 14:41:28 +00:00
|
|
|
pos = -pos - 1;
|
|
|
|
} else {
|
|
|
|
pos = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
i = 0;
|
|
|
|
while (i < it->entry_count) {
|
|
|
|
struct cache_entry *ce = istate->cache[pos + i];
|
|
|
|
const char *slash;
|
|
|
|
struct cache_tree_sub *sub = NULL;
|
|
|
|
const struct object_id *oid;
|
|
|
|
const char *name;
|
|
|
|
unsigned mode;
|
|
|
|
int entlen;
|
|
|
|
|
|
|
|
if (ce->ce_flags & (CE_STAGEMASK | CE_INTENT_TO_ADD | CE_REMOVE))
|
|
|
|
BUG("%s with flags 0x%x should not be in cache-tree",
|
|
|
|
ce->name, ce->ce_flags);
|
|
|
|
name = ce->name + path->len;
|
|
|
|
slash = strchr(name, '/');
|
|
|
|
if (slash) {
|
|
|
|
entlen = slash - name;
|
|
|
|
sub = find_subtree(it, ce->name + path->len, entlen, 0);
|
|
|
|
if (!sub || sub->cache_tree->entry_count < 0)
|
|
|
|
BUG("bad subtree '%.*s'", entlen, name);
|
|
|
|
oid = &sub->cache_tree->oid;
|
|
|
|
mode = S_IFDIR;
|
|
|
|
i += sub->cache_tree->entry_count;
|
|
|
|
} else {
|
|
|
|
oid = &ce->oid;
|
|
|
|
mode = ce->ce_mode;
|
|
|
|
entlen = ce_namelen(ce) - path->len;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
strbuf_addf(&tree_buf, "%o %.*s%c", mode, entlen, name, '\0');
|
2020-01-30 20:32:18 +00:00
|
|
|
strbuf_add(&tree_buf, oid->hash, r->hash_algo->rawsz);
|
2018-08-18 14:41:28 +00:00
|
|
|
}
|
2022-02-04 23:48:32 +00:00
|
|
|
hash_object_file(r->hash_algo, tree_buf.buf, tree_buf.len, OBJ_TREE,
|
2020-01-30 20:32:22 +00:00
|
|
|
&new_oid);
|
2018-10-02 21:19:21 +00:00
|
|
|
if (!oideq(&new_oid, &it->oid))
|
2018-08-18 14:41:28 +00:00
|
|
|
BUG("cache-tree for path %.*s does not match. "
|
|
|
|
"Expected %s got %s", len, path->buf,
|
|
|
|
oid_to_hex(&new_oid), oid_to_hex(&it->oid));
|
|
|
|
strbuf_setlen(path, len);
|
|
|
|
strbuf_release(&tree_buf);
|
2021-10-07 18:07:21 +00:00
|
|
|
return 0;
|
2018-08-18 14:41:28 +00:00
|
|
|
}
|
|
|
|
|
2018-11-10 05:49:02 +00:00
|
|
|
void cache_tree_verify(struct repository *r, struct index_state *istate)
|
2018-08-18 14:41:28 +00:00
|
|
|
{
|
|
|
|
struct strbuf path = STRBUF_INIT;
|
|
|
|
|
|
|
|
if (!istate->cache_tree)
|
|
|
|
return;
|
2021-10-07 18:07:21 +00:00
|
|
|
if (verify_one(r, istate, istate->cache_tree, &path)) {
|
|
|
|
strbuf_reset(&path);
|
|
|
|
if (verify_one(r, istate, istate->cache_tree, &path))
|
|
|
|
BUG("ensure_full_index() called twice while verifying cache tree");
|
|
|
|
}
|
2018-08-18 14:41:28 +00:00
|
|
|
strbuf_release(&path);
|
|
|
|
}
|