resolve_gitlink_ref: ignore non-repository paths

When we want to look up a submodule ref, we use
get_ref_cache(path) to find or auto-create its ref cache.
But if we feed a path that isn't actually a git repository,
we blindly create the ref cache, and then may die deeper in
the code when we try to access it. This is a problem because
many callers speculatively feed us a path that looks vaguely
like a repository, and expect us to tell them when it is
not.

This patch teaches resolve_gitlink_ref to reject
non-repository paths without creating a ref_cache. This
avoids the die(), and also performs better if you have a
large number of these faux-submodule directories (because
the ref_cache lookup is linear, under the assumption that
there won't be a large number of submodules).

To accomplish this, we also break get_ref_cache into two
pieces: the lookup and auto-creation (the latter is lumped
into create_ref_cache). This lets us first cheaply ask our
cache "is it a submodule we know about?" If so, we can avoid
repeating our filesystem lookup. So lookups of real
submodules are not penalized; they examine the submodule's
.git directory only once.

The test in t3000 demonstrates a case where this improves
correctness (we used to just die). The new perf case in
p7300 shows off the speed improvement in an admittedly
pathological repository:

Test                  HEAD^               HEAD
----------------------------------------------------------------
7300.4: ls-files -o   66.97(66.15+0.87)   0.33(0.08+0.24) -99.5%

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Jeff King 2016-01-22 17:29:30 -05:00 committed by Junio C Hamano
parent ffd036b128
commit a2d5156c2b
3 changed files with 45 additions and 16 deletions

View file

@ -933,6 +933,10 @@ static void clear_loose_ref_cache(struct ref_cache *refs)
}
}
/*
* Create a new submodule ref cache and add it to the internal
* set of caches.
*/
static struct ref_cache *create_ref_cache(const char *submodule)
{
int len;
@ -942,9 +946,24 @@ static struct ref_cache *create_ref_cache(const char *submodule)
len = strlen(submodule) + 1;
refs = xcalloc(1, sizeof(struct ref_cache) + len);
memcpy(refs->name, submodule, len);
refs->next = submodule_ref_caches;
submodule_ref_caches = refs;
return refs;
}
static struct ref_cache *lookup_ref_cache(const char *submodule)
{
struct ref_cache *refs;
if (!submodule || !*submodule)
return &ref_cache;
for (refs = submodule_ref_caches; refs; refs = refs->next)
if (!strcmp(submodule, refs->name))
return refs;
return NULL;
}
/*
* Return a pointer to a ref_cache for the specified submodule. For
* the main repository, use submodule==NULL. The returned structure
@ -953,18 +972,9 @@ static struct ref_cache *create_ref_cache(const char *submodule)
*/
static struct ref_cache *get_ref_cache(const char *submodule)
{
struct ref_cache *refs;
if (!submodule || !*submodule)
return &ref_cache;
for (refs = submodule_ref_caches; refs; refs = refs->next)
if (!strcmp(submodule, refs->name))
return refs;
refs = create_ref_cache(submodule);
refs->next = submodule_ref_caches;
submodule_ref_caches = refs;
struct ref_cache *refs = lookup_ref_cache(submodule);
if (!refs)
refs = create_ref_cache(submodule);
return refs;
}
@ -1336,16 +1346,24 @@ static int resolve_gitlink_ref_recursive(struct ref_cache *refs,
int resolve_gitlink_ref(const char *path, const char *refname, unsigned char *sha1)
{
int len = strlen(path), retval;
char *submodule;
struct strbuf submodule = STRBUF_INIT;
struct ref_cache *refs;
while (len && path[len-1] == '/')
len--;
if (!len)
return -1;
submodule = xstrndup(path, len);
refs = get_ref_cache(submodule);
free(submodule);
strbuf_add(&submodule, path, len);
refs = lookup_ref_cache(submodule.buf);
if (!refs) {
if (!is_nonbare_repository_dir(&submodule)) {
strbuf_release(&submodule);
return -1;
}
refs = create_ref_cache(submodule.buf);
}
strbuf_release(&submodule);
retval = resolve_gitlink_ref_recursive(refs, refname, sha1, 0);
return retval;

View file

@ -28,4 +28,8 @@ test_perf 'clean many untracked sub dirs, ignore nested git' '
git clean -n -q -f -f -d 100000_sub_dirs/
'
test_perf 'ls-files -o' '
git ls-files -o
'
test_done

View file

@ -65,6 +65,13 @@ test_expect_success '--no-empty-directory hides empty directory' '
test_cmp expected3 output
'
test_expect_success 'ls-files --others handles non-submodule .git' '
mkdir not-a-submodule &&
echo foo >not-a-submodule/.git &&
git ls-files -o >output &&
test_cmp expected1 output
'
test_expect_success SYMLINKS 'ls-files --others with symlinked submodule' '
git init super &&
git init sub &&