fetch: ensure submodule objects fetched

Currently when git-fetch is asked to recurse into submodules, it dispatches
a plain "git-fetch -C <submodule-dir>" (with some submodule related options
such as prefix and recusing strategy, but) without any information of the
remote or the tip that should be fetched.

But this default fetch is not sufficient, as a newly fetched commit in
the superproject could point to a commit in the submodule that is not
in the default refspec. This is common in workflows like Gerrit's.
When fetching a Gerrit change under review (from refs/changes/??), the
commits in that change likely point to submodule commits that have not
been merged to a branch yet.

Fetch a submodule object by id if the object that the superproject
points to, cannot be found. For now this object is fetched from the
'origin' remote as we defer getting the default remote to a later patch.

A list of new submodule commits are already generated in certain
conditions (by check_for_new_submodule_commits()); this new feature
invokes that function in more situations.

The submodule checks were done only when a ref in the superproject
changed, these checks were extended to also be performed when fetching
into FETCH_HEAD for completeness, and add a test for that too.

Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Stefan Beller 2018-12-06 13:26:55 -08:00 committed by Junio C Hamano
parent a62387b3fc
commit be76c21282
3 changed files with 297 additions and 39 deletions

View file

@ -763,9 +763,6 @@ static int update_local_ref(struct ref *ref,
what = _("[new ref]"); what = _("[new ref]");
} }
if ((recurse_submodules != RECURSE_SUBMODULES_OFF) &&
(recurse_submodules != RECURSE_SUBMODULES_ON))
check_for_new_submodule_commits(&ref->new_oid);
r = s_update_ref(msg, ref, 0); r = s_update_ref(msg, ref, 0);
format_display(display, r ? '!' : '*', what, format_display(display, r ? '!' : '*', what,
r ? _("unable to update local ref") : NULL, r ? _("unable to update local ref") : NULL,
@ -779,9 +776,6 @@ static int update_local_ref(struct ref *ref,
strbuf_add_unique_abbrev(&quickref, &current->object.oid, DEFAULT_ABBREV); strbuf_add_unique_abbrev(&quickref, &current->object.oid, DEFAULT_ABBREV);
strbuf_addstr(&quickref, ".."); strbuf_addstr(&quickref, "..");
strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV); strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV);
if ((recurse_submodules != RECURSE_SUBMODULES_OFF) &&
(recurse_submodules != RECURSE_SUBMODULES_ON))
check_for_new_submodule_commits(&ref->new_oid);
r = s_update_ref("fast-forward", ref, 1); r = s_update_ref("fast-forward", ref, 1);
format_display(display, r ? '!' : ' ', quickref.buf, format_display(display, r ? '!' : ' ', quickref.buf,
r ? _("unable to update local ref") : NULL, r ? _("unable to update local ref") : NULL,
@ -794,9 +788,6 @@ static int update_local_ref(struct ref *ref,
strbuf_add_unique_abbrev(&quickref, &current->object.oid, DEFAULT_ABBREV); strbuf_add_unique_abbrev(&quickref, &current->object.oid, DEFAULT_ABBREV);
strbuf_addstr(&quickref, "..."); strbuf_addstr(&quickref, "...");
strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV); strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV);
if ((recurse_submodules != RECURSE_SUBMODULES_OFF) &&
(recurse_submodules != RECURSE_SUBMODULES_ON))
check_for_new_submodule_commits(&ref->new_oid);
r = s_update_ref("forced-update", ref, 1); r = s_update_ref("forced-update", ref, 1);
format_display(display, r ? '!' : '+', quickref.buf, format_display(display, r ? '!' : '+', quickref.buf,
r ? _("unable to update local ref") : _("forced update"), r ? _("unable to update local ref") : _("forced update"),
@ -892,6 +883,8 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
ref->force = rm->peer_ref->force; ref->force = rm->peer_ref->force;
} }
if (recurse_submodules != RECURSE_SUBMODULES_OFF)
check_for_new_submodule_commits(&rm->old_oid);
if (!strcmp(rm->name, "HEAD")) { if (!strcmp(rm->name, "HEAD")) {
kind = ""; kind = "";

View file

@ -1231,8 +1231,14 @@ struct submodule_parallel_fetch {
int result; int result;
struct string_list changed_submodule_names; struct string_list changed_submodule_names;
/* Pending fetches by OIDs */
struct fetch_task **oid_fetch_tasks;
int oid_fetch_tasks_nr, oid_fetch_tasks_alloc;
}; };
#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP } #define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, \
STRING_LIST_INIT_DUP, \
NULL, 0, 0}
static int get_fetch_recurse_config(const struct submodule *submodule, static int get_fetch_recurse_config(const struct submodule *submodule,
struct submodule_parallel_fetch *spf) struct submodule_parallel_fetch *spf)
@ -1259,6 +1265,76 @@ static int get_fetch_recurse_config(const struct submodule *submodule,
return spf->default_option; return spf->default_option;
} }
/*
* Fetch in progress (if callback data) or
* pending (if in oid_fetch_tasks in struct submodule_parallel_fetch)
*/
struct fetch_task {
struct repository *repo;
const struct submodule *sub;
unsigned free_sub : 1; /* Do we need to free the submodule? */
struct oid_array *commits; /* Ensure these commits are fetched */
};
/**
* When a submodule is not defined in .gitmodules, we cannot access it
* via the regular submodule-config. Create a fake submodule, which we can
* work on.
*/
static const struct submodule *get_non_gitmodules_submodule(const char *path)
{
struct submodule *ret = NULL;
const char *name = default_name_or_path(path);
if (!name)
return NULL;
ret = xmalloc(sizeof(*ret));
memset(ret, 0, sizeof(*ret));
ret->path = name;
ret->name = name;
return (const struct submodule *) ret;
}
static struct fetch_task *fetch_task_create(struct repository *r,
const char *path)
{
struct fetch_task *task = xmalloc(sizeof(*task));
memset(task, 0, sizeof(*task));
task->sub = submodule_from_path(r, &null_oid, path);
if (!task->sub) {
/*
* No entry in .gitmodules? Technically not a submodule,
* but historically we supported repositories that happen to be
* in-place where a gitlink is. Keep supporting them.
*/
task->sub = get_non_gitmodules_submodule(path);
if (!task->sub) {
free(task);
return NULL;
}
task->free_sub = 1;
}
return task;
}
static void fetch_task_release(struct fetch_task *p)
{
if (p->free_sub)
free((void*)p->sub);
p->free_sub = 0;
p->sub = NULL;
if (p->repo)
repo_clear(p->repo);
FREE_AND_NULL(p->repo);
}
static struct repository *get_submodule_repo_for(struct repository *r, static struct repository *get_submodule_repo_for(struct repository *r,
const struct submodule *sub) const struct submodule *sub)
{ {
@ -1286,39 +1362,29 @@ static struct repository *get_submodule_repo_for(struct repository *r,
static int get_next_submodule(struct child_process *cp, static int get_next_submodule(struct child_process *cp,
struct strbuf *err, void *data, void **task_cb) struct strbuf *err, void *data, void **task_cb)
{ {
int ret = 0;
struct submodule_parallel_fetch *spf = data; struct submodule_parallel_fetch *spf = data;
for (; spf->count < spf->r->index->cache_nr; spf->count++) { for (; spf->count < spf->r->index->cache_nr; spf->count++) {
struct strbuf submodule_prefix = STRBUF_INIT;
const struct cache_entry *ce = spf->r->index->cache[spf->count]; const struct cache_entry *ce = spf->r->index->cache[spf->count];
const char *default_argv; const char *default_argv;
const struct submodule *submodule; struct fetch_task *task;
struct repository *repo;
struct submodule default_submodule = SUBMODULE_INIT;
if (!S_ISGITLINK(ce->ce_mode)) if (!S_ISGITLINK(ce->ce_mode))
continue; continue;
submodule = submodule_from_path(spf->r, &null_oid, ce->name); task = fetch_task_create(spf->r, ce->name);
if (!submodule) { if (!task)
const char *name = default_name_or_path(ce->name); continue;
if (name) {
default_submodule.path = name;
default_submodule.name = name;
submodule = &default_submodule;
}
}
switch (get_fetch_recurse_config(submodule, spf)) switch (get_fetch_recurse_config(task->sub, spf))
{ {
default: default:
case RECURSE_SUBMODULES_DEFAULT: case RECURSE_SUBMODULES_DEFAULT:
case RECURSE_SUBMODULES_ON_DEMAND: case RECURSE_SUBMODULES_ON_DEMAND:
if (!submodule || if (!task->sub ||
!string_list_lookup( !string_list_lookup(
&spf->changed_submodule_names, &spf->changed_submodule_names,
submodule->name)) task->sub->name))
continue; continue;
default_argv = "on-demand"; default_argv = "on-demand";
break; break;
@ -1329,11 +1395,11 @@ static int get_next_submodule(struct child_process *cp,
continue; continue;
} }
strbuf_addf(&submodule_prefix, "%s%s/", spf->prefix, ce->name); task->repo = get_submodule_repo_for(spf->r, task->sub);
repo = get_submodule_repo_for(spf->r, submodule); if (task->repo) {
if (repo) { struct strbuf submodule_prefix = STRBUF_INIT;
child_process_init(cp); child_process_init(cp);
cp->dir = xstrdup(repo->gitdir); cp->dir = task->repo->gitdir;
prepare_submodule_repo_env_in_gitdir(&cp->env_array); prepare_submodule_repo_env_in_gitdir(&cp->env_array);
cp->git_cmd = 1; cp->git_cmd = 1;
if (!spf->quiet) if (!spf->quiet)
@ -1343,12 +1409,22 @@ static int get_next_submodule(struct child_process *cp,
argv_array_pushv(&cp->args, spf->args.argv); argv_array_pushv(&cp->args, spf->args.argv);
argv_array_push(&cp->args, default_argv); argv_array_push(&cp->args, default_argv);
argv_array_push(&cp->args, "--submodule-prefix"); argv_array_push(&cp->args, "--submodule-prefix");
strbuf_addf(&submodule_prefix, "%s%s/",
spf->prefix,
task->sub->path);
argv_array_push(&cp->args, submodule_prefix.buf); argv_array_push(&cp->args, submodule_prefix.buf);
repo_clear(repo); spf->count++;
free(repo); *task_cb = task;
ret = 1;
strbuf_release(&submodule_prefix);
return 1;
} else { } else {
fetch_task_release(task);
free(task);
/* /*
* An empty directory is normal, * An empty directory is normal,
* the submodule is not initialized * the submodule is not initialized
@ -1361,12 +1437,38 @@ static int get_next_submodule(struct child_process *cp,
ce->name); ce->name);
} }
} }
}
if (spf->oid_fetch_tasks_nr) {
struct fetch_task *task =
spf->oid_fetch_tasks[spf->oid_fetch_tasks_nr - 1];
struct strbuf submodule_prefix = STRBUF_INIT;
spf->oid_fetch_tasks_nr--;
strbuf_addf(&submodule_prefix, "%s%s/",
spf->prefix, task->sub->path);
child_process_init(cp);
prepare_submodule_repo_env_in_gitdir(&cp->env_array);
cp->git_cmd = 1;
cp->dir = task->repo->gitdir;
argv_array_init(&cp->args);
argv_array_pushv(&cp->args, spf->args.argv);
argv_array_push(&cp->args, "on-demand");
argv_array_push(&cp->args, "--submodule-prefix");
argv_array_push(&cp->args, submodule_prefix.buf);
/* NEEDSWORK: have get_default_remote from submodule--helper */
argv_array_push(&cp->args, "origin");
oid_array_for_each_unique(task->commits,
append_oid_to_argv, &cp->args);
*task_cb = task;
strbuf_release(&submodule_prefix); strbuf_release(&submodule_prefix);
if (ret) {
spf->count++;
return 1; return 1;
} }
}
return 0; return 0;
} }
@ -1374,20 +1476,66 @@ static int fetch_start_failure(struct strbuf *err,
void *cb, void *task_cb) void *cb, void *task_cb)
{ {
struct submodule_parallel_fetch *spf = cb; struct submodule_parallel_fetch *spf = cb;
struct fetch_task *task = task_cb;
spf->result = 1; spf->result = 1;
fetch_task_release(task);
return 0; return 0;
} }
static int commit_missing_in_sub(const struct object_id *oid, void *data)
{
struct repository *subrepo = data;
enum object_type type = oid_object_info(subrepo, oid, NULL);
return type != OBJ_COMMIT;
}
static int fetch_finish(int retvalue, struct strbuf *err, static int fetch_finish(int retvalue, struct strbuf *err,
void *cb, void *task_cb) void *cb, void *task_cb)
{ {
struct submodule_parallel_fetch *spf = cb; struct submodule_parallel_fetch *spf = cb;
struct fetch_task *task = task_cb;
struct string_list_item *it;
struct oid_array *commits;
if (retvalue) if (retvalue)
spf->result = 1; spf->result = 1;
if (!task || !task->sub)
BUG("callback cookie bogus");
/* Is this the second time we process this submodule? */
if (task->commits)
goto out;
it = string_list_lookup(&spf->changed_submodule_names, task->sub->name);
if (!it)
/* Could be an unchanged submodule, not contained in the list */
goto out;
commits = it->util;
oid_array_filter(commits,
commit_missing_in_sub,
task->repo);
/* Are there commits we want, but do not exist? */
if (commits->nr) {
task->commits = commits;
ALLOC_GROW(spf->oid_fetch_tasks,
spf->oid_fetch_tasks_nr + 1,
spf->oid_fetch_tasks_alloc);
spf->oid_fetch_tasks[spf->oid_fetch_tasks_nr] = task;
spf->oid_fetch_tasks_nr++;
return 0;
}
out:
fetch_task_release(task);
return 0; return 0;
} }

View file

@ -600,4 +600,121 @@ test_expect_success "fetch new commits when submodule got renamed" '
test_cmp expect actual test_cmp expect actual
' '
test_expect_success "fetch new submodule commits on-demand outside standard refspec" '
# add a second submodule and ensure it is around in downstream first
git clone submodule sub1 &&
git submodule add ./sub1 &&
git commit -m "adding a second submodule" &&
git -C downstream pull &&
git -C downstream submodule update --init --recursive &&
git checkout --detach &&
C=$(git -C submodule commit-tree -m "new change outside refs/heads" HEAD^{tree}) &&
git -C submodule update-ref refs/changes/1 $C &&
git update-index --cacheinfo 160000 $C submodule &&
test_tick &&
D=$(git -C sub1 commit-tree -m "new change outside refs/heads" HEAD^{tree}) &&
git -C sub1 update-ref refs/changes/2 $D &&
git update-index --cacheinfo 160000 $D sub1 &&
git commit -m "updated submodules outside of refs/heads" &&
E=$(git rev-parse HEAD) &&
git update-ref refs/changes/3 $E &&
(
cd downstream &&
git fetch --recurse-submodules origin refs/changes/3:refs/heads/my_branch &&
git -C submodule cat-file -t $C &&
git -C sub1 cat-file -t $D &&
git checkout --recurse-submodules FETCH_HEAD
)
'
test_expect_success 'fetch new submodule commit on-demand in FETCH_HEAD' '
# depends on the previous test for setup
C=$(git -C submodule commit-tree -m "another change outside refs/heads" HEAD^{tree}) &&
git -C submodule update-ref refs/changes/4 $C &&
git update-index --cacheinfo 160000 $C submodule &&
test_tick &&
D=$(git -C sub1 commit-tree -m "another change outside refs/heads" HEAD^{tree}) &&
git -C sub1 update-ref refs/changes/5 $D &&
git update-index --cacheinfo 160000 $D sub1 &&
git commit -m "updated submodules outside of refs/heads" &&
E=$(git rev-parse HEAD) &&
git update-ref refs/changes/6 $E &&
(
cd downstream &&
git fetch --recurse-submodules origin refs/changes/6 &&
git -C submodule cat-file -t $C &&
git -C sub1 cat-file -t $D &&
git checkout --recurse-submodules FETCH_HEAD
)
'
test_expect_success 'fetch new submodule commits on-demand without .gitmodules entry' '
# depends on the previous test for setup
git config -f .gitmodules --remove-section submodule.sub1 &&
git add .gitmodules &&
git commit -m "delete gitmodules file" &&
git checkout -B master &&
git -C downstream fetch &&
git -C downstream checkout origin/master &&
C=$(git -C submodule commit-tree -m "yet another change outside refs/heads" HEAD^{tree}) &&
git -C submodule update-ref refs/changes/7 $C &&
git update-index --cacheinfo 160000 $C submodule &&
test_tick &&
D=$(git -C sub1 commit-tree -m "yet another change outside refs/heads" HEAD^{tree}) &&
git -C sub1 update-ref refs/changes/8 $D &&
git update-index --cacheinfo 160000 $D sub1 &&
git commit -m "updated submodules outside of refs/heads" &&
E=$(git rev-parse HEAD) &&
git update-ref refs/changes/9 $E &&
(
cd downstream &&
git fetch --recurse-submodules origin refs/changes/9 &&
git -C submodule cat-file -t $C &&
git -C sub1 cat-file -t $D &&
git checkout --recurse-submodules FETCH_HEAD
)
'
test_expect_success 'fetch new submodule commit intermittently referenced by superproject' '
# depends on the previous test for setup
D=$(git -C sub1 commit-tree -m "change 10 outside refs/heads" HEAD^{tree}) &&
E=$(git -C sub1 commit-tree -m "change 11 outside refs/heads" HEAD^{tree}) &&
F=$(git -C sub1 commit-tree -m "change 12 outside refs/heads" HEAD^{tree}) &&
git -C sub1 update-ref refs/changes/10 $D &&
git update-index --cacheinfo 160000 $D sub1 &&
git commit -m "updated submodules outside of refs/heads" &&
git -C sub1 update-ref refs/changes/11 $E &&
git update-index --cacheinfo 160000 $E sub1 &&
git commit -m "updated submodules outside of refs/heads" &&
git -C sub1 update-ref refs/changes/12 $F &&
git update-index --cacheinfo 160000 $F sub1 &&
git commit -m "updated submodules outside of refs/heads" &&
G=$(git rev-parse HEAD) &&
git update-ref refs/changes/13 $G &&
(
cd downstream &&
git fetch --recurse-submodules origin refs/changes/13 &&
git -C sub1 cat-file -t $D &&
git -C sub1 cat-file -t $E &&
git -C sub1 cat-file -t $F
)
'
test_done test_done