2023-04-11 07:41:48 +00:00
|
|
|
#include "git-compat-util.h"
|
2023-03-21 06:25:54 +00:00
|
|
|
#include "gettext.h"
|
2023-02-24 00:09:27 +00:00
|
|
|
#include "hex.h"
|
2023-05-16 06:34:06 +00:00
|
|
|
#include "object-store-ll.h"
|
2019-06-25 13:40:27 +00:00
|
|
|
#include "promisor-remote.h"
|
|
|
|
#include "config.h"
|
2023-04-11 03:00:38 +00:00
|
|
|
#include "trace2.h"
|
2019-06-25 13:40:37 +00:00
|
|
|
#include "transport.h"
|
2020-08-18 04:01:36 +00:00
|
|
|
#include "strvec.h"
|
promisor-remote: die upon failing fetch
In a partial clone, an attempt to read a missing object results in an
attempt to fetch that single object. In order to avoid multiple
sequential fetches, which would occur when multiple objects are missing
(which is the typical case), some commands have been taught to prefetch
in a batch: such a command would, in a partial clone, notice that
several objects that it will eventually need are missing, and call
promisor_remote_get_direct() with all such objects at once.
When this batch prefetch fails, these commands fall back to the
sequential fetches. But at $DAYJOB we have noticed that this results in
a bad user experience: a command would take unexpectedly long to finish
(and possibly use up a lot of bandwidth) if the batch prefetch would
fail for some intermittent reason, but all subsequent fetches would
work. It would be a better user experience for such a command would
just fail.
Therefore, make it a fatal error if the prefetch fails and at least one
object being fetched is known to be a promisor object. (The latter
criterion is to make sure that we are not misleading the user that such
an object would be present from the promisor remote. For example, a
missing object may be a result of repository corruption and not because
it is expectedly missing due to the repository being a partial clone.)
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-10-04 21:13:41 +00:00
|
|
|
#include "packfile.h"
|
2019-06-25 13:40:37 +00:00
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
struct promisor_remote_config {
|
|
|
|
struct promisor_remote *promisors;
|
|
|
|
struct promisor_remote **promisors_tail;
|
|
|
|
};
|
2019-06-25 13:40:38 +00:00
|
|
|
|
2021-06-15 22:41:42 +00:00
|
|
|
static int fetch_objects(struct repository *repo,
|
|
|
|
const char *remote_name,
|
2019-06-25 13:40:37 +00:00
|
|
|
const struct object_id *oids,
|
|
|
|
int oid_nr)
|
|
|
|
{
|
2020-08-18 04:01:36 +00:00
|
|
|
struct child_process child = CHILD_PROCESS_INIT;
|
2019-06-25 13:40:37 +00:00
|
|
|
int i;
|
2020-08-18 04:01:36 +00:00
|
|
|
FILE *child_in;
|
|
|
|
|
|
|
|
child.git_cmd = 1;
|
|
|
|
child.in = -1;
|
2021-06-17 17:13:26 +00:00
|
|
|
if (repo != the_repository)
|
2022-06-02 09:09:50 +00:00
|
|
|
prepare_other_repo_env(&child.env, repo->gitdir);
|
2020-08-18 04:01:36 +00:00
|
|
|
strvec_pushl(&child.args, "-c", "fetch.negotiationAlgorithm=noop",
|
|
|
|
"fetch", remote_name, "--no-tags",
|
|
|
|
"--no-write-fetch-head", "--recurse-submodules=no",
|
|
|
|
"--filter=blob:none", "--stdin", NULL);
|
|
|
|
if (start_command(&child))
|
|
|
|
die(_("promisor-remote: unable to fork off fetch subprocess"));
|
|
|
|
child_in = xfdopen(child.in, "w");
|
2019-06-25 13:40:37 +00:00
|
|
|
|
2021-06-15 22:41:42 +00:00
|
|
|
trace2_data_intmax("promisor", repo, "fetch_count", oid_nr);
|
|
|
|
|
2019-06-25 13:40:37 +00:00
|
|
|
for (i = 0; i < oid_nr; i++) {
|
2020-08-18 04:01:36 +00:00
|
|
|
if (fputs(oid_to_hex(&oids[i]), child_in) < 0)
|
|
|
|
die_errno(_("promisor-remote: could not write to fetch subprocess"));
|
|
|
|
if (fputc('\n', child_in) < 0)
|
|
|
|
die_errno(_("promisor-remote: could not write to fetch subprocess"));
|
2019-06-25 13:40:37 +00:00
|
|
|
}
|
2020-08-18 04:01:36 +00:00
|
|
|
|
|
|
|
if (fclose(child_in) < 0)
|
|
|
|
die_errno(_("promisor-remote: could not close stdin to fetch subprocess"));
|
|
|
|
return finish_command(&child) ? -1 : 0;
|
2019-06-25 13:40:37 +00:00
|
|
|
}
|
2019-06-25 13:40:27 +00:00
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
static struct promisor_remote *promisor_remote_new(struct promisor_remote_config *config,
|
|
|
|
const char *remote_name)
|
2019-06-25 13:40:27 +00:00
|
|
|
{
|
|
|
|
struct promisor_remote *r;
|
|
|
|
|
|
|
|
if (*remote_name == '/') {
|
|
|
|
warning(_("promisor remote name cannot begin with '/': %s"),
|
|
|
|
remote_name);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
FLEX_ALLOC_STR(r, name, remote_name);
|
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
*config->promisors_tail = r;
|
|
|
|
config->promisors_tail = &r->next;
|
2019-06-25 13:40:27 +00:00
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
static struct promisor_remote *promisor_remote_lookup(struct promisor_remote_config *config,
|
|
|
|
const char *remote_name,
|
2019-06-25 13:40:27 +00:00
|
|
|
struct promisor_remote **previous)
|
|
|
|
{
|
|
|
|
struct promisor_remote *r, *p;
|
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
for (p = NULL, r = config->promisors; r; p = r, r = r->next)
|
2019-06-25 13:40:27 +00:00
|
|
|
if (!strcmp(r->name, remote_name)) {
|
|
|
|
if (previous)
|
|
|
|
*previous = p;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
static void promisor_remote_move_to_tail(struct promisor_remote_config *config,
|
|
|
|
struct promisor_remote *r,
|
2019-06-25 13:40:30 +00:00
|
|
|
struct promisor_remote *previous)
|
|
|
|
{
|
2022-05-02 16:50:37 +00:00
|
|
|
if (!r->next)
|
2019-09-30 22:03:55 +00:00
|
|
|
return;
|
|
|
|
|
2019-06-25 13:40:30 +00:00
|
|
|
if (previous)
|
|
|
|
previous->next = r->next;
|
|
|
|
else
|
2021-06-17 17:13:23 +00:00
|
|
|
config->promisors = r->next ? r->next : r;
|
2019-06-25 13:40:30 +00:00
|
|
|
r->next = NULL;
|
2021-06-17 17:13:23 +00:00
|
|
|
*config->promisors_tail = r;
|
|
|
|
config->promisors_tail = &r->next;
|
2019-06-25 13:40:30 +00:00
|
|
|
}
|
|
|
|
|
config: add ctx arg to config_fn_t
Add a new "const struct config_context *ctx" arg to config_fn_t to hold
additional information about the config iteration operation.
config_context has a "struct key_value_info kvi" member that holds
metadata about the config source being read (e.g. what kind of config
source it is, the filename, etc). In this series, we're only interested
in .kvi, so we could have just used "struct key_value_info" as an arg,
but config_context makes it possible to add/adjust members in the future
without changing the config_fn_t signature. We could also consider other
ways of organizing the args (e.g. moving the config name and value into
config_context or key_value_info), but in my experiments, the
incremental benefit doesn't justify the added complexity (e.g. a
config_fn_t will sometimes invoke another config_fn_t but with a
different config value).
In subsequent commits, the .kvi member will replace the global "struct
config_reader" in config.c, making config iteration a global-free
operation. It requires much more work for the machinery to provide
meaningful values of .kvi, so for now, merely change the signature and
call sites, pass NULL as a placeholder value, and don't rely on the arg
in any meaningful way.
Most of the changes are performed by
contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every
config_fn_t:
- Modifies the signature to accept "const struct config_context *ctx"
- Passes "ctx" to any inner config_fn_t, if needed
- Adds UNUSED attributes to "ctx", if needed
Most config_fn_t instances are easily identified by seeing if they are
called by the various config functions. Most of the remaining ones are
manually named in the .cocci patch. Manual cleanups are still needed,
but the majority of it is trivial; it's either adjusting config_fn_t
that the .cocci patch didn't catch, or adding forward declarations of
"struct config_context ctx" to make the signatures make sense.
The non-trivial changes are in cases where we are invoking a config_fn_t
outside of config machinery, and we now need to decide what value of
"ctx" to pass. These cases are:
- trace2/tr2_cfg.c:tr2_cfg_set_fl()
This is indirectly called by git_config_set() so that the trace2
machinery can notice the new config values and update its settings
using the tr2 config parsing function, i.e. tr2_cfg_cb().
- builtin/checkout.c:checkout_main()
This calls git_xmerge_config() as a shorthand for parsing a CLI arg.
This might be worth refactoring away in the future, since
git_xmerge_config() can call git_default_config(), which can do much
more than just parsing.
Handle them by creating a KVI_INIT macro that initializes "struct
key_value_info" to a reasonable default, and use that to construct the
"ctx" arg.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 19:26:22 +00:00
|
|
|
static int promisor_remote_config(const char *var, const char *value,
|
|
|
|
const struct config_context *ctx UNUSED,
|
|
|
|
void *data)
|
2019-06-25 13:40:27 +00:00
|
|
|
{
|
2021-06-17 17:13:23 +00:00
|
|
|
struct promisor_remote_config *config = data;
|
2019-06-25 13:40:27 +00:00
|
|
|
const char *name;
|
2020-04-10 19:44:28 +00:00
|
|
|
size_t namelen;
|
2019-06-25 13:40:27 +00:00
|
|
|
const char *subkey;
|
|
|
|
|
|
|
|
if (parse_config_key(var, "remote", &name, &namelen, &subkey) < 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!strcmp(subkey, "promisor")) {
|
|
|
|
char *remote_name;
|
|
|
|
|
|
|
|
if (!git_config_bool(var, value))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
remote_name = xmemdupz(name, namelen);
|
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
if (!promisor_remote_lookup(config, remote_name, NULL))
|
|
|
|
promisor_remote_new(config, remote_name);
|
2019-06-25 13:40:27 +00:00
|
|
|
|
|
|
|
free(remote_name);
|
|
|
|
return 0;
|
|
|
|
}
|
2019-06-25 13:40:32 +00:00
|
|
|
if (!strcmp(subkey, "partialclonefilter")) {
|
|
|
|
struct promisor_remote *r;
|
|
|
|
char *remote_name = xmemdupz(name, namelen);
|
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
r = promisor_remote_lookup(config, remote_name, NULL);
|
2019-06-25 13:40:32 +00:00
|
|
|
if (!r)
|
2021-06-17 17:13:23 +00:00
|
|
|
r = promisor_remote_new(config, remote_name);
|
2019-06-25 13:40:32 +00:00
|
|
|
|
|
|
|
free(remote_name);
|
|
|
|
|
|
|
|
if (!r)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return git_config_string(&r->partial_clone_filter, var, value);
|
|
|
|
}
|
2019-06-25 13:40:27 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
static void promisor_remote_init(struct repository *r)
|
2019-06-25 13:40:27 +00:00
|
|
|
{
|
2021-06-17 17:13:23 +00:00
|
|
|
struct promisor_remote_config *config;
|
|
|
|
|
|
|
|
if (r->promisor_remote_config)
|
2019-06-25 13:40:27 +00:00
|
|
|
return;
|
2021-06-17 17:13:23 +00:00
|
|
|
config = r->promisor_remote_config =
|
2022-08-23 09:57:33 +00:00
|
|
|
xcalloc(1, sizeof(*r->promisor_remote_config));
|
2021-06-17 17:13:23 +00:00
|
|
|
config->promisors_tail = &config->promisors;
|
2019-06-25 13:40:27 +00:00
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
repo_config(r, promisor_remote_config, config);
|
2019-06-25 13:40:30 +00:00
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
if (r->repository_format_partial_clone) {
|
2019-06-25 13:40:30 +00:00
|
|
|
struct promisor_remote *o, *previous;
|
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
o = promisor_remote_lookup(config,
|
|
|
|
r->repository_format_partial_clone,
|
2019-06-25 13:40:30 +00:00
|
|
|
&previous);
|
|
|
|
if (o)
|
2021-06-17 17:13:23 +00:00
|
|
|
promisor_remote_move_to_tail(config, o, previous);
|
2019-06-25 13:40:30 +00:00
|
|
|
else
|
2021-06-17 17:13:23 +00:00
|
|
|
promisor_remote_new(config, r->repository_format_partial_clone);
|
2019-06-25 13:40:30 +00:00
|
|
|
}
|
2019-06-25 13:40:27 +00:00
|
|
|
}
|
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
void promisor_remote_clear(struct promisor_remote_config *config)
|
2019-06-25 13:40:29 +00:00
|
|
|
{
|
2021-06-17 17:13:23 +00:00
|
|
|
while (config->promisors) {
|
|
|
|
struct promisor_remote *r = config->promisors;
|
|
|
|
config->promisors = config->promisors->next;
|
2019-06-25 13:40:29 +00:00
|
|
|
free(r);
|
|
|
|
}
|
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
config->promisors_tail = &config->promisors;
|
2019-06-25 13:40:29 +00:00
|
|
|
}
|
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
void repo_promisor_remote_reinit(struct repository *r)
|
2019-06-25 13:40:29 +00:00
|
|
|
{
|
2021-06-17 17:13:23 +00:00
|
|
|
promisor_remote_clear(r->promisor_remote_config);
|
|
|
|
FREE_AND_NULL(r->promisor_remote_config);
|
|
|
|
promisor_remote_init(r);
|
2019-06-25 13:40:29 +00:00
|
|
|
}
|
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
struct promisor_remote *repo_promisor_remote_find(struct repository *r,
|
|
|
|
const char *remote_name)
|
2019-06-25 13:40:27 +00:00
|
|
|
{
|
2021-06-17 17:13:23 +00:00
|
|
|
promisor_remote_init(r);
|
2019-06-25 13:40:27 +00:00
|
|
|
|
|
|
|
if (!remote_name)
|
2021-06-17 17:13:23 +00:00
|
|
|
return r->promisor_remote_config->promisors;
|
2019-06-25 13:40:27 +00:00
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
return promisor_remote_lookup(r->promisor_remote_config, remote_name, NULL);
|
2019-06-25 13:40:27 +00:00
|
|
|
}
|
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
int repo_has_promisor_remote(struct repository *r)
|
2019-06-25 13:40:27 +00:00
|
|
|
{
|
2021-06-17 17:13:23 +00:00
|
|
|
return !!repo_promisor_remote_find(r, NULL);
|
2019-06-25 13:40:27 +00:00
|
|
|
}
|
2019-06-25 13:40:28 +00:00
|
|
|
|
|
|
|
static int remove_fetched_oids(struct repository *repo,
|
|
|
|
struct object_id **oids,
|
|
|
|
int oid_nr, int to_free)
|
|
|
|
{
|
|
|
|
int i, remaining_nr = 0;
|
|
|
|
int *remaining = xcalloc(oid_nr, sizeof(*remaining));
|
|
|
|
struct object_id *old_oids = *oids;
|
|
|
|
struct object_id *new_oids;
|
|
|
|
|
|
|
|
for (i = 0; i < oid_nr; i++)
|
|
|
|
if (oid_object_info_extended(repo, &old_oids[i], NULL,
|
|
|
|
OBJECT_INFO_SKIP_FETCH_OBJECT)) {
|
|
|
|
remaining[i] = 1;
|
|
|
|
remaining_nr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (remaining_nr) {
|
|
|
|
int j = 0;
|
2021-03-13 16:17:22 +00:00
|
|
|
CALLOC_ARRAY(new_oids, remaining_nr);
|
2019-06-25 13:40:28 +00:00
|
|
|
for (i = 0; i < oid_nr; i++)
|
|
|
|
if (remaining[i])
|
|
|
|
oidcpy(&new_oids[j++], &old_oids[i]);
|
|
|
|
*oids = new_oids;
|
|
|
|
if (to_free)
|
|
|
|
free(old_oids);
|
|
|
|
}
|
|
|
|
|
|
|
|
free(remaining);
|
|
|
|
|
|
|
|
return remaining_nr;
|
|
|
|
}
|
|
|
|
|
2022-10-04 21:13:40 +00:00
|
|
|
void promisor_remote_get_direct(struct repository *repo,
|
|
|
|
const struct object_id *oids,
|
|
|
|
int oid_nr)
|
2019-06-25 13:40:28 +00:00
|
|
|
{
|
|
|
|
struct promisor_remote *r;
|
|
|
|
struct object_id *remaining_oids = (struct object_id *)oids;
|
|
|
|
int remaining_nr = oid_nr;
|
|
|
|
int to_free = 0;
|
promisor-remote: die upon failing fetch
In a partial clone, an attempt to read a missing object results in an
attempt to fetch that single object. In order to avoid multiple
sequential fetches, which would occur when multiple objects are missing
(which is the typical case), some commands have been taught to prefetch
in a batch: such a command would, in a partial clone, notice that
several objects that it will eventually need are missing, and call
promisor_remote_get_direct() with all such objects at once.
When this batch prefetch fails, these commands fall back to the
sequential fetches. But at $DAYJOB we have noticed that this results in
a bad user experience: a command would take unexpectedly long to finish
(and possibly use up a lot of bandwidth) if the batch prefetch would
fail for some intermittent reason, but all subsequent fetches would
work. It would be a better user experience for such a command would
just fail.
Therefore, make it a fatal error if the prefetch fails and at least one
object being fetched is known to be a promisor object. (The latter
criterion is to make sure that we are not misleading the user that such
an object would be present from the promisor remote. For example, a
missing object may be a result of repository corruption and not because
it is expectedly missing due to the repository being a partial clone.)
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-10-04 21:13:41 +00:00
|
|
|
int i;
|
2019-06-25 13:40:28 +00:00
|
|
|
|
2020-04-02 19:19:16 +00:00
|
|
|
if (oid_nr == 0)
|
2022-10-04 21:13:40 +00:00
|
|
|
return;
|
2020-04-02 19:19:16 +00:00
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
promisor_remote_init(repo);
|
2019-06-25 13:40:28 +00:00
|
|
|
|
2021-06-17 17:13:23 +00:00
|
|
|
for (r = repo->promisor_remote_config->promisors; r; r = r->next) {
|
2021-06-15 22:41:42 +00:00
|
|
|
if (fetch_objects(repo, r->name, remaining_oids, remaining_nr) < 0) {
|
2019-06-25 13:40:28 +00:00
|
|
|
if (remaining_nr == 1)
|
|
|
|
continue;
|
|
|
|
remaining_nr = remove_fetched_oids(repo, &remaining_oids,
|
|
|
|
remaining_nr, to_free);
|
|
|
|
if (remaining_nr) {
|
|
|
|
to_free = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
promisor-remote: die upon failing fetch
In a partial clone, an attempt to read a missing object results in an
attempt to fetch that single object. In order to avoid multiple
sequential fetches, which would occur when multiple objects are missing
(which is the typical case), some commands have been taught to prefetch
in a batch: such a command would, in a partial clone, notice that
several objects that it will eventually need are missing, and call
promisor_remote_get_direct() with all such objects at once.
When this batch prefetch fails, these commands fall back to the
sequential fetches. But at $DAYJOB we have noticed that this results in
a bad user experience: a command would take unexpectedly long to finish
(and possibly use up a lot of bandwidth) if the batch prefetch would
fail for some intermittent reason, but all subsequent fetches would
work. It would be a better user experience for such a command would
just fail.
Therefore, make it a fatal error if the prefetch fails and at least one
object being fetched is known to be a promisor object. (The latter
criterion is to make sure that we are not misleading the user that such
an object would be present from the promisor remote. For example, a
missing object may be a result of repository corruption and not because
it is expectedly missing due to the repository being a partial clone.)
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-10-04 21:13:41 +00:00
|
|
|
goto all_fetched;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < remaining_nr; i++) {
|
|
|
|
if (is_promisor_object(&remaining_oids[i]))
|
|
|
|
die(_("could not fetch %s from promisor remote"),
|
|
|
|
oid_to_hex(&remaining_oids[i]));
|
2019-06-25 13:40:28 +00:00
|
|
|
}
|
|
|
|
|
promisor-remote: die upon failing fetch
In a partial clone, an attempt to read a missing object results in an
attempt to fetch that single object. In order to avoid multiple
sequential fetches, which would occur when multiple objects are missing
(which is the typical case), some commands have been taught to prefetch
in a batch: such a command would, in a partial clone, notice that
several objects that it will eventually need are missing, and call
promisor_remote_get_direct() with all such objects at once.
When this batch prefetch fails, these commands fall back to the
sequential fetches. But at $DAYJOB we have noticed that this results in
a bad user experience: a command would take unexpectedly long to finish
(and possibly use up a lot of bandwidth) if the batch prefetch would
fail for some intermittent reason, but all subsequent fetches would
work. It would be a better user experience for such a command would
just fail.
Therefore, make it a fatal error if the prefetch fails and at least one
object being fetched is known to be a promisor object. (The latter
criterion is to make sure that we are not misleading the user that such
an object would be present from the promisor remote. For example, a
missing object may be a result of repository corruption and not because
it is expectedly missing due to the repository being a partial clone.)
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-10-04 21:13:41 +00:00
|
|
|
all_fetched:
|
2019-06-25 13:40:28 +00:00
|
|
|
if (to_free)
|
|
|
|
free(remaining_oids);
|
|
|
|
}
|