2007-09-11 03:03:25 +00:00
|
|
|
/*
|
|
|
|
* "git fetch"
|
|
|
|
*/
|
2023-05-16 06:33:57 +00:00
|
|
|
#include "builtin.h"
|
2023-04-11 03:00:39 +00:00
|
|
|
#include "advice.h"
|
2017-06-14 18:07:36 +00:00
|
|
|
#include "config.h"
|
2023-03-21 06:25:54 +00:00
|
|
|
#include "gettext.h"
|
2023-03-21 06:26:03 +00:00
|
|
|
#include "environment.h"
|
2023-02-24 00:09:27 +00:00
|
|
|
#include "hex.h"
|
2017-12-12 19:53:52 +00:00
|
|
|
#include "repository.h"
|
2007-09-11 03:03:25 +00:00
|
|
|
#include "refs.h"
|
2018-05-16 22:57:48 +00:00
|
|
|
#include "refspec.h"
|
2023-04-11 07:41:49 +00:00
|
|
|
#include "object-name.h"
|
2023-05-16 06:34:06 +00:00
|
|
|
#include "object-store-ll.h"
|
2019-09-15 21:18:02 +00:00
|
|
|
#include "oidset.h"
|
2023-04-11 03:00:42 +00:00
|
|
|
#include "oid-array.h"
|
2007-09-11 03:03:25 +00:00
|
|
|
#include "commit.h"
|
2008-07-21 18:03:49 +00:00
|
|
|
#include "string-list.h"
|
2007-09-11 03:03:25 +00:00
|
|
|
#include "remote.h"
|
|
|
|
#include "transport.h"
|
2007-11-11 07:29:47 +00:00
|
|
|
#include "run-command.h"
|
2007-12-04 07:25:47 +00:00
|
|
|
#include "parse-options.h"
|
chain kill signals for cleanup functions
If a piece of code wanted to do some cleanup before exiting
(e.g., cleaning up a lockfile or a tempfile), our usual
strategy was to install a signal handler that did something
like this:
do_cleanup(); /* actual work */
signal(signo, SIG_DFL); /* restore previous behavior */
raise(signo); /* deliver signal, killing ourselves */
For a single handler, this works fine. However, if we want
to clean up two _different_ things, we run into a problem.
The most recently installed handler will run, but when it
removes itself as a handler, it doesn't put back the first
handler.
This patch introduces sigchain, a tiny library for handling
a stack of signal handlers. You sigchain_push each handler,
and use sigchain_pop to restore whoever was before you in
the stack.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-01-22 06:02:35 +00:00
|
|
|
#include "sigchain.h"
|
2015-08-18 00:22:00 +00:00
|
|
|
#include "submodule-config.h"
|
2010-11-12 12:54:52 +00:00
|
|
|
#include "submodule.h"
|
2011-09-02 23:33:22 +00:00
|
|
|
#include "connected.h"
|
2020-07-28 20:23:39 +00:00
|
|
|
#include "strvec.h"
|
2016-07-01 16:03:30 +00:00
|
|
|
#include "utf8.h"
|
2023-04-11 07:41:59 +00:00
|
|
|
#include "pager.h"
|
2023-05-16 06:33:59 +00:00
|
|
|
#include "path.h"
|
2023-04-22 20:17:14 +00:00
|
|
|
#include "pkt-line.h"
|
2017-12-08 15:58:44 +00:00
|
|
|
#include "list-objects-filter-options.h"
|
2018-07-20 16:33:04 +00:00
|
|
|
#include "commit-reach.h"
|
2019-08-19 09:11:20 +00:00
|
|
|
#include "branch.h"
|
2019-06-25 13:40:31 +00:00
|
|
|
#include "promisor-remote.h"
|
2019-09-03 02:22:02 +00:00
|
|
|
#include "commit-graph.h"
|
2020-04-30 19:48:50 +00:00
|
|
|
#include "shallow.h"
|
2023-04-11 03:00:38 +00:00
|
|
|
#include "trace.h"
|
|
|
|
#include "trace2.h"
|
2023-01-31 13:29:17 +00:00
|
|
|
#include "bundle-uri.h"
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2019-06-18 20:25:27 +00:00
|
|
|
#define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
|
|
|
|
|
2007-12-04 07:25:47 +00:00
|
|
|
static const char * const builtin_fetch_usage[] = {
|
2012-08-20 12:32:09 +00:00
|
|
|
N_("git fetch [<options>] [<repository> [<refspec>...]]"),
|
|
|
|
N_("git fetch [<options>] <group>"),
|
|
|
|
N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
|
|
|
|
N_("git fetch --all [<options>]"),
|
2007-12-04 07:25:47 +00:00
|
|
|
NULL
|
|
|
|
};
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2007-12-04 07:25:47 +00:00
|
|
|
enum {
|
|
|
|
TAGS_UNSET = 0,
|
|
|
|
TAGS_DEFAULT = 1,
|
|
|
|
TAGS_SET = 2
|
|
|
|
};
|
|
|
|
|
2023-05-10 12:34:24 +00:00
|
|
|
enum display_format {
|
|
|
|
DISPLAY_FORMAT_FULL,
|
|
|
|
DISPLAY_FORMAT_COMPACT,
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
DISPLAY_FORMAT_PORCELAIN,
|
2023-05-10 12:34:24 +00:00
|
|
|
};
|
|
|
|
|
2023-03-20 12:35:20 +00:00
|
|
|
struct display_state {
|
2023-03-20 12:35:40 +00:00
|
|
|
struct strbuf buf;
|
|
|
|
|
2023-03-20 12:35:20 +00:00
|
|
|
int refcol_width;
|
2023-05-10 12:34:24 +00:00
|
|
|
enum display_format format;
|
2023-03-20 12:35:36 +00:00
|
|
|
|
|
|
|
char *url;
|
|
|
|
int url_len, shown_url;
|
2023-03-20 12:35:20 +00:00
|
|
|
};
|
|
|
|
|
2019-06-18 20:25:27 +00:00
|
|
|
static uint64_t forced_updates_ms = 0;
|
2021-04-16 12:49:57 +00:00
|
|
|
static int prefetch = 0;
|
2013-07-13 09:36:24 +00:00
|
|
|
static int prune = -1; /* unspecified */
|
|
|
|
#define PRUNE_BY_DEFAULT 0 /* do we prune by default? */
|
|
|
|
|
fetch: add a --prune-tags option and fetch.pruneTags config
Add a --prune-tags option to git-fetch, along with fetch.pruneTags
config option and a -P shorthand (-p is --prune). This allows for
doing any of:
git fetch -p -P
git fetch --prune --prune-tags
git fetch -p -P origin
git fetch --prune --prune-tags origin
Or simply:
git config fetch.prune true &&
git config fetch.pruneTags true &&
git fetch
Instead of the much more verbose:
git fetch --prune origin 'refs/tags/*:refs/tags/*' '+refs/heads/*:refs/remotes/origin/*'
Before this feature it was painful to support the use-case of pulling
from a repo which is having both its branches *and* tags deleted
regularly, and have our local references to reflect upstream.
At work we create deployment tags in the repo for each rollout, and
there's *lots* of those, so they're archived within weeks for
performance reasons.
Without this change it's hard to centrally configure such repos in
/etc/gitconfig (on servers that are only used for working with
them). You need to set fetch.prune=true globally, and then for each
repo:
git -C {} config --replace-all remote.origin.fetch "refs/tags/*:refs/tags/*" "^\+*refs/tags/\*:refs/tags/\*$"
Now I can simply set fetch.pruneTags=true in /etc/gitconfig as well,
and users running "git pull" will automatically get the pruning
semantics I want.
Even though "git remote" has corresponding "prune" and "update
--prune" subcommands I'm intentionally not adding a corresponding
prune-tags or "update --prune --prune-tags" mode to that command.
It's advertised (as noted in my recent "git remote doc: correct
dangerous lies about what prune does") as only modifying remote
tracking references, whereas any --prune-tags option is always going
to modify what from the user's perspective is a local copy of the tag,
since there's no such thing as a remote tracking tag.
Ideally add_prune_tags_to_fetch_refspec() would be something that
would use ALLOC_GROW() to grow the 'fetch` member of the 'remote'
struct. Instead I'm realloc-ing remote->fetch and adding the
tag_refspec to the end.
The reason is that parse_{fetch,push}_refspec which allocate the
refspec (ultimately remote->fetch) struct are called many places that
don't have access to a 'remote' struct. It would be hard to change all
their callsites to be amenable to carry around the bookkeeping
variables required for dynamic allocation.
All the other callers of the API first incrementally construct the
string version of the refspec in remote->fetch_refspec via
add_fetch_refspec(), before finally calling parse_fetch_refspec() via
some variation of remote_get().
It's less of a pain to deal with the one special case that needs to
modify already constructed refspecs than to chase down and change all
the other callsites. The API I'm adding is intentionally not
generalized because if we add more of these we'd probably want to
re-visit how this is done.
See my "Re: [BUG] git remote prune removes local tags, depending on
fetch config" (87po6ahx87.fsf@evledraar.gmail.com;
https://public-inbox.org/git/87po6ahx87.fsf@evledraar.gmail.com/) for
more background info.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-02-09 20:32:15 +00:00
|
|
|
static int prune_tags = -1; /* unspecified */
|
|
|
|
#define PRUNE_TAGS_BY_DEFAULT 0 /* do we prune tags by default? */
|
|
|
|
|
2023-05-10 12:34:32 +00:00
|
|
|
static int append, dry_run, force, keep, update_head_ok;
|
2020-08-18 14:25:22 +00:00
|
|
|
static int write_fetch_head = 1;
|
2022-03-28 14:02:08 +00:00
|
|
|
static int verbosity, deepen_relative, set_upstream, refetch;
|
2017-06-23 19:13:01 +00:00
|
|
|
static int progress = -1;
|
2023-05-10 12:34:32 +00:00
|
|
|
static int tags = TAGS_DEFAULT, update_shallow, deepen;
|
fetch: implement support for atomic reference updates
When executing a fetch, then git will currently allocate one reference
transaction per reference update and directly commit it. This means that
fetches are non-atomic: even if some of the reference updates fail,
others may still succeed and modify local references.
This is fine in many scenarios, but this strategy has its downsides.
- The view of remote references may be inconsistent and may show a
bastardized state of the remote repository.
- Batching together updates may improve performance in certain
scenarios. While the impact probably isn't as pronounced with loose
references, the upcoming reftable backend may benefit as it needs to
write less files in case the update is batched.
- The reference-update hook is currently being executed twice per
updated reference. While this doesn't matter when there is no such
hook, we have seen severe performance regressions when doing a
git-fetch(1) with reference-transaction hook when the remote
repository has hundreds of thousands of references.
Similar to `git push --atomic`, this commit thus introduces atomic
fetches. Instead of allocating one reference transaction per updated
reference, it causes us to only allocate a single transaction and commit
it as soon as all updates were received. If locking of any reference
fails, then we abort the complete transaction and don't update any
reference, which gives us an all-or-nothing fetch.
Note that this may not completely fix the first of above downsides, as
the consistent view also depends on the server-side. If the server
doesn't have a consistent view of its own references during the
reference negotiation phase, then the client would get the same
inconsistent view the server has. This is a separate problem though and,
if it actually exists, can be fixed at a later point.
This commit also changes the way we write FETCH_HEAD in case `--atomic`
is passed. Instead of writing changes as we go, we need to accumulate
all changes first and only commit them at the end when we know that all
reference updates succeeded. Ideally, we'd just do so via a temporary
file so that we don't need to carry all updates in-memory. This isn't
trivially doable though considering the `--append` mode, where we do not
truncate the file but simply append to it. And given that we support
concurrent processes appending to FETCH_HEAD at the same time without
any loss of data, seeding the temporary file with current contents of
FETCH_HEAD initially and then doing a rename wouldn't work either. So
this commit implements the simple strategy of buffering all changes and
appending them to the file on commit.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-12 12:27:52 +00:00
|
|
|
static int atomic_fetch;
|
2016-02-03 04:09:14 +00:00
|
|
|
static enum transport_family family;
|
2007-11-11 07:29:47 +00:00
|
|
|
static const char *depth;
|
2016-06-12 10:53:59 +00:00
|
|
|
static const char *deepen_since;
|
2007-12-04 07:25:47 +00:00
|
|
|
static const char *upload_pack;
|
2016-06-12 10:54:04 +00:00
|
|
|
static struct string_list deepen_not = STRING_LIST_INIT_NODUP;
|
2007-12-04 07:25:46 +00:00
|
|
|
static struct strbuf default_rla = STRBUF_INIT;
|
2013-08-07 22:38:45 +00:00
|
|
|
static struct transport *gtransport;
|
fetch: work around "transport-take-over" hack
A Git-aware "connect" transport allows the "transport_take_over" to
redirect generic transport requests like fetch(), push_refs() and
get_refs_list() to the native Git transport handling methods. The
take-over process replaces transport->data with a fake data that
these method implementations understand.
While this hack works OK for a single request, it breaks when the
transport needs to make more than one requests. transport->data
that used to hold necessary information for the specific helper to
work correctly is destroyed during the take-over process.
One codepath that this matters is "git fetch" in auto-follow mode;
when it does not get all the tags that ought to point at the history
it got (which can be determined by looking at the peeled tags in the
initial advertisement) from the primary transfer, it internally
makes a second request to complete the fetch. Because "take-over"
hack has already destroyed the data necessary to talk to the
transport helper by the time this happens, the second request cannot
make a request to the helper to make another connection to fetch
these additional tags.
Mark such a transport as "cannot_reuse", and use a separate
transport to perform the backfill fetch in order to work around
this breakage.
Note that this problem does not manifest itself when running t5802,
because our upload-pack gives you all the necessary auto-followed
tags during the primary transfer. You would need to step through
"git fetch" in a debugger, stop immediately after the primary
transfer finishes and writes these auto-followed tags, remove the
tag references and repack/prune the repository to convince the
"find-non-local-tags" procedure that the primary transfer failed to
give us all the necessary tags, and then let it continue, in order
to trigger the bug in the secondary transfer this patch fixes.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-07 22:47:18 +00:00
|
|
|
static struct transport *gsecondary;
|
2018-05-16 22:58:05 +00:00
|
|
|
static struct refspec refmap = REFSPEC_INIT_FETCH;
|
list-objects-filter: add and use initializers
In 7e2619d8ff (list_objects_filter_options: plug leak of filter_spec
strings, 2022-09-08), we noted that the filter_spec string_list was
inconsistent in how it handled memory ownership of strings stored in the
list. The fix there was a bit of a band-aid to set the "strdup_strings"
variable right before adding anything.
That works OK, and it lets the users of the API continue to
zero-initialize the struct. But it makes the code a bit hard to follow
and accident-prone, as any other spots appending the filter_spec need to
think about whether to set the strdup_strings value, too (there's one
such spot in partial_clone_get_default_filter_spec(), which is probably
a possible memory leak).
So let's do that full cleanup now. We'll introduce a
LIST_OBJECTS_FILTER_INIT macro and matching function, and use them as
appropriate (though it is for the "_options" struct, this matches the
corresponding list_objects_filter_release() function).
This is harder than it seems! Many other structs, like
git_transport_data, embed the filter struct. So they need to initialize
it themselves even if the rest of the enclosing struct is OK with
zero-initialization. I found all of the relevant spots by grepping
manually for declarations of list_objects_filter_options. And then doing
so recursively for structs which embed it, and ones which embed those,
and so on.
I'm pretty sure I got everything, but there's no change that would alert
the compiler if any topics in flight added new declarations. To catch
this case, we now double-check in the parsing function that things were
initialized as expected and BUG() if appropriate.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 05:03:07 +00:00
|
|
|
static struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT;
|
2018-04-23 22:46:24 +00:00
|
|
|
static struct string_list server_options = STRING_LIST_INIT_DUP;
|
2018-07-02 22:39:44 +00:00
|
|
|
static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
|
2007-09-14 07:31:25 +00:00
|
|
|
|
2023-05-10 12:34:28 +00:00
|
|
|
struct fetch_config {
|
|
|
|
enum display_format display_format;
|
2023-05-17 11:48:56 +00:00
|
|
|
int prune;
|
2023-05-17 11:49:00 +00:00
|
|
|
int prune_tags;
|
2023-05-17 11:49:04 +00:00
|
|
|
int show_forced_updates;
|
2023-05-17 11:49:08 +00:00
|
|
|
int recurse_submodules;
|
2023-05-17 11:49:13 +00:00
|
|
|
int parallel;
|
2023-05-17 11:49:17 +00:00
|
|
|
int submodule_fetch_jobs;
|
2023-05-10 12:34:28 +00:00
|
|
|
};
|
2007-09-14 07:31:25 +00:00
|
|
|
|
config: add ctx arg to config_fn_t
Add a new "const struct config_context *ctx" arg to config_fn_t to hold
additional information about the config iteration operation.
config_context has a "struct key_value_info kvi" member that holds
metadata about the config source being read (e.g. what kind of config
source it is, the filename, etc). In this series, we're only interested
in .kvi, so we could have just used "struct key_value_info" as an arg,
but config_context makes it possible to add/adjust members in the future
without changing the config_fn_t signature. We could also consider other
ways of organizing the args (e.g. moving the config name and value into
config_context or key_value_info), but in my experiments, the
incremental benefit doesn't justify the added complexity (e.g. a
config_fn_t will sometimes invoke another config_fn_t but with a
different config value).
In subsequent commits, the .kvi member will replace the global "struct
config_reader" in config.c, making config iteration a global-free
operation. It requires much more work for the machinery to provide
meaningful values of .kvi, so for now, merely change the signature and
call sites, pass NULL as a placeholder value, and don't rely on the arg
in any meaningful way.
Most of the changes are performed by
contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every
config_fn_t:
- Modifies the signature to accept "const struct config_context *ctx"
- Passes "ctx" to any inner config_fn_t, if needed
- Adds UNUSED attributes to "ctx", if needed
Most config_fn_t instances are easily identified by seeing if they are
called by the various config functions. Most of the remaining ones are
manually named in the .cocci patch. Manual cleanups are still needed,
but the majority of it is trivial; it's either adjusting config_fn_t
that the .cocci patch didn't catch, or adding forward declarations of
"struct config_context ctx" to make the signatures make sense.
The non-trivial changes are in cases where we are invoking a config_fn_t
outside of config machinery, and we now need to decide what value of
"ctx" to pass. These cases are:
- trace2/tr2_cfg.c:tr2_cfg_set_fl()
This is indirectly called by git_config_set() so that the trace2
machinery can notice the new config values and update its settings
using the tr2 config parsing function, i.e. tr2_cfg_cb().
- builtin/checkout.c:checkout_main()
This calls git_xmerge_config() as a shorthand for parsing a CLI arg.
This might be worth refactoring away in the future, since
git_xmerge_config() can call git_default_config(), which can do much
more than just parsing.
Handle them by creating a KVI_INIT macro that initializes "struct
key_value_info" to a reasonable default, and use that to construct the
"ctx" arg.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 19:26:22 +00:00
|
|
|
static int git_fetch_config(const char *k, const char *v,
|
|
|
|
const struct config_context *ctx, void *cb)
|
2013-07-13 09:36:24 +00:00
|
|
|
{
|
2023-05-10 12:34:28 +00:00
|
|
|
struct fetch_config *fetch_config = cb;
|
|
|
|
|
2013-07-13 09:36:24 +00:00
|
|
|
if (!strcmp(k, "fetch.prune")) {
|
2023-05-17 11:48:56 +00:00
|
|
|
fetch_config->prune = git_config_bool(k, v);
|
2013-07-13 09:36:24 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2017-06-01 00:30:50 +00:00
|
|
|
|
fetch: add a --prune-tags option and fetch.pruneTags config
Add a --prune-tags option to git-fetch, along with fetch.pruneTags
config option and a -P shorthand (-p is --prune). This allows for
doing any of:
git fetch -p -P
git fetch --prune --prune-tags
git fetch -p -P origin
git fetch --prune --prune-tags origin
Or simply:
git config fetch.prune true &&
git config fetch.pruneTags true &&
git fetch
Instead of the much more verbose:
git fetch --prune origin 'refs/tags/*:refs/tags/*' '+refs/heads/*:refs/remotes/origin/*'
Before this feature it was painful to support the use-case of pulling
from a repo which is having both its branches *and* tags deleted
regularly, and have our local references to reflect upstream.
At work we create deployment tags in the repo for each rollout, and
there's *lots* of those, so they're archived within weeks for
performance reasons.
Without this change it's hard to centrally configure such repos in
/etc/gitconfig (on servers that are only used for working with
them). You need to set fetch.prune=true globally, and then for each
repo:
git -C {} config --replace-all remote.origin.fetch "refs/tags/*:refs/tags/*" "^\+*refs/tags/\*:refs/tags/\*$"
Now I can simply set fetch.pruneTags=true in /etc/gitconfig as well,
and users running "git pull" will automatically get the pruning
semantics I want.
Even though "git remote" has corresponding "prune" and "update
--prune" subcommands I'm intentionally not adding a corresponding
prune-tags or "update --prune --prune-tags" mode to that command.
It's advertised (as noted in my recent "git remote doc: correct
dangerous lies about what prune does") as only modifying remote
tracking references, whereas any --prune-tags option is always going
to modify what from the user's perspective is a local copy of the tag,
since there's no such thing as a remote tracking tag.
Ideally add_prune_tags_to_fetch_refspec() would be something that
would use ALLOC_GROW() to grow the 'fetch` member of the 'remote'
struct. Instead I'm realloc-ing remote->fetch and adding the
tag_refspec to the end.
The reason is that parse_{fetch,push}_refspec which allocate the
refspec (ultimately remote->fetch) struct are called many places that
don't have access to a 'remote' struct. It would be hard to change all
their callsites to be amenable to carry around the bookkeeping
variables required for dynamic allocation.
All the other callers of the API first incrementally construct the
string version of the refspec in remote->fetch_refspec via
add_fetch_refspec(), before finally calling parse_fetch_refspec() via
some variation of remote_get().
It's less of a pain to deal with the one special case that needs to
modify already constructed refspecs than to chase down and change all
the other callsites. The API I'm adding is intentionally not
generalized because if we add more of these we'd probably want to
re-visit how this is done.
See my "Re: [BUG] git remote prune removes local tags, depending on
fetch config" (87po6ahx87.fsf@evledraar.gmail.com;
https://public-inbox.org/git/87po6ahx87.fsf@evledraar.gmail.com/) for
more background info.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-02-09 20:32:15 +00:00
|
|
|
if (!strcmp(k, "fetch.prunetags")) {
|
2023-05-17 11:49:00 +00:00
|
|
|
fetch_config->prune_tags = git_config_bool(k, v);
|
fetch: add a --prune-tags option and fetch.pruneTags config
Add a --prune-tags option to git-fetch, along with fetch.pruneTags
config option and a -P shorthand (-p is --prune). This allows for
doing any of:
git fetch -p -P
git fetch --prune --prune-tags
git fetch -p -P origin
git fetch --prune --prune-tags origin
Or simply:
git config fetch.prune true &&
git config fetch.pruneTags true &&
git fetch
Instead of the much more verbose:
git fetch --prune origin 'refs/tags/*:refs/tags/*' '+refs/heads/*:refs/remotes/origin/*'
Before this feature it was painful to support the use-case of pulling
from a repo which is having both its branches *and* tags deleted
regularly, and have our local references to reflect upstream.
At work we create deployment tags in the repo for each rollout, and
there's *lots* of those, so they're archived within weeks for
performance reasons.
Without this change it's hard to centrally configure such repos in
/etc/gitconfig (on servers that are only used for working with
them). You need to set fetch.prune=true globally, and then for each
repo:
git -C {} config --replace-all remote.origin.fetch "refs/tags/*:refs/tags/*" "^\+*refs/tags/\*:refs/tags/\*$"
Now I can simply set fetch.pruneTags=true in /etc/gitconfig as well,
and users running "git pull" will automatically get the pruning
semantics I want.
Even though "git remote" has corresponding "prune" and "update
--prune" subcommands I'm intentionally not adding a corresponding
prune-tags or "update --prune --prune-tags" mode to that command.
It's advertised (as noted in my recent "git remote doc: correct
dangerous lies about what prune does") as only modifying remote
tracking references, whereas any --prune-tags option is always going
to modify what from the user's perspective is a local copy of the tag,
since there's no such thing as a remote tracking tag.
Ideally add_prune_tags_to_fetch_refspec() would be something that
would use ALLOC_GROW() to grow the 'fetch` member of the 'remote'
struct. Instead I'm realloc-ing remote->fetch and adding the
tag_refspec to the end.
The reason is that parse_{fetch,push}_refspec which allocate the
refspec (ultimately remote->fetch) struct are called many places that
don't have access to a 'remote' struct. It would be hard to change all
their callsites to be amenable to carry around the bookkeeping
variables required for dynamic allocation.
All the other callers of the API first incrementally construct the
string version of the refspec in remote->fetch_refspec via
add_fetch_refspec(), before finally calling parse_fetch_refspec() via
some variation of remote_get().
It's less of a pain to deal with the one special case that needs to
modify already constructed refspecs than to chase down and change all
the other callsites. The API I'm adding is intentionally not
generalized because if we add more of these we'd probably want to
re-visit how this is done.
See my "Re: [BUG] git remote prune removes local tags, depending on
fetch config" (87po6ahx87.fsf@evledraar.gmail.com;
https://public-inbox.org/git/87po6ahx87.fsf@evledraar.gmail.com/) for
more background info.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-02-09 20:32:15 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-06-18 20:25:26 +00:00
|
|
|
if (!strcmp(k, "fetch.showforcedupdates")) {
|
2023-05-17 11:49:04 +00:00
|
|
|
fetch_config->show_forced_updates = git_config_bool(k, v);
|
2019-06-18 20:25:26 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-06-01 00:30:50 +00:00
|
|
|
if (!strcmp(k, "submodule.recurse")) {
|
|
|
|
int r = git_config_bool(k, v) ?
|
|
|
|
RECURSE_SUBMODULES_ON : RECURSE_SUBMODULES_OFF;
|
2023-05-17 11:49:08 +00:00
|
|
|
fetch_config->recurse_submodules = r;
|
2017-06-01 00:30:50 +00:00
|
|
|
}
|
|
|
|
|
2017-08-02 19:49:18 +00:00
|
|
|
if (!strcmp(k, "submodule.fetchjobs")) {
|
config: pass kvi to die_bad_number()
Plumb "struct key_value_info" through all code paths that end in
die_bad_number(), which lets us remove the helper functions that read
analogous values from "struct config_reader". As a result, nothing reads
config_reader.config_kvi any more, so remove that too.
In config.c, this requires changing the signature of
git_configset_get_value() to 'return' "kvi" in an out parameter so that
git_configset_get_<type>() can pass it to git_config_<type>(). Only
numeric types will use "kvi", so for non-numeric types (e.g.
git_configset_get_string()), pass NULL to indicate that the out
parameter isn't needed.
Outside of config.c, config callbacks now need to pass "ctx->kvi" to any
of the git_config_<type>() functions that parse a config string into a
number type. Included is a .cocci patch to make that refactor.
The only exceptional case is builtin/config.c, where git_config_<type>()
is called outside of a config callback (namely, on user-provided input),
so config source information has never been available. In this case,
die_bad_number() defaults to a generic, but perfectly descriptive
message. Let's provide a safe, non-NULL for "kvi" anyway, but make sure
not to change the message.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 19:26:27 +00:00
|
|
|
fetch_config->submodule_fetch_jobs = parse_submodule_fetchjobs(k, v, ctx->kvi);
|
2017-08-02 19:49:18 +00:00
|
|
|
return 0;
|
2017-08-02 19:49:19 +00:00
|
|
|
} else if (!strcmp(k, "fetch.recursesubmodules")) {
|
2023-05-17 11:49:08 +00:00
|
|
|
fetch_config->recurse_submodules = parse_fetch_recurse_submodules_arg(k, v);
|
2017-08-02 19:49:19 +00:00
|
|
|
return 0;
|
2017-08-02 19:49:18 +00:00
|
|
|
}
|
|
|
|
|
2019-10-05 18:46:40 +00:00
|
|
|
if (!strcmp(k, "fetch.parallel")) {
|
config: pass kvi to die_bad_number()
Plumb "struct key_value_info" through all code paths that end in
die_bad_number(), which lets us remove the helper functions that read
analogous values from "struct config_reader". As a result, nothing reads
config_reader.config_kvi any more, so remove that too.
In config.c, this requires changing the signature of
git_configset_get_value() to 'return' "kvi" in an out parameter so that
git_configset_get_<type>() can pass it to git_config_<type>(). Only
numeric types will use "kvi", so for non-numeric types (e.g.
git_configset_get_string()), pass NULL to indicate that the out
parameter isn't needed.
Outside of config.c, config callbacks now need to pass "ctx->kvi" to any
of the git_config_<type>() functions that parse a config string into a
number type. Included is a .cocci patch to make that refactor.
The only exceptional case is builtin/config.c, where git_config_<type>()
is called outside of a config callback (namely, on user-provided input),
so config source information has never been available. In this case,
die_bad_number() defaults to a generic, but perfectly descriptive
message. Let's provide a safe, non-NULL for "kvi" anyway, but make sure
not to change the message.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 19:26:27 +00:00
|
|
|
fetch_config->parallel = git_config_int(k, v, ctx->kvi);
|
2023-05-17 11:49:13 +00:00
|
|
|
if (fetch_config->parallel < 0)
|
2019-10-05 18:46:40 +00:00
|
|
|
die(_("fetch.parallel cannot be negative"));
|
2023-05-17 11:49:13 +00:00
|
|
|
if (!fetch_config->parallel)
|
|
|
|
fetch_config->parallel = online_cpus();
|
2019-10-05 18:46:40 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-05-10 12:34:28 +00:00
|
|
|
if (!strcmp(k, "fetch.output")) {
|
|
|
|
if (!v)
|
|
|
|
return config_error_nonbool(k);
|
|
|
|
else if (!strcasecmp(v, "full"))
|
|
|
|
fetch_config->display_format = DISPLAY_FORMAT_FULL;
|
|
|
|
else if (!strcasecmp(v, "compact"))
|
|
|
|
fetch_config->display_format = DISPLAY_FORMAT_COMPACT;
|
|
|
|
else
|
|
|
|
die(_("invalid value for '%s': '%s'"),
|
|
|
|
"fetch.output", v);
|
|
|
|
}
|
|
|
|
|
config: add ctx arg to config_fn_t
Add a new "const struct config_context *ctx" arg to config_fn_t to hold
additional information about the config iteration operation.
config_context has a "struct key_value_info kvi" member that holds
metadata about the config source being read (e.g. what kind of config
source it is, the filename, etc). In this series, we're only interested
in .kvi, so we could have just used "struct key_value_info" as an arg,
but config_context makes it possible to add/adjust members in the future
without changing the config_fn_t signature. We could also consider other
ways of organizing the args (e.g. moving the config name and value into
config_context or key_value_info), but in my experiments, the
incremental benefit doesn't justify the added complexity (e.g. a
config_fn_t will sometimes invoke another config_fn_t but with a
different config value).
In subsequent commits, the .kvi member will replace the global "struct
config_reader" in config.c, making config iteration a global-free
operation. It requires much more work for the machinery to provide
meaningful values of .kvi, so for now, merely change the signature and
call sites, pass NULL as a placeholder value, and don't rely on the arg
in any meaningful way.
Most of the changes are performed by
contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every
config_fn_t:
- Modifies the signature to accept "const struct config_context *ctx"
- Passes "ctx" to any inner config_fn_t, if needed
- Adds UNUSED attributes to "ctx", if needed
Most config_fn_t instances are easily identified by seeing if they are
called by the various config functions. Most of the remaining ones are
manually named in the .cocci patch. Manual cleanups are still needed,
but the majority of it is trivial; it's either adjusting config_fn_t
that the .cocci patch didn't catch, or adding forward declarations of
"struct config_context ctx" to make the signatures make sense.
The non-trivial changes are in cases where we are invoking a config_fn_t
outside of config machinery, and we now need to decide what value of
"ctx" to pass. These cases are:
- trace2/tr2_cfg.c:tr2_cfg_set_fl()
This is indirectly called by git_config_set() so that the trace2
machinery can notice the new config values and update its settings
using the tr2 config parsing function, i.e. tr2_cfg_cb().
- builtin/checkout.c:checkout_main()
This calls git_xmerge_config() as a shorthand for parsing a CLI arg.
This might be worth refactoring away in the future, since
git_xmerge_config() can call git_default_config(), which can do much
more than just parsing.
Handle them by creating a KVI_INIT macro that initializes "struct
key_value_info" to a reasonable default, and use that to construct the
"ctx" arg.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 19:26:22 +00:00
|
|
|
return git_default_config(k, v, ctx, cb);
|
2013-07-13 09:36:24 +00:00
|
|
|
}
|
|
|
|
|
2014-05-29 22:21:31 +00:00
|
|
|
static int parse_refmap_arg(const struct option *opt, const char *arg, int unset)
|
|
|
|
{
|
assert NOARG/NONEG behavior of parse-options callbacks
When we define a parse-options callback, the flags we put in the option
struct must match what the callback expects. For example, a callback
which does not handle the "unset" parameter should only be used with
PARSE_OPT_NONEG. But since the callback and the option struct are not
defined next to each other, it's easy to get this wrong (as earlier
patches in this series show).
Fortunately, the compiler can help us here: compiling with
-Wunused-parameters can show us which callbacks ignore their "unset"
parameters (and likewise, ones that ignore "arg" expect to be triggered
with PARSE_OPT_NOARG).
But after we've inspected a callback and determined that all of its
callers use the right flags, what do we do next? We'd like to silence
the compiler warning, but do so in a way that will catch any wrong calls
in the future.
We can do that by actually checking those variables and asserting that
they match our expectations. Because this is such a common pattern,
we'll introduce some helper macros. The resulting messages aren't
as descriptive as we could make them, but the file/line information from
BUG() is enough to identify the problem (and anyway, the point is that
these should never be seen).
Each of the annotated callbacks in this patch triggers
-Wunused-parameters, and was manually inspected to make sure all callers
use the correct options (so none of these BUGs should be triggerable).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-05 06:45:42 +00:00
|
|
|
BUG_ON_OPT_NEG(unset);
|
|
|
|
|
2014-05-29 22:21:31 +00:00
|
|
|
/*
|
|
|
|
* "git fetch --refmap='' origin foo"
|
|
|
|
* can be used to tell the command not to store anywhere
|
|
|
|
*/
|
parse-options: prefer opt->value to globals in callbacks
We have several parse-options callbacks that ignore their "opt"
parameters entirely. This is a little unusual, as we'd normally put the
result of the parsing into opt->value. In the case of these callbacks,
though, they directly manipulate global variables instead (and in
most cases the caller sets opt->value to NULL in the OPT_CALLBACK
declaration).
The immediate symptom we'd like to deal with is that the unused "opt"
variables trigger -Wunused-parameter. But how to fix that is debatable.
One option is to annotate them with UNUSED. But another is to have the
caller pass in the appropriate variable via opt->value, and use it. That
has the benefit of making the callbacks reusable (in theory at least),
and makes it clear from the OPT_CALLBACK declaration which variables
will be affected (doubly so for the cases in builtin/fast-export.c,
where we do set opt->value, but it is completely ignored!).
The slight downside is that we lose type safety, since they're now
passing through void pointers.
I went with the "just use them" approach here. The loss of type safety
is unfortunate, but that is already an issue with most of the other
callbacks. If we want to try to address that, we should do so more
consistently (and this patch would prepare these callbacks for whatever
we choose to do there).
Note that in the cases in builtin/fast-export.c, we are passing
anonymous enums. We'll have to give them names so that we can declare
the appropriate pointer type within the callbacks.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-08-31 21:21:07 +00:00
|
|
|
refspec_append(opt->value, arg);
|
2018-05-16 22:58:05 +00:00
|
|
|
|
2014-05-29 22:21:31 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
fetch: fix deadlock when cleaning up lockfiles in async signals
When fetching packfiles, we write a bunch of lockfiles for the packfiles
we're writing into the repository. In order to not leave behind any
cruft in case we exit or receive a signal, we register both an exit
handler as well as signal handlers for common signals like SIGINT. These
handlers will then unlink the locks and free the data structure tracking
them. We have observed a deadlock in this logic though:
(gdb) bt
#0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969
#2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#3 0x0000000000662ab1 in string_list_clear ()
#4 0x000000000044f5bc in unlock_pack_on_signal ()
#5 <signal handler called>
#6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024
#7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#8 0x000000000065afd5 in strbuf_release ()
#9 0x000000000066ddb9 in delete_tempfile ()
#10 0x0000000000610d0b in files_transaction_cleanup.isra ()
#11 0x0000000000611718 in files_transaction_abort ()
#12 0x000000000060d2ef in ref_transaction_abort ()
#13 0x000000000060d441 in ref_transaction_prepare ()
#14 0x000000000060e0b5 in ref_transaction_commit ()
#15 0x00000000004511c2 in fetch_and_consume_refs ()
#16 0x000000000045279a in cmd_fetch ()
#17 0x0000000000407c48 in handle_builtin ()
#18 0x0000000000408df2 in cmd_main ()
#19 0x00000000004078b5 in main ()
The process was killed with a signal, which caused the signal handler to
kick in and try free the data structures after we have unlinked the
locks. It then deadlocks while calling free(3P).
The root cause of this is that it is not allowed to call certain
functions in async-signal handlers, as specified by signal-safety(7).
Next to most I/O functions, this list of disallowed functions also
includes memory-handling functions like malloc(3P) and free(3P) because
they may not be reentrant. As a result, if we execute such functions in
the signal handler, then they may operate on inconistent state and fail
in unexpected ways.
Fix this bug by not calling non-async-signal-safe functions when running
in the signal handler. We're about to re-raise the signal anyway and
will thus exit, so it's not much of a problem to keep the string list of
lockfiles untouched. Note that it's fine though to call unlink(2), so
we'll still clean up the lockfiles correctly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 10:55:47 +00:00
|
|
|
static void unlock_pack(unsigned int flags)
|
2007-09-14 07:31:25 +00:00
|
|
|
{
|
2013-08-07 22:38:45 +00:00
|
|
|
if (gtransport)
|
fetch: fix deadlock when cleaning up lockfiles in async signals
When fetching packfiles, we write a bunch of lockfiles for the packfiles
we're writing into the repository. In order to not leave behind any
cruft in case we exit or receive a signal, we register both an exit
handler as well as signal handlers for common signals like SIGINT. These
handlers will then unlink the locks and free the data structure tracking
them. We have observed a deadlock in this logic though:
(gdb) bt
#0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969
#2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#3 0x0000000000662ab1 in string_list_clear ()
#4 0x000000000044f5bc in unlock_pack_on_signal ()
#5 <signal handler called>
#6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024
#7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#8 0x000000000065afd5 in strbuf_release ()
#9 0x000000000066ddb9 in delete_tempfile ()
#10 0x0000000000610d0b in files_transaction_cleanup.isra ()
#11 0x0000000000611718 in files_transaction_abort ()
#12 0x000000000060d2ef in ref_transaction_abort ()
#13 0x000000000060d441 in ref_transaction_prepare ()
#14 0x000000000060e0b5 in ref_transaction_commit ()
#15 0x00000000004511c2 in fetch_and_consume_refs ()
#16 0x000000000045279a in cmd_fetch ()
#17 0x0000000000407c48 in handle_builtin ()
#18 0x0000000000408df2 in cmd_main ()
#19 0x00000000004078b5 in main ()
The process was killed with a signal, which caused the signal handler to
kick in and try free the data structures after we have unlinked the
locks. It then deadlocks while calling free(3P).
The root cause of this is that it is not allowed to call certain
functions in async-signal handlers, as specified by signal-safety(7).
Next to most I/O functions, this list of disallowed functions also
includes memory-handling functions like malloc(3P) and free(3P) because
they may not be reentrant. As a result, if we execute such functions in
the signal handler, then they may operate on inconistent state and fail
in unexpected ways.
Fix this bug by not calling non-async-signal-safe functions when running
in the signal handler. We're about to re-raise the signal anyway and
will thus exit, so it's not much of a problem to keep the string list of
lockfiles untouched. Note that it's fine though to call unlink(2), so
we'll still clean up the lockfiles correctly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 10:55:47 +00:00
|
|
|
transport_unlock_pack(gtransport, flags);
|
fetch: work around "transport-take-over" hack
A Git-aware "connect" transport allows the "transport_take_over" to
redirect generic transport requests like fetch(), push_refs() and
get_refs_list() to the native Git transport handling methods. The
take-over process replaces transport->data with a fake data that
these method implementations understand.
While this hack works OK for a single request, it breaks when the
transport needs to make more than one requests. transport->data
that used to hold necessary information for the specific helper to
work correctly is destroyed during the take-over process.
One codepath that this matters is "git fetch" in auto-follow mode;
when it does not get all the tags that ought to point at the history
it got (which can be determined by looking at the peeled tags in the
initial advertisement) from the primary transfer, it internally
makes a second request to complete the fetch. Because "take-over"
hack has already destroyed the data necessary to talk to the
transport helper by the time this happens, the second request cannot
make a request to the helper to make another connection to fetch
these additional tags.
Mark such a transport as "cannot_reuse", and use a separate
transport to perform the backfill fetch in order to work around
this breakage.
Note that this problem does not manifest itself when running t5802,
because our upload-pack gives you all the necessary auto-followed
tags during the primary transfer. You would need to step through
"git fetch" in a debugger, stop immediately after the primary
transfer finishes and writes these auto-followed tags, remove the
tag references and repack/prune the repository to convince the
"find-non-local-tags" procedure that the primary transfer failed to
give us all the necessary tags, and then let it continue, in order
to trigger the bug in the secondary transfer this patch fixes.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-07 22:47:18 +00:00
|
|
|
if (gsecondary)
|
fetch: fix deadlock when cleaning up lockfiles in async signals
When fetching packfiles, we write a bunch of lockfiles for the packfiles
we're writing into the repository. In order to not leave behind any
cruft in case we exit or receive a signal, we register both an exit
handler as well as signal handlers for common signals like SIGINT. These
handlers will then unlink the locks and free the data structure tracking
them. We have observed a deadlock in this logic though:
(gdb) bt
#0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969
#2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#3 0x0000000000662ab1 in string_list_clear ()
#4 0x000000000044f5bc in unlock_pack_on_signal ()
#5 <signal handler called>
#6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024
#7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#8 0x000000000065afd5 in strbuf_release ()
#9 0x000000000066ddb9 in delete_tempfile ()
#10 0x0000000000610d0b in files_transaction_cleanup.isra ()
#11 0x0000000000611718 in files_transaction_abort ()
#12 0x000000000060d2ef in ref_transaction_abort ()
#13 0x000000000060d441 in ref_transaction_prepare ()
#14 0x000000000060e0b5 in ref_transaction_commit ()
#15 0x00000000004511c2 in fetch_and_consume_refs ()
#16 0x000000000045279a in cmd_fetch ()
#17 0x0000000000407c48 in handle_builtin ()
#18 0x0000000000408df2 in cmd_main ()
#19 0x00000000004078b5 in main ()
The process was killed with a signal, which caused the signal handler to
kick in and try free the data structures after we have unlinked the
locks. It then deadlocks while calling free(3P).
The root cause of this is that it is not allowed to call certain
functions in async-signal handlers, as specified by signal-safety(7).
Next to most I/O functions, this list of disallowed functions also
includes memory-handling functions like malloc(3P) and free(3P) because
they may not be reentrant. As a result, if we execute such functions in
the signal handler, then they may operate on inconistent state and fail
in unexpected ways.
Fix this bug by not calling non-async-signal-safe functions when running
in the signal handler. We're about to re-raise the signal anyway and
will thus exit, so it's not much of a problem to keep the string list of
lockfiles untouched. Note that it's fine though to call unlink(2), so
we'll still clean up the lockfiles correctly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 10:55:47 +00:00
|
|
|
transport_unlock_pack(gsecondary, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void unlock_pack_atexit(void)
|
|
|
|
{
|
|
|
|
unlock_pack(0);
|
2007-09-14 07:31:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void unlock_pack_on_signal(int signo)
|
|
|
|
{
|
fetch: fix deadlock when cleaning up lockfiles in async signals
When fetching packfiles, we write a bunch of lockfiles for the packfiles
we're writing into the repository. In order to not leave behind any
cruft in case we exit or receive a signal, we register both an exit
handler as well as signal handlers for common signals like SIGINT. These
handlers will then unlink the locks and free the data structure tracking
them. We have observed a deadlock in this logic though:
(gdb) bt
#0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969
#2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#3 0x0000000000662ab1 in string_list_clear ()
#4 0x000000000044f5bc in unlock_pack_on_signal ()
#5 <signal handler called>
#6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024
#7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#8 0x000000000065afd5 in strbuf_release ()
#9 0x000000000066ddb9 in delete_tempfile ()
#10 0x0000000000610d0b in files_transaction_cleanup.isra ()
#11 0x0000000000611718 in files_transaction_abort ()
#12 0x000000000060d2ef in ref_transaction_abort ()
#13 0x000000000060d441 in ref_transaction_prepare ()
#14 0x000000000060e0b5 in ref_transaction_commit ()
#15 0x00000000004511c2 in fetch_and_consume_refs ()
#16 0x000000000045279a in cmd_fetch ()
#17 0x0000000000407c48 in handle_builtin ()
#18 0x0000000000408df2 in cmd_main ()
#19 0x00000000004078b5 in main ()
The process was killed with a signal, which caused the signal handler to
kick in and try free the data structures after we have unlinked the
locks. It then deadlocks while calling free(3P).
The root cause of this is that it is not allowed to call certain
functions in async-signal handlers, as specified by signal-safety(7).
Next to most I/O functions, this list of disallowed functions also
includes memory-handling functions like malloc(3P) and free(3P) because
they may not be reentrant. As a result, if we execute such functions in
the signal handler, then they may operate on inconistent state and fail
in unexpected ways.
Fix this bug by not calling non-async-signal-safe functions when running
in the signal handler. We're about to re-raise the signal anyway and
will thus exit, so it's not much of a problem to keep the string list of
lockfiles untouched. Note that it's fine though to call unlink(2), so
we'll still clean up the lockfiles correctly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 10:55:47 +00:00
|
|
|
unlock_pack(TRANSPORT_UNLOCK_PACK_IN_SIGNAL_HANDLER);
|
chain kill signals for cleanup functions
If a piece of code wanted to do some cleanup before exiting
(e.g., cleaning up a lockfile or a tempfile), our usual
strategy was to install a signal handler that did something
like this:
do_cleanup(); /* actual work */
signal(signo, SIG_DFL); /* restore previous behavior */
raise(signo); /* deliver signal, killing ourselves */
For a single handler, this works fine. However, if we want
to clean up two _different_ things, we run into a problem.
The most recently installed handler will run, but when it
removes itself as a handler, it doesn't put back the first
handler.
This patch introduces sigchain, a tiny library for handling
a stack of signal handlers. You sigchain_push each handler,
and use sigchain_pop to restore whoever was before you in
the stack.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-01-22 06:02:35 +00:00
|
|
|
sigchain_pop(signo);
|
2007-09-14 07:31:25 +00:00
|
|
|
raise(signo);
|
|
|
|
}
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2007-09-18 08:54:53 +00:00
|
|
|
static void add_merge_config(struct ref **head,
|
2007-10-30 01:05:40 +00:00
|
|
|
const struct ref *remote_refs,
|
2007-09-18 08:54:53 +00:00
|
|
|
struct branch *branch,
|
|
|
|
struct ref ***tail)
|
2007-09-11 03:03:25 +00:00
|
|
|
{
|
2007-09-18 08:54:53 +00:00
|
|
|
int i;
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2007-09-18 08:54:53 +00:00
|
|
|
for (i = 0; i < branch->merge_nr; i++) {
|
|
|
|
struct ref *rm, **old_tail = *tail;
|
2018-05-16 22:57:49 +00:00
|
|
|
struct refspec_item refspec;
|
2007-09-18 08:54:53 +00:00
|
|
|
|
|
|
|
for (rm = *head; rm; rm = rm->next) {
|
|
|
|
if (branch_merge_matches(branch, i, rm->name)) {
|
2013-05-11 16:15:59 +00:00
|
|
|
rm->fetch_head_status = FETCH_HEAD_MERGE;
|
2007-09-18 08:54:53 +00:00
|
|
|
break;
|
|
|
|
}
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
2007-09-18 08:54:53 +00:00
|
|
|
if (rm)
|
|
|
|
continue;
|
|
|
|
|
2007-10-27 06:09:48 +00:00
|
|
|
/*
|
2010-11-02 15:31:23 +00:00
|
|
|
* Not fetched to a remote-tracking branch? We need to fetch
|
2007-09-18 08:54:53 +00:00
|
|
|
* it anyway to allow this branch's "branch.$name.merge"
|
2008-09-09 10:28:30 +00:00
|
|
|
* to be honored by 'git pull', but we do not have to
|
2007-10-27 06:09:48 +00:00
|
|
|
* fail if branch.$name.merge is misconfigured to point
|
|
|
|
* at a nonexisting branch. If we were indeed called by
|
2008-09-09 10:28:30 +00:00
|
|
|
* 'git pull', it will notice the misconfiguration because
|
2007-10-27 06:09:48 +00:00
|
|
|
* there is no entry in the resulting FETCH_HEAD marked
|
|
|
|
* for merging.
|
2007-09-18 08:54:53 +00:00
|
|
|
*/
|
2010-03-12 22:27:33 +00:00
|
|
|
memset(&refspec, 0, sizeof(refspec));
|
2007-09-18 08:54:53 +00:00
|
|
|
refspec.src = branch->merge[i]->src;
|
2007-10-27 06:09:48 +00:00
|
|
|
get_fetch_map(remote_refs, &refspec, tail, 1);
|
2007-09-18 08:54:53 +00:00
|
|
|
for (rm = *old_tail; rm; rm = rm->next)
|
2013-05-11 16:15:59 +00:00
|
|
|
rm->fetch_head_status = FETCH_HEAD_MERGE;
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-15 21:18:02 +00:00
|
|
|
static void create_fetch_oidset(struct ref **head, struct oidset *out)
|
2013-10-30 05:32:55 +00:00
|
|
|
{
|
|
|
|
struct ref *rm = *head;
|
|
|
|
while (rm) {
|
2019-09-15 21:18:02 +00:00
|
|
|
oidset_insert(out, &rm->old_oid);
|
2013-10-30 05:32:55 +00:00
|
|
|
rm = rm->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-25 20:25:04 +00:00
|
|
|
struct refname_hash_entry {
|
2019-10-06 23:30:43 +00:00
|
|
|
struct hashmap_entry ent;
|
2018-09-25 20:25:04 +00:00
|
|
|
struct object_id oid;
|
2019-06-04 02:13:30 +00:00
|
|
|
int ignore;
|
2018-09-25 20:25:04 +00:00
|
|
|
char refname[FLEX_ARRAY];
|
|
|
|
};
|
|
|
|
|
2022-08-25 17:09:48 +00:00
|
|
|
static int refname_hash_entry_cmp(const void *hashmap_cmp_fn_data UNUSED,
|
2019-10-06 23:30:37 +00:00
|
|
|
const struct hashmap_entry *eptr,
|
|
|
|
const struct hashmap_entry *entry_or_key,
|
2018-09-25 20:25:04 +00:00
|
|
|
const void *keydata)
|
|
|
|
{
|
2019-10-06 23:30:37 +00:00
|
|
|
const struct refname_hash_entry *e1, *e2;
|
2018-09-25 20:25:04 +00:00
|
|
|
|
2019-10-06 23:30:37 +00:00
|
|
|
e1 = container_of(eptr, const struct refname_hash_entry, ent);
|
|
|
|
e2 = container_of(entry_or_key, const struct refname_hash_entry, ent);
|
2018-09-25 20:25:04 +00:00
|
|
|
return strcmp(e1->refname, keydata ? keydata : e2->refname);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct refname_hash_entry *refname_hash_add(struct hashmap *map,
|
|
|
|
const char *refname,
|
|
|
|
const struct object_id *oid)
|
|
|
|
{
|
|
|
|
struct refname_hash_entry *ent;
|
|
|
|
size_t len = strlen(refname);
|
|
|
|
|
|
|
|
FLEX_ALLOC_MEM(ent, refname, refname, len);
|
2019-10-06 23:30:27 +00:00
|
|
|
hashmap_entry_init(&ent->ent, strhash(refname));
|
2018-09-25 20:25:04 +00:00
|
|
|
oidcpy(&ent->oid, oid);
|
2019-10-06 23:30:29 +00:00
|
|
|
hashmap_add(map, &ent->ent);
|
2018-09-25 20:25:04 +00:00
|
|
|
return ent;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int add_one_refname(const char *refname,
|
|
|
|
const struct object_id *oid,
|
2022-08-25 17:09:48 +00:00
|
|
|
int flag UNUSED, void *cbdata)
|
2018-09-25 20:25:04 +00:00
|
|
|
{
|
|
|
|
struct hashmap *refname_map = cbdata;
|
|
|
|
|
|
|
|
(void) refname_hash_add(refname_map, refname, oid);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void refname_hash_init(struct hashmap *map)
|
|
|
|
{
|
|
|
|
hashmap_init(map, refname_hash_entry_cmp, NULL, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int refname_hash_exists(struct hashmap *map, const char *refname)
|
|
|
|
{
|
|
|
|
return !!hashmap_get_from_hash(map, strhash(refname), refname);
|
|
|
|
}
|
|
|
|
|
2019-06-04 02:13:28 +00:00
|
|
|
static void clear_item(struct refname_hash_entry *item)
|
|
|
|
{
|
2019-06-04 02:13:30 +00:00
|
|
|
item->ignore = 1;
|
2019-06-04 02:13:28 +00:00
|
|
|
}
|
|
|
|
|
2022-02-17 13:04:36 +00:00
|
|
|
|
|
|
|
static void add_already_queued_tags(const char *refname,
|
2023-08-29 23:45:37 +00:00
|
|
|
const struct object_id *old_oid UNUSED,
|
2022-02-17 13:04:36 +00:00
|
|
|
const struct object_id *new_oid,
|
|
|
|
void *cb_data)
|
|
|
|
{
|
|
|
|
struct hashmap *queued_tags = cb_data;
|
|
|
|
if (starts_with(refname, "refs/tags/") && new_oid)
|
|
|
|
(void) refname_hash_add(queued_tags, refname, new_oid);
|
|
|
|
}
|
|
|
|
|
2018-06-27 22:30:21 +00:00
|
|
|
static void find_non_local_tags(const struct ref *refs,
|
2022-02-17 13:04:36 +00:00
|
|
|
struct ref_transaction *transaction,
|
2018-06-27 22:30:21 +00:00
|
|
|
struct ref **head,
|
|
|
|
struct ref ***tail)
|
2013-10-30 05:32:55 +00:00
|
|
|
{
|
2018-09-25 20:25:04 +00:00
|
|
|
struct hashmap existing_refs;
|
|
|
|
struct hashmap remote_refs;
|
2019-09-15 21:18:02 +00:00
|
|
|
struct oidset fetch_oids = OIDSET_INIT;
|
2018-09-25 20:25:04 +00:00
|
|
|
struct string_list remote_refs_list = STRING_LIST_INIT_NODUP;
|
|
|
|
struct string_list_item *remote_ref_item;
|
2013-10-30 05:32:55 +00:00
|
|
|
const struct ref *ref;
|
2018-09-25 20:25:04 +00:00
|
|
|
struct refname_hash_entry *item = NULL;
|
2020-02-21 21:47:28 +00:00
|
|
|
const int quick_flags = OBJECT_INFO_QUICK | OBJECT_INFO_SKIP_FETCH_OBJECT;
|
2018-09-25 20:25:04 +00:00
|
|
|
|
|
|
|
refname_hash_init(&existing_refs);
|
|
|
|
refname_hash_init(&remote_refs);
|
2019-09-15 21:18:02 +00:00
|
|
|
create_fetch_oidset(head, &fetch_oids);
|
2013-10-30 05:32:55 +00:00
|
|
|
|
2018-09-25 20:25:04 +00:00
|
|
|
for_each_ref(add_one_refname, &existing_refs);
|
2022-02-17 13:04:36 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we already have a transaction, then we need to filter out all
|
|
|
|
* tags which have already been queued up.
|
|
|
|
*/
|
|
|
|
if (transaction)
|
|
|
|
ref_transaction_for_each_queued_update(transaction,
|
|
|
|
add_already_queued_tags,
|
|
|
|
&existing_refs);
|
|
|
|
|
2018-06-27 22:30:21 +00:00
|
|
|
for (ref = refs; ref; ref = ref->next) {
|
2013-12-17 19:47:35 +00:00
|
|
|
if (!starts_with(ref->name, "refs/tags/"))
|
2013-10-30 05:32:55 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The peeled ref always follows the matching base
|
|
|
|
* ref, so if we see a peeled ref that we don't want
|
|
|
|
* to fetch then we can mark the ref entry in the list
|
|
|
|
* as one to ignore by setting util to NULL.
|
|
|
|
*/
|
2013-12-17 19:47:35 +00:00
|
|
|
if (ends_with(ref->name, "^{}")) {
|
fetch: use "quick" has_sha1_file for tag following
When we auto-follow tags in a fetch, we look at all of the
tags advertised by the remote and fetch ones where we don't
already have the tag, but we do have the object it peels to.
This involves a lot of calls to has_sha1_file(), some of
which we can reasonably expect to fail. Since 45e8a74
(has_sha1_file: re-check pack directory before giving up,
2013-08-30), this may cause many calls to
reprepare_packed_git(), which is potentially expensive.
This has gone unnoticed for several years because it
requires a fairly unique setup to matter:
1. You need to have a lot of packs on the client side to
make reprepare_packed_git() expensive (the most
expensive part is finding duplicates in an unsorted
list, which is currently quadratic).
2. You need a large number of tag refs on the server side
that are candidates for auto-following (i.e., that the
client doesn't have). Each one triggers a re-read of
the pack directory.
3. Under normal circumstances, the client would
auto-follow those tags and after one large fetch, (2)
would no longer be true. But if those tags point to
history which is disconnected from what the client
otherwise fetches, then it will never auto-follow, and
those candidates will impact it on every fetch.
So when all three are true, each fetch pays an extra
O(nr_tags * nr_packs^2) cost, mostly in string comparisons
on the pack names. This was exacerbated by 47bf4b0
(prepare_packed_git_one: refactor duplicate-pack check,
2014-06-30) which uses a slightly more expensive string
check, under the assumption that the duplicate check doesn't
happen very often (and it shouldn't; the real problem here
is how often we are calling reprepare_packed_git()).
This patch teaches fetch to use HAS_SHA1_QUICK to sacrifice
accuracy for speed, in cases where we might be racy with a
simultaneous repack. This is similar to the fix in 0eeb077
(index-pack: avoid excessive re-reading of pack directory,
2015-06-09). As with that case, it's OK for has_sha1_file()
occasionally say "no I don't have it" when we do, because
the worst case is not a corruption, but simply that we may
fail to auto-follow a tag that points to it.
Here are results from the included perf script, which sets
up a situation similar to the one described above:
Test HEAD^ HEAD
----------------------------------------------------------
5550.4: fetch 11.21(10.42+0.78) 0.08(0.04+0.02) -99.3%
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-13 16:53:44 +00:00
|
|
|
if (item &&
|
2023-03-28 13:58:50 +00:00
|
|
|
!repo_has_object_file_with_flags(the_repository, &ref->old_oid, quick_flags) &&
|
2019-09-15 21:18:02 +00:00
|
|
|
!oidset_contains(&fetch_oids, &ref->old_oid) &&
|
2023-03-28 13:58:50 +00:00
|
|
|
!repo_has_object_file_with_flags(the_repository, &item->oid, quick_flags) &&
|
2019-09-15 21:18:02 +00:00
|
|
|
!oidset_contains(&fetch_oids, &item->oid))
|
2019-06-04 02:13:28 +00:00
|
|
|
clear_item(item);
|
2013-10-30 05:32:55 +00:00
|
|
|
item = NULL;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If item is non-NULL here, then we previously saw a
|
|
|
|
* ref not followed by a peeled reference, so we need
|
|
|
|
* to check if it is a lightweight tag that we want to
|
|
|
|
* fetch.
|
|
|
|
*/
|
fetch: use "quick" has_sha1_file for tag following
When we auto-follow tags in a fetch, we look at all of the
tags advertised by the remote and fetch ones where we don't
already have the tag, but we do have the object it peels to.
This involves a lot of calls to has_sha1_file(), some of
which we can reasonably expect to fail. Since 45e8a74
(has_sha1_file: re-check pack directory before giving up,
2013-08-30), this may cause many calls to
reprepare_packed_git(), which is potentially expensive.
This has gone unnoticed for several years because it
requires a fairly unique setup to matter:
1. You need to have a lot of packs on the client side to
make reprepare_packed_git() expensive (the most
expensive part is finding duplicates in an unsorted
list, which is currently quadratic).
2. You need a large number of tag refs on the server side
that are candidates for auto-following (i.e., that the
client doesn't have). Each one triggers a re-read of
the pack directory.
3. Under normal circumstances, the client would
auto-follow those tags and after one large fetch, (2)
would no longer be true. But if those tags point to
history which is disconnected from what the client
otherwise fetches, then it will never auto-follow, and
those candidates will impact it on every fetch.
So when all three are true, each fetch pays an extra
O(nr_tags * nr_packs^2) cost, mostly in string comparisons
on the pack names. This was exacerbated by 47bf4b0
(prepare_packed_git_one: refactor duplicate-pack check,
2014-06-30) which uses a slightly more expensive string
check, under the assumption that the duplicate check doesn't
happen very often (and it shouldn't; the real problem here
is how often we are calling reprepare_packed_git()).
This patch teaches fetch to use HAS_SHA1_QUICK to sacrifice
accuracy for speed, in cases where we might be racy with a
simultaneous repack. This is similar to the fix in 0eeb077
(index-pack: avoid excessive re-reading of pack directory,
2015-06-09). As with that case, it's OK for has_sha1_file()
occasionally say "no I don't have it" when we do, because
the worst case is not a corruption, but simply that we may
fail to auto-follow a tag that points to it.
Here are results from the included perf script, which sets
up a situation similar to the one described above:
Test HEAD^ HEAD
----------------------------------------------------------
5550.4: fetch 11.21(10.42+0.78) 0.08(0.04+0.02) -99.3%
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-13 16:53:44 +00:00
|
|
|
if (item &&
|
2023-03-28 13:58:50 +00:00
|
|
|
!repo_has_object_file_with_flags(the_repository, &item->oid, quick_flags) &&
|
2019-09-15 21:18:02 +00:00
|
|
|
!oidset_contains(&fetch_oids, &item->oid))
|
2019-06-04 02:13:28 +00:00
|
|
|
clear_item(item);
|
2013-10-30 05:32:55 +00:00
|
|
|
|
|
|
|
item = NULL;
|
|
|
|
|
|
|
|
/* skip duplicates and refs that we already have */
|
2018-09-25 20:25:04 +00:00
|
|
|
if (refname_hash_exists(&remote_refs, ref->name) ||
|
|
|
|
refname_hash_exists(&existing_refs, ref->name))
|
2013-10-30 05:32:55 +00:00
|
|
|
continue;
|
|
|
|
|
2018-09-25 20:25:04 +00:00
|
|
|
item = refname_hash_add(&remote_refs, ref->name, &ref->old_oid);
|
|
|
|
string_list_insert(&remote_refs_list, ref->name);
|
2013-10-30 05:32:55 +00:00
|
|
|
}
|
2020-11-02 18:55:05 +00:00
|
|
|
hashmap_clear_and_free(&existing_refs, struct refname_hash_entry, ent);
|
2013-10-30 05:32:55 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We may have a final lightweight tag that needs to be
|
|
|
|
* checked to see if it needs fetching.
|
|
|
|
*/
|
fetch: use "quick" has_sha1_file for tag following
When we auto-follow tags in a fetch, we look at all of the
tags advertised by the remote and fetch ones where we don't
already have the tag, but we do have the object it peels to.
This involves a lot of calls to has_sha1_file(), some of
which we can reasonably expect to fail. Since 45e8a74
(has_sha1_file: re-check pack directory before giving up,
2013-08-30), this may cause many calls to
reprepare_packed_git(), which is potentially expensive.
This has gone unnoticed for several years because it
requires a fairly unique setup to matter:
1. You need to have a lot of packs on the client side to
make reprepare_packed_git() expensive (the most
expensive part is finding duplicates in an unsorted
list, which is currently quadratic).
2. You need a large number of tag refs on the server side
that are candidates for auto-following (i.e., that the
client doesn't have). Each one triggers a re-read of
the pack directory.
3. Under normal circumstances, the client would
auto-follow those tags and after one large fetch, (2)
would no longer be true. But if those tags point to
history which is disconnected from what the client
otherwise fetches, then it will never auto-follow, and
those candidates will impact it on every fetch.
So when all three are true, each fetch pays an extra
O(nr_tags * nr_packs^2) cost, mostly in string comparisons
on the pack names. This was exacerbated by 47bf4b0
(prepare_packed_git_one: refactor duplicate-pack check,
2014-06-30) which uses a slightly more expensive string
check, under the assumption that the duplicate check doesn't
happen very often (and it shouldn't; the real problem here
is how often we are calling reprepare_packed_git()).
This patch teaches fetch to use HAS_SHA1_QUICK to sacrifice
accuracy for speed, in cases where we might be racy with a
simultaneous repack. This is similar to the fix in 0eeb077
(index-pack: avoid excessive re-reading of pack directory,
2015-06-09). As with that case, it's OK for has_sha1_file()
occasionally say "no I don't have it" when we do, because
the worst case is not a corruption, but simply that we may
fail to auto-follow a tag that points to it.
Here are results from the included perf script, which sets
up a situation similar to the one described above:
Test HEAD^ HEAD
----------------------------------------------------------
5550.4: fetch 11.21(10.42+0.78) 0.08(0.04+0.02) -99.3%
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-13 16:53:44 +00:00
|
|
|
if (item &&
|
2023-03-28 13:58:50 +00:00
|
|
|
!repo_has_object_file_with_flags(the_repository, &item->oid, quick_flags) &&
|
2019-09-15 21:18:02 +00:00
|
|
|
!oidset_contains(&fetch_oids, &item->oid))
|
2019-06-04 02:13:28 +00:00
|
|
|
clear_item(item);
|
2013-10-30 05:32:55 +00:00
|
|
|
|
|
|
|
/*
|
2018-09-25 20:25:04 +00:00
|
|
|
* For all the tags in the remote_refs_list,
|
2013-10-30 05:32:55 +00:00
|
|
|
* add them to the list of refs to be fetched
|
|
|
|
*/
|
2018-09-25 20:25:04 +00:00
|
|
|
for_each_string_list_item(remote_ref_item, &remote_refs_list) {
|
|
|
|
const char *refname = remote_ref_item->string;
|
2019-06-04 02:13:29 +00:00
|
|
|
struct ref *rm;
|
2019-10-06 23:30:36 +00:00
|
|
|
unsigned int hash = strhash(refname);
|
2018-09-25 20:25:04 +00:00
|
|
|
|
2019-10-06 23:30:36 +00:00
|
|
|
item = hashmap_get_entry_from_hash(&remote_refs, hash, refname,
|
|
|
|
struct refname_hash_entry, ent);
|
2018-09-25 20:25:04 +00:00
|
|
|
if (!item)
|
|
|
|
BUG("unseen remote ref?");
|
|
|
|
|
2013-10-30 05:32:55 +00:00
|
|
|
/* Unless we have already decided to ignore this item... */
|
2019-06-04 02:13:30 +00:00
|
|
|
if (item->ignore)
|
2019-06-04 02:13:29 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
rm = alloc_ref(item->refname);
|
|
|
|
rm->peer_ref = alloc_ref(item->refname);
|
|
|
|
oidcpy(&rm->old_oid, &item->oid);
|
|
|
|
**tail = rm;
|
|
|
|
*tail = &rm->next;
|
2013-10-30 05:32:55 +00:00
|
|
|
}
|
2020-11-02 18:55:05 +00:00
|
|
|
hashmap_clear_and_free(&remote_refs, struct refname_hash_entry, ent);
|
2018-09-25 20:25:04 +00:00
|
|
|
string_list_clear(&remote_refs_list, 0);
|
2019-09-15 21:18:02 +00:00
|
|
|
oidset_clear(&fetch_oids);
|
2013-10-30 05:32:55 +00:00
|
|
|
}
|
2008-03-03 02:35:25 +00:00
|
|
|
|
2021-04-16 12:49:57 +00:00
|
|
|
static void filter_prefetch_refspec(struct refspec *rs)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!prefetch)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < rs->nr; i++) {
|
|
|
|
struct strbuf new_dst = STRBUF_INIT;
|
|
|
|
char *old_dst;
|
|
|
|
const char *sub = NULL;
|
|
|
|
|
|
|
|
if (rs->items[i].negative)
|
|
|
|
continue;
|
|
|
|
if (!rs->items[i].dst ||
|
|
|
|
(rs->items[i].src &&
|
2022-08-05 17:58:43 +00:00
|
|
|
!strncmp(rs->items[i].src,
|
|
|
|
ref_namespace[NAMESPACE_TAGS].ref,
|
|
|
|
strlen(ref_namespace[NAMESPACE_TAGS].ref)))) {
|
2021-04-16 12:49:57 +00:00
|
|
|
int j;
|
|
|
|
|
|
|
|
free(rs->items[i].src);
|
|
|
|
free(rs->items[i].dst);
|
|
|
|
|
|
|
|
for (j = i + 1; j < rs->nr; j++) {
|
|
|
|
rs->items[j - 1] = rs->items[j];
|
|
|
|
rs->raw[j - 1] = rs->raw[j];
|
|
|
|
}
|
|
|
|
rs->nr--;
|
|
|
|
i--;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
old_dst = rs->items[i].dst;
|
2022-08-05 17:58:43 +00:00
|
|
|
strbuf_addstr(&new_dst, ref_namespace[NAMESPACE_PREFETCH].ref);
|
2021-04-16 12:49:57 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If old_dst starts with "refs/", then place
|
|
|
|
* sub after that prefix. Otherwise, start at
|
|
|
|
* the beginning of the string.
|
|
|
|
*/
|
|
|
|
if (!skip_prefix(old_dst, "refs/", &sub))
|
|
|
|
sub = old_dst;
|
|
|
|
strbuf_addstr(&new_dst, sub);
|
|
|
|
|
|
|
|
rs->items[i].dst = strbuf_detach(&new_dst, NULL);
|
|
|
|
rs->items[i].force = 1;
|
|
|
|
|
|
|
|
free(old_dst);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-27 22:30:21 +00:00
|
|
|
static struct ref *get_ref_map(struct remote *remote,
|
|
|
|
const struct ref *remote_refs,
|
2018-05-16 22:58:08 +00:00
|
|
|
struct refspec *rs,
|
2013-10-23 15:50:38 +00:00
|
|
|
int tags, int *autotags)
|
2007-09-11 03:03:25 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct ref *rm;
|
|
|
|
struct ref *ref_map = NULL;
|
|
|
|
struct ref **tail = &ref_map;
|
|
|
|
|
2013-10-30 05:32:59 +00:00
|
|
|
/* opportunistically-updated references: */
|
|
|
|
struct ref *orefs = NULL, **oref_tail = &orefs;
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2018-09-25 20:25:04 +00:00
|
|
|
struct hashmap existing_refs;
|
2020-08-18 04:01:34 +00:00
|
|
|
int existing_refs_populated = 0;
|
2013-05-11 16:16:52 +00:00
|
|
|
|
2021-04-16 12:49:57 +00:00
|
|
|
filter_prefetch_refspec(rs);
|
|
|
|
if (remote)
|
|
|
|
filter_prefetch_refspec(&remote->fetch);
|
|
|
|
|
2018-05-16 22:58:08 +00:00
|
|
|
if (rs->nr) {
|
2014-05-29 22:21:31 +00:00
|
|
|
struct refspec *fetch_refspec;
|
|
|
|
|
2018-05-16 22:58:08 +00:00
|
|
|
for (i = 0; i < rs->nr; i++) {
|
|
|
|
get_fetch_map(remote_refs, &rs->items[i], &tail, 0);
|
|
|
|
if (rs->items[i].dst && rs->items[i].dst[0])
|
2007-09-11 03:03:25 +00:00
|
|
|
*autotags = 1;
|
|
|
|
}
|
2013-10-30 05:32:58 +00:00
|
|
|
/* Merge everything on the command line (but not --tags) */
|
2007-09-11 03:03:25 +00:00
|
|
|
for (rm = ref_map; rm; rm = rm->next)
|
2013-05-11 16:15:59 +00:00
|
|
|
rm->fetch_head_status = FETCH_HEAD_MERGE;
|
2013-05-11 16:16:52 +00:00
|
|
|
|
|
|
|
/*
|
2013-10-30 05:32:58 +00:00
|
|
|
* For any refs that we happen to be fetching via
|
|
|
|
* command-line arguments, the destination ref might
|
|
|
|
* have been missing or have been different than the
|
|
|
|
* remote-tracking ref that would be derived from the
|
|
|
|
* configured refspec. In these cases, we want to
|
|
|
|
* take the opportunity to update their configured
|
|
|
|
* remote-tracking reference. However, we do not want
|
|
|
|
* to mention these entries in FETCH_HEAD at all, as
|
|
|
|
* they would simply be duplicates of existing
|
|
|
|
* entries, so we set them FETCH_HEAD_IGNORE below.
|
|
|
|
*
|
|
|
|
* We compute these entries now, based only on the
|
|
|
|
* refspecs specified on the command line. But we add
|
|
|
|
* them to the list following the refspecs resulting
|
|
|
|
* from the tags option so that one of the latter,
|
|
|
|
* which has FETCH_HEAD_NOT_FOR_MERGE, is not removed
|
|
|
|
* by ref_remove_duplicates() in favor of one of these
|
|
|
|
* opportunistic entries with FETCH_HEAD_IGNORE.
|
2013-05-11 16:16:52 +00:00
|
|
|
*/
|
2018-05-16 22:58:08 +00:00
|
|
|
if (refmap.nr)
|
|
|
|
fetch_refspec = &refmap;
|
|
|
|
else
|
2018-06-27 22:30:21 +00:00
|
|
|
fetch_refspec = &remote->fetch;
|
2014-05-29 22:21:31 +00:00
|
|
|
|
2018-05-16 22:58:08 +00:00
|
|
|
for (i = 0; i < fetch_refspec->nr; i++)
|
|
|
|
get_fetch_map(ref_map, &fetch_refspec->items[i], &oref_tail, 1);
|
2018-05-16 22:58:05 +00:00
|
|
|
} else if (refmap.nr) {
|
2021-12-01 22:15:40 +00:00
|
|
|
die("--refmap option is only meaningful with command-line refspec(s)");
|
2007-09-11 03:03:25 +00:00
|
|
|
} else {
|
|
|
|
/* Use the defaults */
|
2007-09-18 08:54:53 +00:00
|
|
|
struct branch *branch = branch_get(NULL);
|
|
|
|
int has_merge = branch_has_merge_config(branch);
|
builtin/fetch.c: ignore merge config when not fetching from branch's remote
When 'git fetch' is supplied a single argument, it tries to match it
against a configured remote and then fetch the refs specified by the
named remote's fetchspec. Additionally, or alternatively, if the current
branch has a merge ref configured, and if the name of the remote supplied
to fetch matches the one in the branch's configuration, then git also adds
the merge ref to the list of refs to update.
If the argument to fetch does not specify a named remote, or if the name
supplied does not match the remote configured for the current branch, then
the current branch's merge configuration should not be considered.
git currently mishandles the case when the argument to fetch specifies a
GIT URL(i.e. not a named remote) and the current branch has a configured
merge ref. In this case, fetch should ignore the branch's merge ref and
attempt to fetch from the remote repository's HEAD branch. But, since
fetch only checks _whether_ the current branch has a merge ref configured,
and does _not_ check whether the branch's configured remote matches the
command line argument (until later), it will mistakenly enter the wrong
branch of an 'if' statement and will not fall back to fetch the HEAD branch.
The fetch ends up doing nothing and returns with a successful zero status.
Fix this by comparing the remote repository's name to the branch's remote
name, in addition to whether it has a configured merge ref, sooner, so that
fetch can correctly decide whether the branch's configuration is interesting
or not, and fall back to fetching from the remote's HEAD branch when
appropriate.
This fixes the test in t5510.
Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-08-25 17:52:56 +00:00
|
|
|
if (remote &&
|
2018-05-16 22:58:01 +00:00
|
|
|
(remote->fetch.nr ||
|
2010-09-09 18:56:36 +00:00
|
|
|
/* Note: has_merge implies non-NULL branch->remote_name */
|
builtin/fetch.c: ignore merge config when not fetching from branch's remote
When 'git fetch' is supplied a single argument, it tries to match it
against a configured remote and then fetch the refs specified by the
named remote's fetchspec. Additionally, or alternatively, if the current
branch has a merge ref configured, and if the name of the remote supplied
to fetch matches the one in the branch's configuration, then git also adds
the merge ref to the list of refs to update.
If the argument to fetch does not specify a named remote, or if the name
supplied does not match the remote configured for the current branch, then
the current branch's merge configuration should not be considered.
git currently mishandles the case when the argument to fetch specifies a
GIT URL(i.e. not a named remote) and the current branch has a configured
merge ref. In this case, fetch should ignore the branch's merge ref and
attempt to fetch from the remote repository's HEAD branch. But, since
fetch only checks _whether_ the current branch has a merge ref configured,
and does _not_ check whether the branch's configured remote matches the
command line argument (until later), it will mistakenly enter the wrong
branch of an 'if' statement and will not fall back to fetch the HEAD branch.
The fetch ends up doing nothing and returns with a successful zero status.
Fix this by comparing the remote repository's name to the branch's remote
name, in addition to whether it has a configured merge ref, sooner, so that
fetch can correctly decide whether the branch's configuration is interesting
or not, and fall back to fetching from the remote's HEAD branch when
appropriate.
This fixes the test in t5510.
Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-08-25 17:52:56 +00:00
|
|
|
(has_merge && !strcmp(branch->remote_name, remote->name)))) {
|
2018-05-16 22:58:01 +00:00
|
|
|
for (i = 0; i < remote->fetch.nr; i++) {
|
|
|
|
get_fetch_map(remote_refs, &remote->fetch.items[i], &tail, 0);
|
|
|
|
if (remote->fetch.items[i].dst &&
|
|
|
|
remote->fetch.items[i].dst[0])
|
2007-09-11 03:03:25 +00:00
|
|
|
*autotags = 1;
|
2007-09-18 08:54:53 +00:00
|
|
|
if (!i && !has_merge && ref_map &&
|
2018-05-16 22:58:01 +00:00
|
|
|
!remote->fetch.items[0].pattern)
|
2013-05-11 16:15:59 +00:00
|
|
|
ref_map->fetch_head_status = FETCH_HEAD_MERGE;
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
2007-10-11 00:47:55 +00:00
|
|
|
/*
|
|
|
|
* if the remote we're fetching from is the same
|
|
|
|
* as given in branch.<name>.remote, we add the
|
|
|
|
* ref given in branch.<name>.merge, too.
|
2010-09-09 18:56:36 +00:00
|
|
|
*
|
|
|
|
* Note: has_merge implies non-NULL branch->remote_name
|
2007-10-11 00:47:55 +00:00
|
|
|
*/
|
2007-10-27 06:09:48 +00:00
|
|
|
if (has_merge &&
|
|
|
|
!strcmp(branch->remote_name, remote->name))
|
2007-09-18 08:54:53 +00:00
|
|
|
add_merge_config(&ref_map, remote_refs, branch, &tail);
|
2021-04-16 12:49:57 +00:00
|
|
|
} else if (!prefetch) {
|
2007-09-11 03:03:25 +00:00
|
|
|
ref_map = get_remote_ref(remote_refs, "HEAD");
|
2007-10-27 06:09:48 +00:00
|
|
|
if (!ref_map)
|
2021-12-01 22:15:40 +00:00
|
|
|
die(_("couldn't find remote ref HEAD"));
|
2013-05-11 16:15:59 +00:00
|
|
|
ref_map->fetch_head_status = FETCH_HEAD_MERGE;
|
2008-03-03 02:34:51 +00:00
|
|
|
tail = &ref_map->next;
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
|
|
|
}
|
2013-10-30 05:32:58 +00:00
|
|
|
|
2013-10-30 05:32:59 +00:00
|
|
|
if (tags == TAGS_SET)
|
|
|
|
/* also fetch all tags */
|
|
|
|
get_fetch_map(remote_refs, tag_refspec, &tail, 0);
|
|
|
|
else if (tags == TAGS_DEFAULT && *autotags)
|
2022-02-17 13:04:36 +00:00
|
|
|
find_non_local_tags(remote_refs, NULL, &ref_map, &tail);
|
2013-10-30 05:32:58 +00:00
|
|
|
|
2013-10-30 05:32:59 +00:00
|
|
|
/* Now append any refs to be updated opportunistically: */
|
|
|
|
*tail = orefs;
|
|
|
|
for (rm = orefs; rm; rm = rm->next) {
|
|
|
|
rm->fetch_head_status = FETCH_HEAD_IGNORE;
|
|
|
|
tail = &rm->next;
|
|
|
|
}
|
|
|
|
|
refspec: add support for negative refspecs
Both fetch and push support pattern refspecs which allow fetching or
pushing references that match a specific pattern. Because these patterns
are globs, they have somewhat limited ability to express more complex
situations.
For example, suppose you wish to fetch all branches from a remote except
for a specific one. To allow this, you must setup a set of refspecs
which match only the branches you want. Because refspecs are either
explicit name matches, or simple globs, many patterns cannot be
expressed.
Add support for a new type of refspec, referred to as "negative"
refspecs. These are prefixed with a '^' and mean "exclude any ref
matching this refspec". They can only have one "side" which always
refers to the source. During a fetch, this refers to the name of the ref
on the remote. During a push, this refers to the name of the ref on the
local side.
With negative refspecs, users can express more complex patterns. For
example:
git fetch origin refs/heads/*:refs/remotes/origin/* ^refs/heads/dontwant
will fetch all branches on origin into remotes/origin, but will exclude
fetching the branch named dontwant.
Refspecs today are commutative, meaning that order doesn't expressly
matter. Rather than forcing an implied order, negative refspecs will
always be applied last. That is, in order to match, a ref must match at
least one positive refspec, and match none of the negative refspecs.
This is similar to how negative pathspecs work.
Signed-off-by: Jacob Keller <jacob.keller@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-30 21:25:29 +00:00
|
|
|
/*
|
|
|
|
* apply negative refspecs first, before we remove duplicates. This is
|
|
|
|
* necessary as negative refspecs might remove an otherwise conflicting
|
|
|
|
* duplicate.
|
|
|
|
*/
|
|
|
|
if (rs->nr)
|
|
|
|
ref_map = apply_negative_refspecs(ref_map, rs);
|
|
|
|
else
|
|
|
|
ref_map = apply_negative_refspecs(ref_map, &remote->fetch);
|
|
|
|
|
2018-06-27 22:30:19 +00:00
|
|
|
ref_map = ref_remove_duplicates(ref_map);
|
|
|
|
|
|
|
|
for (rm = ref_map; rm; rm = rm->next) {
|
|
|
|
if (rm->peer_ref) {
|
2018-09-25 20:25:04 +00:00
|
|
|
const char *refname = rm->peer_ref->name;
|
|
|
|
struct refname_hash_entry *peer_item;
|
2019-10-06 23:30:36 +00:00
|
|
|
unsigned int hash = strhash(refname);
|
2018-09-25 20:25:04 +00:00
|
|
|
|
2020-08-18 04:01:34 +00:00
|
|
|
if (!existing_refs_populated) {
|
|
|
|
refname_hash_init(&existing_refs);
|
|
|
|
for_each_ref(add_one_refname, &existing_refs);
|
|
|
|
existing_refs_populated = 1;
|
|
|
|
}
|
|
|
|
|
2019-10-06 23:30:36 +00:00
|
|
|
peer_item = hashmap_get_entry_from_hash(&existing_refs,
|
|
|
|
hash, refname,
|
|
|
|
struct refname_hash_entry, ent);
|
2018-06-27 22:30:19 +00:00
|
|
|
if (peer_item) {
|
2018-09-25 20:25:04 +00:00
|
|
|
struct object_id *old_oid = &peer_item->oid;
|
2018-06-27 22:30:19 +00:00
|
|
|
oidcpy(&rm->peer_ref->old_oid, old_oid);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-08-18 04:01:34 +00:00
|
|
|
if (existing_refs_populated)
|
2020-11-02 18:55:05 +00:00
|
|
|
hashmap_clear_and_free(&existing_refs, struct refname_hash_entry, ent);
|
2018-06-27 22:30:19 +00:00
|
|
|
|
|
|
|
return ref_map;
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
|
|
|
|
2009-05-25 10:40:54 +00:00
|
|
|
#define STORE_REF_ERROR_OTHER 1
|
|
|
|
#define STORE_REF_ERROR_DF_CONFLICT 2
|
|
|
|
|
2007-09-11 03:03:25 +00:00
|
|
|
static int s_update_ref(const char *action,
|
|
|
|
struct ref *ref,
|
2021-01-12 12:27:48 +00:00
|
|
|
struct ref_transaction *transaction,
|
2007-09-11 03:03:25 +00:00
|
|
|
int check_old)
|
|
|
|
{
|
2017-03-28 19:46:26 +00:00
|
|
|
char *msg;
|
2007-09-11 03:03:25 +00:00
|
|
|
char *rla = getenv("GIT_REFLOG_ACTION");
|
2021-01-12 12:27:48 +00:00
|
|
|
struct ref_transaction *our_transaction = NULL;
|
2014-04-28 20:49:07 +00:00
|
|
|
struct strbuf err = STRBUF_INIT;
|
2021-01-12 12:27:43 +00:00
|
|
|
int ret;
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2009-11-10 08:19:43 +00:00
|
|
|
if (dry_run)
|
|
|
|
return 0;
|
2007-09-11 03:03:25 +00:00
|
|
|
if (!rla)
|
2007-12-04 07:25:46 +00:00
|
|
|
rla = default_rla.buf;
|
2017-03-28 19:46:26 +00:00
|
|
|
msg = xstrfmt("%s: %s", rla, action);
|
2014-04-28 20:49:07 +00:00
|
|
|
|
2021-01-12 12:27:48 +00:00
|
|
|
/*
|
|
|
|
* If no transaction was passed to us, we manage the transaction
|
|
|
|
* ourselves. Otherwise, we trust the caller to handle the transaction
|
|
|
|
* lifecycle.
|
|
|
|
*/
|
2021-01-12 12:27:43 +00:00
|
|
|
if (!transaction) {
|
2021-01-12 12:27:48 +00:00
|
|
|
transaction = our_transaction = ref_transaction_begin(&err);
|
|
|
|
if (!transaction) {
|
|
|
|
ret = STORE_REF_ERROR_OTHER;
|
|
|
|
goto out;
|
|
|
|
}
|
2021-01-12 12:27:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ret = ref_transaction_update(transaction, ref->name, &ref->new_oid,
|
|
|
|
check_old ? &ref->old_oid : NULL,
|
|
|
|
0, msg, &err);
|
2014-04-28 20:49:07 +00:00
|
|
|
if (ret) {
|
2021-01-12 12:27:43 +00:00
|
|
|
ret = STORE_REF_ERROR_OTHER;
|
|
|
|
goto out;
|
2014-04-28 20:49:07 +00:00
|
|
|
}
|
|
|
|
|
2021-01-12 12:27:48 +00:00
|
|
|
if (our_transaction) {
|
|
|
|
switch (ref_transaction_commit(our_transaction, &err)) {
|
|
|
|
case 0:
|
|
|
|
break;
|
|
|
|
case TRANSACTION_NAME_CONFLICT:
|
|
|
|
ret = STORE_REF_ERROR_DF_CONFLICT;
|
|
|
|
goto out;
|
|
|
|
default:
|
|
|
|
ret = STORE_REF_ERROR_OTHER;
|
|
|
|
goto out;
|
|
|
|
}
|
2021-01-12 12:27:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
2021-01-12 12:27:48 +00:00
|
|
|
ref_transaction_free(our_transaction);
|
2021-01-12 12:27:43 +00:00
|
|
|
if (ret)
|
|
|
|
error("%s", err.buf);
|
2014-04-28 20:49:07 +00:00
|
|
|
strbuf_release(&err);
|
2017-03-28 19:46:26 +00:00
|
|
|
free(msg);
|
2021-01-12 12:27:43 +00:00
|
|
|
return ret;
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
|
|
|
|
2023-05-10 12:34:20 +00:00
|
|
|
static int refcol_width(const struct ref *ref_map, int compact_format)
|
2016-07-01 16:03:30 +00:00
|
|
|
{
|
2023-05-10 12:34:20 +00:00
|
|
|
const struct ref *ref;
|
|
|
|
int max, width = 10;
|
2016-07-01 16:03:30 +00:00
|
|
|
|
2023-05-10 12:34:20 +00:00
|
|
|
max = term_columns();
|
|
|
|
if (compact_format)
|
|
|
|
max = max * 2 / 3;
|
2016-07-01 16:03:30 +00:00
|
|
|
|
2023-05-10 12:34:20 +00:00
|
|
|
for (ref = ref_map; ref; ref = ref->next) {
|
|
|
|
int rlen, llen = 0, len;
|
2016-07-01 16:03:31 +00:00
|
|
|
|
2023-05-10 12:34:20 +00:00
|
|
|
if (ref->status == REF_STATUS_REJECT_SHALLOW ||
|
|
|
|
!ref->peer_ref ||
|
|
|
|
!strcmp(ref->name, "HEAD"))
|
|
|
|
continue;
|
2016-07-01 16:03:30 +00:00
|
|
|
|
2023-05-10 12:34:20 +00:00
|
|
|
/* uptodate lines are only shown on high verbosity level */
|
|
|
|
if (verbosity <= 0 && oideq(&ref->peer_ref->old_oid, &ref->old_oid))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
rlen = utf8_strwidth(prettify_refname(ref->name));
|
|
|
|
if (!compact_format)
|
|
|
|
llen = utf8_strwidth(prettify_refname(ref->peer_ref->name));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* rough estimation to see if the output line is too long and
|
|
|
|
* should not be counted (we can't do precise calculation
|
|
|
|
* anyway because we don't know if the error explanation part
|
|
|
|
* will be printed in update_local_ref)
|
|
|
|
*/
|
|
|
|
len = 21 /* flag and summary */ + rlen + 4 /* -> */ + llen;
|
|
|
|
if (len >= max)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (width < rlen)
|
|
|
|
width = rlen;
|
2016-07-01 16:03:31 +00:00
|
|
|
}
|
2016-07-01 16:03:30 +00:00
|
|
|
|
2023-05-10 12:34:20 +00:00
|
|
|
return width;
|
2016-07-01 16:03:30 +00:00
|
|
|
}
|
|
|
|
|
2023-03-20 12:35:36 +00:00
|
|
|
static void display_state_init(struct display_state *display_state, struct ref *ref_map,
|
2023-05-10 12:34:28 +00:00
|
|
|
const char *raw_url, enum display_format format)
|
2016-07-01 16:03:30 +00:00
|
|
|
{
|
2023-03-20 12:35:36 +00:00
|
|
|
int i;
|
2016-07-01 16:03:31 +00:00
|
|
|
|
2023-03-20 12:35:20 +00:00
|
|
|
memset(display_state, 0, sizeof(*display_state));
|
2023-03-20 12:35:40 +00:00
|
|
|
strbuf_init(&display_state->buf, 0);
|
2023-05-10 12:34:28 +00:00
|
|
|
display_state->format = format;
|
2023-03-20 12:35:40 +00:00
|
|
|
|
2023-03-20 12:35:36 +00:00
|
|
|
if (raw_url)
|
|
|
|
display_state->url = transport_anonymize_url(raw_url);
|
|
|
|
else
|
|
|
|
display_state->url = xstrdup("foreign");
|
|
|
|
|
|
|
|
display_state->url_len = strlen(display_state->url);
|
|
|
|
for (i = display_state->url_len - 1; display_state->url[i] == '/' && 0 <= i; i--)
|
|
|
|
;
|
|
|
|
display_state->url_len = i + 1;
|
|
|
|
if (4 < i && !strncmp(".git", display_state->url + i - 3, 4))
|
|
|
|
display_state->url_len = i - 3;
|
2016-07-01 16:03:31 +00:00
|
|
|
|
fetch: skip formatting updated refs with `--quiet`
When fetching, Git will by default print a list of all updated refs in a
nicely formatted table. In order to come up with this table, Git needs
to iterate refs twice: first to determine the maximum column width, and
a second time to actually format these changed refs.
While this table will not be printed in case the user passes `--quiet`,
we still go out of our way and do all these steps. In fact, we even do
more work compared to not passing `--quiet`: without the flag, we will
skip all references in the column width computation which have not been
updated, but if it is set we will now compute widths for all refs.
Fix this issue by completely skipping both preparation of the format and
formatting data for display in case the user passes `--quiet`, improving
performance especially with many refs. The following benchmark shows a
nice speedup for a quiet mirror-fetch in a repository with 2.3M refs:
Benchmark #1: HEAD~: git-fetch
Time (mean ± σ): 26.929 s ± 0.145 s [User: 24.194 s, System: 4.656 s]
Range (min … max): 26.692 s … 27.068 s 5 runs
Benchmark #2: HEAD: git-fetch
Time (mean ± σ): 25.189 s ± 0.094 s [User: 22.556 s, System: 4.606 s]
Range (min … max): 25.070 s … 25.314 s 5 runs
Summary
'HEAD: git-fetch' ran
1.07 ± 0.01 times faster than 'HEAD~: git-fetch'
While at it, this patch also fixes `adjust_refcol_width()` such that it
skips unchanged refs in case the user passed `--quiet`, where verbosity
will be negative. While this function won't be called anymore if so,
this brings the comment in line with actual code. Furthermore, needless
`verbosity >= 0` checks are now removed in `store_updated_refs()`: we
never print to the `note` buffer anymore in case `verbosity < 0`, so we
won't end up in that code block anyway.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-08-30 10:54:26 +00:00
|
|
|
if (verbosity < 0)
|
|
|
|
return;
|
|
|
|
|
2023-05-10 12:34:24 +00:00
|
|
|
switch (display_state->format) {
|
|
|
|
case DISPLAY_FORMAT_FULL:
|
|
|
|
case DISPLAY_FORMAT_COMPACT:
|
|
|
|
display_state->refcol_width = refcol_width(ref_map,
|
|
|
|
display_state->format == DISPLAY_FORMAT_COMPACT);
|
|
|
|
break;
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
case DISPLAY_FORMAT_PORCELAIN:
|
|
|
|
/* We don't need to precompute anything here. */
|
|
|
|
break;
|
2023-05-10 12:34:24 +00:00
|
|
|
default:
|
|
|
|
BUG("unexpected display format %d", display_state->format);
|
2016-07-01 16:03:30 +00:00
|
|
|
}
|
|
|
|
}
|
2007-11-03 05:32:48 +00:00
|
|
|
|
2023-03-20 12:35:36 +00:00
|
|
|
static void display_state_release(struct display_state *display_state)
|
|
|
|
{
|
2023-03-20 12:35:40 +00:00
|
|
|
strbuf_release(&display_state->buf);
|
2023-03-20 12:35:36 +00:00
|
|
|
free(display_state->url);
|
|
|
|
}
|
|
|
|
|
2023-03-20 12:35:20 +00:00
|
|
|
static void print_remote_to_local(struct display_state *display_state,
|
2016-07-01 16:03:31 +00:00
|
|
|
const char *remote, const char *local)
|
|
|
|
{
|
2023-03-20 12:35:40 +00:00
|
|
|
strbuf_addf(&display_state->buf, "%-*s -> %s",
|
|
|
|
display_state->refcol_width, remote, local);
|
2016-07-01 16:03:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int find_and_replace(struct strbuf *haystack,
|
|
|
|
const char *needle,
|
|
|
|
const char *placeholder)
|
|
|
|
{
|
2019-01-25 09:51:22 +00:00
|
|
|
const char *p = NULL;
|
2016-07-01 16:03:31 +00:00
|
|
|
int plen, nlen;
|
|
|
|
|
2019-01-25 09:51:22 +00:00
|
|
|
nlen = strlen(needle);
|
|
|
|
if (ends_with(haystack->buf, needle))
|
|
|
|
p = haystack->buf + haystack->len - nlen;
|
|
|
|
else
|
|
|
|
p = strstr(haystack->buf, needle);
|
2016-07-01 16:03:31 +00:00
|
|
|
if (!p)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (p > haystack->buf && p[-1] != '/')
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
plen = strlen(p);
|
|
|
|
if (plen > nlen && p[nlen] != '/')
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
strbuf_splice(haystack, p - haystack->buf, nlen,
|
|
|
|
placeholder, strlen(placeholder));
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2023-03-20 12:35:40 +00:00
|
|
|
static void print_compact(struct display_state *display_state,
|
2016-07-01 16:03:31 +00:00
|
|
|
const char *remote, const char *local)
|
|
|
|
{
|
|
|
|
struct strbuf r = STRBUF_INIT;
|
|
|
|
struct strbuf l = STRBUF_INIT;
|
|
|
|
|
|
|
|
if (!strcmp(remote, local)) {
|
2023-03-20 12:35:40 +00:00
|
|
|
strbuf_addf(&display_state->buf, "%-*s -> *", display_state->refcol_width, remote);
|
2016-07-01 16:03:31 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_addstr(&r, remote);
|
|
|
|
strbuf_addstr(&l, local);
|
|
|
|
|
|
|
|
if (!find_and_replace(&r, local, "*"))
|
|
|
|
find_and_replace(&l, remote, "*");
|
2023-03-20 12:35:40 +00:00
|
|
|
print_remote_to_local(display_state, r.buf, l.buf);
|
2016-07-01 16:03:31 +00:00
|
|
|
|
|
|
|
strbuf_release(&r);
|
|
|
|
strbuf_release(&l);
|
|
|
|
}
|
|
|
|
|
2023-03-20 12:35:40 +00:00
|
|
|
static void display_ref_update(struct display_state *display_state, char code,
|
|
|
|
const char *summary, const char *error,
|
|
|
|
const char *remote, const char *local,
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
const struct object_id *old_oid,
|
|
|
|
const struct object_id *new_oid,
|
2023-03-20 12:35:40 +00:00
|
|
|
int summary_width)
|
2016-06-26 05:58:07 +00:00
|
|
|
{
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
FILE *f = stderr;
|
fetch: skip formatting updated refs with `--quiet`
When fetching, Git will by default print a list of all updated refs in a
nicely formatted table. In order to come up with this table, Git needs
to iterate refs twice: first to determine the maximum column width, and
a second time to actually format these changed refs.
While this table will not be printed in case the user passes `--quiet`,
we still go out of our way and do all these steps. In fact, we even do
more work compared to not passing `--quiet`: without the flag, we will
skip all references in the column width computation which have not been
updated, but if it is set we will now compute widths for all refs.
Fix this issue by completely skipping both preparation of the format and
formatting data for display in case the user passes `--quiet`, improving
performance especially with many refs. The following benchmark shows a
nice speedup for a quiet mirror-fetch in a repository with 2.3M refs:
Benchmark #1: HEAD~: git-fetch
Time (mean ± σ): 26.929 s ± 0.145 s [User: 24.194 s, System: 4.656 s]
Range (min … max): 26.692 s … 27.068 s 5 runs
Benchmark #2: HEAD: git-fetch
Time (mean ± σ): 25.189 s ± 0.094 s [User: 22.556 s, System: 4.606 s]
Range (min … max): 25.070 s … 25.314 s 5 runs
Summary
'HEAD: git-fetch' ran
1.07 ± 0.01 times faster than 'HEAD~: git-fetch'
While at it, this patch also fixes `adjust_refcol_width()` such that it
skips unchanged refs in case the user passed `--quiet`, where verbosity
will be negative. While this function won't be called anymore if so,
this brings the comment in line with actual code. Furthermore, needless
`verbosity >= 0` checks are now removed in `store_updated_refs()`: we
never print to the `note` buffer anymore in case `verbosity < 0`, so we
won't end up in that code block anyway.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-08-30 10:54:26 +00:00
|
|
|
|
|
|
|
if (verbosity < 0)
|
|
|
|
return;
|
|
|
|
|
2023-03-20 12:35:40 +00:00
|
|
|
strbuf_reset(&display_state->buf);
|
|
|
|
|
2023-05-10 12:34:24 +00:00
|
|
|
switch (display_state->format) {
|
|
|
|
case DISPLAY_FORMAT_FULL:
|
|
|
|
case DISPLAY_FORMAT_COMPACT: {
|
|
|
|
int width;
|
2023-03-20 12:35:36 +00:00
|
|
|
|
2023-05-10 12:34:24 +00:00
|
|
|
if (!display_state->shown_url) {
|
|
|
|
strbuf_addf(&display_state->buf, _("From %.*s\n"),
|
|
|
|
display_state->url_len, display_state->url);
|
|
|
|
display_state->shown_url = 1;
|
|
|
|
}
|
2016-10-21 22:22:55 +00:00
|
|
|
|
2023-05-10 12:34:24 +00:00
|
|
|
width = (summary_width + strlen(summary) - gettext_width(summary));
|
|
|
|
remote = prettify_refname(remote);
|
|
|
|
local = prettify_refname(local);
|
|
|
|
|
|
|
|
strbuf_addf(&display_state->buf, " %c %-*s ", code, width, summary);
|
|
|
|
|
|
|
|
if (display_state->format != DISPLAY_FORMAT_COMPACT)
|
|
|
|
print_remote_to_local(display_state, remote, local);
|
|
|
|
else
|
|
|
|
print_compact(display_state, remote, local);
|
|
|
|
|
|
|
|
if (error)
|
|
|
|
strbuf_addf(&display_state->buf, " (%s)", error);
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
case DISPLAY_FORMAT_PORCELAIN:
|
|
|
|
strbuf_addf(&display_state->buf, "%c %s %s %s", code,
|
|
|
|
oid_to_hex(old_oid), oid_to_hex(new_oid), local);
|
|
|
|
f = stdout;
|
|
|
|
break;
|
2023-05-10 12:34:24 +00:00
|
|
|
default:
|
|
|
|
BUG("unexpected display format %d", display_state->format);
|
|
|
|
};
|
2023-03-20 12:35:40 +00:00
|
|
|
strbuf_addch(&display_state->buf, '\n');
|
|
|
|
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
fputs(display_state->buf.buf, f);
|
2016-06-26 05:58:07 +00:00
|
|
|
}
|
2007-11-03 05:32:48 +00:00
|
|
|
|
2007-09-11 03:03:25 +00:00
|
|
|
static int update_local_ref(struct ref *ref,
|
2021-01-12 12:27:48 +00:00
|
|
|
struct ref_transaction *transaction,
|
2023-03-20 12:35:20 +00:00
|
|
|
struct display_state *display_state,
|
fetch: print left-hand side when fetching HEAD:foo
`store_updated_refs()` parses the remote reference for two purposes:
- It gets used as a note when writing FETCH_HEAD.
- It is passed through to `display_ref_update()` to display
updated references in the following format:
```
* branch master -> master
```
In most cases, the parsed remote reference is the prettified reference
name and can thus be used for both cases. But if the remote reference is
HEAD, the parsed remote reference becomes empty. This is intended when
we write the FETCH_HEAD, where we skip writing the note in that case.
But when displaying the updated references this leads to inconsistent
output where the left-hand side of reference updates is missing in some
cases:
```
$ git fetch origin HEAD HEAD:explicit-head :implicit-head main
From https://github.com/git/git
* branch HEAD -> FETCH_HEAD
* [new ref] -> explicit-head
* [new ref] -> implicit-head
* branch main -> FETCH_HEAD
```
This behaviour has existed ever since the table-based output has been
introduced for git-fetch(1) via 165f390250 (git-fetch: more terse fetch
output, 2007-11-03) and was never explicitly documented either in the
commit message or in any of our tests. So while it may not be a bug per
se, it feels like a weird inconsistency and not like it was a concious
design decision.
The logic of how we compute the remote reference name that we ultimately
pass to `display_ref_update()` is not easy to follow. There are three
different cases here:
- When the remote reference name is "HEAD" we set the remote
reference name to the empty string. This is the case that causes
the left-hand side to go missing, where we would indeed want to
print "HEAD" instead of the empty string. This is what
`prettify_refname()` would return.
- When the remote reference name has a well-known prefix then we
strip this prefix. This matches what `prettify_refname()` does.
- Otherwise, we keep the fully qualified reference name. This also
matches what `prettify_refname()` does.
As the return value of `prettify_refname()` would do the correct thing
for us in all three cases, we can thus fix the inconsistency by passing
through the full remote reference name to `display_ref_update()`, which
learns to call `prettify_refname()`. At the same time, this also
simplifies the code a bit.
Note that this patch also changes formatting of the block that computes
the "kind" (which is the category like "branch" or "tag") and "what"
(which is the prettified reference name like "master" or "v1.0")
variables. This is done on purpose so that it is part of the diff,
hopefully making the change easier to comprehend.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:15 +00:00
|
|
|
const struct ref *remote_ref,
|
2023-05-17 11:49:04 +00:00
|
|
|
int summary_width,
|
|
|
|
const struct fetch_config *config)
|
2007-09-11 03:03:25 +00:00
|
|
|
{
|
|
|
|
struct commit *current = NULL, *updated;
|
2019-06-18 20:25:27 +00:00
|
|
|
int fast_forward = 0;
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2021-09-01 13:09:45 +00:00
|
|
|
if (!repo_has_object_file(the_repository, &ref->new_oid))
|
2015-11-10 02:22:20 +00:00
|
|
|
die(_("object %s not found"), oid_to_hex(&ref->new_oid));
|
2007-09-11 03:03:25 +00:00
|
|
|
|
convert "oidcmp() == 0" to oideq()
Using the more restrictive oideq() should, in the long run,
give the compiler more opportunities to optimize these
callsites. For now, this conversion should be a complete
noop with respect to the generated code.
The result is also perhaps a little more readable, as it
avoids the "zero is equal" idiom. Since it's so prevalent in
C, I think seasoned programmers tend not to even notice it
anymore, but it can sometimes make for awkward double
negations (e.g., we can drop a few !!oidcmp() instances
here).
This patch was generated almost entirely by the included
coccinelle patch. This mechanical conversion should be
completely safe, because we check explicitly for cases where
oidcmp() is compared to 0, which is what oideq() is doing
under the hood. Note that we don't have to catch "!oidcmp()"
separately; coccinelle's standard isomorphisms make sure the
two are treated equivalently.
I say "almost" because I did hand-edit the coccinelle output
to fix up a few style violations (it mostly keeps the
original formatting, but sometimes unwraps long lines).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-28 21:22:40 +00:00
|
|
|
if (oideq(&ref->old_oid, &ref->new_oid)) {
|
2008-11-15 00:14:24 +00:00
|
|
|
if (verbosity > 0)
|
2023-03-20 12:35:40 +00:00
|
|
|
display_ref_update(display_state, '=', _("[up to date]"), NULL,
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
remote_ref->name, ref->name,
|
|
|
|
&ref->old_oid, &ref->new_oid, summary_width);
|
2007-09-11 03:03:25 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-12-01 22:15:44 +00:00
|
|
|
if (!update_head_ok &&
|
fetch: use new branch_checked_out() and add tests
When fetching refs from a remote, it is possible that the refspec will
cause use to overwrite a ref that is checked out in a worktree. The
existing logic in builtin/fetch.c uses a possibly-slow mechanism. Update
those sections to use the new, more efficient branch_checked_out()
helper.
These uses were not previously tested, so add a test case that can be
used for these kinds of collisions. There is only one test now, but more
tests will be added as other consumers of branch_checked_out() are
added.
Note that there are two uses in builtin/fetch.c, but only one of the
messages is tested. This is because the tested check is run before
completing the fetch, and the untested check is not reachable without
concurrent updates to the filesystem. Thus, it is beneficial to keep
that extra check for the sake of defense-in-depth. However, we should
not attempt to test the check, as the effort required is too
complicated to be worth the effort. This use in update_local_ref()
also requires a change in the error message because we no longer have
access to the worktree struct, only the path of the worktree. This error
is so rare that making a distinction between the two is not critical.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-06-14 19:27:31 +00:00
|
|
|
!is_null_oid(&ref->old_oid) &&
|
|
|
|
branch_checked_out(ref->name)) {
|
2007-09-11 03:03:25 +00:00
|
|
|
/*
|
|
|
|
* If this is the head, and it's not okay to update
|
|
|
|
* the head, and the old value of the head isn't empty...
|
|
|
|
*/
|
2023-03-20 12:35:40 +00:00
|
|
|
display_ref_update(display_state, '!', _("[rejected]"),
|
|
|
|
_("can't fetch into checked-out branch"),
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
remote_ref->name, ref->name,
|
|
|
|
&ref->old_oid, &ref->new_oid, summary_width);
|
2007-09-11 03:03:25 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2015-11-10 02:22:20 +00:00
|
|
|
if (!is_null_oid(&ref->old_oid) &&
|
2013-11-30 20:55:40 +00:00
|
|
|
starts_with(ref->name, "refs/tags/")) {
|
fetch: stop clobbering existing tags without --force
Change "fetch" to treat "+" in refspecs (aka --force) to mean we
should clobber a local tag of the same name.
This changes the long-standing behavior of "fetch" added in
853a3697dc ("[PATCH] Multi-head fetch.", 2005-08-20). Before this
change, all tag fetches effectively had --force enabled. See the
git-fetch-script code in fast_forward_local() with the comment:
> Tags need not be pointing at commits so there is no way to
> guarantee "fast-forward" anyway.
That commit and the rest of the history of "fetch" shows that the
"+" (--force) part of refpecs was only conceived for branch updates,
while tags have accepted any changes from upstream unconditionally and
clobbered the local tag object. Changing this behavior has been
discussed as early as 2011[1].
The current behavior doesn't make sense to me, it easily results in
local tags accidentally being clobbered. We could namespace our tags
per-remote and not locally populate refs/tags/*, but as with my
97716d217c ("fetch: add a --prune-tags option and fetch.pruneTags
config", 2018-02-09) it's easier to work around the current
implementation than to fix the root cause.
So this change implements suggestion #1 from Jeff's 2011 E-Mail[1],
"fetch" now only clobbers the tag if either "+" is provided as part of
the refspec, or if "--force" is provided on the command-line.
This also makes it nicely symmetrical with how "tag" itself works when
creating tags. I.e. we refuse to clobber any existing tags unless
"--force" is supplied. Now we can refuse all such clobbering, whether
it would happen by clobbering a local tag with "tag", or by fetching
it from the remote with "fetch".
Ref updates outside refs/{tags,heads/* are still still not symmetrical
with how "git push" works, as discussed in the recently changed
pull-fetch-param.txt documentation. This change brings the two
divergent behaviors more into line with one another. I don't think
there's any reason "fetch" couldn't fully converge with the behavior
used by "push", but that's a topic for another change.
One of the tests added in 31b808a032 ("clone --single: limit the fetch
refspec to fetched branch", 2012-09-20) is being changed to use
--force where a clone would clobber a tag. This changes nothing about
the existing behavior of the test.
1. https://public-inbox.org/git/20111123221658.GA22313@sigill.intra.peff.net/
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-31 20:10:04 +00:00
|
|
|
if (force || ref->force) {
|
|
|
|
int r;
|
2021-01-12 12:27:48 +00:00
|
|
|
r = s_update_ref("updating tag", ref, transaction, 0);
|
2023-03-20 12:35:40 +00:00
|
|
|
display_ref_update(display_state, r ? '!' : 't', _("[tag update]"),
|
|
|
|
r ? _("unable to update local ref") : NULL,
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
remote_ref->name, ref->name,
|
|
|
|
&ref->old_oid, &ref->new_oid, summary_width);
|
fetch: stop clobbering existing tags without --force
Change "fetch" to treat "+" in refspecs (aka --force) to mean we
should clobber a local tag of the same name.
This changes the long-standing behavior of "fetch" added in
853a3697dc ("[PATCH] Multi-head fetch.", 2005-08-20). Before this
change, all tag fetches effectively had --force enabled. See the
git-fetch-script code in fast_forward_local() with the comment:
> Tags need not be pointing at commits so there is no way to
> guarantee "fast-forward" anyway.
That commit and the rest of the history of "fetch" shows that the
"+" (--force) part of refpecs was only conceived for branch updates,
while tags have accepted any changes from upstream unconditionally and
clobbered the local tag object. Changing this behavior has been
discussed as early as 2011[1].
The current behavior doesn't make sense to me, it easily results in
local tags accidentally being clobbered. We could namespace our tags
per-remote and not locally populate refs/tags/*, but as with my
97716d217c ("fetch: add a --prune-tags option and fetch.pruneTags
config", 2018-02-09) it's easier to work around the current
implementation than to fix the root cause.
So this change implements suggestion #1 from Jeff's 2011 E-Mail[1],
"fetch" now only clobbers the tag if either "+" is provided as part of
the refspec, or if "--force" is provided on the command-line.
This also makes it nicely symmetrical with how "tag" itself works when
creating tags. I.e. we refuse to clobber any existing tags unless
"--force" is supplied. Now we can refuse all such clobbering, whether
it would happen by clobbering a local tag with "tag", or by fetching
it from the remote with "fetch".
Ref updates outside refs/{tags,heads/* are still still not symmetrical
with how "git push" works, as discussed in the recently changed
pull-fetch-param.txt documentation. This change brings the two
divergent behaviors more into line with one another. I don't think
there's any reason "fetch" couldn't fully converge with the behavior
used by "push", but that's a topic for another change.
One of the tests added in 31b808a032 ("clone --single: limit the fetch
refspec to fetched branch", 2012-09-20) is being changed to use
--force where a clone would clobber a tag. This changes nothing about
the existing behavior of the test.
1. https://public-inbox.org/git/20111123221658.GA22313@sigill.intra.peff.net/
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-31 20:10:04 +00:00
|
|
|
return r;
|
|
|
|
} else {
|
2023-03-20 12:35:40 +00:00
|
|
|
display_ref_update(display_state, '!', _("[rejected]"),
|
|
|
|
_("would clobber existing tag"),
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
remote_ref->name, ref->name,
|
|
|
|
&ref->old_oid, &ref->new_oid, summary_width);
|
fetch: stop clobbering existing tags without --force
Change "fetch" to treat "+" in refspecs (aka --force) to mean we
should clobber a local tag of the same name.
This changes the long-standing behavior of "fetch" added in
853a3697dc ("[PATCH] Multi-head fetch.", 2005-08-20). Before this
change, all tag fetches effectively had --force enabled. See the
git-fetch-script code in fast_forward_local() with the comment:
> Tags need not be pointing at commits so there is no way to
> guarantee "fast-forward" anyway.
That commit and the rest of the history of "fetch" shows that the
"+" (--force) part of refpecs was only conceived for branch updates,
while tags have accepted any changes from upstream unconditionally and
clobbered the local tag object. Changing this behavior has been
discussed as early as 2011[1].
The current behavior doesn't make sense to me, it easily results in
local tags accidentally being clobbered. We could namespace our tags
per-remote and not locally populate refs/tags/*, but as with my
97716d217c ("fetch: add a --prune-tags option and fetch.pruneTags
config", 2018-02-09) it's easier to work around the current
implementation than to fix the root cause.
So this change implements suggestion #1 from Jeff's 2011 E-Mail[1],
"fetch" now only clobbers the tag if either "+" is provided as part of
the refspec, or if "--force" is provided on the command-line.
This also makes it nicely symmetrical with how "tag" itself works when
creating tags. I.e. we refuse to clobber any existing tags unless
"--force" is supplied. Now we can refuse all such clobbering, whether
it would happen by clobbering a local tag with "tag", or by fetching
it from the remote with "fetch".
Ref updates outside refs/{tags,heads/* are still still not symmetrical
with how "git push" works, as discussed in the recently changed
pull-fetch-param.txt documentation. This change brings the two
divergent behaviors more into line with one another. I don't think
there's any reason "fetch" couldn't fully converge with the behavior
used by "push", but that's a topic for another change.
One of the tests added in 31b808a032 ("clone --single: limit the fetch
refspec to fetched branch", 2012-09-20) is being changed to use
--force where a clone would clobber a tag. This changes nothing about
the existing behavior of the test.
1. https://public-inbox.org/git/20111123221658.GA22313@sigill.intra.peff.net/
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-31 20:10:04 +00:00
|
|
|
return 1;
|
|
|
|
}
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
|
|
|
|
2018-06-29 01:21:57 +00:00
|
|
|
current = lookup_commit_reference_gently(the_repository,
|
|
|
|
&ref->old_oid, 1);
|
|
|
|
updated = lookup_commit_reference_gently(the_repository,
|
|
|
|
&ref->new_oid, 1);
|
2007-09-11 03:03:25 +00:00
|
|
|
if (!current || !updated) {
|
2007-11-03 05:32:48 +00:00
|
|
|
const char *msg;
|
|
|
|
const char *what;
|
2008-06-27 03:59:50 +00:00
|
|
|
int r;
|
2012-04-16 22:08:50 +00:00
|
|
|
/*
|
|
|
|
* Nicely describe the new ref we're fetching.
|
|
|
|
* Base this on the remote's ref name, as it's
|
|
|
|
* more likely to follow a standard layout.
|
|
|
|
*/
|
2023-05-17 11:48:46 +00:00
|
|
|
if (starts_with(remote_ref->name, "refs/tags/")) {
|
2007-09-11 03:03:25 +00:00
|
|
|
msg = "storing tag";
|
2011-02-22 23:41:53 +00:00
|
|
|
what = _("[new tag]");
|
2023-05-17 11:48:46 +00:00
|
|
|
} else if (starts_with(remote_ref->name, "refs/heads/")) {
|
2007-09-11 03:03:25 +00:00
|
|
|
msg = "storing head";
|
2011-02-22 23:41:53 +00:00
|
|
|
what = _("[new branch]");
|
2012-04-16 22:08:50 +00:00
|
|
|
} else {
|
|
|
|
msg = "storing ref";
|
|
|
|
what = _("[new ref]");
|
2007-11-03 05:32:48 +00:00
|
|
|
}
|
|
|
|
|
2021-01-12 12:27:48 +00:00
|
|
|
r = s_update_ref(msg, ref, transaction, 0);
|
2023-03-20 12:35:40 +00:00
|
|
|
display_ref_update(display_state, r ? '!' : '*', what,
|
|
|
|
r ? _("unable to update local ref") : NULL,
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
remote_ref->name, ref->name,
|
|
|
|
&ref->old_oid, &ref->new_oid, summary_width);
|
2008-06-27 03:59:50 +00:00
|
|
|
return r;
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
|
|
|
|
2023-05-17 11:49:04 +00:00
|
|
|
if (config->show_forced_updates) {
|
2019-06-18 20:25:27 +00:00
|
|
|
uint64_t t_before = getnanotime();
|
2023-03-28 13:58:47 +00:00
|
|
|
fast_forward = repo_in_merge_bases(the_repository, current,
|
|
|
|
updated);
|
2019-06-18 20:25:27 +00:00
|
|
|
forced_updates_ms += (getnanotime() - t_before) / 1000000;
|
|
|
|
} else {
|
|
|
|
fast_forward = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fast_forward) {
|
2015-09-24 21:07:40 +00:00
|
|
|
struct strbuf quickref = STRBUF_INIT;
|
2008-06-27 03:59:50 +00:00
|
|
|
int r;
|
2019-06-18 20:25:26 +00:00
|
|
|
|
strbuf: convert strbuf_add_unique_abbrev to use struct object_id
Convert the declaration and definition of strbuf_add_unique_abbrev to
make it take a pointer to struct object_id. Predeclare the struct in
strbuf.h, as cache.h includes strbuf.h before it declares the struct,
and otherwise the struct declaration would have the wrong scope.
Apply the following semantic patch, along with the standard object_id
transforms, to adjust the callers:
@@
expression E1, E2, E3;
@@
- strbuf_add_unique_abbrev(E1, E2.hash, E3);
+ strbuf_add_unique_abbrev(E1, &E2, E3);
@@
expression E1, E2, E3;
@@
- strbuf_add_unique_abbrev(E1, E2->hash, E3);
+ strbuf_add_unique_abbrev(E1, E2, E3);
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-03-12 02:27:28 +00:00
|
|
|
strbuf_add_unique_abbrev(&quickref, ¤t->object.oid, DEFAULT_ABBREV);
|
2015-09-24 21:07:40 +00:00
|
|
|
strbuf_addstr(&quickref, "..");
|
strbuf: convert strbuf_add_unique_abbrev to use struct object_id
Convert the declaration and definition of strbuf_add_unique_abbrev to
make it take a pointer to struct object_id. Predeclare the struct in
strbuf.h, as cache.h includes strbuf.h before it declares the struct,
and otherwise the struct declaration would have the wrong scope.
Apply the following semantic patch, along with the standard object_id
transforms, to adjust the callers:
@@
expression E1, E2, E3;
@@
- strbuf_add_unique_abbrev(E1, E2.hash, E3);
+ strbuf_add_unique_abbrev(E1, &E2, E3);
@@
expression E1, E2, E3;
@@
- strbuf_add_unique_abbrev(E1, E2->hash, E3);
+ strbuf_add_unique_abbrev(E1, E2, E3);
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-03-12 02:27:28 +00:00
|
|
|
strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV);
|
2021-01-12 12:27:48 +00:00
|
|
|
r = s_update_ref("fast-forward", ref, transaction, 1);
|
2023-03-20 12:35:40 +00:00
|
|
|
display_ref_update(display_state, r ? '!' : ' ', quickref.buf,
|
|
|
|
r ? _("unable to update local ref") : NULL,
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
remote_ref->name, ref->name,
|
|
|
|
&ref->old_oid, &ref->new_oid, summary_width);
|
2015-09-24 21:07:40 +00:00
|
|
|
strbuf_release(&quickref);
|
2008-06-27 03:59:50 +00:00
|
|
|
return r;
|
2007-11-03 05:32:48 +00:00
|
|
|
} else if (force || ref->force) {
|
2015-09-24 21:07:40 +00:00
|
|
|
struct strbuf quickref = STRBUF_INIT;
|
2008-06-27 03:59:50 +00:00
|
|
|
int r;
|
strbuf: convert strbuf_add_unique_abbrev to use struct object_id
Convert the declaration and definition of strbuf_add_unique_abbrev to
make it take a pointer to struct object_id. Predeclare the struct in
strbuf.h, as cache.h includes strbuf.h before it declares the struct,
and otherwise the struct declaration would have the wrong scope.
Apply the following semantic patch, along with the standard object_id
transforms, to adjust the callers:
@@
expression E1, E2, E3;
@@
- strbuf_add_unique_abbrev(E1, E2.hash, E3);
+ strbuf_add_unique_abbrev(E1, &E2, E3);
@@
expression E1, E2, E3;
@@
- strbuf_add_unique_abbrev(E1, E2->hash, E3);
+ strbuf_add_unique_abbrev(E1, E2, E3);
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-03-12 02:27:28 +00:00
|
|
|
strbuf_add_unique_abbrev(&quickref, ¤t->object.oid, DEFAULT_ABBREV);
|
2015-09-24 21:07:40 +00:00
|
|
|
strbuf_addstr(&quickref, "...");
|
strbuf: convert strbuf_add_unique_abbrev to use struct object_id
Convert the declaration and definition of strbuf_add_unique_abbrev to
make it take a pointer to struct object_id. Predeclare the struct in
strbuf.h, as cache.h includes strbuf.h before it declares the struct,
and otherwise the struct declaration would have the wrong scope.
Apply the following semantic patch, along with the standard object_id
transforms, to adjust the callers:
@@
expression E1, E2, E3;
@@
- strbuf_add_unique_abbrev(E1, E2.hash, E3);
+ strbuf_add_unique_abbrev(E1, &E2, E3);
@@
expression E1, E2, E3;
@@
- strbuf_add_unique_abbrev(E1, E2->hash, E3);
+ strbuf_add_unique_abbrev(E1, E2, E3);
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-03-12 02:27:28 +00:00
|
|
|
strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV);
|
2021-01-12 12:27:48 +00:00
|
|
|
r = s_update_ref("forced-update", ref, transaction, 1);
|
2023-03-20 12:35:40 +00:00
|
|
|
display_ref_update(display_state, r ? '!' : '+', quickref.buf,
|
|
|
|
r ? _("unable to update local ref") : _("forced update"),
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
remote_ref->name, ref->name,
|
|
|
|
&ref->old_oid, &ref->new_oid, summary_width);
|
2015-09-24 21:07:40 +00:00
|
|
|
strbuf_release(&quickref);
|
2008-06-27 03:59:50 +00:00
|
|
|
return r;
|
2007-11-03 05:32:48 +00:00
|
|
|
} else {
|
2023-03-20 12:35:40 +00:00
|
|
|
display_ref_update(display_state, '!', _("[rejected]"), _("non-fast-forward"),
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
remote_ref->name, ref->name,
|
|
|
|
&ref->old_oid, &ref->new_oid, summary_width);
|
2007-09-11 03:03:25 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-01 13:09:50 +00:00
|
|
|
static const struct object_id *iterate_ref_map(void *cb_data)
|
2011-09-01 22:43:35 +00:00
|
|
|
{
|
2011-09-02 23:22:47 +00:00
|
|
|
struct ref **rm = cb_data;
|
|
|
|
struct ref *ref = *rm;
|
2011-09-01 22:43:35 +00:00
|
|
|
|
2013-12-05 13:02:40 +00:00
|
|
|
while (ref && ref->status == REF_STATUS_REJECT_SHALLOW)
|
|
|
|
ref = ref->next;
|
2011-09-02 23:22:47 +00:00
|
|
|
if (!ref)
|
2021-09-01 13:09:50 +00:00
|
|
|
return NULL;
|
2011-09-02 23:22:47 +00:00
|
|
|
*rm = ref->next;
|
2021-09-01 13:09:50 +00:00
|
|
|
return &ref->old_oid;
|
2011-09-01 22:43:35 +00:00
|
|
|
}
|
|
|
|
|
2021-01-12 12:27:35 +00:00
|
|
|
struct fetch_head {
|
|
|
|
FILE *fp;
|
2021-01-12 12:27:39 +00:00
|
|
|
struct strbuf buf;
|
2021-01-12 12:27:35 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static int open_fetch_head(struct fetch_head *fetch_head)
|
|
|
|
{
|
|
|
|
const char *filename = git_path_fetch_head(the_repository);
|
|
|
|
|
|
|
|
if (write_fetch_head) {
|
|
|
|
fetch_head->fp = fopen(filename, "a");
|
|
|
|
if (!fetch_head->fp)
|
2022-01-05 20:02:17 +00:00
|
|
|
return error_errno(_("cannot open '%s'"), filename);
|
2021-01-12 12:27:39 +00:00
|
|
|
strbuf_init(&fetch_head->buf, 0);
|
2021-01-12 12:27:35 +00:00
|
|
|
} else {
|
|
|
|
fetch_head->fp = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void append_fetch_head(struct fetch_head *fetch_head,
|
|
|
|
const struct object_id *old_oid,
|
|
|
|
enum fetch_head_status fetch_head_status,
|
|
|
|
const char *note,
|
|
|
|
const char *url, size_t url_len)
|
|
|
|
{
|
|
|
|
char old_oid_hex[GIT_MAX_HEXSZ + 1];
|
|
|
|
const char *merge_status_marker;
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
if (!fetch_head->fp)
|
|
|
|
return;
|
|
|
|
|
|
|
|
switch (fetch_head_status) {
|
|
|
|
case FETCH_HEAD_NOT_FOR_MERGE:
|
|
|
|
merge_status_marker = "not-for-merge";
|
|
|
|
break;
|
|
|
|
case FETCH_HEAD_MERGE:
|
|
|
|
merge_status_marker = "";
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* do not write anything to FETCH_HEAD */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-01-12 12:27:39 +00:00
|
|
|
strbuf_addf(&fetch_head->buf, "%s\t%s\t%s",
|
|
|
|
oid_to_hex_r(old_oid_hex, old_oid), merge_status_marker, note);
|
2021-01-12 12:27:35 +00:00
|
|
|
for (i = 0; i < url_len; ++i)
|
|
|
|
if ('\n' == url[i])
|
2021-01-12 12:27:39 +00:00
|
|
|
strbuf_addstr(&fetch_head->buf, "\\n");
|
2021-01-12 12:27:35 +00:00
|
|
|
else
|
2021-01-12 12:27:39 +00:00
|
|
|
strbuf_addch(&fetch_head->buf, url[i]);
|
|
|
|
strbuf_addch(&fetch_head->buf, '\n');
|
|
|
|
|
fetch: implement support for atomic reference updates
When executing a fetch, then git will currently allocate one reference
transaction per reference update and directly commit it. This means that
fetches are non-atomic: even if some of the reference updates fail,
others may still succeed and modify local references.
This is fine in many scenarios, but this strategy has its downsides.
- The view of remote references may be inconsistent and may show a
bastardized state of the remote repository.
- Batching together updates may improve performance in certain
scenarios. While the impact probably isn't as pronounced with loose
references, the upcoming reftable backend may benefit as it needs to
write less files in case the update is batched.
- The reference-update hook is currently being executed twice per
updated reference. While this doesn't matter when there is no such
hook, we have seen severe performance regressions when doing a
git-fetch(1) with reference-transaction hook when the remote
repository has hundreds of thousands of references.
Similar to `git push --atomic`, this commit thus introduces atomic
fetches. Instead of allocating one reference transaction per updated
reference, it causes us to only allocate a single transaction and commit
it as soon as all updates were received. If locking of any reference
fails, then we abort the complete transaction and don't update any
reference, which gives us an all-or-nothing fetch.
Note that this may not completely fix the first of above downsides, as
the consistent view also depends on the server-side. If the server
doesn't have a consistent view of its own references during the
reference negotiation phase, then the client would get the same
inconsistent view the server has. This is a separate problem though and,
if it actually exists, can be fixed at a later point.
This commit also changes the way we write FETCH_HEAD in case `--atomic`
is passed. Instead of writing changes as we go, we need to accumulate
all changes first and only commit them at the end when we know that all
reference updates succeeded. Ideally, we'd just do so via a temporary
file so that we don't need to carry all updates in-memory. This isn't
trivially doable though considering the `--append` mode, where we do not
truncate the file but simply append to it. And given that we support
concurrent processes appending to FETCH_HEAD at the same time without
any loss of data, seeding the temporary file with current contents of
FETCH_HEAD initially and then doing a rename wouldn't work either. So
this commit implements the simple strategy of buffering all changes and
appending them to the file on commit.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-12 12:27:52 +00:00
|
|
|
/*
|
|
|
|
* When using an atomic fetch, we do not want to update FETCH_HEAD if
|
|
|
|
* any of the reference updates fails. We thus have to write all
|
|
|
|
* updates to a buffer first and only commit it as soon as all
|
|
|
|
* references have been successfully updated.
|
|
|
|
*/
|
|
|
|
if (!atomic_fetch) {
|
|
|
|
strbuf_write(&fetch_head->buf, fetch_head->fp);
|
|
|
|
strbuf_reset(&fetch_head->buf);
|
|
|
|
}
|
2021-01-12 12:27:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void commit_fetch_head(struct fetch_head *fetch_head)
|
|
|
|
{
|
fetch: implement support for atomic reference updates
When executing a fetch, then git will currently allocate one reference
transaction per reference update and directly commit it. This means that
fetches are non-atomic: even if some of the reference updates fail,
others may still succeed and modify local references.
This is fine in many scenarios, but this strategy has its downsides.
- The view of remote references may be inconsistent and may show a
bastardized state of the remote repository.
- Batching together updates may improve performance in certain
scenarios. While the impact probably isn't as pronounced with loose
references, the upcoming reftable backend may benefit as it needs to
write less files in case the update is batched.
- The reference-update hook is currently being executed twice per
updated reference. While this doesn't matter when there is no such
hook, we have seen severe performance regressions when doing a
git-fetch(1) with reference-transaction hook when the remote
repository has hundreds of thousands of references.
Similar to `git push --atomic`, this commit thus introduces atomic
fetches. Instead of allocating one reference transaction per updated
reference, it causes us to only allocate a single transaction and commit
it as soon as all updates were received. If locking of any reference
fails, then we abort the complete transaction and don't update any
reference, which gives us an all-or-nothing fetch.
Note that this may not completely fix the first of above downsides, as
the consistent view also depends on the server-side. If the server
doesn't have a consistent view of its own references during the
reference negotiation phase, then the client would get the same
inconsistent view the server has. This is a separate problem though and,
if it actually exists, can be fixed at a later point.
This commit also changes the way we write FETCH_HEAD in case `--atomic`
is passed. Instead of writing changes as we go, we need to accumulate
all changes first and only commit them at the end when we know that all
reference updates succeeded. Ideally, we'd just do so via a temporary
file so that we don't need to carry all updates in-memory. This isn't
trivially doable though considering the `--append` mode, where we do not
truncate the file but simply append to it. And given that we support
concurrent processes appending to FETCH_HEAD at the same time without
any loss of data, seeding the temporary file with current contents of
FETCH_HEAD initially and then doing a rename wouldn't work either. So
this commit implements the simple strategy of buffering all changes and
appending them to the file on commit.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-12 12:27:52 +00:00
|
|
|
if (!fetch_head->fp || !atomic_fetch)
|
|
|
|
return;
|
|
|
|
strbuf_write(&fetch_head->buf, fetch_head->fp);
|
2021-01-12 12:27:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void close_fetch_head(struct fetch_head *fetch_head)
|
|
|
|
{
|
|
|
|
if (!fetch_head->fp)
|
|
|
|
return;
|
|
|
|
|
|
|
|
fclose(fetch_head->fp);
|
2021-01-12 12:27:39 +00:00
|
|
|
strbuf_release(&fetch_head->buf);
|
2021-01-12 12:27:35 +00:00
|
|
|
}
|
|
|
|
|
2019-08-06 17:19:52 +00:00
|
|
|
static const char warn_show_forced_updates[] =
|
2021-12-01 22:15:40 +00:00
|
|
|
N_("fetch normally indicates which branches had a forced update,\n"
|
|
|
|
"but that check has been disabled; to re-enable, use '--show-forced-updates'\n"
|
|
|
|
"flag or run 'git config fetch.showForcedUpdates true'");
|
2019-08-06 17:19:52 +00:00
|
|
|
static const char warn_time_show_forced_updates[] =
|
2021-12-01 22:15:40 +00:00
|
|
|
N_("it took %.2f seconds to check forced updates; you can use\n"
|
2019-08-06 17:19:52 +00:00
|
|
|
"'--no-show-forced-updates' or run 'git config fetch.showForcedUpdates false'\n"
|
2021-12-01 22:15:40 +00:00
|
|
|
"to avoid this check\n");
|
2019-08-06 17:19:52 +00:00
|
|
|
|
2023-03-20 12:35:20 +00:00
|
|
|
static int store_updated_refs(struct display_state *display_state,
|
2023-03-20 12:35:36 +00:00
|
|
|
const char *remote_name,
|
2022-02-17 13:04:36 +00:00
|
|
|
int connectivity_checked,
|
|
|
|
struct ref_transaction *transaction, struct ref *ref_map,
|
2023-05-17 11:49:04 +00:00
|
|
|
struct fetch_head *fetch_head,
|
|
|
|
const struct fetch_config *config)
|
2007-09-11 03:03:25 +00:00
|
|
|
{
|
2023-03-20 12:35:36 +00:00
|
|
|
int rc = 0;
|
2022-07-05 13:46:59 +00:00
|
|
|
struct strbuf note = STRBUF_INIT;
|
2007-09-11 03:03:25 +00:00
|
|
|
const char *what, *kind;
|
|
|
|
struct ref *rm;
|
2013-05-11 16:15:59 +00:00
|
|
|
int want_status;
|
2022-02-10 12:28:16 +00:00
|
|
|
int summary_width = 0;
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2022-02-10 12:28:16 +00:00
|
|
|
if (verbosity >= 0)
|
|
|
|
summary_width = transport_summary_width(ref_map);
|
|
|
|
|
fetch-pack: write shallow, then check connectivity
When fetching, connectivity is checked after the shallow file is
updated. There are 2 issues with this: (1) the connectivity check is
only performed up to ancestors of existing refs (which is not thorough
enough if we were deepening an existing ref in the first place), and (2)
there is no rollback of the shallow file if the connectivity check
fails.
To solve (1), update the connectivity check to check the ancestry chain
completely in the case of a deepening fetch by refraining from passing
"--not --all" when invoking rev-list in connected.c.
To solve (2), have fetch_pack() perform its own connectivity check
before updating the shallow file. To support existing use cases in which
"git fetch-pack" is used to download objects without much regard as to
the connectivity of the resulting objects with respect to the existing
repository, the connectivity check is only done if necessary (that is,
the fetch is not a clone, and the fetch involves shallow/deepen
functionality). "git fetch" still performs its own connectivity check,
preserving correctness but sometimes performing redundant work. This
redundancy is mitigated by the fact that fetch_pack() reports if it has
performed a connectivity check itself, and if the transport supports
connect or stateless-connect, it will bubble up that report so that "git
fetch" knows not to perform the connectivity check in such a case.
This was noticed when a user tried to deepen an existing repository by
fetching with --no-shallow from a server that did not send all necessary
objects - the connectivity check as run by "git fetch" succeeded, but a
subsequent "git fsck" failed.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 22:08:43 +00:00
|
|
|
if (!connectivity_checked) {
|
2020-01-12 04:15:25 +00:00
|
|
|
struct check_connected_options opt = CHECK_CONNECTED_INIT;
|
|
|
|
|
2023-02-12 09:04:26 +00:00
|
|
|
opt.exclude_hidden_refs_section = "fetch";
|
fetch-pack: write shallow, then check connectivity
When fetching, connectivity is checked after the shallow file is
updated. There are 2 issues with this: (1) the connectivity check is
only performed up to ancestors of existing refs (which is not thorough
enough if we were deepening an existing ref in the first place), and (2)
there is no rollback of the shallow file if the connectivity check
fails.
To solve (1), update the connectivity check to check the ancestry chain
completely in the case of a deepening fetch by refraining from passing
"--not --all" when invoking rev-list in connected.c.
To solve (2), have fetch_pack() perform its own connectivity check
before updating the shallow file. To support existing use cases in which
"git fetch-pack" is used to download objects without much regard as to
the connectivity of the resulting objects with respect to the existing
repository, the connectivity check is only done if necessary (that is,
the fetch is not a clone, and the fetch involves shallow/deepen
functionality). "git fetch" still performs its own connectivity check,
preserving correctness but sometimes performing redundant work. This
redundancy is mitigated by the fact that fetch_pack() reports if it has
performed a connectivity check itself, and if the transport supports
connect or stateless-connect, it will bubble up that report so that "git
fetch" knows not to perform the connectivity check in such a case.
This was noticed when a user tried to deepen an existing repository by
fetching with --no-shallow from a server that did not send all necessary
objects - the connectivity check as run by "git fetch" succeeded, but a
subsequent "git fsck" failed.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 22:08:43 +00:00
|
|
|
rm = ref_map;
|
2020-01-12 04:15:25 +00:00
|
|
|
if (check_connected(iterate_ref_map, &rm, &opt)) {
|
2023-03-20 12:35:36 +00:00
|
|
|
rc = error(_("%s did not send all necessary objects\n"),
|
|
|
|
display_state->url);
|
fetch-pack: write shallow, then check connectivity
When fetching, connectivity is checked after the shallow file is
updated. There are 2 issues with this: (1) the connectivity check is
only performed up to ancestors of existing refs (which is not thorough
enough if we were deepening an existing ref in the first place), and (2)
there is no rollback of the shallow file if the connectivity check
fails.
To solve (1), update the connectivity check to check the ancestry chain
completely in the case of a deepening fetch by refraining from passing
"--not --all" when invoking rev-list in connected.c.
To solve (2), have fetch_pack() perform its own connectivity check
before updating the shallow file. To support existing use cases in which
"git fetch-pack" is used to download objects without much regard as to
the connectivity of the resulting objects with respect to the existing
repository, the connectivity check is only done if necessary (that is,
the fetch is not a clone, and the fetch involves shallow/deepen
functionality). "git fetch" still performs its own connectivity check,
preserving correctness but sometimes performing redundant work. This
redundancy is mitigated by the fact that fetch_pack() reports if it has
performed a connectivity check itself, and if the transport supports
connect or stateless-connect, it will bubble up that report so that "git
fetch" knows not to perform the connectivity check in such a case.
This was noticed when a user tried to deepen an existing repository by
fetching with --no-shallow from a server that did not send all necessary
objects - the connectivity check as run by "git fetch" succeeded, but a
subsequent "git fsck" failed.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 22:08:43 +00:00
|
|
|
goto abort;
|
|
|
|
}
|
2011-10-07 07:40:22 +00:00
|
|
|
}
|
2011-09-01 22:43:35 +00:00
|
|
|
|
2011-12-26 16:16:56 +00:00
|
|
|
/*
|
2013-05-11 16:15:59 +00:00
|
|
|
* We do a pass for each fetch_head_status type in their enum order, so
|
|
|
|
* merged entries are written before not-for-merge. That lets readers
|
|
|
|
* use FETCH_HEAD as a refname to refer to the ref to be merged.
|
2011-12-26 16:16:56 +00:00
|
|
|
*/
|
2013-05-11 16:15:59 +00:00
|
|
|
for (want_status = FETCH_HEAD_MERGE;
|
|
|
|
want_status <= FETCH_HEAD_IGNORE;
|
|
|
|
want_status++) {
|
2011-12-26 16:16:56 +00:00
|
|
|
for (rm = ref_map; rm; rm = rm->next) {
|
|
|
|
struct ref *ref = NULL;
|
|
|
|
|
2013-12-05 13:02:40 +00:00
|
|
|
if (rm->status == REF_STATUS_REJECT_SHALLOW) {
|
|
|
|
if (want_status == FETCH_HEAD_MERGE)
|
2021-05-18 06:18:55 +00:00
|
|
|
warning(_("rejected %s because shallow roots are not allowed to be updated"),
|
2013-12-05 13:02:40 +00:00
|
|
|
rm->peer_ref ? rm->peer_ref->name : rm->name);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2021-09-01 13:09:41 +00:00
|
|
|
/*
|
fetch: avoid lookup of commits when not appending to FETCH_HEAD
When fetching from a remote repository we will by default write what has
been fetched into the special FETCH_HEAD reference. The order in which
references are written depends on whether the reference is for merge or
not, which, despite some other conditions, is also determined based on
whether the old object ID the reference is being updated from actually
exists in the repository.
To write FETCH_HEAD we thus loop through all references thrice: once for
the references that are about to be merged, once for the references that
are not for merge, and finally for all references that are ignored. For
every iteration, we then look up the old object ID to determine whether
the referenced object exists so that we can label it as "not-for-merge"
if it doesn't exist. It goes without saying that this can be expensive
in case where we are fetching a lot of references.
While this is hard to avoid in the case where we're writing FETCH_HEAD,
users can in fact ask us to skip this work via `--no-write-fetch-head`.
In that case, we do not care for the result of those lookups at all
because we don't have to order writes to FETCH_HEAD in the first place.
Skip this busywork in case we're not writing to FETCH_HEAD. The
following benchmark performs a mirror-fetch in a repository with about
two million references via `git fetch --prune --no-write-fetch-head
+refs/*:refs/*`:
Benchmark 1: HEAD~
Time (mean ± σ): 75.388 s ± 1.942 s [User: 71.103 s, System: 8.953 s]
Range (min … max): 73.184 s … 76.845 s 3 runs
Benchmark 2: HEAD
Time (mean ± σ): 69.486 s ± 1.016 s [User: 65.941 s, System: 8.806 s]
Range (min … max): 68.864 s … 70.659 s 3 runs
Summary
'HEAD' ran
1.08 ± 0.03 times faster than 'HEAD~'
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-03-01 09:33:41 +00:00
|
|
|
* When writing FETCH_HEAD we need to determine whether
|
|
|
|
* we already have the commit or not. If not, then the
|
|
|
|
* reference is not for merge and needs to be written
|
|
|
|
* to the reflog after other commits which we already
|
|
|
|
* have. We're not interested in this property though
|
|
|
|
* in case FETCH_HEAD is not to be updated, so we can
|
|
|
|
* skip the classification in that case.
|
2021-09-01 13:09:41 +00:00
|
|
|
*/
|
fetch: avoid lookup of commits when not appending to FETCH_HEAD
When fetching from a remote repository we will by default write what has
been fetched into the special FETCH_HEAD reference. The order in which
references are written depends on whether the reference is for merge or
not, which, despite some other conditions, is also determined based on
whether the old object ID the reference is being updated from actually
exists in the repository.
To write FETCH_HEAD we thus loop through all references thrice: once for
the references that are about to be merged, once for the references that
are not for merge, and finally for all references that are ignored. For
every iteration, we then look up the old object ID to determine whether
the referenced object exists so that we can label it as "not-for-merge"
if it doesn't exist. It goes without saying that this can be expensive
in case where we are fetching a lot of references.
While this is hard to avoid in the case where we're writing FETCH_HEAD,
users can in fact ask us to skip this work via `--no-write-fetch-head`.
In that case, we do not care for the result of those lookups at all
because we don't have to order writes to FETCH_HEAD in the first place.
Skip this busywork in case we're not writing to FETCH_HEAD. The
following benchmark performs a mirror-fetch in a repository with about
two million references via `git fetch --prune --no-write-fetch-head
+refs/*:refs/*`:
Benchmark 1: HEAD~
Time (mean ± σ): 75.388 s ± 1.942 s [User: 71.103 s, System: 8.953 s]
Range (min … max): 73.184 s … 76.845 s 3 runs
Benchmark 2: HEAD
Time (mean ± σ): 69.486 s ± 1.016 s [User: 65.941 s, System: 8.806 s]
Range (min … max): 68.864 s … 70.659 s 3 runs
Summary
'HEAD' ran
1.08 ± 0.03 times faster than 'HEAD~'
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-03-01 09:33:41 +00:00
|
|
|
if (fetch_head->fp) {
|
|
|
|
struct commit *commit = NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* References in "refs/tags/" are often going to point
|
|
|
|
* to annotated tags, which are not part of the
|
|
|
|
* commit-graph. We thus only try to look up refs in
|
|
|
|
* the graph which are not in that namespace to not
|
|
|
|
* regress performance in repositories with many
|
|
|
|
* annotated tags.
|
|
|
|
*/
|
|
|
|
if (!starts_with(rm->name, "refs/tags/"))
|
|
|
|
commit = lookup_commit_in_graph(the_repository, &rm->old_oid);
|
|
|
|
if (!commit) {
|
|
|
|
commit = lookup_commit_reference_gently(the_repository,
|
|
|
|
&rm->old_oid,
|
|
|
|
1);
|
|
|
|
if (!commit)
|
|
|
|
rm->fetch_head_status = FETCH_HEAD_NOT_FOR_MERGE;
|
|
|
|
}
|
2021-09-01 13:09:41 +00:00
|
|
|
}
|
2011-12-26 16:16:56 +00:00
|
|
|
|
2013-05-11 16:15:59 +00:00
|
|
|
if (rm->fetch_head_status != want_status)
|
2011-12-26 16:16:56 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
if (rm->peer_ref) {
|
2015-09-24 21:08:09 +00:00
|
|
|
ref = alloc_ref(rm->peer_ref->name);
|
2015-11-10 02:22:20 +00:00
|
|
|
oidcpy(&ref->old_oid, &rm->peer_ref->old_oid);
|
|
|
|
oidcpy(&ref->new_oid, &rm->old_oid);
|
2011-12-26 16:16:56 +00:00
|
|
|
ref->force = rm->peer_ref->force;
|
|
|
|
}
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2023-05-17 11:49:08 +00:00
|
|
|
if (config->recurse_submodules != RECURSE_SUBMODULES_OFF &&
|
fetch: do not look for submodule changes in unchanged refs
When fetching recursively with submodules, for each ref in the
superproject, we call check_for_new_submodule_commits() which collects all
the objects that have to be checked for submodule changes on
calculate_changed_submodule_paths(). On the first call, it also collects all
the existing refs for excluding them from the scan.
calculate_changed_submodule_paths() creates an argument array with all the
collected new objects, followed by --not and all the old objects. This argv
is passed to setup_revisions, which parses each argument, converts it back
to an oid and resolves the object. The parsing itself also does redundant
work, because it is treated like user input, while in fact it is a full
oid. So it needlessly attempts to look it up as ref (checks if it has ^, ~
etc.), checks if it is a file name etc.
For a repository with many refs, all of this is expensive. But if the fetch
in the superproject did not update the ref (i.e. the objects that are
required to exist in the submodule did not change), there is no need to
include it in the list.
Before commit be76c212 (fetch: ensure submodule objects fetched,
2018-12-06), submodule reference changes were only detected for refs that
were changed, but not for new refs. This commit covered also this case, but
what it did was to just include every ref.
This change should reduce the number of scanned refs by about half (except
the case of a no-op fetch, which will not scan any ref), because all the
existing refs will still be listed after --not.
The regression was reported here:
https://public-inbox.org/git/CAGHpTBKSUJzFSWc=uznSu2zB33qCSmKXM-
iAjxRCpqNK5bnhRg@mail.gmail.com/
Signed-off-by: Orgad Shaneh <orgads@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-04 13:50:49 +00:00
|
|
|
(!rm->peer_ref || !oideq(&ref->old_oid, &ref->new_oid))) {
|
2018-12-06 21:26:55 +00:00
|
|
|
check_for_new_submodule_commits(&rm->old_oid);
|
fetch: do not look for submodule changes in unchanged refs
When fetching recursively with submodules, for each ref in the
superproject, we call check_for_new_submodule_commits() which collects all
the objects that have to be checked for submodule changes on
calculate_changed_submodule_paths(). On the first call, it also collects all
the existing refs for excluding them from the scan.
calculate_changed_submodule_paths() creates an argument array with all the
collected new objects, followed by --not and all the old objects. This argv
is passed to setup_revisions, which parses each argument, converts it back
to an oid and resolves the object. The parsing itself also does redundant
work, because it is treated like user input, while in fact it is a full
oid. So it needlessly attempts to look it up as ref (checks if it has ^, ~
etc.), checks if it is a file name etc.
For a repository with many refs, all of this is expensive. But if the fetch
in the superproject did not update the ref (i.e. the objects that are
required to exist in the submodule did not change), there is no need to
include it in the list.
Before commit be76c212 (fetch: ensure submodule objects fetched,
2018-12-06), submodule reference changes were only detected for refs that
were changed, but not for new refs. This commit covered also this case, but
what it did was to just include every ref.
This change should reduce the number of scanned refs by about half (except
the case of a no-op fetch, which will not scan any ref), because all the
existing refs will still be listed after --not.
The regression was reported here:
https://public-inbox.org/git/CAGHpTBKSUJzFSWc=uznSu2zB33qCSmKXM-
iAjxRCpqNK5bnhRg@mail.gmail.com/
Signed-off-by: Orgad Shaneh <orgads@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-04 13:50:49 +00:00
|
|
|
}
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2011-12-26 16:16:56 +00:00
|
|
|
if (!strcmp(rm->name, "HEAD")) {
|
|
|
|
kind = "";
|
|
|
|
what = "";
|
fetch: print left-hand side when fetching HEAD:foo
`store_updated_refs()` parses the remote reference for two purposes:
- It gets used as a note when writing FETCH_HEAD.
- It is passed through to `display_ref_update()` to display
updated references in the following format:
```
* branch master -> master
```
In most cases, the parsed remote reference is the prettified reference
name and can thus be used for both cases. But if the remote reference is
HEAD, the parsed remote reference becomes empty. This is intended when
we write the FETCH_HEAD, where we skip writing the note in that case.
But when displaying the updated references this leads to inconsistent
output where the left-hand side of reference updates is missing in some
cases:
```
$ git fetch origin HEAD HEAD:explicit-head :implicit-head main
From https://github.com/git/git
* branch HEAD -> FETCH_HEAD
* [new ref] -> explicit-head
* [new ref] -> implicit-head
* branch main -> FETCH_HEAD
```
This behaviour has existed ever since the table-based output has been
introduced for git-fetch(1) via 165f390250 (git-fetch: more terse fetch
output, 2007-11-03) and was never explicitly documented either in the
commit message or in any of our tests. So while it may not be a bug per
se, it feels like a weird inconsistency and not like it was a concious
design decision.
The logic of how we compute the remote reference name that we ultimately
pass to `display_ref_update()` is not easy to follow. There are three
different cases here:
- When the remote reference name is "HEAD" we set the remote
reference name to the empty string. This is the case that causes
the left-hand side to go missing, where we would indeed want to
print "HEAD" instead of the empty string. This is what
`prettify_refname()` would return.
- When the remote reference name has a well-known prefix then we
strip this prefix. This matches what `prettify_refname()` does.
- Otherwise, we keep the fully qualified reference name. This also
matches what `prettify_refname()` does.
As the return value of `prettify_refname()` would do the correct thing
for us in all three cases, we can thus fix the inconsistency by passing
through the full remote reference name to `display_ref_update()`, which
learns to call `prettify_refname()`. At the same time, this also
simplifies the code a bit.
Note that this patch also changes formatting of the block that computes
the "kind" (which is the category like "branch" or "tag") and "what"
(which is the prettified reference name like "master" or "v1.0")
variables. This is done on purpose so that it is part of the diff,
hopefully making the change easier to comprehend.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:15 +00:00
|
|
|
} else if (skip_prefix(rm->name, "refs/heads/", &what)) {
|
2011-12-26 16:16:56 +00:00
|
|
|
kind = "branch";
|
fetch: print left-hand side when fetching HEAD:foo
`store_updated_refs()` parses the remote reference for two purposes:
- It gets used as a note when writing FETCH_HEAD.
- It is passed through to `display_ref_update()` to display
updated references in the following format:
```
* branch master -> master
```
In most cases, the parsed remote reference is the prettified reference
name and can thus be used for both cases. But if the remote reference is
HEAD, the parsed remote reference becomes empty. This is intended when
we write the FETCH_HEAD, where we skip writing the note in that case.
But when displaying the updated references this leads to inconsistent
output where the left-hand side of reference updates is missing in some
cases:
```
$ git fetch origin HEAD HEAD:explicit-head :implicit-head main
From https://github.com/git/git
* branch HEAD -> FETCH_HEAD
* [new ref] -> explicit-head
* [new ref] -> implicit-head
* branch main -> FETCH_HEAD
```
This behaviour has existed ever since the table-based output has been
introduced for git-fetch(1) via 165f390250 (git-fetch: more terse fetch
output, 2007-11-03) and was never explicitly documented either in the
commit message or in any of our tests. So while it may not be a bug per
se, it feels like a weird inconsistency and not like it was a concious
design decision.
The logic of how we compute the remote reference name that we ultimately
pass to `display_ref_update()` is not easy to follow. There are three
different cases here:
- When the remote reference name is "HEAD" we set the remote
reference name to the empty string. This is the case that causes
the left-hand side to go missing, where we would indeed want to
print "HEAD" instead of the empty string. This is what
`prettify_refname()` would return.
- When the remote reference name has a well-known prefix then we
strip this prefix. This matches what `prettify_refname()` does.
- Otherwise, we keep the fully qualified reference name. This also
matches what `prettify_refname()` does.
As the return value of `prettify_refname()` would do the correct thing
for us in all three cases, we can thus fix the inconsistency by passing
through the full remote reference name to `display_ref_update()`, which
learns to call `prettify_refname()`. At the same time, this also
simplifies the code a bit.
Note that this patch also changes formatting of the block that computes
the "kind" (which is the category like "branch" or "tag") and "what"
(which is the prettified reference name like "master" or "v1.0")
variables. This is done on purpose so that it is part of the diff,
hopefully making the change easier to comprehend.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:15 +00:00
|
|
|
} else if (skip_prefix(rm->name, "refs/tags/", &what)) {
|
2011-12-26 16:16:56 +00:00
|
|
|
kind = "tag";
|
fetch: print left-hand side when fetching HEAD:foo
`store_updated_refs()` parses the remote reference for two purposes:
- It gets used as a note when writing FETCH_HEAD.
- It is passed through to `display_ref_update()` to display
updated references in the following format:
```
* branch master -> master
```
In most cases, the parsed remote reference is the prettified reference
name and can thus be used for both cases. But if the remote reference is
HEAD, the parsed remote reference becomes empty. This is intended when
we write the FETCH_HEAD, where we skip writing the note in that case.
But when displaying the updated references this leads to inconsistent
output where the left-hand side of reference updates is missing in some
cases:
```
$ git fetch origin HEAD HEAD:explicit-head :implicit-head main
From https://github.com/git/git
* branch HEAD -> FETCH_HEAD
* [new ref] -> explicit-head
* [new ref] -> implicit-head
* branch main -> FETCH_HEAD
```
This behaviour has existed ever since the table-based output has been
introduced for git-fetch(1) via 165f390250 (git-fetch: more terse fetch
output, 2007-11-03) and was never explicitly documented either in the
commit message or in any of our tests. So while it may not be a bug per
se, it feels like a weird inconsistency and not like it was a concious
design decision.
The logic of how we compute the remote reference name that we ultimately
pass to `display_ref_update()` is not easy to follow. There are three
different cases here:
- When the remote reference name is "HEAD" we set the remote
reference name to the empty string. This is the case that causes
the left-hand side to go missing, where we would indeed want to
print "HEAD" instead of the empty string. This is what
`prettify_refname()` would return.
- When the remote reference name has a well-known prefix then we
strip this prefix. This matches what `prettify_refname()` does.
- Otherwise, we keep the fully qualified reference name. This also
matches what `prettify_refname()` does.
As the return value of `prettify_refname()` would do the correct thing
for us in all three cases, we can thus fix the inconsistency by passing
through the full remote reference name to `display_ref_update()`, which
learns to call `prettify_refname()`. At the same time, this also
simplifies the code a bit.
Note that this patch also changes formatting of the block that computes
the "kind" (which is the category like "branch" or "tag") and "what"
(which is the prettified reference name like "master" or "v1.0")
variables. This is done on purpose so that it is part of the diff,
hopefully making the change easier to comprehend.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:15 +00:00
|
|
|
} else if (skip_prefix(rm->name, "refs/remotes/", &what)) {
|
2011-12-26 16:16:56 +00:00
|
|
|
kind = "remote-tracking branch";
|
fetch: print left-hand side when fetching HEAD:foo
`store_updated_refs()` parses the remote reference for two purposes:
- It gets used as a note when writing FETCH_HEAD.
- It is passed through to `display_ref_update()` to display
updated references in the following format:
```
* branch master -> master
```
In most cases, the parsed remote reference is the prettified reference
name and can thus be used for both cases. But if the remote reference is
HEAD, the parsed remote reference becomes empty. This is intended when
we write the FETCH_HEAD, where we skip writing the note in that case.
But when displaying the updated references this leads to inconsistent
output where the left-hand side of reference updates is missing in some
cases:
```
$ git fetch origin HEAD HEAD:explicit-head :implicit-head main
From https://github.com/git/git
* branch HEAD -> FETCH_HEAD
* [new ref] -> explicit-head
* [new ref] -> implicit-head
* branch main -> FETCH_HEAD
```
This behaviour has existed ever since the table-based output has been
introduced for git-fetch(1) via 165f390250 (git-fetch: more terse fetch
output, 2007-11-03) and was never explicitly documented either in the
commit message or in any of our tests. So while it may not be a bug per
se, it feels like a weird inconsistency and not like it was a concious
design decision.
The logic of how we compute the remote reference name that we ultimately
pass to `display_ref_update()` is not easy to follow. There are three
different cases here:
- When the remote reference name is "HEAD" we set the remote
reference name to the empty string. This is the case that causes
the left-hand side to go missing, where we would indeed want to
print "HEAD" instead of the empty string. This is what
`prettify_refname()` would return.
- When the remote reference name has a well-known prefix then we
strip this prefix. This matches what `prettify_refname()` does.
- Otherwise, we keep the fully qualified reference name. This also
matches what `prettify_refname()` does.
As the return value of `prettify_refname()` would do the correct thing
for us in all three cases, we can thus fix the inconsistency by passing
through the full remote reference name to `display_ref_update()`, which
learns to call `prettify_refname()`. At the same time, this also
simplifies the code a bit.
Note that this patch also changes formatting of the block that computes
the "kind" (which is the category like "branch" or "tag") and "what"
(which is the prettified reference name like "master" or "v1.0")
variables. This is done on purpose so that it is part of the diff,
hopefully making the change easier to comprehend.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:15 +00:00
|
|
|
} else {
|
2011-12-26 16:16:56 +00:00
|
|
|
kind = "";
|
|
|
|
what = rm->name;
|
|
|
|
}
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2011-12-26 16:16:56 +00:00
|
|
|
strbuf_reset(¬e);
|
|
|
|
if (*what) {
|
|
|
|
if (*kind)
|
|
|
|
strbuf_addf(¬e, "%s ", kind);
|
|
|
|
strbuf_addf(¬e, "'%s' of ", what);
|
|
|
|
}
|
2021-01-12 12:27:35 +00:00
|
|
|
|
2022-02-17 13:04:24 +00:00
|
|
|
append_fetch_head(fetch_head, &rm->old_oid,
|
2021-01-12 12:27:35 +00:00
|
|
|
rm->fetch_head_status,
|
2023-03-20 12:35:36 +00:00
|
|
|
note.buf, display_state->url,
|
|
|
|
display_state->url_len);
|
2011-12-26 16:16:56 +00:00
|
|
|
|
|
|
|
if (ref) {
|
fetch: print left-hand side when fetching HEAD:foo
`store_updated_refs()` parses the remote reference for two purposes:
- It gets used as a note when writing FETCH_HEAD.
- It is passed through to `display_ref_update()` to display
updated references in the following format:
```
* branch master -> master
```
In most cases, the parsed remote reference is the prettified reference
name and can thus be used for both cases. But if the remote reference is
HEAD, the parsed remote reference becomes empty. This is intended when
we write the FETCH_HEAD, where we skip writing the note in that case.
But when displaying the updated references this leads to inconsistent
output where the left-hand side of reference updates is missing in some
cases:
```
$ git fetch origin HEAD HEAD:explicit-head :implicit-head main
From https://github.com/git/git
* branch HEAD -> FETCH_HEAD
* [new ref] -> explicit-head
* [new ref] -> implicit-head
* branch main -> FETCH_HEAD
```
This behaviour has existed ever since the table-based output has been
introduced for git-fetch(1) via 165f390250 (git-fetch: more terse fetch
output, 2007-11-03) and was never explicitly documented either in the
commit message or in any of our tests. So while it may not be a bug per
se, it feels like a weird inconsistency and not like it was a concious
design decision.
The logic of how we compute the remote reference name that we ultimately
pass to `display_ref_update()` is not easy to follow. There are three
different cases here:
- When the remote reference name is "HEAD" we set the remote
reference name to the empty string. This is the case that causes
the left-hand side to go missing, where we would indeed want to
print "HEAD" instead of the empty string. This is what
`prettify_refname()` would return.
- When the remote reference name has a well-known prefix then we
strip this prefix. This matches what `prettify_refname()` does.
- Otherwise, we keep the fully qualified reference name. This also
matches what `prettify_refname()` does.
As the return value of `prettify_refname()` would do the correct thing
for us in all three cases, we can thus fix the inconsistency by passing
through the full remote reference name to `display_ref_update()`, which
learns to call `prettify_refname()`. At the same time, this also
simplifies the code a bit.
Note that this patch also changes formatting of the block that computes
the "kind" (which is the category like "branch" or "tag") and "what"
(which is the prettified reference name like "master" or "v1.0")
variables. This is done on purpose so that it is part of the diff,
hopefully making the change easier to comprehend.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:15 +00:00
|
|
|
rc |= update_local_ref(ref, transaction, display_state,
|
2023-05-17 11:49:04 +00:00
|
|
|
rm, summary_width, config);
|
2011-12-26 16:16:56 +00:00
|
|
|
free(ref);
|
2020-09-02 21:05:39 +00:00
|
|
|
} else if (write_fetch_head || dry_run) {
|
|
|
|
/*
|
|
|
|
* Display fetches written to FETCH_HEAD (or
|
|
|
|
* would be written to FETCH_HEAD, if --dry-run
|
|
|
|
* is set).
|
|
|
|
*/
|
2023-03-20 12:35:40 +00:00
|
|
|
display_ref_update(display_state, '*',
|
|
|
|
*kind ? kind : "branch", NULL,
|
fetch: print left-hand side when fetching HEAD:foo
`store_updated_refs()` parses the remote reference for two purposes:
- It gets used as a note when writing FETCH_HEAD.
- It is passed through to `display_ref_update()` to display
updated references in the following format:
```
* branch master -> master
```
In most cases, the parsed remote reference is the prettified reference
name and can thus be used for both cases. But if the remote reference is
HEAD, the parsed remote reference becomes empty. This is intended when
we write the FETCH_HEAD, where we skip writing the note in that case.
But when displaying the updated references this leads to inconsistent
output where the left-hand side of reference updates is missing in some
cases:
```
$ git fetch origin HEAD HEAD:explicit-head :implicit-head main
From https://github.com/git/git
* branch HEAD -> FETCH_HEAD
* [new ref] -> explicit-head
* [new ref] -> implicit-head
* branch main -> FETCH_HEAD
```
This behaviour has existed ever since the table-based output has been
introduced for git-fetch(1) via 165f390250 (git-fetch: more terse fetch
output, 2007-11-03) and was never explicitly documented either in the
commit message or in any of our tests. So while it may not be a bug per
se, it feels like a weird inconsistency and not like it was a concious
design decision.
The logic of how we compute the remote reference name that we ultimately
pass to `display_ref_update()` is not easy to follow. There are three
different cases here:
- When the remote reference name is "HEAD" we set the remote
reference name to the empty string. This is the case that causes
the left-hand side to go missing, where we would indeed want to
print "HEAD" instead of the empty string. This is what
`prettify_refname()` would return.
- When the remote reference name has a well-known prefix then we
strip this prefix. This matches what `prettify_refname()` does.
- Otherwise, we keep the fully qualified reference name. This also
matches what `prettify_refname()` does.
As the return value of `prettify_refname()` would do the correct thing
for us in all three cases, we can thus fix the inconsistency by passing
through the full remote reference name to `display_ref_update()`, which
learns to call `prettify_refname()`. At the same time, this also
simplifies the code a bit.
Note that this patch also changes formatting of the block that computes
the "kind" (which is the category like "branch" or "tag") and "what"
(which is the prettified reference name like "master" or "v1.0")
variables. This is done on purpose so that it is part of the diff,
hopefully making the change easier to comprehend.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:15 +00:00
|
|
|
rm->name,
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
"FETCH_HEAD",
|
|
|
|
&rm->new_oid, &rm->old_oid,
|
|
|
|
summary_width);
|
2007-11-03 05:32:48 +00:00
|
|
|
}
|
|
|
|
}
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
2011-10-07 07:40:22 +00:00
|
|
|
|
2009-05-25 10:40:54 +00:00
|
|
|
if (rc & STORE_REF_ERROR_DF_CONFLICT)
|
2011-02-22 23:41:51 +00:00
|
|
|
error(_("some local refs could not be updated; try running\n"
|
2008-06-27 04:01:41 +00:00
|
|
|
" 'git remote prune %s' to remove any old, conflicting "
|
2011-02-22 23:41:51 +00:00
|
|
|
"branches"), remote_name);
|
2011-10-07 07:40:22 +00:00
|
|
|
|
2021-08-23 10:44:00 +00:00
|
|
|
if (advice_enabled(ADVICE_FETCH_SHOW_FORCED_UPDATES)) {
|
2023-05-17 11:49:04 +00:00
|
|
|
if (!config->show_forced_updates) {
|
2019-08-06 17:19:52 +00:00
|
|
|
warning(_(warn_show_forced_updates));
|
2019-06-18 20:25:27 +00:00
|
|
|
} else if (forced_updates_ms > FORCED_UPDATES_DELAY_WARNING_IN_MS) {
|
2019-08-06 17:19:52 +00:00
|
|
|
warning(_(warn_time_show_forced_updates),
|
2019-06-18 20:25:27 +00:00
|
|
|
forced_updates_ms / 1000.0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-10-07 07:40:22 +00:00
|
|
|
abort:
|
2011-12-08 08:43:19 +00:00
|
|
|
strbuf_release(¬e);
|
2008-05-28 15:29:36 +00:00
|
|
|
return rc;
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
|
|
|
|
2007-11-11 07:29:47 +00:00
|
|
|
/*
|
|
|
|
* We would want to bypass the object transfer altogether if
|
quickfetch(): Prevent overflow of the rev-list command line
quickfetch() calls rev-list to check whether the objects we are about to
fetch are already present in the repo (if so, we can skip the object fetch).
However, when there are many (~1000) refs to be fetched, the rev-list
command line grows larger than the maximum command line size on some systems
(32K in Windows). This causes rev-list to fail, making quickfetch() return
non-zero, which unnecessarily triggers the transport machinery. This somehow
causes fetch to fail with an exit code.
By using the --stdin option to rev-list (and feeding the object list to its
standard input), we prevent the overflow of the rev-list command line,
which causes quickfetch(), and subsequently the overall fetch, to succeed.
However, using rev-list --stdin is not entirely straightforward: rev-list
terminates immediately when encountering an unknown object, which can
trigger SIGPIPE if we are still writing object's to its standard input.
We therefore temporarily ignore SIGPIPE so that the fetch process is not
terminated.
The patch also contains a testcase to verify the fix (note that before
the patch, the testcase would only fail on msysGit).
Signed-off-by: Johan Herland <johan@herland.net>
Improved-by: Johannes Sixt <j6t@kdbg.org>
Improved-by: Alex Riesen <raa.lkml@gmail.com>
Tested-by: Peter Krefting <peter@softwolves.pp.se>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-09 23:52:30 +00:00
|
|
|
* everything we are going to fetch already exists and is connected
|
2007-11-11 07:29:47 +00:00
|
|
|
* locally.
|
|
|
|
*/
|
2018-09-21 18:22:38 +00:00
|
|
|
static int check_exist_and_connected(struct ref *ref_map)
|
2007-11-11 07:29:47 +00:00
|
|
|
{
|
2011-09-02 23:22:47 +00:00
|
|
|
struct ref *rm = ref_map;
|
check_everything_connected: use a struct with named options
The number of variants of check_everything_connected has
grown over the years, so that the "real" function takes
several possibly-zero, possibly-NULL arguments. We hid the
complexity behind some wrapper functions, but this doesn't
scale well when we want to add new options.
If we add more wrapper variants to handle the new options,
then we can get a combinatorial explosion when those options
might be used together (right now nobody wants to use both
"shallow" and "transport" together, so we get by with just a
few wrappers).
If instead we add new parameters to each function, each of
which can have a default value, then callers who want the
defaults end up with confusing invocations like:
check_everything_connected(fn, 0, data, -1, 0, NULL);
where it is unclear which parameter is which (and every
caller needs updated when we add new options).
Instead, let's add a struct to hold all of the optional
parameters. This is a little more verbose for the callers
(who have to declare the struct and fill it in), but it
makes their code much easier to follow, because every option
is named as it is set (and unused options do not have to be
mentioned at all).
Note that we could also stick the iteration function and its
callback data into the option struct, too. But since those
are required for each call, by avoiding doing so, we can let
very simple callers just pass "NULL" for the options and not
worry about the struct at all.
While we're touching each site, let's also rename the
function to check_connected(). The existing name was quite
long, and not all of the wrappers even used the full name.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-15 10:30:40 +00:00
|
|
|
struct check_connected_options opt = CHECK_CONNECTED_INIT;
|
2018-09-21 18:22:38 +00:00
|
|
|
struct ref *r;
|
2011-09-02 23:22:47 +00:00
|
|
|
|
2007-11-11 07:29:47 +00:00
|
|
|
/*
|
|
|
|
* If we are deepening a shallow clone we already have these
|
|
|
|
* objects reachable. Running rev-list here will return with
|
|
|
|
* a good (0) exit status and we'll bypass the fetch that we
|
|
|
|
* really need to perform. Claiming failure now will ensure
|
|
|
|
* we perform the network exchange to deepen our history.
|
|
|
|
*/
|
2016-06-12 10:53:59 +00:00
|
|
|
if (deepen)
|
2007-11-11 07:29:47 +00:00
|
|
|
return -1;
|
2018-09-21 18:22:38 +00:00
|
|
|
|
2022-03-28 14:02:08 +00:00
|
|
|
/*
|
|
|
|
* Similarly, if we need to refetch, we always want to perform a full
|
|
|
|
* fetch ignoring existing objects.
|
|
|
|
*/
|
|
|
|
if (refetch)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
|
2018-09-21 18:22:38 +00:00
|
|
|
/*
|
|
|
|
* check_connected() allows objects to merely be promised, but
|
|
|
|
* we need all direct targets to exist.
|
|
|
|
*/
|
|
|
|
for (r = rm; r; r = r->next) {
|
2023-03-28 13:58:50 +00:00
|
|
|
if (!repo_has_object_file_with_flags(the_repository, &r->old_oid,
|
|
|
|
OBJECT_INFO_SKIP_FETCH_OBJECT))
|
2018-09-21 18:22:38 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
check_everything_connected: use a struct with named options
The number of variants of check_everything_connected has
grown over the years, so that the "real" function takes
several possibly-zero, possibly-NULL arguments. We hid the
complexity behind some wrapper functions, but this doesn't
scale well when we want to add new options.
If we add more wrapper variants to handle the new options,
then we can get a combinatorial explosion when those options
might be used together (right now nobody wants to use both
"shallow" and "transport" together, so we get by with just a
few wrappers).
If instead we add new parameters to each function, each of
which can have a default value, then callers who want the
defaults end up with confusing invocations like:
check_everything_connected(fn, 0, data, -1, 0, NULL);
where it is unclear which parameter is which (and every
caller needs updated when we add new options).
Instead, let's add a struct to hold all of the optional
parameters. This is a little more verbose for the callers
(who have to declare the struct and fill it in), but it
makes their code much easier to follow, because every option
is named as it is set (and unused options do not have to be
mentioned at all).
Note that we could also stick the iteration function and its
callback data into the option struct, too. But since those
are required for each call, by avoiding doing so, we can let
very simple callers just pass "NULL" for the options and not
worry about the struct at all.
While we're touching each site, let's also rename the
function to check_connected(). The existing name was quite
long, and not all of the wrappers even used the full name.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-15 10:30:40 +00:00
|
|
|
opt.quiet = 1;
|
2023-02-12 09:04:26 +00:00
|
|
|
opt.exclude_hidden_refs_section = "fetch";
|
check_everything_connected: use a struct with named options
The number of variants of check_everything_connected has
grown over the years, so that the "real" function takes
several possibly-zero, possibly-NULL arguments. We hid the
complexity behind some wrapper functions, but this doesn't
scale well when we want to add new options.
If we add more wrapper variants to handle the new options,
then we can get a combinatorial explosion when those options
might be used together (right now nobody wants to use both
"shallow" and "transport" together, so we get by with just a
few wrappers).
If instead we add new parameters to each function, each of
which can have a default value, then callers who want the
defaults end up with confusing invocations like:
check_everything_connected(fn, 0, data, -1, 0, NULL);
where it is unclear which parameter is which (and every
caller needs updated when we add new options).
Instead, let's add a struct to hold all of the optional
parameters. This is a little more verbose for the callers
(who have to declare the struct and fill it in), but it
makes their code much easier to follow, because every option
is named as it is set (and unused options do not have to be
mentioned at all).
Note that we could also stick the iteration function and its
callback data into the option struct, too. But since those
are required for each call, by avoiding doing so, we can let
very simple callers just pass "NULL" for the options and not
worry about the struct at all.
While we're touching each site, let's also rename the
function to check_connected(). The existing name was quite
long, and not all of the wrappers even used the full name.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-15 10:30:40 +00:00
|
|
|
return check_connected(iterate_ref_map, &rm, &opt);
|
2007-11-11 07:29:47 +00:00
|
|
|
}
|
|
|
|
|
2023-03-20 12:35:20 +00:00
|
|
|
static int fetch_and_consume_refs(struct display_state *display_state,
|
|
|
|
struct transport *transport,
|
2022-02-17 13:04:36 +00:00
|
|
|
struct ref_transaction *transaction,
|
2021-12-01 22:15:44 +00:00
|
|
|
struct ref *ref_map,
|
2023-05-17 11:49:04 +00:00
|
|
|
struct fetch_head *fetch_head,
|
|
|
|
const struct fetch_config *config)
|
2007-09-11 03:03:25 +00:00
|
|
|
{
|
2021-09-01 13:10:06 +00:00
|
|
|
int connectivity_checked = 1;
|
2021-09-01 13:09:58 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't need to perform a fetch in case we can already satisfy all
|
|
|
|
* refs.
|
|
|
|
*/
|
|
|
|
ret = check_exist_and_connected(ref_map);
|
2019-10-02 23:49:28 +00:00
|
|
|
if (ret) {
|
|
|
|
trace2_region_enter("fetch", "fetch_refs", the_repository);
|
fetch-pack: unify ref in and out param
When a user fetches:
- at least one up-to-date ref and at least one non-up-to-date ref,
- using HTTP with protocol v0 (or something else that uses the fetch
command of a remote helper)
some refs might not be updated after the fetch.
This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow
info in output parameter", 2018-06-28) which allowed transports to
report the refs that they have fetched in a new out-parameter
"fetched_refs". If they do so, transport_fetch_refs() makes this
information available to its caller.
Users of "fetched_refs" rely on the following 3 properties:
(1) it is the complete list of refs that was passed to
transport_fetch_refs(),
(2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if
relevant), and
(3) it has updated OIDs if ref-in-want was used (introduced after
989b8c4452).
In an effort to satisfy (1), whenever transport_fetch_refs()
filters the refs sent to the transport, it re-adds the filtered refs to
whatever the transport supplies before returning it to the user.
However, the implementation in 989b8c4452 unconditionally re-adds the
filtered refs without checking if the transport refrained from reporting
anything in "fetched_refs" (which it is allowed to do), resulting in an
incomplete list, no longer satisfying (1).
An earlier effort to resolve this [1] solved the issue by readding the
filtered refs only if the transport did not refrain from reporting in
"fetched_refs", but after further discussion, it seems that the better
solution is to revert the API change that introduced "fetched_refs".
This API change was first suggested as part of a ref-in-want
implementation that allowed for ref patterns and, thus, there could be
drastic differences between the input refs and the refs actually fetched
[2]; we eventually decided to only allow exact ref names, but this API
change remained even though its necessity was decreased.
Therefore, revert this API change by reverting commit 989b8c4452, and
make receive_wanted_refs() update the OIDs in the sought array (like how
update_shallow() updates shallow information in the sought array)
instead. A test is also included to show that the user-visible bug
discussed at the beginning of this commit message no longer exists.
[1] https://public-inbox.org/git/20180801171806.GA122458@google.com/
[2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-01 20:13:20 +00:00
|
|
|
ret = transport_fetch_refs(transport, ref_map);
|
2019-10-02 23:49:28 +00:00
|
|
|
trace2_region_leave("fetch", "fetch_refs", the_repository);
|
2021-09-01 13:09:58 +00:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
2021-09-01 13:10:06 +00:00
|
|
|
connectivity_checked = transport->smart_options ?
|
|
|
|
transport->smart_options->connectivity_checked : 0;
|
2019-10-02 23:49:28 +00:00
|
|
|
}
|
2018-06-27 22:30:20 +00:00
|
|
|
|
2019-10-02 23:49:28 +00:00
|
|
|
trace2_region_enter("fetch", "consume_refs", the_repository);
|
2023-03-20 12:35:36 +00:00
|
|
|
ret = store_updated_refs(display_state, transport->remote->name,
|
2022-02-17 13:04:36 +00:00
|
|
|
connectivity_checked, transaction, ref_map,
|
2023-05-17 11:49:04 +00:00
|
|
|
fetch_head, config);
|
2019-10-02 23:49:28 +00:00
|
|
|
trace2_region_leave("fetch", "consume_refs", the_repository);
|
2021-09-01 13:10:02 +00:00
|
|
|
|
|
|
|
out:
|
fetch: fix deadlock when cleaning up lockfiles in async signals
When fetching packfiles, we write a bunch of lockfiles for the packfiles
we're writing into the repository. In order to not leave behind any
cruft in case we exit or receive a signal, we register both an exit
handler as well as signal handlers for common signals like SIGINT. These
handlers will then unlink the locks and free the data structure tracking
them. We have observed a deadlock in this logic though:
(gdb) bt
#0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969
#2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#3 0x0000000000662ab1 in string_list_clear ()
#4 0x000000000044f5bc in unlock_pack_on_signal ()
#5 <signal handler called>
#6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024
#7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#8 0x000000000065afd5 in strbuf_release ()
#9 0x000000000066ddb9 in delete_tempfile ()
#10 0x0000000000610d0b in files_transaction_cleanup.isra ()
#11 0x0000000000611718 in files_transaction_abort ()
#12 0x000000000060d2ef in ref_transaction_abort ()
#13 0x000000000060d441 in ref_transaction_prepare ()
#14 0x000000000060e0b5 in ref_transaction_commit ()
#15 0x00000000004511c2 in fetch_and_consume_refs ()
#16 0x000000000045279a in cmd_fetch ()
#17 0x0000000000407c48 in handle_builtin ()
#18 0x0000000000408df2 in cmd_main ()
#19 0x00000000004078b5 in main ()
The process was killed with a signal, which caused the signal handler to
kick in and try free the data structures after we have unlinked the
locks. It then deadlocks while calling free(3P).
The root cause of this is that it is not allowed to call certain
functions in async-signal handlers, as specified by signal-safety(7).
Next to most I/O functions, this list of disallowed functions also
includes memory-handling functions like malloc(3P) and free(3P) because
they may not be reentrant. As a result, if we execute such functions in
the signal handler, then they may operate on inconistent state and fail
in unexpected ways.
Fix this bug by not calling non-async-signal-safe functions when running
in the signal handler. We're about to re-raise the signal anyway and
will thus exit, so it's not much of a problem to keep the string list of
lockfiles untouched. Note that it's fine though to call unlink(2), so
we'll still clean up the lockfiles correctly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 10:55:47 +00:00
|
|
|
transport_unlock_pack(transport, 0);
|
2007-09-11 03:03:25 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2023-03-20 12:35:20 +00:00
|
|
|
static int prune_refs(struct display_state *display_state,
|
|
|
|
struct refspec *rs,
|
2022-02-17 13:04:41 +00:00
|
|
|
struct ref_transaction *transaction,
|
2023-03-20 12:35:36 +00:00
|
|
|
struct ref *ref_map)
|
2009-11-10 08:15:47 +00:00
|
|
|
{
|
2023-03-20 12:35:36 +00:00
|
|
|
int result = 0;
|
2018-05-16 22:58:10 +00:00
|
|
|
struct ref *ref, *stale_refs = get_stale_heads(rs, ref_map);
|
2022-02-17 13:04:41 +00:00
|
|
|
struct strbuf err = STRBUF_INIT;
|
2009-11-10 08:15:47 +00:00
|
|
|
const char *dangling_msg = dry_run
|
2012-04-23 12:30:25 +00:00
|
|
|
? _(" (%s will become dangling)")
|
|
|
|
: _(" (%s has become dangling)");
|
2009-11-10 08:15:47 +00:00
|
|
|
|
2015-06-22 14:02:59 +00:00
|
|
|
if (!dry_run) {
|
2022-02-17 13:04:41 +00:00
|
|
|
if (transaction) {
|
|
|
|
for (ref = stale_refs; ref; ref = ref->next) {
|
|
|
|
result = ref_transaction_delete(transaction, ref->name, NULL, 0,
|
|
|
|
"fetch: prune", &err);
|
|
|
|
if (result)
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
struct string_list refnames = STRING_LIST_INIT_NODUP;
|
2015-06-22 14:02:59 +00:00
|
|
|
|
2022-02-17 13:04:41 +00:00
|
|
|
for (ref = stale_refs; ref; ref = ref->next)
|
|
|
|
string_list_append(&refnames, ref->name);
|
2015-06-22 14:02:59 +00:00
|
|
|
|
2022-02-17 13:04:41 +00:00
|
|
|
result = delete_refs("fetch: prune", &refnames, 0);
|
|
|
|
string_list_clear(&refnames, 0);
|
|
|
|
}
|
2015-06-22 14:02:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (verbosity >= 0) {
|
2022-02-10 12:28:16 +00:00
|
|
|
int summary_width = transport_summary_width(stale_refs);
|
|
|
|
|
2015-06-22 14:02:59 +00:00
|
|
|
for (ref = stale_refs; ref; ref = ref->next) {
|
2023-03-20 12:35:40 +00:00
|
|
|
display_ref_update(display_state, '-', _("[deleted]"), NULL,
|
|
|
|
_("(none)"), ref->name,
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
&ref->new_oid, &ref->old_oid,
|
2023-03-20 12:35:40 +00:00
|
|
|
summary_width);
|
2009-11-10 08:15:47 +00:00
|
|
|
warn_dangling_symref(stderr, dangling_msg, ref->name);
|
|
|
|
}
|
|
|
|
}
|
2015-06-22 14:02:59 +00:00
|
|
|
|
2022-02-17 13:04:41 +00:00
|
|
|
cleanup:
|
|
|
|
strbuf_release(&err);
|
2009-11-10 08:15:47 +00:00
|
|
|
free_refs(stale_refs);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
fetch: use new branch_checked_out() and add tests
When fetching refs from a remote, it is possible that the refspec will
cause use to overwrite a ref that is checked out in a worktree. The
existing logic in builtin/fetch.c uses a possibly-slow mechanism. Update
those sections to use the new, more efficient branch_checked_out()
helper.
These uses were not previously tested, so add a test case that can be
used for these kinds of collisions. There is only one test now, but more
tests will be added as other consumers of branch_checked_out() are
added.
Note that there are two uses in builtin/fetch.c, but only one of the
messages is tested. This is because the tested check is run before
completing the fetch, and the untested check is not reachable without
concurrent updates to the filesystem. Thus, it is beneficial to keep
that extra check for the sake of defense-in-depth. However, we should
not attempt to test the check, as the effort required is too
complicated to be worth the effort. This use in update_local_ref()
also requires a change in the error message because we no longer have
access to the worktree struct, only the path of the worktree. This error
is so rare that making a distinction between the two is not critical.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-06-14 19:27:31 +00:00
|
|
|
static void check_not_current_branch(struct ref *ref_map)
|
2008-10-13 09:36:52 +00:00
|
|
|
{
|
fetch: use new branch_checked_out() and add tests
When fetching refs from a remote, it is possible that the refspec will
cause use to overwrite a ref that is checked out in a worktree. The
existing logic in builtin/fetch.c uses a possibly-slow mechanism. Update
those sections to use the new, more efficient branch_checked_out()
helper.
These uses were not previously tested, so add a test case that can be
used for these kinds of collisions. There is only one test now, but more
tests will be added as other consumers of branch_checked_out() are
added.
Note that there are two uses in builtin/fetch.c, but only one of the
messages is tested. This is because the tested check is run before
completing the fetch, and the untested check is not reachable without
concurrent updates to the filesystem. Thus, it is beneficial to keep
that extra check for the sake of defense-in-depth. However, we should
not attempt to test the check, as the effort required is too
complicated to be worth the effort. This use in update_local_ref()
also requires a change in the error message because we no longer have
access to the worktree struct, only the path of the worktree. This error
is so rare that making a distinction between the two is not critical.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-06-14 19:27:31 +00:00
|
|
|
const char *path;
|
2008-10-13 09:36:52 +00:00
|
|
|
for (; ref_map; ref_map = ref_map->next)
|
2021-12-01 22:15:44 +00:00
|
|
|
if (ref_map->peer_ref &&
|
2022-05-16 08:41:41 +00:00
|
|
|
starts_with(ref_map->peer_ref->name, "refs/heads/") &&
|
fetch: use new branch_checked_out() and add tests
When fetching refs from a remote, it is possible that the refspec will
cause use to overwrite a ref that is checked out in a worktree. The
existing logic in builtin/fetch.c uses a possibly-slow mechanism. Update
those sections to use the new, more efficient branch_checked_out()
helper.
These uses were not previously tested, so add a test case that can be
used for these kinds of collisions. There is only one test now, but more
tests will be added as other consumers of branch_checked_out() are
added.
Note that there are two uses in builtin/fetch.c, but only one of the
messages is tested. This is because the tested check is run before
completing the fetch, and the untested check is not reachable without
concurrent updates to the filesystem. Thus, it is beneficial to keep
that extra check for the sake of defense-in-depth. However, we should
not attempt to test the check, as the effort required is too
complicated to be worth the effort. This use in update_local_ref()
also requires a change in the error message because we no longer have
access to the worktree struct, only the path of the worktree. This error
is so rare that making a distinction between the two is not critical.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-06-14 19:27:31 +00:00
|
|
|
(path = branch_checked_out(ref_map->peer_ref->name)))
|
2021-12-01 22:15:44 +00:00
|
|
|
die(_("refusing to fetch into branch '%s' "
|
|
|
|
"checked out at '%s'"),
|
fetch: use new branch_checked_out() and add tests
When fetching refs from a remote, it is possible that the refspec will
cause use to overwrite a ref that is checked out in a worktree. The
existing logic in builtin/fetch.c uses a possibly-slow mechanism. Update
those sections to use the new, more efficient branch_checked_out()
helper.
These uses were not previously tested, so add a test case that can be
used for these kinds of collisions. There is only one test now, but more
tests will be added as other consumers of branch_checked_out() are
added.
Note that there are two uses in builtin/fetch.c, but only one of the
messages is tested. This is because the tested check is run before
completing the fetch, and the untested check is not reachable without
concurrent updates to the filesystem. Thus, it is beneficial to keep
that extra check for the sake of defense-in-depth. However, we should
not attempt to test the check, as the effort required is too
complicated to be worth the effort. This use in update_local_ref()
also requires a change in the error message because we no longer have
access to the worktree struct, only the path of the worktree. This error
is so rare that making a distinction between the two is not critical.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-06-14 19:27:31 +00:00
|
|
|
ref_map->peer_ref->name, path);
|
2008-10-13 09:36:52 +00:00
|
|
|
}
|
|
|
|
|
2010-02-24 19:02:05 +00:00
|
|
|
static int truncate_fetch_head(void)
|
|
|
|
{
|
2018-05-17 22:51:51 +00:00
|
|
|
const char *filename = git_path_fetch_head(the_repository);
|
Handle more file writes correctly in shared repos
In shared repositories, we have to be careful when writing files whose
permissions do not allow users other than the owner to write them.
In particular, we force the marks file of fast-export and the FETCH_HEAD
when fetching to be rewritten from scratch.
This commit does not touch other calls to fopen() that want to
write files:
- commands that write to working tree files (core.sharedRepository
does not affect permission bits of working tree files),
e.g. .rej file created by "apply --reject", result of applying a
previous conflict resolution by "rerere", "git merge-file".
- git am, when splitting mails (git-am correctly cleans up its directory
after finishing, so there is no need to share those files between users)
- git submodule clone, when writing the .git file, because the file
will not be overwritten
- git_terminal_prompt() in compat/terminal.c, because it is not writing to
a file at all
- git diff --output, because the output file is clearly not intended to be
shared between the users of the current repository
- git fast-import, when writing a crash report, because the reports' file
names are unique due to an embedded process ID
- mailinfo() in mailinfo.c, because the output is clearly not intended to
be shared between the users of the current repository
- check_or_regenerate_marks() in remote-testsvn.c, because this is only
used for Git's internal testing
- git fsck, when writing lost&found blobs (this should probably be
changed, but left as a low-hanging fruit for future contributors).
Note that this patch does not touch callers of write_file() and
write_file_gently(), which would benefit from the same scrutiny as
to usage in shared repositories. Most notable users are branch,
daemon, submodule & worktree, and a worrisome call in transport.c
when updating one ref (which ignores the shared flag).
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-11 18:35:54 +00:00
|
|
|
FILE *fp = fopen_for_writing(filename);
|
2010-02-24 19:02:05 +00:00
|
|
|
|
|
|
|
if (!fp)
|
2022-01-05 20:02:17 +00:00
|
|
|
return error_errno(_("cannot open '%s'"), filename);
|
2010-02-24 19:02:05 +00:00
|
|
|
fclose(fp);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-08-07 21:43:20 +00:00
|
|
|
static void set_option(struct transport *transport, const char *name, const char *value)
|
|
|
|
{
|
|
|
|
int r = transport_set_option(transport, name, value);
|
|
|
|
if (r < 0)
|
2021-12-01 22:15:40 +00:00
|
|
|
die(_("option \"%s\" value \"%s\" is not valid for %s"),
|
2013-08-07 21:43:20 +00:00
|
|
|
name, value, transport->url);
|
|
|
|
if (r > 0)
|
2021-12-01 22:15:40 +00:00
|
|
|
warning(_("option \"%s\" is ignored for %s\n"),
|
2013-08-07 21:43:20 +00:00
|
|
|
name, transport->url);
|
|
|
|
}
|
|
|
|
|
2018-07-02 22:39:44 +00:00
|
|
|
|
2022-08-25 17:09:48 +00:00
|
|
|
static int add_oid(const char *refname UNUSED,
|
2022-08-19 10:08:32 +00:00
|
|
|
const struct object_id *oid,
|
2022-08-25 17:09:48 +00:00
|
|
|
int flags UNUSED, void *cb_data)
|
2018-07-02 22:39:44 +00:00
|
|
|
{
|
|
|
|
struct oid_array *oids = cb_data;
|
|
|
|
|
|
|
|
oid_array_append(oids, oid);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void add_negotiation_tips(struct git_transport_options *smart_options)
|
|
|
|
{
|
|
|
|
struct oid_array *oids = xcalloc(1, sizeof(*oids));
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < negotiation_tip.nr; i++) {
|
|
|
|
const char *s = negotiation_tip.items[i].string;
|
|
|
|
int old_nr;
|
|
|
|
if (!has_glob_specials(s)) {
|
|
|
|
struct object_id oid;
|
2023-03-28 13:58:46 +00:00
|
|
|
if (repo_get_oid(the_repository, s, &oid))
|
2021-07-15 17:44:32 +00:00
|
|
|
die(_("%s is not a valid object"), s);
|
|
|
|
if (!has_object(the_repository, &oid, 0))
|
|
|
|
die(_("the object %s does not exist"), s);
|
2018-07-02 22:39:44 +00:00
|
|
|
oid_array_append(oids, &oid);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
old_nr = oids->nr;
|
|
|
|
for_each_glob_ref(add_oid, s, oids);
|
|
|
|
if (old_nr == oids->nr)
|
2021-12-01 22:15:40 +00:00
|
|
|
warning("ignoring --negotiation-tip=%s because it does not match any refs",
|
2018-07-02 22:39:44 +00:00
|
|
|
s);
|
|
|
|
}
|
|
|
|
smart_options->negotiation_tips = oids;
|
|
|
|
}
|
|
|
|
|
2016-06-12 10:53:59 +00:00
|
|
|
static struct transport *prepare_transport(struct remote *remote, int deepen)
|
2013-08-07 21:43:20 +00:00
|
|
|
{
|
|
|
|
struct transport *transport;
|
2019-01-08 00:17:09 +00:00
|
|
|
|
2013-08-07 21:43:20 +00:00
|
|
|
transport = transport_get(remote, NULL);
|
|
|
|
transport_set_verbosity(transport, verbosity, progress);
|
2016-02-03 04:09:14 +00:00
|
|
|
transport->family = family;
|
2013-08-07 21:43:20 +00:00
|
|
|
if (upload_pack)
|
|
|
|
set_option(transport, TRANS_OPT_UPLOADPACK, upload_pack);
|
|
|
|
if (keep)
|
|
|
|
set_option(transport, TRANS_OPT_KEEP, "yes");
|
|
|
|
if (depth)
|
|
|
|
set_option(transport, TRANS_OPT_DEPTH, depth);
|
2016-06-12 10:53:59 +00:00
|
|
|
if (deepen && deepen_since)
|
|
|
|
set_option(transport, TRANS_OPT_DEEPEN_SINCE, deepen_since);
|
2016-06-12 10:54:04 +00:00
|
|
|
if (deepen && deepen_not.nr)
|
|
|
|
set_option(transport, TRANS_OPT_DEEPEN_NOT,
|
|
|
|
(const char *)&deepen_not);
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 10:54:09 +00:00
|
|
|
if (deepen_relative)
|
|
|
|
set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes");
|
2013-12-05 13:02:42 +00:00
|
|
|
if (update_shallow)
|
|
|
|
set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes");
|
2022-03-28 14:02:08 +00:00
|
|
|
if (refetch)
|
|
|
|
set_option(transport, TRANS_OPT_REFETCH, "yes");
|
2017-12-08 15:58:44 +00:00
|
|
|
if (filter_options.choice) {
|
2019-06-27 22:54:10 +00:00
|
|
|
const char *spec =
|
|
|
|
expand_list_objects_filter_spec(&filter_options);
|
|
|
|
set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, spec);
|
2017-12-08 15:58:44 +00:00
|
|
|
set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
|
|
|
|
}
|
2018-07-02 22:39:44 +00:00
|
|
|
if (negotiation_tip.nr) {
|
|
|
|
if (transport->smart_options)
|
|
|
|
add_negotiation_tips(transport->smart_options);
|
|
|
|
else
|
2021-12-01 22:15:40 +00:00
|
|
|
warning("ignoring --negotiation-tip because the protocol does not support it");
|
2018-07-02 22:39:44 +00:00
|
|
|
}
|
2013-08-07 21:43:20 +00:00
|
|
|
return transport;
|
|
|
|
}
|
|
|
|
|
2023-03-20 12:35:20 +00:00
|
|
|
static int backfill_tags(struct display_state *display_state,
|
|
|
|
struct transport *transport,
|
2022-02-17 13:04:36 +00:00
|
|
|
struct ref_transaction *transaction,
|
2022-02-17 13:04:28 +00:00
|
|
|
struct ref *ref_map,
|
2023-05-17 11:49:04 +00:00
|
|
|
struct fetch_head *fetch_head,
|
|
|
|
const struct fetch_config *config)
|
2013-08-07 22:14:45 +00:00
|
|
|
{
|
2022-02-17 13:04:28 +00:00
|
|
|
int retcode, cannot_reuse;
|
2016-06-12 10:53:59 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Once we have set TRANS_OPT_DEEPEN_SINCE, we can't unset it
|
|
|
|
* when remote helper is used (setting it to an empty string
|
|
|
|
* is not unsetting). We could extend the remote helper
|
|
|
|
* protocol for that, but for now, just force a new connection
|
2016-06-12 10:54:04 +00:00
|
|
|
* without deepen-since. Similar story for deepen-not.
|
2016-06-12 10:53:59 +00:00
|
|
|
*/
|
2016-06-12 10:54:04 +00:00
|
|
|
cannot_reuse = transport->cannot_reuse ||
|
|
|
|
deepen_since || deepen_not.nr;
|
2016-06-12 10:53:59 +00:00
|
|
|
if (cannot_reuse) {
|
|
|
|
gsecondary = prepare_transport(transport->remote, 0);
|
fetch: work around "transport-take-over" hack
A Git-aware "connect" transport allows the "transport_take_over" to
redirect generic transport requests like fetch(), push_refs() and
get_refs_list() to the native Git transport handling methods. The
take-over process replaces transport->data with a fake data that
these method implementations understand.
While this hack works OK for a single request, it breaks when the
transport needs to make more than one requests. transport->data
that used to hold necessary information for the specific helper to
work correctly is destroyed during the take-over process.
One codepath that this matters is "git fetch" in auto-follow mode;
when it does not get all the tags that ought to point at the history
it got (which can be determined by looking at the peeled tags in the
initial advertisement) from the primary transfer, it internally
makes a second request to complete the fetch. Because "take-over"
hack has already destroyed the data necessary to talk to the
transport helper by the time this happens, the second request cannot
make a request to the helper to make another connection to fetch
these additional tags.
Mark such a transport as "cannot_reuse", and use a separate
transport to perform the backfill fetch in order to work around
this breakage.
Note that this problem does not manifest itself when running t5802,
because our upload-pack gives you all the necessary auto-followed
tags during the primary transfer. You would need to step through
"git fetch" in a debugger, stop immediately after the primary
transfer finishes and writes these auto-followed tags, remove the
tag references and repack/prune the repository to convince the
"find-non-local-tags" procedure that the primary transfer failed to
give us all the necessary tags, and then let it continue, in order
to trigger the bug in the secondary transfer this patch fixes.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-07 22:47:18 +00:00
|
|
|
transport = gsecondary;
|
|
|
|
}
|
|
|
|
|
2013-08-07 22:14:45 +00:00
|
|
|
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, NULL);
|
|
|
|
transport_set_option(transport, TRANS_OPT_DEPTH, "0");
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 10:54:09 +00:00
|
|
|
transport_set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, NULL);
|
2023-05-17 11:49:04 +00:00
|
|
|
retcode = fetch_and_consume_refs(display_state, transport, transaction, ref_map,
|
|
|
|
fetch_head, config);
|
fetch: work around "transport-take-over" hack
A Git-aware "connect" transport allows the "transport_take_over" to
redirect generic transport requests like fetch(), push_refs() and
get_refs_list() to the native Git transport handling methods. The
take-over process replaces transport->data with a fake data that
these method implementations understand.
While this hack works OK for a single request, it breaks when the
transport needs to make more than one requests. transport->data
that used to hold necessary information for the specific helper to
work correctly is destroyed during the take-over process.
One codepath that this matters is "git fetch" in auto-follow mode;
when it does not get all the tags that ought to point at the history
it got (which can be determined by looking at the peeled tags in the
initial advertisement) from the primary transfer, it internally
makes a second request to complete the fetch. Because "take-over"
hack has already destroyed the data necessary to talk to the
transport helper by the time this happens, the second request cannot
make a request to the helper to make another connection to fetch
these additional tags.
Mark such a transport as "cannot_reuse", and use a separate
transport to perform the backfill fetch in order to work around
this breakage.
Note that this problem does not manifest itself when running t5802,
because our upload-pack gives you all the necessary auto-followed
tags during the primary transfer. You would need to step through
"git fetch" in a debugger, stop immediately after the primary
transfer finishes and writes these auto-followed tags, remove the
tag references and repack/prune the repository to convince the
"find-non-local-tags" procedure that the primary transfer failed to
give us all the necessary tags, and then let it continue, in order
to trigger the bug in the secondary transfer this patch fixes.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-07 22:47:18 +00:00
|
|
|
|
|
|
|
if (gsecondary) {
|
|
|
|
transport_disconnect(gsecondary);
|
|
|
|
gsecondary = NULL;
|
|
|
|
}
|
2022-02-17 13:04:28 +00:00
|
|
|
|
|
|
|
return retcode;
|
2013-08-07 22:14:45 +00:00
|
|
|
}
|
|
|
|
|
2007-09-11 03:03:25 +00:00
|
|
|
static int do_fetch(struct transport *transport,
|
2023-05-10 12:34:28 +00:00
|
|
|
struct refspec *rs,
|
2023-05-17 11:48:51 +00:00
|
|
|
const struct fetch_config *config)
|
2007-09-11 03:03:25 +00:00
|
|
|
{
|
2022-02-17 13:04:36 +00:00
|
|
|
struct ref_transaction *transaction = NULL;
|
2022-02-17 13:04:20 +00:00
|
|
|
struct ref *ref_map = NULL;
|
2023-03-20 12:35:36 +00:00
|
|
|
struct display_state display_state = { 0 };
|
2007-09-11 03:03:25 +00:00
|
|
|
int autotags = (transport->remote->fetch_tags == 1);
|
2013-05-25 09:08:16 +00:00
|
|
|
int retcode = 0;
|
2018-06-27 22:30:21 +00:00
|
|
|
const struct ref *remote_refs;
|
2021-02-05 20:48:48 +00:00
|
|
|
struct transport_ls_refs_options transport_ls_refs_options =
|
|
|
|
TRANSPORT_LS_REFS_OPTIONS_INIT;
|
2018-09-27 19:24:07 +00:00
|
|
|
int must_list_refs = 1;
|
2022-02-17 13:04:24 +00:00
|
|
|
struct fetch_head fetch_head = { 0 };
|
2022-02-17 13:04:36 +00:00
|
|
|
struct strbuf err = STRBUF_INIT;
|
2009-10-25 21:28:12 +00:00
|
|
|
|
2010-08-11 22:57:20 +00:00
|
|
|
if (tags == TAGS_DEFAULT) {
|
|
|
|
if (transport->remote->fetch_tags == 2)
|
|
|
|
tags = TAGS_SET;
|
|
|
|
if (transport->remote->fetch_tags == -1)
|
|
|
|
tags = TAGS_UNSET;
|
|
|
|
}
|
2007-09-11 03:03:25 +00:00
|
|
|
|
|
|
|
/* if not appending, truncate FETCH_HEAD */
|
2020-08-18 14:25:22 +00:00
|
|
|
if (!append && write_fetch_head) {
|
2013-05-25 09:08:16 +00:00
|
|
|
retcode = truncate_fetch_head();
|
|
|
|
if (retcode)
|
|
|
|
goto cleanup;
|
2007-11-22 22:22:23 +00:00
|
|
|
}
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2018-09-27 19:24:07 +00:00
|
|
|
if (rs->nr) {
|
|
|
|
int i;
|
|
|
|
|
2021-02-05 20:48:48 +00:00
|
|
|
refspec_ref_prefixes(rs, &transport_ls_refs_options.ref_prefixes);
|
2018-09-27 19:24:07 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We can avoid listing refs if all of them are exact
|
|
|
|
* OIDs
|
|
|
|
*/
|
|
|
|
must_list_refs = 0;
|
|
|
|
for (i = 0; i < rs->nr; i++) {
|
|
|
|
if (!rs->items[i].exact_sha1) {
|
|
|
|
must_list_refs = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
fetch: add branch.*.merge to default ref-prefix extension
When running "git pull" with no arguments, we'll do a default "git
fetch" and then try to merge the branch specified by the branch.*.merge
config. There's code in get_ref_map() to treat that "merge" branch as
something we want to fetch, even if it is not otherwise covered by the
default refspec.
This works fine with the v0 protocol, as the server tells us about all
of the refs, and get_ref_map() is the ultimate decider of what we fetch.
But in the v2 protocol, we send the ref-prefix extension to the server,
asking it to limit the ref advertisement. And we only tell it about the
default refspec for the remote; we don't mention the branch.*.merge
config at all.
This usually doesn't matter, because the default refspec matches
"refs/heads/*", which covers all branches. But if you explicitly use a
narrow refspec, then "git pull" on some branches may fail. The server
doesn't advertise the branch, so we don't fetch it, and "git pull"
thinks that it went away upstream.
We can fix this by including any branch.*.merge entries for the current
branch in the list of ref-prefixes we pass to the server. This only
needs to happen when using the default configured refspec (since
command-line refspecs are already added, and take precedence in deciding
what we fetch). We don't otherwise need to replicate any of the "what to
fetch" logic in get_ref_map(). These ref-prefixes are an optimization,
so it's OK if we tell the server to advertise the branch.*.merge ref,
even if we're not going to pull it. We'll just choose not to fetch it.
The test here is based on one constructed by Johannes. I modified the
branch names to trigger the ref-prefix issue (and be more descriptive),
and to confirm that "git pull" actually updated the local ref, which
should be more robust than just checking stderr.
Reported-by: Lana Deere <lana.deere@gmail.com>
Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-08 19:26:09 +00:00
|
|
|
} else {
|
|
|
|
struct branch *branch = branch_get(NULL);
|
|
|
|
|
|
|
|
if (transport->remote->fetch.nr)
|
|
|
|
refspec_ref_prefixes(&transport->remote->fetch,
|
|
|
|
&transport_ls_refs_options.ref_prefixes);
|
|
|
|
if (branch_has_merge_config(branch) &&
|
|
|
|
!strcmp(branch->remote_name, transport->remote->name)) {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < branch->merge_nr; i++) {
|
|
|
|
strvec_push(&transport_ls_refs_options.ref_prefixes,
|
|
|
|
branch->merge[i]->src);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2018-09-27 19:24:07 +00:00
|
|
|
if (tags == TAGS_SET || tags == TAGS_DEFAULT) {
|
|
|
|
must_list_refs = 1;
|
2021-02-05 20:48:48 +00:00
|
|
|
if (transport_ls_refs_options.ref_prefixes.nr)
|
|
|
|
strvec_push(&transport_ls_refs_options.ref_prefixes,
|
|
|
|
"refs/tags/");
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
|
|
|
|
2019-10-02 23:49:28 +00:00
|
|
|
if (must_list_refs) {
|
|
|
|
trace2_region_enter("fetch", "remote_refs", the_repository);
|
2021-02-05 20:48:48 +00:00
|
|
|
remote_refs = transport_get_remote_refs(transport,
|
|
|
|
&transport_ls_refs_options);
|
2019-10-02 23:49:28 +00:00
|
|
|
trace2_region_leave("fetch", "remote_refs", the_repository);
|
|
|
|
} else
|
2018-09-27 19:24:07 +00:00
|
|
|
remote_refs = NULL;
|
|
|
|
|
2022-02-05 00:08:14 +00:00
|
|
|
transport_ls_refs_options_release(&transport_ls_refs_options);
|
2018-06-27 22:30:21 +00:00
|
|
|
|
|
|
|
ref_map = get_ref_map(transport->remote, remote_refs, rs,
|
|
|
|
tags, &autotags);
|
2008-10-13 09:36:52 +00:00
|
|
|
if (!update_head_ok)
|
fetch: use new branch_checked_out() and add tests
When fetching refs from a remote, it is possible that the refspec will
cause use to overwrite a ref that is checked out in a worktree. The
existing logic in builtin/fetch.c uses a possibly-slow mechanism. Update
those sections to use the new, more efficient branch_checked_out()
helper.
These uses were not previously tested, so add a test case that can be
used for these kinds of collisions. There is only one test now, but more
tests will be added as other consumers of branch_checked_out() are
added.
Note that there are two uses in builtin/fetch.c, but only one of the
messages is tested. This is because the tested check is run before
completing the fetch, and the untested check is not reachable without
concurrent updates to the filesystem. Thus, it is beneficial to keep
that extra check for the sake of defense-in-depth. However, we should
not attempt to test the check, as the effort required is too
complicated to be worth the effort. This use in update_local_ref()
also requires a change in the error message because we no longer have
access to the worktree struct, only the path of the worktree. This error
is so rare that making a distinction between the two is not critical.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-06-14 19:27:31 +00:00
|
|
|
check_not_current_branch(ref_map);
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2022-02-17 13:04:24 +00:00
|
|
|
retcode = open_fetch_head(&fetch_head);
|
|
|
|
if (retcode)
|
|
|
|
goto cleanup;
|
|
|
|
|
2023-05-17 11:48:51 +00:00
|
|
|
display_state_init(&display_state, ref_map, transport->url,
|
|
|
|
config->display_format);
|
2023-03-20 12:35:20 +00:00
|
|
|
|
2022-02-17 13:04:36 +00:00
|
|
|
if (atomic_fetch) {
|
|
|
|
transaction = ref_transaction_begin(&err);
|
|
|
|
if (!transaction) {
|
2023-12-17 14:11:34 +00:00
|
|
|
retcode = -1;
|
2022-02-17 13:04:36 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-03-04 03:27:40 +00:00
|
|
|
if (tags == TAGS_DEFAULT && autotags)
|
|
|
|
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, "1");
|
2011-10-15 05:04:25 +00:00
|
|
|
if (prune) {
|
fetch --prune: prune only based on explicit refspecs
The old behavior of "fetch --prune" was to prune whatever was being
fetched. In particular, "fetch --prune --tags" caused tags not only
to be fetched, but also to be pruned. This is inappropriate because
there is only one tags namespace that is shared among the local
repository and all remotes. Therefore, if the user defines a local
tag and then runs "git fetch --prune --tags", then the local tag is
deleted. Moreover, "--prune" and "--tags" can also be configured via
fetch.prune / remote.<name>.prune and remote.<name>.tagopt, making it
even less obvious that an invocation of "git fetch" could result in
tag lossage.
Since the command "git remote update" invokes "git fetch", it had the
same problem.
The command "git remote prune", on the other hand, disregarded the
setting of remote.<name>.tagopt, and so its behavior was inconsistent
with that of the other commands.
So the old behavior made it too easy to lose tags. To fix this
problem, change "fetch --prune" to prune references based only on
refspecs specified explicitly by the user, either on the command line
or via remote.<name>.fetch. Thus, tags are no longer made subject to
pruning by the --tags option or the remote.<name>.tagopt setting.
However, tags *are* still subject to pruning if they are fetched as
part of a refspec, and that is good. For example:
* On the command line,
git fetch --prune 'refs/tags/*:refs/tags/*'
causes tags, and only tags, to be fetched and pruned, and is
therefore a simple way for the user to get the equivalent of the old
behavior of "--prune --tag".
* For a remote that was configured with the "--mirror" option, the
configuration is set to include
[remote "name"]
fetch = +refs/*:refs/*
, which causes tags to be subject to pruning along with all other
references. This is the behavior that will typically be desired for
a mirror.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-10-30 05:33:00 +00:00
|
|
|
/*
|
|
|
|
* We only prune based on refspecs specified
|
|
|
|
* explicitly (via command line or configuration); we
|
|
|
|
* don't care whether --tags was specified.
|
|
|
|
*/
|
2018-05-16 22:58:07 +00:00
|
|
|
if (rs->nr) {
|
2023-03-20 12:35:36 +00:00
|
|
|
retcode = prune_refs(&display_state, rs, transaction, ref_map);
|
2011-10-15 05:04:26 +00:00
|
|
|
} else {
|
2023-03-20 12:35:20 +00:00
|
|
|
retcode = prune_refs(&display_state, &transport->remote->fetch,
|
2023-03-20 12:35:36 +00:00
|
|
|
transaction, ref_map);
|
2011-10-15 05:04:26 +00:00
|
|
|
}
|
2022-01-31 13:30:47 +00:00
|
|
|
if (retcode != 0)
|
|
|
|
retcode = 1;
|
2011-10-15 05:04:25 +00:00
|
|
|
}
|
2022-02-17 13:04:24 +00:00
|
|
|
|
2023-05-17 11:49:04 +00:00
|
|
|
if (fetch_and_consume_refs(&display_state, transport, transaction, ref_map,
|
|
|
|
&fetch_head, config)) {
|
2014-01-03 02:28:52 +00:00
|
|
|
retcode = 1;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2019-08-19 09:11:20 +00:00
|
|
|
|
2022-02-17 13:04:20 +00:00
|
|
|
/*
|
|
|
|
* If neither --no-tags nor --tags was specified, do automated tag
|
|
|
|
* following.
|
|
|
|
*/
|
|
|
|
if (tags == TAGS_DEFAULT && autotags) {
|
|
|
|
struct ref *tags_ref_map = NULL, **tail = &tags_ref_map;
|
|
|
|
|
2022-02-17 13:04:36 +00:00
|
|
|
find_non_local_tags(remote_refs, transaction, &tags_ref_map, &tail);
|
2022-02-17 13:04:28 +00:00
|
|
|
if (tags_ref_map) {
|
|
|
|
/*
|
|
|
|
* If backfilling of tags fails then we want to tell
|
|
|
|
* the user so, but we have to continue regardless to
|
|
|
|
* populate upstream information of the references we
|
2022-02-17 13:04:36 +00:00
|
|
|
* have already fetched above. The exception though is
|
|
|
|
* when `--atomic` is passed: in that case we'll abort
|
|
|
|
* the transaction and don't commit anything.
|
2022-02-17 13:04:28 +00:00
|
|
|
*/
|
2023-03-20 12:35:20 +00:00
|
|
|
if (backfill_tags(&display_state, transport, transaction, tags_ref_map,
|
2023-05-17 11:49:04 +00:00
|
|
|
&fetch_head, config))
|
2022-02-17 13:04:28 +00:00
|
|
|
retcode = 1;
|
|
|
|
}
|
2022-02-17 13:04:20 +00:00
|
|
|
|
|
|
|
free_refs(tags_ref_map);
|
|
|
|
}
|
|
|
|
|
2022-02-17 13:04:36 +00:00
|
|
|
if (transaction) {
|
|
|
|
if (retcode)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
retcode = ref_transaction_commit(transaction, &err);
|
|
|
|
if (retcode) {
|
|
|
|
ref_transaction_free(transaction);
|
|
|
|
transaction = NULL;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-17 13:04:24 +00:00
|
|
|
commit_fetch_head(&fetch_head);
|
|
|
|
|
2019-08-19 09:11:20 +00:00
|
|
|
if (set_upstream) {
|
|
|
|
struct branch *branch = branch_get("HEAD");
|
|
|
|
struct ref *rm;
|
|
|
|
struct ref *source_ref = NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We're setting the upstream configuration for the
|
2019-11-05 17:07:23 +00:00
|
|
|
* current branch. The relevant upstream is the
|
2019-08-19 09:11:20 +00:00
|
|
|
* fetched branch that is meant to be merged with the
|
|
|
|
* current one, i.e. the one fetched to FETCH_HEAD.
|
|
|
|
*
|
|
|
|
* When there are several such branches, consider the
|
|
|
|
* request ambiguous and err on the safe side by doing
|
|
|
|
* nothing and just emit a warning.
|
|
|
|
*/
|
|
|
|
for (rm = ref_map; rm; rm = rm->next) {
|
|
|
|
if (!rm->peer_ref) {
|
|
|
|
if (source_ref) {
|
2019-10-31 20:41:46 +00:00
|
|
|
warning(_("multiple branches detected, incompatible with --set-upstream"));
|
2022-02-17 13:04:20 +00:00
|
|
|
goto cleanup;
|
2019-08-19 09:11:20 +00:00
|
|
|
} else {
|
|
|
|
source_ref = rm;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (source_ref) {
|
pull, fetch: fix segfault in --set-upstream option
Fix a segfault in the --set-upstream option added in
24bc1a12926 (pull, fetch: add --set-upstream option, 2019-08-19) added
in v2.24.0.
The code added there did not do the same checking we do for "git
branch" itself since 8efb8899cfe (branch: segfault fixes and
validation, 2013-02-23), which in turn fixed the same sort of segfault
I'm fixing now in "git branch --set-upstream-to", see
6183d826ba6 (branch: introduce --set-upstream-to, 2012-08-20).
The warning message I'm adding here is an amalgamation of the error
added for "git branch" in 8efb8899cfe, and the error output
install_branch_config() itself emits, i.e. it trims "refs/heads/" from
the name and says "branch X on remote", not "branch refs/heads/X on
remote".
I think it would make more sense to simply die() here, but in the
other checks for --set-upstream added in 24bc1a12926 we issue a
warning() instead. Let's do the same here for consistency for now.
There was an earlier submitted alternate way of fixing this in [1],
due to that patch breaking threading with the original report at [2] I
didn't notice it before authoring this version. I think the more
detailed warning message here is better, and we should also have tests
for this behavior.
The --no-rebase option to "git pull" is needed as of the recently
merged 7d0daf3f12f (Merge branch 'en/pull-conflicting-options',
2021-08-30).
1. https://lore.kernel.org/git/20210706162238.575988-1-clemens@endorphin.org/
2. https://lore.kernel.org/git/CAG6gW_uHhfNiHGQDgGmb1byMqBA7xa8kuH1mP-wAPEe5Tmi2Ew@mail.gmail.com/
Reported-by: Clemens Fruhwirth <clemens@endorphin.org>
Reported-by: Jan Pokorný <poki@fnusa.cz>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-12-07 22:04:30 +00:00
|
|
|
if (!branch) {
|
|
|
|
const char *shortname = source_ref->name;
|
|
|
|
skip_prefix(shortname, "refs/heads/", &shortname);
|
|
|
|
|
|
|
|
warning(_("could not set upstream of HEAD to '%s' from '%s' when "
|
|
|
|
"it does not point to any branch."),
|
|
|
|
shortname, transport->remote->name);
|
2022-02-17 13:04:20 +00:00
|
|
|
goto cleanup;
|
pull, fetch: fix segfault in --set-upstream option
Fix a segfault in the --set-upstream option added in
24bc1a12926 (pull, fetch: add --set-upstream option, 2019-08-19) added
in v2.24.0.
The code added there did not do the same checking we do for "git
branch" itself since 8efb8899cfe (branch: segfault fixes and
validation, 2013-02-23), which in turn fixed the same sort of segfault
I'm fixing now in "git branch --set-upstream-to", see
6183d826ba6 (branch: introduce --set-upstream-to, 2012-08-20).
The warning message I'm adding here is an amalgamation of the error
added for "git branch" in 8efb8899cfe, and the error output
install_branch_config() itself emits, i.e. it trims "refs/heads/" from
the name and says "branch X on remote", not "branch refs/heads/X on
remote".
I think it would make more sense to simply die() here, but in the
other checks for --set-upstream added in 24bc1a12926 we issue a
warning() instead. Let's do the same here for consistency for now.
There was an earlier submitted alternate way of fixing this in [1],
due to that patch breaking threading with the original report at [2] I
didn't notice it before authoring this version. I think the more
detailed warning message here is better, and we should also have tests
for this behavior.
The --no-rebase option to "git pull" is needed as of the recently
merged 7d0daf3f12f (Merge branch 'en/pull-conflicting-options',
2021-08-30).
1. https://lore.kernel.org/git/20210706162238.575988-1-clemens@endorphin.org/
2. https://lore.kernel.org/git/CAG6gW_uHhfNiHGQDgGmb1byMqBA7xa8kuH1mP-wAPEe5Tmi2Ew@mail.gmail.com/
Reported-by: Clemens Fruhwirth <clemens@endorphin.org>
Reported-by: Jan Pokorný <poki@fnusa.cz>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-12-07 22:04:30 +00:00
|
|
|
}
|
|
|
|
|
2019-08-19 09:11:20 +00:00
|
|
|
if (!strcmp(source_ref->name, "HEAD") ||
|
|
|
|
starts_with(source_ref->name, "refs/heads/"))
|
|
|
|
install_branch_config(0,
|
|
|
|
branch->name,
|
|
|
|
transport->remote->name,
|
|
|
|
source_ref->name);
|
|
|
|
else if (starts_with(source_ref->name, "refs/remotes/"))
|
|
|
|
warning(_("not setting upstream for a remote remote-tracking branch"));
|
|
|
|
else if (starts_with(source_ref->name, "refs/tags/"))
|
|
|
|
warning(_("not setting upstream for a remote tag"));
|
|
|
|
else
|
|
|
|
warning(_("unknown branch type"));
|
|
|
|
} else {
|
2021-12-01 22:15:40 +00:00
|
|
|
warning(_("no source branch found;\n"
|
|
|
|
"you need to specify exactly one branch with the --set-upstream option"));
|
2019-08-19 09:11:20 +00:00
|
|
|
}
|
|
|
|
}
|
2007-09-11 03:03:25 +00:00
|
|
|
|
2021-12-01 22:15:44 +00:00
|
|
|
cleanup:
|
2023-12-17 14:11:34 +00:00
|
|
|
if (retcode) {
|
|
|
|
if (err.len) {
|
|
|
|
error("%s", err.buf);
|
|
|
|
strbuf_reset(&err);
|
|
|
|
}
|
|
|
|
if (transaction && ref_transaction_abort(transaction, &err) &&
|
|
|
|
err.len)
|
|
|
|
error("%s", err.buf);
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
|
|
|
|
2023-03-20 12:35:36 +00:00
|
|
|
display_state_release(&display_state);
|
2022-02-17 13:04:24 +00:00
|
|
|
close_fetch_head(&fetch_head);
|
2022-02-17 13:04:36 +00:00
|
|
|
strbuf_release(&err);
|
2022-02-17 13:04:20 +00:00
|
|
|
free_refs(ref_map);
|
2013-05-25 09:08:16 +00:00
|
|
|
return retcode;
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
|
|
|
|
2009-11-09 20:09:56 +00:00
|
|
|
static int get_one_remote_for_fetch(struct remote *remote, void *priv)
|
|
|
|
{
|
|
|
|
struct string_list *list = priv;
|
2009-11-09 20:11:06 +00:00
|
|
|
if (!remote->skip_default_update)
|
2010-06-25 23:41:38 +00:00
|
|
|
string_list_append(list, remote->name);
|
2009-11-09 20:09:56 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct remote_group_data {
|
|
|
|
const char *name;
|
|
|
|
struct string_list *list;
|
|
|
|
};
|
|
|
|
|
config: add ctx arg to config_fn_t
Add a new "const struct config_context *ctx" arg to config_fn_t to hold
additional information about the config iteration operation.
config_context has a "struct key_value_info kvi" member that holds
metadata about the config source being read (e.g. what kind of config
source it is, the filename, etc). In this series, we're only interested
in .kvi, so we could have just used "struct key_value_info" as an arg,
but config_context makes it possible to add/adjust members in the future
without changing the config_fn_t signature. We could also consider other
ways of organizing the args (e.g. moving the config name and value into
config_context or key_value_info), but in my experiments, the
incremental benefit doesn't justify the added complexity (e.g. a
config_fn_t will sometimes invoke another config_fn_t but with a
different config value).
In subsequent commits, the .kvi member will replace the global "struct
config_reader" in config.c, making config iteration a global-free
operation. It requires much more work for the machinery to provide
meaningful values of .kvi, so for now, merely change the signature and
call sites, pass NULL as a placeholder value, and don't rely on the arg
in any meaningful way.
Most of the changes are performed by
contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every
config_fn_t:
- Modifies the signature to accept "const struct config_context *ctx"
- Passes "ctx" to any inner config_fn_t, if needed
- Adds UNUSED attributes to "ctx", if needed
Most config_fn_t instances are easily identified by seeing if they are
called by the various config functions. Most of the remaining ones are
manually named in the .cocci patch. Manual cleanups are still needed,
but the majority of it is trivial; it's either adjusting config_fn_t
that the .cocci patch didn't catch, or adding forward declarations of
"struct config_context ctx" to make the signatures make sense.
The non-trivial changes are in cases where we are invoking a config_fn_t
outside of config machinery, and we now need to decide what value of
"ctx" to pass. These cases are:
- trace2/tr2_cfg.c:tr2_cfg_set_fl()
This is indirectly called by git_config_set() so that the trace2
machinery can notice the new config values and update its settings
using the tr2 config parsing function, i.e. tr2_cfg_cb().
- builtin/checkout.c:checkout_main()
This calls git_xmerge_config() as a shorthand for parsing a CLI arg.
This might be worth refactoring away in the future, since
git_xmerge_config() can call git_default_config(), which can do much
more than just parsing.
Handle them by creating a KVI_INIT macro that initializes "struct
key_value_info" to a reasonable default, and use that to construct the
"ctx" arg.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 19:26:22 +00:00
|
|
|
static int get_remote_group(const char *key, const char *value,
|
|
|
|
const struct config_context *ctx UNUSED,
|
|
|
|
void *priv)
|
2009-11-09 20:09:56 +00:00
|
|
|
{
|
|
|
|
struct remote_group_data *g = priv;
|
|
|
|
|
2015-07-28 21:08:21 +00:00
|
|
|
if (skip_prefix(key, "remotes.", &key) && !strcmp(key, g->name)) {
|
2009-11-09 20:09:56 +00:00
|
|
|
/* split list by white space */
|
|
|
|
while (*value) {
|
2015-07-28 21:08:20 +00:00
|
|
|
size_t wordlen = strcspn(value, " \t\n");
|
|
|
|
|
2015-07-28 21:08:19 +00:00
|
|
|
if (wordlen >= 1)
|
2016-06-14 18:28:56 +00:00
|
|
|
string_list_append_nodup(g->list,
|
2015-07-28 21:08:19 +00:00
|
|
|
xstrndup(value, wordlen));
|
|
|
|
value += wordlen + (value[wordlen] != '\0');
|
2009-11-09 20:09:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int add_remote_or_group(const char *name, struct string_list *list)
|
|
|
|
{
|
|
|
|
int prev_nr = list->nr;
|
2010-05-14 09:31:33 +00:00
|
|
|
struct remote_group_data g;
|
|
|
|
g.name = name; g.list = list;
|
2009-11-09 20:09:56 +00:00
|
|
|
|
|
|
|
git_config(get_remote_group, &g);
|
|
|
|
if (list->nr == prev_nr) {
|
2016-02-16 09:47:50 +00:00
|
|
|
struct remote *remote = remote_get(name);
|
2017-01-19 21:20:02 +00:00
|
|
|
if (!remote_is_configured(remote, 0))
|
2009-11-09 20:09:56 +00:00
|
|
|
return 0;
|
2010-06-25 23:41:38 +00:00
|
|
|
string_list_append(list, remote->name);
|
2009-11-09 20:09:56 +00:00
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
static void add_options_to_argv(struct strvec *argv,
|
2023-05-17 11:48:51 +00:00
|
|
|
const struct fetch_config *config)
|
2009-11-09 20:09:56 +00:00
|
|
|
{
|
2009-11-10 08:19:43 +00:00
|
|
|
if (dry_run)
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_push(argv, "--dry-run");
|
2013-10-30 05:33:04 +00:00
|
|
|
if (prune != -1)
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_push(argv, prune ? "--prune" : "--no-prune");
|
fetch: add a --prune-tags option and fetch.pruneTags config
Add a --prune-tags option to git-fetch, along with fetch.pruneTags
config option and a -P shorthand (-p is --prune). This allows for
doing any of:
git fetch -p -P
git fetch --prune --prune-tags
git fetch -p -P origin
git fetch --prune --prune-tags origin
Or simply:
git config fetch.prune true &&
git config fetch.pruneTags true &&
git fetch
Instead of the much more verbose:
git fetch --prune origin 'refs/tags/*:refs/tags/*' '+refs/heads/*:refs/remotes/origin/*'
Before this feature it was painful to support the use-case of pulling
from a repo which is having both its branches *and* tags deleted
regularly, and have our local references to reflect upstream.
At work we create deployment tags in the repo for each rollout, and
there's *lots* of those, so they're archived within weeks for
performance reasons.
Without this change it's hard to centrally configure such repos in
/etc/gitconfig (on servers that are only used for working with
them). You need to set fetch.prune=true globally, and then for each
repo:
git -C {} config --replace-all remote.origin.fetch "refs/tags/*:refs/tags/*" "^\+*refs/tags/\*:refs/tags/\*$"
Now I can simply set fetch.pruneTags=true in /etc/gitconfig as well,
and users running "git pull" will automatically get the pruning
semantics I want.
Even though "git remote" has corresponding "prune" and "update
--prune" subcommands I'm intentionally not adding a corresponding
prune-tags or "update --prune --prune-tags" mode to that command.
It's advertised (as noted in my recent "git remote doc: correct
dangerous lies about what prune does") as only modifying remote
tracking references, whereas any --prune-tags option is always going
to modify what from the user's perspective is a local copy of the tag,
since there's no such thing as a remote tracking tag.
Ideally add_prune_tags_to_fetch_refspec() would be something that
would use ALLOC_GROW() to grow the 'fetch` member of the 'remote'
struct. Instead I'm realloc-ing remote->fetch and adding the
tag_refspec to the end.
The reason is that parse_{fetch,push}_refspec which allocate the
refspec (ultimately remote->fetch) struct are called many places that
don't have access to a 'remote' struct. It would be hard to change all
their callsites to be amenable to carry around the bookkeeping
variables required for dynamic allocation.
All the other callers of the API first incrementally construct the
string version of the refspec in remote->fetch_refspec via
add_fetch_refspec(), before finally calling parse_fetch_refspec() via
some variation of remote_get().
It's less of a pain to deal with the one special case that needs to
modify already constructed refspecs than to chase down and change all
the other callsites. The API I'm adding is intentionally not
generalized because if we add more of these we'd probably want to
re-visit how this is done.
See my "Re: [BUG] git remote prune removes local tags, depending on
fetch config" (87po6ahx87.fsf@evledraar.gmail.com;
https://public-inbox.org/git/87po6ahx87.fsf@evledraar.gmail.com/) for
more background info.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-02-09 20:32:15 +00:00
|
|
|
if (prune_tags != -1)
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_push(argv, prune_tags ? "--prune-tags" : "--no-prune-tags");
|
2010-02-24 18:22:06 +00:00
|
|
|
if (update_head_ok)
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_push(argv, "--update-head-ok");
|
2010-02-24 18:22:06 +00:00
|
|
|
if (force)
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_push(argv, "--force");
|
2010-02-24 18:22:06 +00:00
|
|
|
if (keep)
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_push(argv, "--keep");
|
2023-05-17 11:49:08 +00:00
|
|
|
if (config->recurse_submodules == RECURSE_SUBMODULES_ON)
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_push(argv, "--recurse-submodules");
|
2023-05-17 11:49:08 +00:00
|
|
|
else if (config->recurse_submodules == RECURSE_SUBMODULES_OFF)
|
2023-05-10 12:34:02 +00:00
|
|
|
strvec_push(argv, "--no-recurse-submodules");
|
2023-05-17 11:49:08 +00:00
|
|
|
else if (config->recurse_submodules == RECURSE_SUBMODULES_ON_DEMAND)
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_push(argv, "--recurse-submodules=on-demand");
|
2012-09-05 21:22:19 +00:00
|
|
|
if (tags == TAGS_SET)
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_push(argv, "--tags");
|
2012-09-05 21:22:19 +00:00
|
|
|
else if (tags == TAGS_UNSET)
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_push(argv, "--no-tags");
|
2009-11-09 20:09:56 +00:00
|
|
|
if (verbosity >= 2)
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_push(argv, "-v");
|
2009-11-09 20:09:56 +00:00
|
|
|
if (verbosity >= 1)
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_push(argv, "-v");
|
2009-11-09 20:09:56 +00:00
|
|
|
else if (verbosity < 0)
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_push(argv, "-q");
|
2020-09-15 11:54:07 +00:00
|
|
|
if (family == TRANSPORT_FAMILY_IPV4)
|
2020-09-22 19:36:34 +00:00
|
|
|
strvec_push(argv, "--ipv4");
|
2020-09-15 11:54:07 +00:00
|
|
|
else if (family == TRANSPORT_FAMILY_IPV6)
|
2020-09-22 19:36:34 +00:00
|
|
|
strvec_push(argv, "--ipv6");
|
2023-03-08 22:22:05 +00:00
|
|
|
if (!write_fetch_head)
|
|
|
|
strvec_push(argv, "--no-write-fetch-head");
|
2023-05-17 11:48:51 +00:00
|
|
|
if (config->display_format == DISPLAY_FORMAT_PORCELAIN)
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
strvec_pushf(argv, "--porcelain");
|
2010-11-12 12:54:52 +00:00
|
|
|
}
|
|
|
|
|
2019-10-05 18:46:40 +00:00
|
|
|
/* Fetch multiple remotes in parallel */
|
|
|
|
|
|
|
|
struct parallel_fetch_state {
|
|
|
|
const char **argv;
|
|
|
|
struct string_list *remotes;
|
|
|
|
int next, result;
|
2023-05-17 11:48:51 +00:00
|
|
|
const struct fetch_config *config;
|
2019-10-05 18:46:40 +00:00
|
|
|
};
|
|
|
|
|
2023-02-24 06:39:46 +00:00
|
|
|
static int fetch_next_remote(struct child_process *cp,
|
|
|
|
struct strbuf *out UNUSED,
|
2019-10-05 18:46:40 +00:00
|
|
|
void *cb, void **task_cb)
|
|
|
|
{
|
|
|
|
struct parallel_fetch_state *state = cb;
|
|
|
|
char *remote;
|
|
|
|
|
|
|
|
if (state->next < 0 || state->next >= state->remotes->nr)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
remote = state->remotes->items[state->next++].string;
|
|
|
|
*task_cb = remote;
|
|
|
|
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_pushv(&cp->args, state->argv);
|
|
|
|
strvec_push(&cp->args, remote);
|
2019-10-05 18:46:40 +00:00
|
|
|
cp->git_cmd = 1;
|
|
|
|
|
2023-05-17 11:48:51 +00:00
|
|
|
if (verbosity >= 0 && state->config->display_format != DISPLAY_FORMAT_PORCELAIN)
|
2019-10-05 18:46:40 +00:00
|
|
|
printf(_("Fetching %s\n"), remote);
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2023-02-24 06:39:46 +00:00
|
|
|
static int fetch_failed_to_start(struct strbuf *out UNUSED,
|
|
|
|
void *cb, void *task_cb)
|
2019-10-05 18:46:40 +00:00
|
|
|
{
|
|
|
|
struct parallel_fetch_state *state = cb;
|
|
|
|
const char *remote = task_cb;
|
|
|
|
|
2021-12-01 22:15:40 +00:00
|
|
|
state->result = error(_("could not fetch %s"), remote);
|
2019-10-05 18:46:40 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fetch_finished(int result, struct strbuf *out,
|
|
|
|
void *cb, void *task_cb)
|
|
|
|
{
|
|
|
|
struct parallel_fetch_state *state = cb;
|
|
|
|
const char *remote = task_cb;
|
|
|
|
|
|
|
|
if (result) {
|
|
|
|
strbuf_addf(out, _("could not fetch '%s' (exit code: %d)\n"),
|
|
|
|
remote, result);
|
|
|
|
state->result = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
static int fetch_multiple(struct string_list *list, int max_children,
|
2023-05-17 11:48:51 +00:00
|
|
|
const struct fetch_config *config)
|
2010-11-12 12:54:52 +00:00
|
|
|
{
|
|
|
|
int i, result = 0;
|
2020-07-28 20:24:27 +00:00
|
|
|
struct strvec argv = STRVEC_INIT;
|
2009-11-09 20:09:56 +00:00
|
|
|
|
2020-08-18 14:25:22 +00:00
|
|
|
if (!append && write_fetch_head) {
|
2010-02-24 19:02:05 +00:00
|
|
|
int errcode = truncate_fetch_head();
|
|
|
|
if (errcode)
|
|
|
|
return errcode;
|
|
|
|
}
|
|
|
|
|
2023-03-31 15:59:04 +00:00
|
|
|
/*
|
|
|
|
* Cancel out the fetch.bundleURI config when running subprocesses,
|
|
|
|
* to avoid fetching from the same bundle list multiple times.
|
|
|
|
*/
|
|
|
|
strvec_pushl(&argv, "-c", "fetch.bundleURI=",
|
|
|
|
"fetch", "--append", "--no-auto-gc",
|
strvec: fix indentation in renamed calls
Code which split an argv_array call across multiple lines, like:
argv_array_pushl(&args, "one argument",
"another argument", "and more",
NULL);
was recently mechanically renamed to use strvec, which results in
mis-matched indentation like:
strvec_pushl(&args, "one argument",
"another argument", "and more",
NULL);
Let's fix these up to align the arguments with the opening paren. I did
this manually by sifting through the results of:
git jump grep 'strvec_.*,$'
and liberally applying my editor's auto-format. Most of the changes are
of the form shown above, though I also normalized a few that had
originally used a single-tab indentation (rather than our usual style of
aligning with the open paren). I also rewrapped a couple of obvious
cases (e.g., where previously too-long lines became short enough to fit
on one), but I wasn't aggressive about it. In cases broken to three or
more lines, the grouping of arguments is sometimes meaningful, and it
wasn't worth my time or reviewer time to ponder each case individually.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-07-28 20:26:31 +00:00
|
|
|
"--no-write-commit-graph", NULL);
|
2023-05-17 11:48:51 +00:00
|
|
|
add_options_to_argv(&argv, config);
|
2012-09-01 11:27:35 +00:00
|
|
|
|
2019-10-05 18:46:40 +00:00
|
|
|
if (max_children != 1 && list->nr != 1) {
|
2023-05-17 11:48:51 +00:00
|
|
|
struct parallel_fetch_state state = { argv.v, list, 0, 0, config };
|
2022-10-12 21:02:27 +00:00
|
|
|
const struct run_process_parallel_opts opts = {
|
|
|
|
.tr2_category = "fetch",
|
|
|
|
.tr2_label = "parallel/fetch",
|
|
|
|
|
|
|
|
.processes = max_children,
|
|
|
|
|
|
|
|
.get_next_task = &fetch_next_remote,
|
|
|
|
.start_failure = &fetch_failed_to_start,
|
|
|
|
.task_finished = &fetch_finished,
|
|
|
|
.data = &state,
|
|
|
|
};
|
2019-10-05 18:46:40 +00:00
|
|
|
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_push(&argv, "--end-of-options");
|
run-command API: have "run_processes_parallel{,_tr2}()" return void
Change the "run_processes_parallel{,_tr2}()" functions to return void,
instead of int. Ever since c553c72eed6 (run-command: add an
asynchronous parallel child processor, 2015-12-15) they have
unconditionally returned 0.
To get a "real" return value out of this function the caller needs to
get it via the "task_finished_fn" callback, see the example in hook.c
added in 96e7225b310 (hook: add 'run' subcommand, 2021-12-22).
So the "result = " and "if (!result)" code added to "builtin/fetch.c"
d54dea77dba (fetch: let --jobs=<n> parallelize --multiple, too,
2019-10-05) has always been redundant, we always took that "if"
path. Likewise the "ret =" in "t/helper/test-run-command.c" added in
be5d88e1128 (test-tool run-command: learn to run (parts of) the
testsuite, 2019-10-04) wasn't used, instead we got the return value
from the "if (suite.failed.nr > 0)" block seen in the context.
Subsequent commits will alter this API interface, getting rid of this
always-zero return value makes it easier to understand those changes.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-10-12 21:02:21 +00:00
|
|
|
|
2022-10-12 21:02:27 +00:00
|
|
|
run_processes_parallel(&opts);
|
run-command API: have "run_processes_parallel{,_tr2}()" return void
Change the "run_processes_parallel{,_tr2}()" functions to return void,
instead of int. Ever since c553c72eed6 (run-command: add an
asynchronous parallel child processor, 2015-12-15) they have
unconditionally returned 0.
To get a "real" return value out of this function the caller needs to
get it via the "task_finished_fn" callback, see the example in hook.c
added in 96e7225b310 (hook: add 'run' subcommand, 2021-12-22).
So the "result = " and "if (!result)" code added to "builtin/fetch.c"
d54dea77dba (fetch: let --jobs=<n> parallelize --multiple, too,
2019-10-05) has always been redundant, we always took that "if"
path. Likewise the "ret =" in "t/helper/test-run-command.c" added in
be5d88e1128 (test-tool run-command: learn to run (parts of) the
testsuite, 2019-10-04) wasn't used, instead we got the return value
from the "if (suite.failed.nr > 0)" block seen in the context.
Subsequent commits will alter this API interface, getting rid of this
always-zero return value makes it easier to understand those changes.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-10-12 21:02:21 +00:00
|
|
|
result = state.result;
|
2019-10-05 18:46:40 +00:00
|
|
|
} else
|
|
|
|
for (i = 0; i < list->nr; i++) {
|
|
|
|
const char *name = list->items[i].string;
|
2022-10-30 11:55:06 +00:00
|
|
|
struct child_process cmd = CHILD_PROCESS_INIT;
|
|
|
|
|
|
|
|
strvec_pushv(&cmd.args, argv.v);
|
|
|
|
strvec_push(&cmd.args, name);
|
2023-05-17 11:48:51 +00:00
|
|
|
if (verbosity >= 0 && config->display_format != DISPLAY_FORMAT_PORCELAIN)
|
2019-10-05 18:46:40 +00:00
|
|
|
printf(_("Fetching %s\n"), name);
|
2022-10-30 11:55:06 +00:00
|
|
|
cmd.git_cmd = 1;
|
|
|
|
if (run_command(&cmd)) {
|
2021-12-01 22:15:40 +00:00
|
|
|
error(_("could not fetch %s"), name);
|
2019-10-05 18:46:40 +00:00
|
|
|
result = 1;
|
|
|
|
}
|
2009-11-09 20:09:56 +00:00
|
|
|
}
|
|
|
|
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_clear(&argv);
|
2019-10-05 18:46:40 +00:00
|
|
|
return !!result;
|
2009-11-09 20:09:56 +00:00
|
|
|
}
|
|
|
|
|
2017-12-08 15:58:50 +00:00
|
|
|
/*
|
|
|
|
* Fetching from the promisor remote should use the given filter-spec
|
|
|
|
* or inherit the default filter-spec from the config.
|
|
|
|
*/
|
|
|
|
static inline void fetch_one_setup_partial(struct remote *remote)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Explicit --no-filter argument overrides everything, regardless
|
|
|
|
* of any prior partial clones and fetches.
|
|
|
|
*/
|
|
|
|
if (filter_options.no_filter)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If no prior partial clone/fetch and the current fetch DID NOT
|
|
|
|
* request a partial-fetch, do a normal fetch.
|
|
|
|
*/
|
2023-03-28 13:58:53 +00:00
|
|
|
if (!repo_has_promisor_remote(the_repository) && !filter_options.choice)
|
2017-12-08 15:58:50 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
2019-06-25 13:40:33 +00:00
|
|
|
* If this is a partial-fetch request, we enable partial on
|
|
|
|
* this repo if not already enabled and remember the given
|
|
|
|
* filter-spec as the default for subsequent fetches to this
|
2020-09-28 22:26:38 +00:00
|
|
|
* remote if there is currently no default filter-spec.
|
2017-12-08 15:58:50 +00:00
|
|
|
*/
|
2019-06-25 13:40:33 +00:00
|
|
|
if (filter_options.choice) {
|
2017-12-08 15:58:50 +00:00
|
|
|
partial_clone_register(remote->name, &filter_options);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do a partial-fetch from the promisor remote using either the
|
|
|
|
* explicitly given filter-spec or inherit the filter-spec from
|
|
|
|
* the config.
|
|
|
|
*/
|
|
|
|
if (!filter_options.choice)
|
2019-06-25 13:40:32 +00:00
|
|
|
partial_clone_get_default_filter_spec(&filter_options, remote->name);
|
2017-12-08 15:58:50 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-08-18 04:01:32 +00:00
|
|
|
static int fetch_one(struct remote *remote, int argc, const char **argv,
|
2023-05-10 12:34:28 +00:00
|
|
|
int prune_tags_ok, int use_stdin_refspecs,
|
2023-05-17 11:48:51 +00:00
|
|
|
const struct fetch_config *config)
|
2007-09-11 03:03:25 +00:00
|
|
|
{
|
2018-05-16 22:58:04 +00:00
|
|
|
struct refspec rs = REFSPEC_INIT_FETCH;
|
|
|
|
int i;
|
2008-04-28 20:23:35 +00:00
|
|
|
int exit_code;
|
2018-02-09 20:32:16 +00:00
|
|
|
int maybe_prune_tags;
|
|
|
|
int remote_via_config = remote_is_configured(remote, 0);
|
2007-09-11 03:03:25 +00:00
|
|
|
|
Give error when no remote is configured
When there's no explicitly-named remote, we use the remote specified
for the current branch, which in turn defaults to "origin". But it
this case should require the remote to actually be configured, and not
fall back to the path "origin".
Possibly, the config file's "remote = something" should require the
something to be a configured remote instead of a bare repository URL,
but we actually test with a bare repository URL.
In fetch, we were giving the sensible error message when coming up
with a URL failed, but this wasn't actually reachable, so move that
error up and use it when appropriate.
In push, we need a new error message, because the old one (formerly
unreachable without a lot of help) used the repo name, which was NULL.
Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-03-11 05:47:20 +00:00
|
|
|
if (!remote)
|
2021-12-01 22:15:40 +00:00
|
|
|
die(_("no remote repository specified; please specify either a URL or a\n"
|
|
|
|
"remote name from which new revisions should be fetched"));
|
Give error when no remote is configured
When there's no explicitly-named remote, we use the remote specified
for the current branch, which in turn defaults to "origin". But it
this case should require the remote to actually be configured, and not
fall back to the path "origin".
Possibly, the config file's "remote = something" should require the
something to be a configured remote instead of a bare repository URL,
but we actually test with a bare repository URL.
In fetch, we were giving the sensible error message when coming up
with a URL failed, but this wasn't actually reachable, so move that
error up and use it when appropriate.
In push, we need a new error message, because the old one (formerly
unreachable without a lot of help) used the repo name, which was NULL.
Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-03-11 05:47:20 +00:00
|
|
|
|
2016-06-12 10:53:59 +00:00
|
|
|
gtransport = prepare_transport(remote, 1);
|
2013-07-13 09:36:24 +00:00
|
|
|
|
|
|
|
if (prune < 0) {
|
|
|
|
/* no command line request */
|
2018-02-09 20:32:02 +00:00
|
|
|
if (0 <= remote->prune)
|
|
|
|
prune = remote->prune;
|
2023-05-17 11:48:56 +00:00
|
|
|
else if (0 <= config->prune)
|
|
|
|
prune = config->prune;
|
2013-07-13 09:36:24 +00:00
|
|
|
else
|
|
|
|
prune = PRUNE_BY_DEFAULT;
|
|
|
|
}
|
|
|
|
|
fetch: add a --prune-tags option and fetch.pruneTags config
Add a --prune-tags option to git-fetch, along with fetch.pruneTags
config option and a -P shorthand (-p is --prune). This allows for
doing any of:
git fetch -p -P
git fetch --prune --prune-tags
git fetch -p -P origin
git fetch --prune --prune-tags origin
Or simply:
git config fetch.prune true &&
git config fetch.pruneTags true &&
git fetch
Instead of the much more verbose:
git fetch --prune origin 'refs/tags/*:refs/tags/*' '+refs/heads/*:refs/remotes/origin/*'
Before this feature it was painful to support the use-case of pulling
from a repo which is having both its branches *and* tags deleted
regularly, and have our local references to reflect upstream.
At work we create deployment tags in the repo for each rollout, and
there's *lots* of those, so they're archived within weeks for
performance reasons.
Without this change it's hard to centrally configure such repos in
/etc/gitconfig (on servers that are only used for working with
them). You need to set fetch.prune=true globally, and then for each
repo:
git -C {} config --replace-all remote.origin.fetch "refs/tags/*:refs/tags/*" "^\+*refs/tags/\*:refs/tags/\*$"
Now I can simply set fetch.pruneTags=true in /etc/gitconfig as well,
and users running "git pull" will automatically get the pruning
semantics I want.
Even though "git remote" has corresponding "prune" and "update
--prune" subcommands I'm intentionally not adding a corresponding
prune-tags or "update --prune --prune-tags" mode to that command.
It's advertised (as noted in my recent "git remote doc: correct
dangerous lies about what prune does") as only modifying remote
tracking references, whereas any --prune-tags option is always going
to modify what from the user's perspective is a local copy of the tag,
since there's no such thing as a remote tracking tag.
Ideally add_prune_tags_to_fetch_refspec() would be something that
would use ALLOC_GROW() to grow the 'fetch` member of the 'remote'
struct. Instead I'm realloc-ing remote->fetch and adding the
tag_refspec to the end.
The reason is that parse_{fetch,push}_refspec which allocate the
refspec (ultimately remote->fetch) struct are called many places that
don't have access to a 'remote' struct. It would be hard to change all
their callsites to be amenable to carry around the bookkeeping
variables required for dynamic allocation.
All the other callers of the API first incrementally construct the
string version of the refspec in remote->fetch_refspec via
add_fetch_refspec(), before finally calling parse_fetch_refspec() via
some variation of remote_get().
It's less of a pain to deal with the one special case that needs to
modify already constructed refspecs than to chase down and change all
the other callsites. The API I'm adding is intentionally not
generalized because if we add more of these we'd probably want to
re-visit how this is done.
See my "Re: [BUG] git remote prune removes local tags, depending on
fetch config" (87po6ahx87.fsf@evledraar.gmail.com;
https://public-inbox.org/git/87po6ahx87.fsf@evledraar.gmail.com/) for
more background info.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-02-09 20:32:15 +00:00
|
|
|
if (prune_tags < 0) {
|
|
|
|
/* no command line request */
|
|
|
|
if (0 <= remote->prune_tags)
|
|
|
|
prune_tags = remote->prune_tags;
|
2023-05-17 11:49:00 +00:00
|
|
|
else if (0 <= config->prune_tags)
|
|
|
|
prune_tags = config->prune_tags;
|
fetch: add a --prune-tags option and fetch.pruneTags config
Add a --prune-tags option to git-fetch, along with fetch.pruneTags
config option and a -P shorthand (-p is --prune). This allows for
doing any of:
git fetch -p -P
git fetch --prune --prune-tags
git fetch -p -P origin
git fetch --prune --prune-tags origin
Or simply:
git config fetch.prune true &&
git config fetch.pruneTags true &&
git fetch
Instead of the much more verbose:
git fetch --prune origin 'refs/tags/*:refs/tags/*' '+refs/heads/*:refs/remotes/origin/*'
Before this feature it was painful to support the use-case of pulling
from a repo which is having both its branches *and* tags deleted
regularly, and have our local references to reflect upstream.
At work we create deployment tags in the repo for each rollout, and
there's *lots* of those, so they're archived within weeks for
performance reasons.
Without this change it's hard to centrally configure such repos in
/etc/gitconfig (on servers that are only used for working with
them). You need to set fetch.prune=true globally, and then for each
repo:
git -C {} config --replace-all remote.origin.fetch "refs/tags/*:refs/tags/*" "^\+*refs/tags/\*:refs/tags/\*$"
Now I can simply set fetch.pruneTags=true in /etc/gitconfig as well,
and users running "git pull" will automatically get the pruning
semantics I want.
Even though "git remote" has corresponding "prune" and "update
--prune" subcommands I'm intentionally not adding a corresponding
prune-tags or "update --prune --prune-tags" mode to that command.
It's advertised (as noted in my recent "git remote doc: correct
dangerous lies about what prune does") as only modifying remote
tracking references, whereas any --prune-tags option is always going
to modify what from the user's perspective is a local copy of the tag,
since there's no such thing as a remote tracking tag.
Ideally add_prune_tags_to_fetch_refspec() would be something that
would use ALLOC_GROW() to grow the 'fetch` member of the 'remote'
struct. Instead I'm realloc-ing remote->fetch and adding the
tag_refspec to the end.
The reason is that parse_{fetch,push}_refspec which allocate the
refspec (ultimately remote->fetch) struct are called many places that
don't have access to a 'remote' struct. It would be hard to change all
their callsites to be amenable to carry around the bookkeeping
variables required for dynamic allocation.
All the other callers of the API first incrementally construct the
string version of the refspec in remote->fetch_refspec via
add_fetch_refspec(), before finally calling parse_fetch_refspec() via
some variation of remote_get().
It's less of a pain to deal with the one special case that needs to
modify already constructed refspecs than to chase down and change all
the other callsites. The API I'm adding is intentionally not
generalized because if we add more of these we'd probably want to
re-visit how this is done.
See my "Re: [BUG] git remote prune removes local tags, depending on
fetch config" (87po6ahx87.fsf@evledraar.gmail.com;
https://public-inbox.org/git/87po6ahx87.fsf@evledraar.gmail.com/) for
more background info.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-02-09 20:32:15 +00:00
|
|
|
else
|
|
|
|
prune_tags = PRUNE_TAGS_BY_DEFAULT;
|
|
|
|
}
|
|
|
|
|
2018-02-09 20:32:16 +00:00
|
|
|
maybe_prune_tags = prune_tags_ok && prune_tags;
|
|
|
|
if (maybe_prune_tags && remote_via_config)
|
2018-05-16 22:58:02 +00:00
|
|
|
refspec_append(&remote->fetch, TAG_REFSPEC);
|
fetch: add a --prune-tags option and fetch.pruneTags config
Add a --prune-tags option to git-fetch, along with fetch.pruneTags
config option and a -P shorthand (-p is --prune). This allows for
doing any of:
git fetch -p -P
git fetch --prune --prune-tags
git fetch -p -P origin
git fetch --prune --prune-tags origin
Or simply:
git config fetch.prune true &&
git config fetch.pruneTags true &&
git fetch
Instead of the much more verbose:
git fetch --prune origin 'refs/tags/*:refs/tags/*' '+refs/heads/*:refs/remotes/origin/*'
Before this feature it was painful to support the use-case of pulling
from a repo which is having both its branches *and* tags deleted
regularly, and have our local references to reflect upstream.
At work we create deployment tags in the repo for each rollout, and
there's *lots* of those, so they're archived within weeks for
performance reasons.
Without this change it's hard to centrally configure such repos in
/etc/gitconfig (on servers that are only used for working with
them). You need to set fetch.prune=true globally, and then for each
repo:
git -C {} config --replace-all remote.origin.fetch "refs/tags/*:refs/tags/*" "^\+*refs/tags/\*:refs/tags/\*$"
Now I can simply set fetch.pruneTags=true in /etc/gitconfig as well,
and users running "git pull" will automatically get the pruning
semantics I want.
Even though "git remote" has corresponding "prune" and "update
--prune" subcommands I'm intentionally not adding a corresponding
prune-tags or "update --prune --prune-tags" mode to that command.
It's advertised (as noted in my recent "git remote doc: correct
dangerous lies about what prune does") as only modifying remote
tracking references, whereas any --prune-tags option is always going
to modify what from the user's perspective is a local copy of the tag,
since there's no such thing as a remote tracking tag.
Ideally add_prune_tags_to_fetch_refspec() would be something that
would use ALLOC_GROW() to grow the 'fetch` member of the 'remote'
struct. Instead I'm realloc-ing remote->fetch and adding the
tag_refspec to the end.
The reason is that parse_{fetch,push}_refspec which allocate the
refspec (ultimately remote->fetch) struct are called many places that
don't have access to a 'remote' struct. It would be hard to change all
their callsites to be amenable to carry around the bookkeeping
variables required for dynamic allocation.
All the other callers of the API first incrementally construct the
string version of the refspec in remote->fetch_refspec via
add_fetch_refspec(), before finally calling parse_fetch_refspec() via
some variation of remote_get().
It's less of a pain to deal with the one special case that needs to
modify already constructed refspecs than to chase down and change all
the other callsites. The API I'm adding is intentionally not
generalized because if we add more of these we'd probably want to
re-visit how this is done.
See my "Re: [BUG] git remote prune removes local tags, depending on
fetch config" (87po6ahx87.fsf@evledraar.gmail.com;
https://public-inbox.org/git/87po6ahx87.fsf@evledraar.gmail.com/) for
more background info.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-02-09 20:32:15 +00:00
|
|
|
|
2018-05-16 22:58:04 +00:00
|
|
|
if (maybe_prune_tags && (argc || !remote_via_config))
|
|
|
|
refspec_append(&rs, TAG_REFSPEC);
|
2018-02-09 20:32:16 +00:00
|
|
|
|
2018-05-16 22:58:04 +00:00
|
|
|
for (i = 0; i < argc; i++) {
|
|
|
|
if (!strcmp(argv[i], "tag")) {
|
|
|
|
i++;
|
|
|
|
if (i >= argc)
|
2021-12-01 22:15:40 +00:00
|
|
|
die(_("you need to specify a tag name"));
|
2018-05-16 22:58:04 +00:00
|
|
|
|
2020-09-05 14:49:30 +00:00
|
|
|
refspec_appendf(&rs, "refs/tags/%s:refs/tags/%s",
|
|
|
|
argv[i], argv[i]);
|
2018-05-16 22:58:04 +00:00
|
|
|
} else {
|
|
|
|
refspec_append(&rs, argv[i]);
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-18 04:01:32 +00:00
|
|
|
if (use_stdin_refspecs) {
|
|
|
|
struct strbuf line = STRBUF_INIT;
|
|
|
|
while (strbuf_getline_lf(&line, stdin) != EOF)
|
|
|
|
refspec_append(&rs, line.buf);
|
|
|
|
strbuf_release(&line);
|
|
|
|
}
|
|
|
|
|
2018-04-23 22:46:24 +00:00
|
|
|
if (server_options.nr)
|
|
|
|
gtransport->server_options = &server_options;
|
|
|
|
|
2009-01-22 06:03:08 +00:00
|
|
|
sigchain_push_common(unlock_pack_on_signal);
|
fetch: fix deadlock when cleaning up lockfiles in async signals
When fetching packfiles, we write a bunch of lockfiles for the packfiles
we're writing into the repository. In order to not leave behind any
cruft in case we exit or receive a signal, we register both an exit
handler as well as signal handlers for common signals like SIGINT. These
handlers will then unlink the locks and free the data structure tracking
them. We have observed a deadlock in this logic though:
(gdb) bt
#0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969
#2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#3 0x0000000000662ab1 in string_list_clear ()
#4 0x000000000044f5bc in unlock_pack_on_signal ()
#5 <signal handler called>
#6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024
#7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#8 0x000000000065afd5 in strbuf_release ()
#9 0x000000000066ddb9 in delete_tempfile ()
#10 0x0000000000610d0b in files_transaction_cleanup.isra ()
#11 0x0000000000611718 in files_transaction_abort ()
#12 0x000000000060d2ef in ref_transaction_abort ()
#13 0x000000000060d441 in ref_transaction_prepare ()
#14 0x000000000060e0b5 in ref_transaction_commit ()
#15 0x00000000004511c2 in fetch_and_consume_refs ()
#16 0x000000000045279a in cmd_fetch ()
#17 0x0000000000407c48 in handle_builtin ()
#18 0x0000000000408df2 in cmd_main ()
#19 0x00000000004078b5 in main ()
The process was killed with a signal, which caused the signal handler to
kick in and try free the data structures after we have unlinked the
locks. It then deadlocks while calling free(3P).
The root cause of this is that it is not allowed to call certain
functions in async-signal handlers, as specified by signal-safety(7).
Next to most I/O functions, this list of disallowed functions also
includes memory-handling functions like malloc(3P) and free(3P) because
they may not be reentrant. As a result, if we execute such functions in
the signal handler, then they may operate on inconistent state and fail
in unexpected ways.
Fix this bug by not calling non-async-signal-safe functions when running
in the signal handler. We're about to re-raise the signal anyway and
will thus exit, so it's not much of a problem to keep the string list of
lockfiles untouched. Note that it's fine though to call unlink(2), so
we'll still clean up the lockfiles correctly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 10:55:47 +00:00
|
|
|
atexit(unlock_pack_atexit);
|
fetch: ignore SIGPIPE during network operation
The default SIGPIPE behavior can be useful for a command that generates
a lot of output: if the receiver of our output goes away, we'll be
notified asynchronously to stop generating it (typically by killing the
program).
But for a command like fetch, which is primarily concerned with
receiving data and writing it to disk, an unexpected SIGPIPE can be
awkward. We're already checking the return value of all of our write()
calls, and dying due to the signal takes away our chance to gracefully
handle the error.
On Linux, we wouldn't generally see SIGPIPE at all during fetch. If the
other side of the network connection hangs up, we'll see ECONNRESET. But
on OS X, we get a SIGPIPE, and the process is killed. This causes t5570
to racily fail, as we sometimes die by signal (instead of the expected
die() call) when the server side hangs up.
Let's ignore SIGPIPE during the network portion of the fetch, which will
cause our write() to return EPIPE, giving us consistent behavior across
platforms.
This fixes the test flakiness, but note that it stops short of fixing
the larger problem. The server side hit a fatal error, sent us an "ERR"
packet, and then hung up. We notice the failure because we're trying to
write to a closed socket. But by dying immediately, we never actually
read the ERR packet and report its content to the user. This is a (racy)
problem on all platforms. So this patch lays the groundwork from which
that problem might be fixed consistently, but it doesn't actually fix
it.
Note the placement of the SIGPIPE handling. The absolute minimal change
would be to ignore SIGPIPE only when we're writing. But twiddling the
signal handler for each write call is inefficient and maintenance
burden. On the opposite end of the spectrum, we could simply declare
that fetch does not need SIGPIPE handling, since it doesn't generate a
lot of output, and we could just ignore it at the start of cmd_fetch().
This patch takes a middle ground. It ignores SIGPIPE during the network
operation (which is admittedly most of the program, since the actual
network operations are all done under the hood by the transport code).
So it's still pretty coarse.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-03 16:58:43 +00:00
|
|
|
sigchain_push(SIGPIPE, SIG_IGN);
|
2023-05-17 11:48:51 +00:00
|
|
|
exit_code = do_fetch(gtransport, &rs, config);
|
fetch: ignore SIGPIPE during network operation
The default SIGPIPE behavior can be useful for a command that generates
a lot of output: if the receiver of our output goes away, we'll be
notified asynchronously to stop generating it (typically by killing the
program).
But for a command like fetch, which is primarily concerned with
receiving data and writing it to disk, an unexpected SIGPIPE can be
awkward. We're already checking the return value of all of our write()
calls, and dying due to the signal takes away our chance to gracefully
handle the error.
On Linux, we wouldn't generally see SIGPIPE at all during fetch. If the
other side of the network connection hangs up, we'll see ECONNRESET. But
on OS X, we get a SIGPIPE, and the process is killed. This causes t5570
to racily fail, as we sometimes die by signal (instead of the expected
die() call) when the server side hangs up.
Let's ignore SIGPIPE during the network portion of the fetch, which will
cause our write() to return EPIPE, giving us consistent behavior across
platforms.
This fixes the test flakiness, but note that it stops short of fixing
the larger problem. The server side hit a fatal error, sent us an "ERR"
packet, and then hung up. We notice the failure because we're trying to
write to a closed socket. But by dying immediately, we never actually
read the ERR packet and report its content to the user. This is a (racy)
problem on all platforms. So this patch lays the groundwork from which
that problem might be fixed consistently, but it doesn't actually fix
it.
Note the placement of the SIGPIPE handling. The absolute minimal change
would be to ignore SIGPIPE only when we're writing. But twiddling the
signal handler for each write call is inefficient and maintenance
burden. On the opposite end of the spectrum, we could simply declare
that fetch does not need SIGPIPE handling, since it doesn't generate a
lot of output, and we could just ignore it at the start of cmd_fetch().
This patch takes a middle ground. It ignores SIGPIPE during the network
operation (which is admittedly most of the program, since the actual
network operations are all done under the hood by the transport code).
So it's still pretty coarse.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-03 16:58:43 +00:00
|
|
|
sigchain_pop(SIGPIPE);
|
2018-05-16 22:58:04 +00:00
|
|
|
refspec_clear(&rs);
|
2013-08-07 22:38:45 +00:00
|
|
|
transport_disconnect(gtransport);
|
|
|
|
gtransport = NULL;
|
2008-04-28 20:23:35 +00:00
|
|
|
return exit_code;
|
2007-09-11 03:03:25 +00:00
|
|
|
}
|
2009-11-09 20:09:56 +00:00
|
|
|
|
|
|
|
int cmd_fetch(int argc, const char **argv, const char *prefix)
|
|
|
|
{
|
2023-05-10 12:34:28 +00:00
|
|
|
struct fetch_config config = {
|
|
|
|
.display_format = DISPLAY_FORMAT_FULL,
|
2023-05-17 11:48:56 +00:00
|
|
|
.prune = -1,
|
2023-05-17 11:49:00 +00:00
|
|
|
.prune_tags = -1,
|
2023-05-17 11:49:04 +00:00
|
|
|
.show_forced_updates = 1,
|
2023-05-17 11:49:08 +00:00
|
|
|
.recurse_submodules = RECURSE_SUBMODULES_DEFAULT,
|
2023-05-17 11:49:13 +00:00
|
|
|
.parallel = 1,
|
2023-05-17 11:49:17 +00:00
|
|
|
.submodule_fetch_jobs = -1,
|
2023-05-10 12:34:28 +00:00
|
|
|
};
|
2023-05-10 12:34:32 +00:00
|
|
|
const char *submodule_prefix = "";
|
2023-01-31 13:29:17 +00:00
|
|
|
const char *bundle_uri;
|
2016-06-14 18:28:56 +00:00
|
|
|
struct string_list list = STRING_LIST_INIT_DUP;
|
2017-12-08 15:58:43 +00:00
|
|
|
struct remote *remote = NULL;
|
2023-05-10 12:34:32 +00:00
|
|
|
int all = 0, multiple = 0;
|
2009-11-09 20:09:56 +00:00
|
|
|
int result = 0;
|
2018-03-06 22:54:01 +00:00
|
|
|
int prune_tags_ok = 1;
|
2023-05-10 12:34:32 +00:00
|
|
|
int enable_auto_gc = 1;
|
|
|
|
int unshallow = 0;
|
|
|
|
int max_jobs = -1;
|
|
|
|
int recurse_submodules_cli = RECURSE_SUBMODULES_DEFAULT;
|
|
|
|
int recurse_submodules_default = RECURSE_SUBMODULES_ON_DEMAND;
|
|
|
|
int fetch_write_commit_graph = -1;
|
|
|
|
int stdin_refspecs = 0;
|
|
|
|
int negotiate_only = 0;
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
int porcelain = 0;
|
2023-05-10 12:34:32 +00:00
|
|
|
int i;
|
|
|
|
|
|
|
|
struct option builtin_fetch_options[] = {
|
|
|
|
OPT__VERBOSITY(&verbosity),
|
|
|
|
OPT_BOOL(0, "all", &all,
|
|
|
|
N_("fetch from all remotes")),
|
|
|
|
OPT_BOOL(0, "set-upstream", &set_upstream,
|
|
|
|
N_("set upstream for git pull/fetch")),
|
|
|
|
OPT_BOOL('a', "append", &append,
|
|
|
|
N_("append to .git/FETCH_HEAD instead of overwriting")),
|
|
|
|
OPT_BOOL(0, "atomic", &atomic_fetch,
|
|
|
|
N_("use atomic transaction to update references")),
|
|
|
|
OPT_STRING(0, "upload-pack", &upload_pack, N_("path"),
|
|
|
|
N_("path to upload pack on remote end")),
|
|
|
|
OPT__FORCE(&force, N_("force overwrite of local reference"), 0),
|
|
|
|
OPT_BOOL('m', "multiple", &multiple,
|
|
|
|
N_("fetch from multiple remotes")),
|
|
|
|
OPT_SET_INT('t', "tags", &tags,
|
|
|
|
N_("fetch all tags and associated objects"), TAGS_SET),
|
|
|
|
OPT_SET_INT('n', NULL, &tags,
|
|
|
|
N_("do not fetch all tags (--no-tags)"), TAGS_UNSET),
|
|
|
|
OPT_INTEGER('j', "jobs", &max_jobs,
|
|
|
|
N_("number of submodules fetched in parallel")),
|
|
|
|
OPT_BOOL(0, "prefetch", &prefetch,
|
|
|
|
N_("modify the refspec to place all refs within refs/prefetch/")),
|
|
|
|
OPT_BOOL('p', "prune", &prune,
|
|
|
|
N_("prune remote-tracking branches no longer on remote")),
|
|
|
|
OPT_BOOL('P', "prune-tags", &prune_tags,
|
|
|
|
N_("prune local tags no longer on remote and clobber changed tags")),
|
|
|
|
OPT_CALLBACK_F(0, "recurse-submodules", &recurse_submodules_cli, N_("on-demand"),
|
|
|
|
N_("control recursive fetching of submodules"),
|
|
|
|
PARSE_OPT_OPTARG, option_fetch_parse_recurse_submodules),
|
|
|
|
OPT_BOOL(0, "dry-run", &dry_run,
|
|
|
|
N_("dry run")),
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
OPT_BOOL(0, "porcelain", &porcelain, N_("machine-readable output")),
|
2023-05-10 12:34:32 +00:00
|
|
|
OPT_BOOL(0, "write-fetch-head", &write_fetch_head,
|
|
|
|
N_("write fetched references to the FETCH_HEAD file")),
|
|
|
|
OPT_BOOL('k', "keep", &keep, N_("keep downloaded pack")),
|
|
|
|
OPT_BOOL('u', "update-head-ok", &update_head_ok,
|
|
|
|
N_("allow updating of HEAD ref")),
|
|
|
|
OPT_BOOL(0, "progress", &progress, N_("force progress reporting")),
|
|
|
|
OPT_STRING(0, "depth", &depth, N_("depth"),
|
|
|
|
N_("deepen history of shallow clone")),
|
|
|
|
OPT_STRING(0, "shallow-since", &deepen_since, N_("time"),
|
|
|
|
N_("deepen history of shallow repository based on time")),
|
|
|
|
OPT_STRING_LIST(0, "shallow-exclude", &deepen_not, N_("revision"),
|
|
|
|
N_("deepen history of shallow clone, excluding rev")),
|
|
|
|
OPT_INTEGER(0, "deepen", &deepen_relative,
|
|
|
|
N_("deepen history of shallow clone")),
|
|
|
|
OPT_SET_INT_F(0, "unshallow", &unshallow,
|
|
|
|
N_("convert to a complete repository"),
|
|
|
|
1, PARSE_OPT_NONEG),
|
|
|
|
OPT_SET_INT_F(0, "refetch", &refetch,
|
|
|
|
N_("re-fetch without negotiating common commits"),
|
|
|
|
1, PARSE_OPT_NONEG),
|
|
|
|
{ OPTION_STRING, 0, "submodule-prefix", &submodule_prefix, N_("dir"),
|
|
|
|
N_("prepend this to submodule path output"), PARSE_OPT_HIDDEN },
|
|
|
|
OPT_CALLBACK_F(0, "recurse-submodules-default",
|
|
|
|
&recurse_submodules_default, N_("on-demand"),
|
|
|
|
N_("default for recursive fetching of submodules "
|
|
|
|
"(lower priority than config files)"),
|
|
|
|
PARSE_OPT_HIDDEN, option_fetch_parse_recurse_submodules),
|
|
|
|
OPT_BOOL(0, "update-shallow", &update_shallow,
|
|
|
|
N_("accept refs that update .git/shallow")),
|
parse-options: prefer opt->value to globals in callbacks
We have several parse-options callbacks that ignore their "opt"
parameters entirely. This is a little unusual, as we'd normally put the
result of the parsing into opt->value. In the case of these callbacks,
though, they directly manipulate global variables instead (and in
most cases the caller sets opt->value to NULL in the OPT_CALLBACK
declaration).
The immediate symptom we'd like to deal with is that the unused "opt"
variables trigger -Wunused-parameter. But how to fix that is debatable.
One option is to annotate them with UNUSED. But another is to have the
caller pass in the appropriate variable via opt->value, and use it. That
has the benefit of making the callbacks reusable (in theory at least),
and makes it clear from the OPT_CALLBACK declaration which variables
will be affected (doubly so for the cases in builtin/fast-export.c,
where we do set opt->value, but it is completely ignored!).
The slight downside is that we lose type safety, since they're now
passing through void pointers.
I went with the "just use them" approach here. The loss of type safety
is unfortunate, but that is already an issue with most of the other
callbacks. If we want to try to address that, we should do so more
consistently (and this patch would prepare these callbacks for whatever
we choose to do there).
Note that in the cases in builtin/fast-export.c, we are passing
anonymous enums. We'll have to give them names so that we can declare
the appropriate pointer type within the callbacks.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-08-31 21:21:07 +00:00
|
|
|
OPT_CALLBACK_F(0, "refmap", &refmap, N_("refmap"),
|
2023-05-10 12:34:32 +00:00
|
|
|
N_("specify fetch refmap"), PARSE_OPT_NONEG, parse_refmap_arg),
|
|
|
|
OPT_STRING_LIST('o', "server-option", &server_options, N_("server-specific"), N_("option to transmit")),
|
2023-07-18 21:34:33 +00:00
|
|
|
OPT_IPVERSION(&family),
|
2023-05-10 12:34:32 +00:00
|
|
|
OPT_STRING_LIST(0, "negotiation-tip", &negotiation_tip, N_("revision"),
|
|
|
|
N_("report that we have only objects reachable from this object")),
|
|
|
|
OPT_BOOL(0, "negotiate-only", &negotiate_only,
|
|
|
|
N_("do not fetch a packfile; instead, print ancestors of negotiation tips")),
|
|
|
|
OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
|
|
|
|
OPT_BOOL(0, "auto-maintenance", &enable_auto_gc,
|
|
|
|
N_("run 'maintenance --auto' after fetching")),
|
|
|
|
OPT_BOOL(0, "auto-gc", &enable_auto_gc,
|
|
|
|
N_("run 'maintenance --auto' after fetching")),
|
2023-05-17 11:49:04 +00:00
|
|
|
OPT_BOOL(0, "show-forced-updates", &config.show_forced_updates,
|
2023-05-10 12:34:32 +00:00
|
|
|
N_("check for forced-updates on all updated branches")),
|
|
|
|
OPT_BOOL(0, "write-commit-graph", &fetch_write_commit_graph,
|
|
|
|
N_("write the commit-graph after fetching")),
|
|
|
|
OPT_BOOL(0, "stdin", &stdin_refspecs,
|
|
|
|
N_("accept refspecs from stdin")),
|
|
|
|
OPT_END()
|
|
|
|
};
|
2009-11-09 20:09:56 +00:00
|
|
|
|
2011-02-24 14:30:19 +00:00
|
|
|
packet_trace_identity("fetch");
|
|
|
|
|
2009-11-09 20:09:56 +00:00
|
|
|
/* Record the command line for the reflog */
|
|
|
|
strbuf_addstr(&default_rla, "fetch");
|
2020-06-04 20:08:29 +00:00
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
/* This handles non-URLs gracefully */
|
|
|
|
char *anon = transport_anonymize_url(argv[i]);
|
|
|
|
|
|
|
|
strbuf_addf(&default_rla, " %s", anon);
|
|
|
|
free(anon);
|
|
|
|
}
|
2009-11-09 20:09:56 +00:00
|
|
|
|
2023-05-10 12:34:28 +00:00
|
|
|
git_config(git_fetch_config, &config);
|
checkout/fetch/pull/pack-objects: allow `-h` outside a repository
When we taught these commands about the sparse index, we did not account
for the fact that the `cmd_*()` functions _can_ be called without a
gitdir, namely when `-h` is passed to show the usage.
A plausible approach to address this is to move the
`prepare_repo_settings()` calls right after the `parse_options()` calls:
The latter will never return when it handles `-h`, and therefore it is
safe to assume that we have a `gitdir` at that point, as long as the
built-in is marked with the `RUN_SETUP` flag.
However, it is unfortunately not that simple. In `cmd_pack_objects()`,
for example, the repo settings need to be fully populated so that the
command-line options `--sparse`/`--no-sparse` can override them, not the
other way round.
Therefore, we choose to imitate the strategy taken in `cmd_diff()`,
where we simply do not bother to prepare and initialize the repo
settings unless we have a `gitdir`.
This fixes https://github.com/git-for-windows/git/issues/3688
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-08 11:21:53 +00:00
|
|
|
if (the_repository->gitdir) {
|
|
|
|
prepare_repo_settings(the_repository);
|
|
|
|
the_repository->settings.command_requires_full_index = 0;
|
|
|
|
}
|
2013-07-13 09:36:24 +00:00
|
|
|
|
2009-11-09 20:09:56 +00:00
|
|
|
argc = parse_options(argc, argv, prefix,
|
|
|
|
builtin_fetch_options, builtin_fetch_usage, 0);
|
2022-01-19 00:00:56 +00:00
|
|
|
|
|
|
|
if (recurse_submodules_cli != RECURSE_SUBMODULES_DEFAULT)
|
2023-05-17 11:49:08 +00:00
|
|
|
config.recurse_submodules = recurse_submodules_cli;
|
2022-01-19 00:00:56 +00:00
|
|
|
|
|
|
|
if (negotiate_only) {
|
|
|
|
switch (recurse_submodules_cli) {
|
|
|
|
case RECURSE_SUBMODULES_OFF:
|
|
|
|
case RECURSE_SUBMODULES_DEFAULT:
|
|
|
|
/*
|
|
|
|
* --negotiate-only should never recurse into
|
|
|
|
* submodules. Skip it by setting recurse_submodules to
|
|
|
|
* RECURSE_SUBMODULES_OFF.
|
|
|
|
*/
|
2023-05-17 11:49:08 +00:00
|
|
|
config.recurse_submodules = RECURSE_SUBMODULES_OFF;
|
2022-01-19 00:00:56 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2022-01-20 21:58:43 +00:00
|
|
|
die(_("options '%s' and '%s' cannot be used together"),
|
|
|
|
"--negotiate-only", "--recurse-submodules");
|
2022-01-19 00:00:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-17 11:49:08 +00:00
|
|
|
if (config.recurse_submodules != RECURSE_SUBMODULES_OFF) {
|
2023-05-17 11:49:17 +00:00
|
|
|
int *sfjc = config.submodule_fetch_jobs == -1
|
|
|
|
? &config.submodule_fetch_jobs : NULL;
|
2023-05-17 11:49:08 +00:00
|
|
|
int *rs = config.recurse_submodules == RECURSE_SUBMODULES_DEFAULT
|
|
|
|
? &config.recurse_submodules : NULL;
|
fetch: avoid reading submodule config until needed
In "fetch", there are two parameters submodule_fetch_jobs_config and
recurse_submodules that can be set in a variety of ways: through
.gitmodules, through .git/config, and through the command line.
Currently "fetch" handles this by first reading .gitmodules, then
reading .git/config (allowing it to overwrite existing values), then
reading the command line (allowing it to overwrite existing values).
Notice that we can avoid reading .gitmodules if .git/config and/or the
command line already provides us with what we need. In addition, if
recurse_submodules is found to be "no", we do not need the value of
submodule_fetch_jobs_config.
Avoiding reading .gitmodules is especially important when we use "git
fetch" to perform lazy fetches in a partial clone because the
.gitmodules file itself might need to be lazy fetched (and otherwise
causing an infinite loop).
In light of all this, avoid reading .gitmodules until necessary. When
reading it, we may only need one of the two parameters it provides, so
teach fetch_config_from_gitmodules() to support NULL arguments. With
this patch, users (including Git itself when invoking "git fetch" to
lazy-fetch) will be able to guarantee avoiding reading .gitmodules by
passing --recurse-submodules=no.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-18 04:01:33 +00:00
|
|
|
|
|
|
|
fetch_config_from_gitmodules(sfjc, rs);
|
|
|
|
}
|
2009-11-09 20:09:56 +00:00
|
|
|
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
|
|
|
|
if (porcelain) {
|
|
|
|
switch (recurse_submodules_cli) {
|
|
|
|
case RECURSE_SUBMODULES_OFF:
|
|
|
|
case RECURSE_SUBMODULES_DEFAULT:
|
|
|
|
/*
|
|
|
|
* Reference updates in submodules would be ambiguous
|
|
|
|
* in porcelain mode, so we reject this combination.
|
|
|
|
*/
|
2023-05-17 11:49:08 +00:00
|
|
|
config.recurse_submodules = RECURSE_SUBMODULES_OFF;
|
fetch: introduce machine-parseable "porcelain" output format
The output of git-fetch(1) is obviously designed for consumption by
users, only: we neatly columnize data, we abbreviate reference names, we
print neat arrows and we don't provide information about actual object
IDs that have changed. This makes the output format basically unusable
in the context of scripted invocations of git-fetch(1) that want to
learn about the exact changes that the command performs.
Introduce a new machine-parseable "porcelain" output format that is
supposed to fix this shortcoming. This output format is intended to
provide information about every reference that is about to be updated,
the old object ID that the reference has been pointing to and the new
object ID it will be updated to. Furthermore, the output format provides
the same flags as the human-readable format to indicate basic conditions
for each reference update like whether it was a fast-forward update, a
branch deletion, a rejected update or others.
The output format is quite simple:
```
<flag> <old-object-id> <new-object-id> <local-reference>\n
```
We assume two conditions which are generally true:
- The old and new object IDs have fixed known widths and cannot
contain spaces.
- References cannot contain newlines.
With these assumptions, the output format becomes unambiguously
parseable. Furthermore, given that this output is designed to be
consumed by scripts, the machine-readable data is printed to stdout
instead of stderr like the human-readable output is. This is mostly done
so that other data printed to stderr, like error messages or progress
meters, don't interfere with the parseable data.
A notable ommission here is that the output format does not include the
remote from which a reference was fetched, which might be important
information especially in the context of multi-remote fetches. But as
such a format would require us to print the remote for every single
reference update due to parallelizable fetches it feels wasteful for the
most likely usecase, which is when fetching from a single remote.
In a similar spirit, a second restriction is that this cannot be used
with `--recurse-submodules`. This is because any reference updates would
be ambiguous without also printing the repository in which the update
happens.
Considering that both multi-remote and submodule fetches are user-facing
features, using them in conjunction with `--porcelain` that is intended
for scripting purposes is likely not going to be useful in the majority
of cases. With that in mind these restrictions feel acceptable. If
usecases for either of these come up in the future though it is easy
enough to add a new "porcelain-v2" format that adds this information.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-10 12:34:36 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
die(_("options '%s' and '%s' cannot be used together"),
|
|
|
|
"--porcelain", "--recurse-submodules");
|
|
|
|
}
|
|
|
|
|
|
|
|
config.display_format = DISPLAY_FORMAT_PORCELAIN;
|
|
|
|
}
|
|
|
|
|
2021-07-08 10:53:15 +00:00
|
|
|
if (negotiate_only && !negotiation_tip.nr)
|
2022-01-28 14:36:02 +00:00
|
|
|
die(_("--negotiate-only needs one or more --negotiation-tip=*"));
|
2021-07-08 10:53:15 +00:00
|
|
|
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 10:54:09 +00:00
|
|
|
if (deepen_relative) {
|
|
|
|
if (deepen_relative < 0)
|
2021-12-01 22:15:40 +00:00
|
|
|
die(_("negative depth in --deepen is not supported"));
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 10:54:09 +00:00
|
|
|
if (depth)
|
2022-01-05 20:02:14 +00:00
|
|
|
die(_("options '%s' and '%s' cannot be used together"), "--deepen", "--depth");
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 10:54:09 +00:00
|
|
|
depth = xstrfmt("%d", deepen_relative);
|
|
|
|
}
|
2013-01-11 09:05:46 +00:00
|
|
|
if (unshallow) {
|
|
|
|
if (depth)
|
2022-01-05 20:02:14 +00:00
|
|
|
die(_("options '%s' and '%s' cannot be used together"), "--depth", "--unshallow");
|
2018-05-17 22:51:46 +00:00
|
|
|
else if (!is_repository_shallow(the_repository))
|
2013-01-11 09:05:46 +00:00
|
|
|
die(_("--unshallow on a complete repository does not make sense"));
|
2015-09-24 21:07:07 +00:00
|
|
|
else
|
|
|
|
depth = xstrfmt("%d", INFINITE_DEPTH);
|
2013-01-11 09:05:46 +00:00
|
|
|
}
|
|
|
|
|
2013-12-05 03:31:11 +00:00
|
|
|
/* no need to be strict, transport_set_option() will validate it again */
|
|
|
|
if (depth && atoi(depth) < 1)
|
|
|
|
die(_("depth %s is not a positive number"), depth);
|
2016-06-12 10:54:04 +00:00
|
|
|
if (depth || deepen_since || deepen_not.nr)
|
2016-06-12 10:53:59 +00:00
|
|
|
deepen = 1;
|
2013-12-05 03:31:11 +00:00
|
|
|
|
2020-08-18 14:25:22 +00:00
|
|
|
/* FETCH_HEAD never gets updated in --dry-run mode */
|
|
|
|
if (dry_run)
|
|
|
|
write_fetch_head = 0;
|
|
|
|
|
2023-02-20 21:33:25 +00:00
|
|
|
if (!max_jobs)
|
|
|
|
max_jobs = online_cpus();
|
|
|
|
|
2023-01-31 13:29:17 +00:00
|
|
|
if (!git_config_get_string_tmp("fetch.bundleuri", &bundle_uri) &&
|
|
|
|
fetch_bundle_uri(the_repository, bundle_uri, NULL))
|
|
|
|
warning(_("failed to fetch bundles from '%s'"), bundle_uri);
|
|
|
|
|
2009-11-09 20:09:56 +00:00
|
|
|
if (all) {
|
|
|
|
if (argc == 1)
|
2011-02-22 23:41:51 +00:00
|
|
|
die(_("fetch --all does not take a repository argument"));
|
2009-11-09 20:09:56 +00:00
|
|
|
else if (argc > 1)
|
2011-02-22 23:41:51 +00:00
|
|
|
die(_("fetch --all does not make sense with refspecs"));
|
2009-11-09 20:09:56 +00:00
|
|
|
(void) for_each_remote(get_one_remote_for_fetch, &list);
|
fetch: do not run a redundant fetch from submodule
When 7dce19d3 (fetch/pull: Add the --recurse-submodules option,
2010-11-12) introduced the "--recurse-submodule" option, the
approach taken was to perform fetches in submodules only once, after
all the main fetching (it may usually be a fetch from a single
remote, but it could be fetching from a group of remotes using
fetch_multiple()) succeeded. Later we added "--all" to fetch from
all defined remotes, which complicated things even more.
If your project has a submodule, and you try to run "git fetch
--recurse-submodule --all", you'd see a fetch for the top-level,
which invokes another fetch for the submodule, followed by another
fetch for the same submodule. All but the last fetch for the
submodule come from a "git fetch --recurse-submodules" subprocess
that is spawned via the fetch_multiple() interface for the remotes,
and the last fetch comes from the code at the end.
Because recursive fetching from submodules is done in each fetch for
the top-level in fetch_multiple(), the last fetch in the submodule
is redundant. It only matters when fetch_one() interacts with a
single remote at the top-level.
While we are at it, there is one optimization that exists in dealing
with a group of remote, but is missing when "--all" is used. In the
former, when the group turns out to be a group of one, instead of
spawning "git fetch" as a subprocess via the fetch_multiple()
interface, we use the normal fetch_one() code path. Do the same
when handing "--all", if it turns out that we have only one remote
defined.
Reviewed-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-05-16 23:53:40 +00:00
|
|
|
|
|
|
|
/* do not do fetch_multiple() of one */
|
|
|
|
if (list.nr == 1)
|
|
|
|
remote = remote_get(list.items[0].string);
|
2009-11-09 20:09:56 +00:00
|
|
|
} else if (argc == 0) {
|
|
|
|
/* No arguments -- use default remote */
|
|
|
|
remote = remote_get(NULL);
|
2009-11-09 20:10:32 +00:00
|
|
|
} else if (multiple) {
|
|
|
|
/* All arguments are assumed to be remotes or groups */
|
|
|
|
for (i = 0; i < argc; i++)
|
|
|
|
if (!add_remote_or_group(argv[i], &list))
|
2021-12-01 22:15:40 +00:00
|
|
|
die(_("no such remote or remote group: %s"),
|
|
|
|
argv[i]);
|
2009-11-09 20:09:56 +00:00
|
|
|
} else {
|
|
|
|
/* Single remote or group */
|
|
|
|
(void) add_remote_or_group(argv[0], &list);
|
|
|
|
if (list.nr > 1) {
|
|
|
|
/* More than one remote */
|
|
|
|
if (argc > 1)
|
2021-12-01 22:15:40 +00:00
|
|
|
die(_("fetching a group and specifying refspecs does not make sense"));
|
2009-11-09 20:09:56 +00:00
|
|
|
} else {
|
|
|
|
/* Zero or one remotes */
|
|
|
|
remote = remote_get(argv[0]);
|
2018-03-06 22:54:01 +00:00
|
|
|
prune_tags_ok = (argc == 1);
|
2017-12-08 15:58:43 +00:00
|
|
|
argc--;
|
|
|
|
argv++;
|
2009-11-09 20:09:56 +00:00
|
|
|
}
|
|
|
|
}
|
2023-01-19 22:05:38 +00:00
|
|
|
string_list_remove_duplicates(&list, 0);
|
2009-11-09 20:09:56 +00:00
|
|
|
|
fetch: teach independent negotiation (no packfile)
Currently, the packfile negotiation step within a Git fetch cannot be
done independent of sending the packfile, even though there is at least
one application wherein this is useful. Therefore, make it possible for
this negotiation step to be done independently. A subsequent commit will
use this for one such application - push negotiation.
This feature is for protocol v2 only. (An implementation for protocol v0
would require a separate implementation in the fetch, transport, and
transport helper code.)
In the protocol, the main hindrance towards independent negotiation is
that the server can unilaterally decide to send the packfile. This is
solved by a "wait-for-done" argument: the server will then wait for the
client to say "done". In practice, the client will never say it; instead
it will cease requests once it is satisfied.
In the client, the main change lies in the transport and transport
helper code. fetch_refs_via_pack() performs everything needed - protocol
version and capability checks, and the negotiation itself.
There are 2 code paths that do not go through fetch_refs_via_pack() that
needed to be individually excluded: the bundle transport (excluded
through requiring smart_options, which the bundle transport doesn't
support) and transport helpers that do not support takeover. If or when
we support independent negotiation for protocol v0, we will need to
modify these 2 code paths to support it. But for now, report failure if
independent negotiation is requested in these cases.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-04 21:16:01 +00:00
|
|
|
if (negotiate_only) {
|
|
|
|
struct oidset acked_commits = OIDSET_INIT;
|
|
|
|
struct oidset_iter iter;
|
|
|
|
const struct object_id *oid;
|
|
|
|
|
|
|
|
if (!remote)
|
|
|
|
die(_("must supply remote when using --negotiate-only"));
|
|
|
|
gtransport = prepare_transport(remote, 1);
|
|
|
|
if (gtransport->smart_options) {
|
|
|
|
gtransport->smart_options->acked_commits = &acked_commits;
|
|
|
|
} else {
|
2021-12-01 22:15:40 +00:00
|
|
|
warning(_("protocol does not support --negotiate-only, exiting"));
|
2022-01-19 00:00:54 +00:00
|
|
|
result = 1;
|
|
|
|
goto cleanup;
|
fetch: teach independent negotiation (no packfile)
Currently, the packfile negotiation step within a Git fetch cannot be
done independent of sending the packfile, even though there is at least
one application wherein this is useful. Therefore, make it possible for
this negotiation step to be done independently. A subsequent commit will
use this for one such application - push negotiation.
This feature is for protocol v2 only. (An implementation for protocol v0
would require a separate implementation in the fetch, transport, and
transport helper code.)
In the protocol, the main hindrance towards independent negotiation is
that the server can unilaterally decide to send the packfile. This is
solved by a "wait-for-done" argument: the server will then wait for the
client to say "done". In practice, the client will never say it; instead
it will cease requests once it is satisfied.
In the client, the main change lies in the transport and transport
helper code. fetch_refs_via_pack() performs everything needed - protocol
version and capability checks, and the negotiation itself.
There are 2 code paths that do not go through fetch_refs_via_pack() that
needed to be individually excluded: the bundle transport (excluded
through requiring smart_options, which the bundle transport doesn't
support) and transport helpers that do not support takeover. If or when
we support independent negotiation for protocol v0, we will need to
modify these 2 code paths to support it. But for now, report failure if
independent negotiation is requested in these cases.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-04 21:16:01 +00:00
|
|
|
}
|
|
|
|
if (server_options.nr)
|
|
|
|
gtransport->server_options = &server_options;
|
|
|
|
result = transport_fetch_refs(gtransport, NULL);
|
|
|
|
|
|
|
|
oidset_iter_init(&acked_commits, &iter);
|
|
|
|
while ((oid = oidset_iter_next(&iter)))
|
|
|
|
printf("%s\n", oid_to_hex(oid));
|
|
|
|
oidset_clear(&acked_commits);
|
|
|
|
} else if (remote) {
|
2023-03-28 13:58:53 +00:00
|
|
|
if (filter_options.choice || repo_has_promisor_remote(the_repository))
|
2017-12-08 15:58:50 +00:00
|
|
|
fetch_one_setup_partial(remote);
|
2023-05-10 12:34:28 +00:00
|
|
|
result = fetch_one(remote, argc, argv, prune_tags_ok, stdin_refspecs,
|
2023-05-17 11:48:51 +00:00
|
|
|
&config);
|
2017-12-08 15:58:44 +00:00
|
|
|
} else {
|
2019-10-05 18:46:40 +00:00
|
|
|
int max_children = max_jobs;
|
|
|
|
|
2017-12-08 15:58:44 +00:00
|
|
|
if (filter_options.choice)
|
2019-01-13 08:52:19 +00:00
|
|
|
die(_("--filter can only be used with the remote "
|
|
|
|
"configured in extensions.partialclone"));
|
2019-10-05 18:46:40 +00:00
|
|
|
|
fetch: implement support for atomic reference updates
When executing a fetch, then git will currently allocate one reference
transaction per reference update and directly commit it. This means that
fetches are non-atomic: even if some of the reference updates fail,
others may still succeed and modify local references.
This is fine in many scenarios, but this strategy has its downsides.
- The view of remote references may be inconsistent and may show a
bastardized state of the remote repository.
- Batching together updates may improve performance in certain
scenarios. While the impact probably isn't as pronounced with loose
references, the upcoming reftable backend may benefit as it needs to
write less files in case the update is batched.
- The reference-update hook is currently being executed twice per
updated reference. While this doesn't matter when there is no such
hook, we have seen severe performance regressions when doing a
git-fetch(1) with reference-transaction hook when the remote
repository has hundreds of thousands of references.
Similar to `git push --atomic`, this commit thus introduces atomic
fetches. Instead of allocating one reference transaction per updated
reference, it causes us to only allocate a single transaction and commit
it as soon as all updates were received. If locking of any reference
fails, then we abort the complete transaction and don't update any
reference, which gives us an all-or-nothing fetch.
Note that this may not completely fix the first of above downsides, as
the consistent view also depends on the server-side. If the server
doesn't have a consistent view of its own references during the
reference negotiation phase, then the client would get the same
inconsistent view the server has. This is a separate problem though and,
if it actually exists, can be fixed at a later point.
This commit also changes the way we write FETCH_HEAD in case `--atomic`
is passed. Instead of writing changes as we go, we need to accumulate
all changes first and only commit them at the end when we know that all
reference updates succeeded. Ideally, we'd just do so via a temporary
file so that we don't need to carry all updates in-memory. This isn't
trivially doable though considering the `--append` mode, where we do not
truncate the file but simply append to it. And given that we support
concurrent processes appending to FETCH_HEAD at the same time without
any loss of data, seeding the temporary file with current contents of
FETCH_HEAD initially and then doing a rename wouldn't work either. So
this commit implements the simple strategy of buffering all changes and
appending them to the file on commit.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-12 12:27:52 +00:00
|
|
|
if (atomic_fetch)
|
|
|
|
die(_("--atomic can only be used when fetching "
|
|
|
|
"from one remote"));
|
|
|
|
|
2020-08-18 04:01:32 +00:00
|
|
|
if (stdin_refspecs)
|
|
|
|
die(_("--stdin can only be used when fetching "
|
|
|
|
"from one remote"));
|
|
|
|
|
2019-10-05 18:46:40 +00:00
|
|
|
if (max_children < 0)
|
2023-05-17 11:49:13 +00:00
|
|
|
max_children = config.parallel;
|
2019-10-05 18:46:40 +00:00
|
|
|
|
2017-12-08 15:58:50 +00:00
|
|
|
/* TODO should this also die if we have a previous partial-clone? */
|
2023-05-17 11:48:51 +00:00
|
|
|
result = fetch_multiple(&list, max_children, &config);
|
2017-12-08 15:58:44 +00:00
|
|
|
}
|
2017-12-08 15:58:43 +00:00
|
|
|
|
fetch: do not run a redundant fetch from submodule
When 7dce19d3 (fetch/pull: Add the --recurse-submodules option,
2010-11-12) introduced the "--recurse-submodule" option, the
approach taken was to perform fetches in submodules only once, after
all the main fetching (it may usually be a fetch from a single
remote, but it could be fetching from a group of remotes using
fetch_multiple()) succeeded. Later we added "--all" to fetch from
all defined remotes, which complicated things even more.
If your project has a submodule, and you try to run "git fetch
--recurse-submodule --all", you'd see a fetch for the top-level,
which invokes another fetch for the submodule, followed by another
fetch for the same submodule. All but the last fetch for the
submodule come from a "git fetch --recurse-submodules" subprocess
that is spawned via the fetch_multiple() interface for the remotes,
and the last fetch comes from the code at the end.
Because recursive fetching from submodules is done in each fetch for
the top-level in fetch_multiple(), the last fetch in the submodule
is redundant. It only matters when fetch_one() interacts with a
single remote at the top-level.
While we are at it, there is one optimization that exists in dealing
with a group of remote, but is missing when "--all" is used. In the
former, when the group turns out to be a group of one, instead of
spawning "git fetch" as a subprocess via the fetch_multiple()
interface, we use the normal fetch_one() code path. Do the same
when handing "--all", if it turns out that we have only one remote
defined.
Reviewed-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-05-16 23:53:40 +00:00
|
|
|
/*
|
|
|
|
* This is only needed after fetch_one(), which does not fetch
|
|
|
|
* submodules by itself.
|
|
|
|
*
|
|
|
|
* When we fetch from multiple remotes, fetch_multiple() has
|
|
|
|
* already updated submodules to grab commits necessary for
|
|
|
|
* the fetched history from each remote, so there is no need
|
|
|
|
* to fetch submodules from here.
|
|
|
|
*/
|
2023-05-17 11:49:08 +00:00
|
|
|
if (!result && remote && (config.recurse_submodules != RECURSE_SUBMODULES_OFF)) {
|
2020-07-28 20:24:27 +00:00
|
|
|
struct strvec options = STRVEC_INIT;
|
2019-10-05 18:46:40 +00:00
|
|
|
int max_children = max_jobs;
|
|
|
|
|
|
|
|
if (max_children < 0)
|
2023-05-17 11:49:17 +00:00
|
|
|
max_children = config.submodule_fetch_jobs;
|
2019-10-05 18:46:40 +00:00
|
|
|
if (max_children < 0)
|
2023-05-17 11:49:13 +00:00
|
|
|
max_children = config.parallel;
|
2012-09-01 11:27:35 +00:00
|
|
|
|
2023-05-17 11:48:51 +00:00
|
|
|
add_options_to_argv(&options, &config);
|
2022-03-08 00:14:32 +00:00
|
|
|
result = fetch_submodules(the_repository,
|
|
|
|
&options,
|
|
|
|
submodule_prefix,
|
2023-05-17 11:49:08 +00:00
|
|
|
config.recurse_submodules,
|
2022-03-08 00:14:32 +00:00
|
|
|
recurse_submodules_default,
|
|
|
|
verbosity < 0,
|
|
|
|
max_children);
|
2020-07-28 20:24:27 +00:00
|
|
|
strvec_clear(&options);
|
2010-11-12 12:54:52 +00:00
|
|
|
}
|
|
|
|
|
2022-01-19 00:00:55 +00:00
|
|
|
/*
|
|
|
|
* Skip irrelevant tasks because we know objects were not
|
|
|
|
* fetched.
|
|
|
|
*
|
|
|
|
* NEEDSWORK: as a future optimization, we can return early
|
|
|
|
* whenever objects were not fetched e.g. if we already have all
|
|
|
|
* of them.
|
|
|
|
*/
|
|
|
|
if (negotiate_only)
|
|
|
|
goto cleanup;
|
2009-11-09 20:09:56 +00:00
|
|
|
|
2019-09-03 02:22:02 +00:00
|
|
|
prepare_repo_settings(the_repository);
|
2019-11-03 00:21:56 +00:00
|
|
|
if (fetch_write_commit_graph > 0 ||
|
|
|
|
(fetch_write_commit_graph < 0 &&
|
|
|
|
the_repository->settings.fetch_write_commit_graph)) {
|
2019-09-30 04:19:32 +00:00
|
|
|
int commit_graph_flags = COMMIT_GRAPH_WRITE_SPLIT;
|
2019-09-03 02:22:02 +00:00
|
|
|
|
|
|
|
if (progress)
|
2019-09-30 04:19:32 +00:00
|
|
|
commit_graph_flags |= COMMIT_GRAPH_WRITE_PROGRESS;
|
2019-09-03 02:22:02 +00:00
|
|
|
|
2020-02-04 05:51:50 +00:00
|
|
|
write_commit_graph_reachable(the_repository->objects->odb,
|
2019-09-03 02:22:02 +00:00
|
|
|
commit_graph_flags,
|
commit-graph: prefer default size_mult when given zero
In 50f26bd ("fetch: add fetch.writeCommitGraph config setting",
2019-09-02), the fetch builtin added the capability to write a
commit-graph using the "--split" feature. This feature creates
multiple commit-graph files, and those can merge based on a set
of "split options" including a size multiple. The default size
multiple is 2, which intends to provide a log_2 N depth of the
commit-graph chain where N is the number of commits.
However, I noticed during dogfooding that my commit-graph chains
were becoming quite large when left only to builds by 'git fetch'.
It turns out that in split_graph_merge_strategy(), we default the
size_mult variable to 2 except we override it with the context's
split_opts if they exist. In builtin/fetch.c, we create such a
split_opts, but do not populate it with values.
This problem is due to two failures:
1. It is unclear that we can add the flag COMMIT_GRAPH_WRITE_SPLIT
with a NULL split_opts.
2. If we have a non-NULL split_opts, then we override the default
values even if a zero value is given.
Correct both of these issues. First, do not override size_mult when
the options provide a zero value. Second, stop creating a split_opts
in the fetch builtin.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-02 16:14:14 +00:00
|
|
|
NULL);
|
2019-09-03 02:22:02 +00:00
|
|
|
}
|
|
|
|
|
2022-03-28 14:02:10 +00:00
|
|
|
if (enable_auto_gc) {
|
|
|
|
if (refetch) {
|
|
|
|
/*
|
|
|
|
* Hint auto-maintenance strongly to encourage repacking,
|
|
|
|
* but respect config settings disabling it.
|
|
|
|
*/
|
|
|
|
int opt_val;
|
|
|
|
if (git_config_get_int("gc.autopacklimit", &opt_val))
|
|
|
|
opt_val = -1;
|
|
|
|
if (opt_val != 0)
|
|
|
|
git_config_push_parameter("gc.autoPackLimit=1");
|
|
|
|
|
|
|
|
if (git_config_get_int("maintenance.incremental-repack.auto", &opt_val))
|
|
|
|
opt_val = -1;
|
|
|
|
if (opt_val != 0)
|
|
|
|
git_config_push_parameter("maintenance.incremental-repack.auto=-1");
|
|
|
|
}
|
2020-09-17 18:11:44 +00:00
|
|
|
run_auto_maintenance(verbosity < 0);
|
2022-03-28 14:02:10 +00:00
|
|
|
}
|
2013-01-26 22:40:38 +00:00
|
|
|
|
2022-01-19 00:00:54 +00:00
|
|
|
cleanup:
|
|
|
|
string_list_clear(&list, 0);
|
2009-11-09 20:09:56 +00:00
|
|
|
return result;
|
|
|
|
}
|