From 4de656263aa195080495fc0a103351b9eaac8160 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 1 Mar 2022 10:33:37 +0100 Subject: [PATCH 1/5] upload-pack: look up "want" lines via commit-graph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During packfile negotiation the client will send "want" and "want-ref" lines to the server to tell it which objects it is interested in. The server-side parses each of those and looks them up to see whether it actually has requested objects. This lookup is performed by calling `parse_object()` directly, which thus hits the object database. In the general case though most of the objects the client requests will be commits. We can thus try to look up the object via the commit-graph opportunistically, which is much faster than doing the same via the object database. Refactor parsing of both "want" and "want-ref" lines to do so. The following benchmark is executed in a repository with a huge number of references. It uses cached request from git-fetch(1) as input to git-upload-pack(1) that contains about 876,000 "want" lines: Benchmark 1: HEAD~ Time (mean ± σ): 7.113 s ± 0.028 s [User: 6.900 s, System: 0.662 s] Range (min … max): 7.072 s … 7.168 s 10 runs Benchmark 2: HEAD Time (mean ± σ): 6.622 s ± 0.061 s [User: 6.452 s, System: 0.650 s] Range (min … max): 6.535 s … 6.727 s 10 runs Summary 'HEAD' ran 1.07 ± 0.01 times faster than 'HEAD~' Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- upload-pack.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 8acc98741b..3a851b3606 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1400,13 +1400,19 @@ static int parse_want(struct packet_writer *writer, const char *line, const char *arg; if (skip_prefix(line, "want ", &arg)) { struct object_id oid; + struct commit *commit; struct object *o; if (get_oid_hex(arg, &oid)) die("git upload-pack: protocol error, " "expected to get oid, not '%s'", line); - o = parse_object(the_repository, &oid); + commit = lookup_commit_in_graph(the_repository, &oid); + if (commit) + o = &commit->object; + else + o = parse_object(the_repository, &oid); + if (!o) { packet_writer_error(writer, "upload-pack: not our ref %s", @@ -1434,7 +1440,7 @@ static int parse_want_ref(struct packet_writer *writer, const char *line, if (skip_prefix(line, "want-ref ", &refname_nons)) { struct object_id oid; struct string_list_item *item; - struct object *o; + struct object *o = NULL; struct strbuf refname = STRBUF_INIT; strbuf_addf(&refname, "%s%s", get_git_namespace(), refname_nons); @@ -1448,7 +1454,15 @@ static int parse_want_ref(struct packet_writer *writer, const char *line, item = string_list_append(wanted_refs, refname_nons); item->util = oiddup(&oid); - o = parse_object_or_die(&oid, refname_nons); + if (!starts_with(refname_nons, "refs/tags/")) { + struct commit *commit = lookup_commit_in_graph(the_repository, &oid); + if (commit) + o = &commit->object; + } + + if (!o) + o = parse_object_or_die(&oid, refname_nons); + if (!(o->flags & WANTED)) { o->flags |= WANTED; add_object_array(o, NULL, want_obj); From 8e55634b47858f036ca773f3e1d754e5dbd4bb59 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 1 Mar 2022 10:33:41 +0100 Subject: [PATCH 2/5] fetch: avoid lookup of commits when not appending to FETCH_HEAD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When fetching from a remote repository we will by default write what has been fetched into the special FETCH_HEAD reference. The order in which references are written depends on whether the reference is for merge or not, which, despite some other conditions, is also determined based on whether the old object ID the reference is being updated from actually exists in the repository. To write FETCH_HEAD we thus loop through all references thrice: once for the references that are about to be merged, once for the references that are not for merge, and finally for all references that are ignored. For every iteration, we then look up the old object ID to determine whether the referenced object exists so that we can label it as "not-for-merge" if it doesn't exist. It goes without saying that this can be expensive in case where we are fetching a lot of references. While this is hard to avoid in the case where we're writing FETCH_HEAD, users can in fact ask us to skip this work via `--no-write-fetch-head`. In that case, we do not care for the result of those lookups at all because we don't have to order writes to FETCH_HEAD in the first place. Skip this busywork in case we're not writing to FETCH_HEAD. The following benchmark performs a mirror-fetch in a repository with about two million references via `git fetch --prune --no-write-fetch-head +refs/*:refs/*`: Benchmark 1: HEAD~ Time (mean ± σ): 75.388 s ± 1.942 s [User: 71.103 s, System: 8.953 s] Range (min … max): 73.184 s … 76.845 s 3 runs Benchmark 2: HEAD Time (mean ± σ): 69.486 s ± 1.016 s [User: 65.941 s, System: 8.806 s] Range (min … max): 68.864 s … 70.659 s 3 runs Summary 'HEAD' ran 1.08 ± 0.03 times faster than 'HEAD~' Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fetch.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index ec1ec91da2..3d4581d3c6 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1143,7 +1143,6 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, want_status <= FETCH_HEAD_IGNORE; want_status++) { for (rm = ref_map; rm; rm = rm->next) { - struct commit *commit = NULL; struct ref *ref = NULL; if (rm->status == REF_STATUS_REJECT_SHALLOW) { @@ -1154,21 +1153,34 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, } /* - * References in "refs/tags/" are often going to point - * to annotated tags, which are not part of the - * commit-graph. We thus only try to look up refs in - * the graph which are not in that namespace to not - * regress performance in repositories with many - * annotated tags. + * When writing FETCH_HEAD we need to determine whether + * we already have the commit or not. If not, then the + * reference is not for merge and needs to be written + * to the reflog after other commits which we already + * have. We're not interested in this property though + * in case FETCH_HEAD is not to be updated, so we can + * skip the classification in that case. */ - if (!starts_with(rm->name, "refs/tags/")) - commit = lookup_commit_in_graph(the_repository, &rm->old_oid); - if (!commit) { - commit = lookup_commit_reference_gently(the_repository, - &rm->old_oid, - 1); - if (!commit) - rm->fetch_head_status = FETCH_HEAD_NOT_FOR_MERGE; + if (fetch_head->fp) { + struct commit *commit = NULL; + + /* + * References in "refs/tags/" are often going to point + * to annotated tags, which are not part of the + * commit-graph. We thus only try to look up refs in + * the graph which are not in that namespace to not + * regress performance in repositories with many + * annotated tags. + */ + if (!starts_with(rm->name, "refs/tags/")) + commit = lookup_commit_in_graph(the_repository, &rm->old_oid); + if (!commit) { + commit = lookup_commit_reference_gently(the_repository, + &rm->old_oid, + 1); + if (!commit) + rm->fetch_head_status = FETCH_HEAD_NOT_FOR_MERGE; + } } if (rm->fetch_head_status != want_status) From cd475b3b03809b1b1c664e0dca9f16f815456719 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 1 Mar 2022 10:33:46 +0100 Subject: [PATCH 3/5] refs: add ability for backends to special-case reading of symbolic refs Reading of symbolic and non-symbolic references is currently treated the same in reference backends: we always call `refs_read_raw_ref()` and then decide based on the returned flags what type it is. This has one downside though: symbolic references may be treated different from normal references in a backend from normal references. The packed-refs backend for example doesn't even know about symbolic references, and as a result it is pointless to even ask it for one. There are cases where we really only care about whether a reference is symbolic or not, but don't care about whether it exists at all or may be a non-symbolic reference. But it is not possible to optimize for this case right now, and as a consequence we will always first check for a loose reference to exist, and if it doesn't, we'll query the packed-refs backend for a known-to-not-be-symbolic reference. This is inefficient and requires us to search all packed references even though we know to not care for the result at all. Introduce a new function `refs_read_symbolic_ref()` which allows us to fix this case. This function will only ever return symbolic references and can thus optimize for the scenario layed out above. By default, if the backend doesn't provide an implementation for it, we just use the old code path and fall back to `read_raw_ref()`. But in case the backend provides its own, more efficient implementation, we will use that one instead. Note that this function is explicitly designed to not distinguish between missing references and non-symbolic references. If it did, we'd be forced to always search the packed-refs backend to see whether the symbolic reference the user asked for really doesn't exist, or if it exists as a non-symbolic reference. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 17 +++++++++++++++++ refs.h | 3 +++ refs/debug.c | 1 + refs/files-backend.c | 1 + refs/packed-backend.c | 1 + refs/refs-internal.h | 16 ++++++++++++++++ 6 files changed, 39 insertions(+) diff --git a/refs.c b/refs.c index 35d4e69687..d75a5dd53d 100644 --- a/refs.c +++ b/refs.c @@ -1672,6 +1672,23 @@ int refs_read_raw_ref(struct ref_store *ref_store, const char *refname, type, failure_errno); } +int refs_read_symbolic_ref(struct ref_store *ref_store, const char *refname, + struct strbuf *referent) +{ + struct object_id oid; + int ret, failure_errno = 0; + unsigned int type = 0; + + if (ref_store->be->read_symbolic_ref) + return ref_store->be->read_symbolic_ref(ref_store, refname, referent); + + ret = refs_read_raw_ref(ref_store, refname, &oid, referent, &type, &failure_errno); + if (ret || !(type & REF_ISSYMREF)) + return -1; + + return 0; +} + const char *refs_resolve_ref_unsafe(struct ref_store *refs, const char *refname, int resolve_flags, diff --git a/refs.h b/refs.h index 1ae12c410a..23479c7ee0 100644 --- a/refs.h +++ b/refs.h @@ -82,6 +82,9 @@ int read_ref_full(const char *refname, int resolve_flags, struct object_id *oid, int *flags); int read_ref(const char *refname, struct object_id *oid); +int refs_read_symbolic_ref(struct ref_store *ref_store, const char *refname, + struct strbuf *referent); + /* * Return 0 if a reference named refname could be created without * conflicting with the name of an existing reference. Otherwise, diff --git a/refs/debug.c b/refs/debug.c index 2b0771ca53..c590d37720 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -435,6 +435,7 @@ struct ref_storage_be refs_be_debug = { debug_ref_iterator_begin, debug_read_raw_ref, + NULL, debug_reflog_iterator_begin, debug_for_each_reflog_ent, diff --git a/refs/files-backend.c b/refs/files-backend.c index f59589d6cc..f3428a9f12 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3286,6 +3286,7 @@ struct ref_storage_be refs_be_files = { files_ref_iterator_begin, files_read_raw_ref, + NULL, files_reflog_iterator_begin, files_for_each_reflog_ent, diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 27dd8c3922..f56e2cc635 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1684,6 +1684,7 @@ struct ref_storage_be refs_be_packed = { packed_ref_iterator_begin, packed_read_raw_ref, + NULL, packed_reflog_iterator_begin, packed_for_each_reflog_ent, diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 6e15db3ca4..001ef15835 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -649,6 +649,21 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname, struct object_id *oid, struct strbuf *referent, unsigned int *type, int *failure_errno); +/* + * Read a symbolic reference from the specified reference store. This function + * is optional: if not implemented by a backend, then `read_raw_ref_fn` is used + * to read the symbolcic reference instead. It is intended to be implemented + * only in case the backend can optimize the reading of symbolic references. + * + * Return 0 on success, or -1 on failure. `referent` will be set to the target + * of the symbolic reference on success. This function explicitly does not + * distinguish between error cases and the reference not being a symbolic + * reference to allow backends to optimize this operation in case symbolic and + * non-symbolic references are treated differently. + */ +typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname, + struct strbuf *referent); + struct ref_storage_be { struct ref_storage_be *next; const char *name; @@ -668,6 +683,7 @@ struct ref_storage_be { ref_iterator_begin_fn *iterator_begin; read_raw_ref_fn *read_raw_ref; + read_symbolic_ref_fn *read_symbolic_ref; reflog_iterator_begin_fn *reflog_iterator_begin; for_each_reflog_ent_fn *for_each_reflog_ent; From 1553f5e76c72243f61b454412b651706a869388f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 1 Mar 2022 10:33:50 +0100 Subject: [PATCH 4/5] remote: read symbolic refs via `refs_read_symbolic_ref()` We have two cases in the remote code where we check whether a reference is symbolic or not, but don't mind in case it doesn't exist or in case it exists but is a non-symbolic reference. Convert these two callsites to use the new `refs_read_symbolic_ref()` function, whose intent is to implement exactly that usecase. No change in behaviour is expected from this change. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/remote.c | 8 +++++--- remote.c | 14 +++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/builtin/remote.c b/builtin/remote.c index 299c466116..805a517192 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -766,13 +766,15 @@ static int mv(int argc, const char **argv) for_each_ref(read_remote_branches, &rename); for (i = 0; i < remote_branches.nr; i++) { struct string_list_item *item = remote_branches.items + i; - int flag = 0; + struct strbuf referent = STRBUF_INIT; - read_ref_full(item->string, RESOLVE_REF_READING, NULL, &flag); - if (!(flag & REF_ISSYMREF)) + if (refs_read_symbolic_ref(get_main_ref_store(the_repository), item->string, + &referent)) continue; if (delete_ref(NULL, item->string, NULL, REF_NO_DEREF)) die(_("deleting '%s' failed"), item->string); + + strbuf_release(&referent); } for (i = 0; i < remote_branches.nr; i++) { struct string_list_item *item = remote_branches.items + i; diff --git a/remote.c b/remote.c index c97c626eaa..42a4e7106e 100644 --- a/remote.c +++ b/remote.c @@ -1945,13 +1945,9 @@ const char *branch_get_push(struct branch *branch, struct strbuf *err) return branch->push_tracking_ref; } -static int ignore_symref_update(const char *refname) +static int ignore_symref_update(const char *refname, struct strbuf *scratch) { - int flag; - - if (!resolve_ref_unsafe(refname, 0, NULL, &flag)) - return 0; /* non-existing refs are OK */ - return (flag & REF_ISSYMREF); + return !refs_read_symbolic_ref(get_main_ref_store(the_repository), refname, scratch); } /* @@ -1964,6 +1960,7 @@ static int ignore_symref_update(const char *refname) static struct ref *get_expanded_map(const struct ref *remote_refs, const struct refspec_item *refspec) { + struct strbuf scratch = STRBUF_INIT; const struct ref *ref; struct ref *ret = NULL; struct ref **tail = &ret; @@ -1971,11 +1968,13 @@ static struct ref *get_expanded_map(const struct ref *remote_refs, for (ref = remote_refs; ref; ref = ref->next) { char *expn_name = NULL; + strbuf_reset(&scratch); + if (strchr(ref->name, '^')) continue; /* a dereference item */ if (match_name_with_pattern(refspec->src, ref->name, refspec->dst, &expn_name) && - !ignore_symref_update(expn_name)) { + !ignore_symref_update(expn_name, &scratch)) { struct ref *cpy = copy_ref(ref); cpy->peer_ref = alloc_ref(expn_name); @@ -1987,6 +1986,7 @@ static struct ref *get_expanded_map(const struct ref *remote_refs, free(expn_name); } + strbuf_release(&scratch); return ret; } From 0a7b38707d5d5c8a7baeb88a85d6259cee4f4e4d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 1 Mar 2022 10:33:54 +0100 Subject: [PATCH 5/5] refs/files-backend: optimize reading of symbolic refs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When reading references via `files_read_raw_ref()` we always consult both the loose reference, and if that wasn't found, we also consult the packed-refs file. While this makes sense to read a generic reference, it is wasteful in the case where we only care about symbolic references: the packed-refs backend does not support them, and thus it cannot ever return one for us. Special-case reading of symbolic references for the files backend such that we always skip asking the packed-refs backend. We use `refs_read_symbolic_ref()` extensively to determine whether we need to skip updating local symbolic references during a fetch, which is why the change results in a significant speedup when doing fetches in repositories with huge numbers of references. The following benchmark executes a mirror-fetch in a repository with about 2 million references via `git fetch --prune --no-write-fetch-head +refs/*:refs/*`: Benchmark 1: HEAD~ Time (mean ± σ): 68.372 s ± 2.344 s [User: 65.629 s, System: 8.786 s] Range (min … max): 65.745 s … 70.246 s 3 runs Benchmark 2: HEAD Time (mean ± σ): 60.259 s ± 0.343 s [User: 61.019 s, System: 7.245 s] Range (min … max): 60.003 s … 60.649 s 3 runs Summary 'HEAD' ran 1.13 ± 0.04 times faster than 'HEAD~' Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/files-backend.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index f3428a9f12..0457ecdb42 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -338,9 +338,9 @@ static struct ref_cache *get_loose_ref_cache(struct files_ref_store *refs) return refs->loose; } -static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, - struct object_id *oid, struct strbuf *referent, - unsigned int *type, int *failure_errno) +static int read_ref_internal(struct ref_store *ref_store, const char *refname, + struct object_id *oid, struct strbuf *referent, + unsigned int *type, int *failure_errno, int skip_packed_refs) { struct files_ref_store *refs = files_downcast(ref_store, REF_STORE_READ, "read_raw_ref"); @@ -381,7 +381,7 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, if (lstat(path, &st) < 0) { int ignore_errno; myerr = errno; - if (myerr != ENOENT) + if (myerr != ENOENT || skip_packed_refs) goto out; if (refs_read_raw_ref(refs->packed_ref_store, refname, oid, referent, type, &ignore_errno)) { @@ -425,7 +425,8 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, * ref is supposed to be, there could still be a * packed ref: */ - if (refs_read_raw_ref(refs->packed_ref_store, refname, oid, + if (skip_packed_refs || + refs_read_raw_ref(refs->packed_ref_store, refname, oid, referent, type, &ignore_errno)) { myerr = EISDIR; goto out; @@ -470,6 +471,27 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, return ret; } +static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, + struct object_id *oid, struct strbuf *referent, + unsigned int *type, int *failure_errno) +{ + return read_ref_internal(ref_store, refname, oid, referent, type, failure_errno, 0); +} + +static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refname, + struct strbuf *referent) +{ + struct object_id oid; + int failure_errno, ret; + unsigned int type; + + ret = read_ref_internal(ref_store, refname, &oid, referent, &type, &failure_errno, 1); + if (ret) + return ret; + + return !(type & REF_ISSYMREF); +} + int parse_loose_ref_contents(const char *buf, struct object_id *oid, struct strbuf *referent, unsigned int *type, int *failure_errno) @@ -3286,7 +3308,7 @@ struct ref_storage_be refs_be_files = { files_ref_iterator_begin, files_read_raw_ref, - NULL, + files_read_symbolic_ref, files_reflog_iterator_begin, files_for_each_reflog_ent,