git/tree.c

223 lines
5.2 KiB
C
Raw Permalink Normal View History

global: introduce `USE_THE_REPOSITORY_VARIABLE` macro Use of the `the_repository` variable is deprecated nowadays, and we slowly but steadily convert the codebase to not use it anymore. Instead, callers should be passing down the repository to work on via parameters. It is hard though to prove that a given code unit does not use this variable anymore. The most trivial case, merely demonstrating that there is no direct use of `the_repository`, is already a bit of a pain during code reviews as the reviewer needs to manually verify claims made by the patch author. The bigger problem though is that we have many interfaces that implicitly rely on `the_repository`. Introduce a new `USE_THE_REPOSITORY_VARIABLE` macro that allows code units to opt into usage of `the_repository`. The intent of this macro is to demonstrate that a certain code unit does not use this variable anymore, and to keep it from new dependencies on it in future changes, be it explicit or implicit For now, the macro only guards `the_repository` itself as well as `the_hash_algo`. There are many more known interfaces where we have an implicit dependency on `the_repository`, but those are not guarded at the current point in time. Over time though, we should start to add guards as required (or even better, just remove them). Define the macro as required in our code units. As expected, most of our code still relies on the global variable. Nearly all of our builtins rely on the variable as there is no way yet to pass `the_repository` to their entry point. For now, declare the macro in "biultin.h" to keep the required changes at least a little bit more contained. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-06-14 06:50:23 +00:00
#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "hex.h"
#include "tree.h"
#include "object-name.h"
#include "object-store-ll.h"
#include "commit.h"
#include "alloc.h"
#include "tree-walk.h"
#include "repository.h"
#include "environment.h"
const char *tree_type = "tree";
int read_tree_at(struct repository *r,
struct tree *tree, struct strbuf *base,
int depth,
const struct pathspec *pathspec,
read_tree_fn_t fn, void *context)
{
struct tree_desc desc;
tree_entry(): new tree-walking helper function This adds a "tree_entry()" function that combines the common operation of doing a "tree_entry_extract()" + "update_tree_entry()". It also has a simplified calling convention, designed for simple loops that traverse over a whole tree: the arguments are pointers to the tree descriptor and a name_entry structure to fill in, and it returns a boolean "true" if there was an entry left to be gotten in the tree. This allows tree traversal with struct tree_desc desc; struct name_entry entry; desc.buf = tree->buffer; desc.size = tree->size; while (tree_entry(&desc, &entry) { ... use "entry.{path, sha1, mode, pathlen}" ... } which is not only shorter than writing it out in full, it's hopefully less error prone too. [ It's actually a tad faster too - we don't need to recalculate the entry pathlength in both extract and update, but need to do it only once. Also, some callers can avoid doing a "strlen()" on the result, since it's returned as part of the name_entry structure. However, by now we're talking just 1% speedup on "git-rev-list --objects --all", and we're definitely at the point where tree walking is no longer the issue any more. ] NOTE! Not everybody wants to use this new helper function, since some of the tree walkers very much on purpose do the descriptor update separately from the entry extraction. So the "extract + update" sequence still remains as the core sequence, this is just a simplified interface. We should probably add a silly two-line inline helper function for initializing the descriptor from the "struct tree" too, just to cut down on the noise from that common "desc" initializer. Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-30 16:45:45 +00:00
struct name_entry entry;
struct object_id oid;
int len, oldlen = base->len;
enum interesting retval = entry_not_interesting;
if (depth > max_allowed_tree_depth)
return error("exceeded maximum allowed tree depth");
if (parse_tree(tree))
return -1;
init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size);
tree_entry(): new tree-walking helper function This adds a "tree_entry()" function that combines the common operation of doing a "tree_entry_extract()" + "update_tree_entry()". It also has a simplified calling convention, designed for simple loops that traverse over a whole tree: the arguments are pointers to the tree descriptor and a name_entry structure to fill in, and it returns a boolean "true" if there was an entry left to be gotten in the tree. This allows tree traversal with struct tree_desc desc; struct name_entry entry; desc.buf = tree->buffer; desc.size = tree->size; while (tree_entry(&desc, &entry) { ... use "entry.{path, sha1, mode, pathlen}" ... } which is not only shorter than writing it out in full, it's hopefully less error prone too. [ It's actually a tad faster too - we don't need to recalculate the entry pathlength in both extract and update, but need to do it only once. Also, some callers can avoid doing a "strlen()" on the result, since it's returned as part of the name_entry structure. However, by now we're talking just 1% speedup on "git-rev-list --objects --all", and we're definitely at the point where tree walking is no longer the issue any more. ] NOTE! Not everybody wants to use this new helper function, since some of the tree walkers very much on purpose do the descriptor update separately from the entry extraction. So the "extract + update" sequence still remains as the core sequence, this is just a simplified interface. We should probably add a silly two-line inline helper function for initializing the descriptor from the "struct tree" too, just to cut down on the noise from that common "desc" initializer. Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-30 16:45:45 +00:00
while (tree_entry(&desc, &entry)) {
if (retval != all_entries_interesting) {
retval = tree_entry_interesting(r->index, &entry,
base, pathspec);
if (retval == all_entries_not_interesting)
break;
if (retval == entry_not_interesting)
continue;
}
switch (fn(&entry.oid, base,
entry.path, entry.mode, context)) {
case 0:
continue;
case READ_TREE_RECURSIVE:
break;
default:
return -1;
}
if (S_ISDIR(entry.mode))
oidcpy(&oid, &entry.oid);
else if (S_ISGITLINK(entry.mode)) {
struct commit *commit;
commit = lookup_commit(r, &entry.oid);
if (!commit)
die("Commit %s in submodule path %s%s not found",
oid_to_hex(&entry.oid),
base->buf, entry.path);
libs: use "struct repository *" argument, not "the_repository" As can easily be seen from grepping in our sources, we had these uses of "the_repository" in various library code in cases where the function in question was already getting a "struct repository *" argument. Let's use that argument instead. Out of these changes only the changes to "cache-tree.c", "commit-reach.c", "shallow.c" and "upload-pack.c" would have cleanly applied before the migration away from the "repo_*()" wrapper macros in the preceding commits. The rest aren't new, as we'd previously implicitly refer to "the_repository", but it's now more obvious that we were doing the wrong thing all along, and should have used the parameter instead. The change to change "get_index_format_default(the_repository)" in "read-cache.c" to use the "r" variable instead should arguably have been part of [1], or in the subsequent cleanup in [2]. Let's do it here, as can be seen from the initial code in [3] it's not important that we use "the_repository" there, but would prefer to always use the current repository. This change excludes the "the_repository" use in "upload-pack.c"'s upload_pack_advertise(), as the in-flight [4] makes that change. 1. ee1f0c242ef (read-cache: add index.skipHash config option, 2023-01-06) 2. 6269f8eaad0 (treewide: always have a valid "index_state.repo" member, 2023-01-17) 3. 7211b9e7534 (repo-settings: consolidate some config settings, 2019-08-13) 4. <Y/hbUsGPVNAxTdmS@coredump.intra.peff.net> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-28 13:58:58 +00:00
if (repo_parse_commit(r, commit))
die("Invalid commit %s in submodule path %s%s",
oid_to_hex(&entry.oid),
base->buf, entry.path);
oidcpy(&oid, get_commit_tree_oid(commit));
}
else
continue;
len = tree_entry_len(&entry);
strbuf_add(base, entry.path, len);
strbuf_addch(base, '/');
retval = read_tree_at(r, lookup_tree(r, &oid),
base, depth + 1, pathspec,
fn, context);
strbuf_setlen(base, oldlen);
if (retval)
return -1;
}
return 0;
}
int read_tree(struct repository *r,
struct tree *tree,
const struct pathspec *pathspec,
read_tree_fn_t fn, void *context)
{
struct strbuf sb = STRBUF_INIT;
int ret = read_tree_at(r, tree, &sb, 0, pathspec, fn, context);
strbuf_release(&sb);
return ret;
}
int base_name_compare(const char *name1, size_t len1, int mode1,
const char *name2, size_t len2, int mode2)
{
unsigned char c1, c2;
size_t len = len1 < len2 ? len1 : len2;
int cmp;
cmp = memcmp(name1, name2, len);
if (cmp)
return cmp;
c1 = name1[len];
c2 = name2[len];
if (!c1 && S_ISDIR(mode1))
c1 = '/';
if (!c2 && S_ISDIR(mode2))
c2 = '/';
return (c1 < c2) ? -1 : (c1 > c2) ? 1 : 0;
}
/*
* df_name_compare() is identical to base_name_compare(), except it
* compares conflicting directory/file entries as equal. Note that
* while a directory name compares as equal to a regular file, they
* then individually compare _differently_ to a filename that has
* a dot after the basename (because '\0' < '.' < '/').
*
* This is used by routines that want to traverse the git namespace
* but then handle conflicting entries together when possible.
*/
int df_name_compare(const char *name1, size_t len1, int mode1,
const char *name2, size_t len2, int mode2)
{
unsigned char c1, c2;
size_t len = len1 < len2 ? len1 : len2;
int cmp;
cmp = memcmp(name1, name2, len);
if (cmp)
return cmp;
/* Directories and files compare equal (same length, same name) */
if (len1 == len2)
return 0;
c1 = name1[len];
if (!c1 && S_ISDIR(mode1))
c1 = '/';
c2 = name2[len];
if (!c2 && S_ISDIR(mode2))
c2 = '/';
if (c1 == '/' && !c2)
return 0;
if (c2 == '/' && !c1)
return 0;
return c1 - c2;
}
int name_compare(const char *name1, size_t len1, const char *name2, size_t len2)
{
size_t min_len = (len1 < len2) ? len1 : len2;
int cmp = memcmp(name1, name2, min_len);
if (cmp)
return cmp;
if (len1 < len2)
return -1;
if (len1 > len2)
return 1;
return 0;
}
struct tree *lookup_tree(struct repository *r, const struct object_id *oid)
{
struct object *obj = lookup_object(r, oid);
if (!obj)
return create_object(r, oid, alloc_tree_node(r));
return object_as_type(obj, OBJ_TREE, 0);
}
int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size)
{
if (item->object.parsed)
return 0;
item->object.parsed = 1;
item->buffer = buffer;
item->size = size;
return 0;
}
int parse_tree_gently(struct tree *item, int quiet_on_missing)
{
enum object_type type;
void *buffer;
unsigned long size;
if (item->object.parsed)
return 0;
buffer = repo_read_object_file(the_repository, &item->object.oid,
&type, &size);
if (!buffer)
return quiet_on_missing ? -1 :
error("Could not read %s",
oid_to_hex(&item->object.oid));
if (type != OBJ_TREE) {
free(buffer);
return error("Object %s not a tree",
oid_to_hex(&item->object.oid));
}
return parse_tree_buffer(item, buffer, size);
}
void free_tree_buffer(struct tree *tree)
{
FREE_AND_NULL(tree->buffer);
tree->size = 0;
tree->object.parsed = 0;
}
struct tree *parse_tree_indirect(const struct object_id *oid)
{
struct repository *r = the_repository;
struct object *obj = parse_object(r, oid);
return (struct tree *)repo_peel_to_type(r, NULL, 0, obj, OBJ_TREE);
}