git/builtin/diff-tree.c
Shuqi Liang 48c5fbfb89 diff-tree: integrate with sparse index
The index is read in 'cmd_diff_tree' at two points:

1. The first index read was added in fd66bcc31f (diff-tree: read the
index so attribute checks work in bare repositories, 2017-12-06) to deal
with reading '.gitattributes' content. 77efbb366a (attr: be careful
about sparse directories, 2021-09-08) established that, in a sparse
index, we do _not_ try to load a '.gitattributes' file from within a
sparse directory.

2. The second index access point is involved in rename detection,
specifically when reading from stdin.This was initially added in
f0c6b2a2fd ([PATCH] Optimize diff-tree -[CM]--stdin, 2005-05-27), where
'setup' was set to 'DIFF_SETUP_USE_SIZE_CACHE |DIFF_SETUP_USE_CACHE'.
That assignment was later modified to drop the'DIFF_SETUP_USE_CACHE' in
ff7fe37b05 (diff.c: move read_index() code back to the caller,
2018-08-13).However, 'DIFF_SETUP_USE_SIZE_CACHE' seems to be unused as
of 6e0b8ed6d3 (diff.c: do not use a separate "size cache"., 2007-05-07)
and nothing about 'detect_rename' otherwise indicates index usage.

Hence we can just set the requires-full-index to false for "diff-tree".

Add tests that verify that 'git diff-tree' behaves correctly when the
sparse index is enabled and test to ensure the index is not expanded.

The `p2000` tests demonstrate a ~98% execution time reduction for
'git diff-tree' using a sparse index:

Test                                                before  after
-----------------------------------------------------------------------
2000.94: git diff-tree HEAD (full-v3)                0.05   0.04 -20.0%
2000.95: git diff-tree HEAD (full-v4)                0.06   0.05 -16.7%
2000.96: git diff-tree HEAD (sparse-v3)              0.59   0.01 -98.3%
2000.97: git diff-tree HEAD (sparse-v4)              0.61   0.01 -98.4%
2000.98: git diff-tree HEAD -- f2/f4/a (full-v3)     0.05   0.05 +0.0%
2000.99: git diff-tree HEAD -- f2/f4/a (full-v4)     0.05   0.04 -20.0%
2000.100: git diff-tree HEAD -- f2/f4/a (sparse-v3)  0.58   0.01 -98.3%
2000.101: git diff-tree HEAD -- f2/f4/a (sparse-v4)  0.55   0.01 -98.2%

Helped-by: Victoria Dye <vdye@github.com>
Signed-off-by: Shuqi Liang <cheskaqiqi@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-18 10:40:33 -07:00

236 lines
6.3 KiB
C

#define USE_THE_INDEX_VARIABLE
#include "cache.h"
#include "config.h"
#include "diff.h"
#include "commit.h"
#include "gettext.h"
#include "hex.h"
#include "log-tree.h"
#include "builtin.h"
#include "submodule.h"
#include "repository.h"
#include "tree.h"
static struct rev_info log_tree_opt;
static int diff_tree_commit_oid(const struct object_id *oid)
{
struct commit *commit = lookup_commit_reference(the_repository, oid);
if (!commit)
return -1;
return log_tree_commit(&log_tree_opt, commit);
}
/* Diff one or more commits. */
static int stdin_diff_commit(struct commit *commit, const char *p)
{
struct object_id oid;
struct commit_list **pptr = NULL;
/* Graft the fake parents locally to the commit */
while (isspace(*p++) && !parse_oid_hex(p, &oid, &p)) {
struct commit *parent = lookup_commit(the_repository, &oid);
if (!pptr) {
/* Free the real parent list */
free_commit_list(commit->parents);
commit->parents = NULL;
pptr = &(commit->parents);
}
if (parent) {
pptr = &commit_list_insert(parent, pptr)->next;
}
}
return log_tree_commit(&log_tree_opt, commit);
}
/* Diff two trees. */
static int stdin_diff_trees(struct tree *tree1, const char *p)
{
struct object_id oid;
struct tree *tree2;
if (!isspace(*p++) || parse_oid_hex(p, &oid, &p) || *p)
return error("Need exactly two trees, separated by a space");
tree2 = lookup_tree(the_repository, &oid);
if (!tree2 || parse_tree(tree2))
return -1;
printf("%s %s\n", oid_to_hex(&tree1->object.oid),
oid_to_hex(&tree2->object.oid));
diff_tree_oid(&tree1->object.oid, &tree2->object.oid,
"", &log_tree_opt.diffopt);
log_tree_diff_flush(&log_tree_opt);
return 0;
}
static int diff_tree_stdin(char *line)
{
int len = strlen(line);
struct object_id oid;
struct object *obj;
const char *p;
if (!len || line[len-1] != '\n')
return -1;
line[len-1] = 0;
if (parse_oid_hex(line, &oid, &p))
return -1;
obj = parse_object(the_repository, &oid);
if (!obj)
return -1;
if (obj->type == OBJ_COMMIT)
return stdin_diff_commit((struct commit *)obj, p);
if (obj->type == OBJ_TREE)
return stdin_diff_trees((struct tree *)obj, p);
error("Object %s is a %s, not a commit or tree",
oid_to_hex(&oid), type_name(obj->type));
return -1;
}
static const char diff_tree_usage[] =
"git diff-tree [--stdin] [-m] [-s] [-v] [--no-commit-id] [--pretty]\n"
" [-t] [-r] [-c | --cc] [--combined-all-paths] [--root] [--merge-base]\n"
" [<common-diff-options>] <tree-ish> [<tree-ish>] [<path>...]\n"
"\n"
" -r diff recursively\n"
" -c show combined diff for merge commits\n"
" --cc show combined diff for merge commits removing uninteresting hunks\n"
" --combined-all-paths\n"
" show name of file in all parents for combined diffs\n"
" --root include the initial commit as diff against /dev/null\n"
COMMON_DIFF_OPTIONS_HELP;
static void diff_tree_tweak_rev(struct rev_info *rev, struct setup_revision_opt *opt)
{
if (!rev->diffopt.output_format) {
if (rev->dense_combined_merges)
rev->diffopt.output_format = DIFF_FORMAT_PATCH;
else
rev->diffopt.output_format = DIFF_FORMAT_RAW;
}
}
int cmd_diff_tree(int argc, const char **argv, const char *prefix)
{
char line[1000];
struct object *tree1, *tree2;
static struct rev_info *opt = &log_tree_opt;
struct setup_revision_opt s_r_opt;
struct userformat_want w;
int read_stdin = 0;
int merge_base = 0;
if (argc == 2 && !strcmp(argv[1], "-h"))
usage(diff_tree_usage);
git_config(git_diff_basic_config, NULL); /* no "diff" UI options */
prepare_repo_settings(the_repository);
the_repository->settings.command_requires_full_index = 0;
repo_init_revisions(the_repository, opt, prefix);
if (repo_read_index(the_repository) < 0)
die(_("index file corrupt"));
opt->abbrev = 0;
opt->diff = 1;
opt->disable_stdin = 1;
memset(&s_r_opt, 0, sizeof(s_r_opt));
s_r_opt.tweak = diff_tree_tweak_rev;
prefix = precompose_argv_prefix(argc, argv, prefix);
argc = setup_revisions(argc, argv, opt, &s_r_opt);
memset(&w, 0, sizeof(w));
userformat_find_requirements(NULL, &w);
if (!opt->show_notes_given && w.notes)
opt->show_notes = 1;
if (opt->show_notes)
load_display_notes(&opt->notes_opt);
while (--argc > 0) {
const char *arg = *++argv;
if (!strcmp(arg, "--stdin")) {
read_stdin = 1;
continue;
}
if (!strcmp(arg, "--merge-base")) {
merge_base = 1;
continue;
}
usage(diff_tree_usage);
}
if (read_stdin && merge_base)
die(_("options '%s' and '%s' cannot be used together"), "--stdin", "--merge-base");
if (merge_base && opt->pending.nr != 2)
die(_("--merge-base only works with two commits"));
opt->diffopt.rotate_to_strict = 1;
/*
* NOTE! We expect "a..b" to expand to "^a b" but it is
* perfectly valid for revision range parser to yield "b ^a",
* which means the same thing. If we get the latter, i.e. the
* second one is marked UNINTERESTING, we recover the original
* order the user gave, i.e. "a..b", by swapping the trees.
*/
switch (opt->pending.nr) {
case 0:
if (!read_stdin)
usage(diff_tree_usage);
break;
case 1:
tree1 = opt->pending.objects[0].item;
diff_tree_commit_oid(&tree1->oid);
break;
case 2:
tree1 = opt->pending.objects[0].item;
tree2 = opt->pending.objects[1].item;
if (merge_base) {
struct object_id oid;
diff_get_merge_base(opt, &oid);
tree1 = lookup_object(the_repository, &oid);
} else if (tree2->flags & UNINTERESTING) {
SWAP(tree2, tree1);
}
diff_tree_oid(&tree1->oid, &tree2->oid, "", &opt->diffopt);
log_tree_diff_flush(opt);
break;
}
if (read_stdin) {
int saved_nrl = 0;
int saved_dcctc = 0;
opt->diffopt.rotate_to_strict = 0;
opt->diffopt.no_free = 1;
if (opt->diffopt.detect_rename) {
if (!the_index.cache)
repo_read_index(the_repository);
opt->diffopt.setup |= DIFF_SETUP_USE_SIZE_CACHE;
}
while (fgets(line, sizeof(line), stdin)) {
struct object_id oid;
if (get_oid_hex(line, &oid)) {
fputs(line, stdout);
fflush(stdout);
}
else {
diff_tree_stdin(line);
if (saved_nrl < opt->diffopt.needed_rename_limit)
saved_nrl = opt->diffopt.needed_rename_limit;
if (opt->diffopt.degraded_cc_to_c)
saved_dcctc = 1;
}
}
opt->diffopt.degraded_cc_to_c = saved_dcctc;
opt->diffopt.needed_rename_limit = saved_nrl;
opt->diffopt.no_free = 0;
diff_free(&opt->diffopt);
}
return diff_result_code(&opt->diffopt, 0);
}