git/builtin/diff.c
Torsten Bögershausen 5c327502db MacOS: precompose_argv_prefix()
The following sequence leads to a "BUG" assertion running under MacOS:

  DIR=git-test-restore-p
  Adiarnfd=$(printf 'A\314\210')
  DIRNAME=xx${Adiarnfd}yy
  mkdir $DIR &&
  cd $DIR &&
  git init &&
  mkdir $DIRNAME &&
  cd $DIRNAME &&
  echo "Initial" >file &&
  git add file &&
  echo "One more line" >>file &&
  echo y | git restore -p .

 Initialized empty Git repository in /tmp/git-test-restore-p/.git/
 BUG: pathspec.c:495: error initializing pathspec_item
 Cannot close git diff-index --cached --numstat
 [snip]

The command `git restore` is run from a directory inside a Git repo.
Git needs to split the $CWD into 2 parts:
The path to the repo and "the rest", if any.
"The rest" becomes a "prefix" later used inside the pathspec code.

As an example, "/path/to/repo/dir-inside-repå" would determine
"/path/to/repo" as the root of the repo, the place where the
configuration file .git/config is found.

The rest becomes the prefix ("dir-inside-repå"), from where the
pathspec machinery expands the ".", more about this later.
If there is a decomposed form, (making the decomposing visible like this),
"dir-inside-rep°a" doesn't match "dir-inside-repå".

Git commands need to:

 (a) read the configuration variable "core.precomposeunicode"
 (b) precocompose argv[]
 (c) precompose the prefix, if there was any

The first commit,
76759c7dff "git on Mac OS and precomposed unicode"
addressed (a) and (b).

The call to precompose_argv() was added into parse-options.c,
because that seemed to be a good place when the patch was written.

Commands that don't use parse-options need to do (a) and (b) themselfs.

The commands `diff-files`, `diff-index`, `diff-tree` and `diff`
learned (a) and (b) in
commit 90a78b83e0 "diff: run arguments through precompose_argv"

Branch names (or refs in general) using decomposed code points
resulting in decomposed file names had been fixed in
commit 8e712ef6fc "Honor core.precomposeUnicode in more places"

The bug report from above shows 2 things:
- more commands need to handle precomposed unicode
- (c) should be implemented for all commands using pathspecs

Solution:
precompose_argv() now handles the prefix (if needed), and is renamed into
precompose_argv_prefix().

Inside this function the config variable core.precomposeunicode is read
into the global variable precomposed_unicode, as before.
This reading is skipped if precomposed_unicode had been read before.

The original patch for preocomposed unicode, 76759c7dff, placed
precompose_argv() into parse-options.c

Now add it into git.c::run_builtin() as well.  Existing precompose
calls in diff-files.c and others may become redundant, and if we
audit the callflows that reach these places to make sure that they
can never be reached without going through the new call added to
run_builtin(), we might be able to remove these existing ones.

But in this commit, we do not bother to do so and leave these
precompose callsites as they are.  Because precompose() is
idempotent and can be called on an already precomposed string
safely, this is safer than removing existing calls without fully
vetting the callflows.

There is certainly room for cleanups - this change intends to be a bug fix.
Cleanups needs more tests in e.g. t/t3910-mac-os-precompose.sh, and should
be done in future commits.

[1] git-bugreport-2021-01-06-1209.txt (git can't deal with special characters)
[2] https://lore.kernel.org/git/A102844A-9501-4A86-854D-E3B387D378AA@icloud.com/

Reported-by: Daniel Troger <random_n0body@icloud.com>
Helped-By: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-03 14:09:37 -08:00

595 lines
16 KiB
C

/*
* Builtin "git diff"
*
* Copyright (c) 2006 Junio C Hamano
*/
#define USE_THE_INDEX_COMPATIBILITY_MACROS
#include "cache.h"
#include "config.h"
#include "ewah/ewok.h"
#include "lockfile.h"
#include "color.h"
#include "commit.h"
#include "blob.h"
#include "tag.h"
#include "diff.h"
#include "diffcore.h"
#include "revision.h"
#include "log-tree.h"
#include "builtin.h"
#include "submodule.h"
#include "oid-array.h"
#define DIFF_NO_INDEX_EXPLICIT 1
#define DIFF_NO_INDEX_IMPLICIT 2
static const char builtin_diff_usage[] =
"git diff [<options>] [<commit>] [--] [<path>...]\n"
" or: git diff [<options>] --cached [<commit>] [--] [<path>...]\n"
" or: git diff [<options>] <commit> [--merge-base] [<commit>...] <commit> [--] [<path>...]\n"
" or: git diff [<options>] <commit>...<commit>] [--] [<path>...]\n"
" or: git diff [<options>] <blob> <blob>]\n"
" or: git diff [<options>] --no-index [--] <path> <path>]\n"
COMMON_DIFF_OPTIONS_HELP;
static const char *blob_path(struct object_array_entry *entry)
{
return entry->path ? entry->path : entry->name;
}
static void stuff_change(struct diff_options *opt,
unsigned old_mode, unsigned new_mode,
const struct object_id *old_oid,
const struct object_id *new_oid,
int old_oid_valid,
int new_oid_valid,
const char *old_path,
const char *new_path)
{
struct diff_filespec *one, *two;
if (!is_null_oid(old_oid) && !is_null_oid(new_oid) &&
oideq(old_oid, new_oid) && (old_mode == new_mode))
return;
if (opt->flags.reverse_diff) {
SWAP(old_mode, new_mode);
SWAP(old_oid, new_oid);
SWAP(old_path, new_path);
}
if (opt->prefix &&
(strncmp(old_path, opt->prefix, opt->prefix_length) ||
strncmp(new_path, opt->prefix, opt->prefix_length)))
return;
one = alloc_filespec(old_path);
two = alloc_filespec(new_path);
fill_filespec(one, old_oid, old_oid_valid, old_mode);
fill_filespec(two, new_oid, new_oid_valid, new_mode);
diff_queue(&diff_queued_diff, one, two);
}
static int builtin_diff_b_f(struct rev_info *revs,
int argc, const char **argv,
struct object_array_entry **blob)
{
/* Blob vs file in the working tree*/
struct stat st;
const char *path;
if (argc > 1)
usage(builtin_diff_usage);
GUARD_PATHSPEC(&revs->prune_data, PATHSPEC_FROMTOP | PATHSPEC_LITERAL);
path = revs->prune_data.items[0].match;
if (lstat(path, &st))
die_errno(_("failed to stat '%s'"), path);
if (!(S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)))
die(_("'%s': not a regular file or symlink"), path);
diff_set_mnemonic_prefix(&revs->diffopt, "o/", "w/");
if (blob[0]->mode == S_IFINVALID)
blob[0]->mode = canon_mode(st.st_mode);
stuff_change(&revs->diffopt,
blob[0]->mode, canon_mode(st.st_mode),
&blob[0]->item->oid, &null_oid,
1, 0,
blob[0]->path ? blob[0]->path : path,
path);
diffcore_std(&revs->diffopt);
diff_flush(&revs->diffopt);
return 0;
}
static int builtin_diff_blobs(struct rev_info *revs,
int argc, const char **argv,
struct object_array_entry **blob)
{
const unsigned mode = canon_mode(S_IFREG | 0644);
if (argc > 1)
usage(builtin_diff_usage);
if (blob[0]->mode == S_IFINVALID)
blob[0]->mode = mode;
if (blob[1]->mode == S_IFINVALID)
blob[1]->mode = mode;
stuff_change(&revs->diffopt,
blob[0]->mode, blob[1]->mode,
&blob[0]->item->oid, &blob[1]->item->oid,
1, 1,
blob_path(blob[0]), blob_path(blob[1]));
diffcore_std(&revs->diffopt);
diff_flush(&revs->diffopt);
return 0;
}
static int builtin_diff_index(struct rev_info *revs,
int argc, const char **argv)
{
unsigned int option = 0;
while (1 < argc) {
const char *arg = argv[1];
if (!strcmp(arg, "--cached") || !strcmp(arg, "--staged"))
option |= DIFF_INDEX_CACHED;
else if (!strcmp(arg, "--merge-base"))
option |= DIFF_INDEX_MERGE_BASE;
else
usage(builtin_diff_usage);
argv++; argc--;
}
/*
* Make sure there is one revision (i.e. pending object),
* and there is no revision filtering parameters.
*/
if (revs->pending.nr != 1 ||
revs->max_count != -1 || revs->min_age != -1 ||
revs->max_age != -1)
usage(builtin_diff_usage);
if (!(option & DIFF_INDEX_CACHED)) {
setup_work_tree();
if (read_cache_preload(&revs->diffopt.pathspec) < 0) {
perror("read_cache_preload");
return -1;
}
} else if (read_cache() < 0) {
perror("read_cache");
return -1;
}
return run_diff_index(revs, option);
}
static int builtin_diff_tree(struct rev_info *revs,
int argc, const char **argv,
struct object_array_entry *ent0,
struct object_array_entry *ent1)
{
const struct object_id *(oid[2]);
struct object_id mb_oid;
int merge_base = 0;
while (1 < argc) {
const char *arg = argv[1];
if (!strcmp(arg, "--merge-base"))
merge_base = 1;
else
usage(builtin_diff_usage);
argv++; argc--;
}
if (merge_base) {
diff_get_merge_base(revs, &mb_oid);
oid[0] = &mb_oid;
oid[1] = &revs->pending.objects[1].item->oid;
} else {
int swap = 0;
/*
* We saw two trees, ent0 and ent1. If ent1 is uninteresting,
* swap them.
*/
if (ent1->item->flags & UNINTERESTING)
swap = 1;
oid[swap] = &ent0->item->oid;
oid[1 - swap] = &ent1->item->oid;
}
diff_tree_oid(oid[0], oid[1], "", &revs->diffopt);
log_tree_diff_flush(revs);
return 0;
}
static int builtin_diff_combined(struct rev_info *revs,
int argc, const char **argv,
struct object_array_entry *ent,
int ents)
{
struct oid_array parents = OID_ARRAY_INIT;
int i;
if (argc > 1)
usage(builtin_diff_usage);
if (!revs->dense_combined_merges && !revs->combine_merges)
revs->dense_combined_merges = revs->combine_merges = 1;
for (i = 1; i < ents; i++)
oid_array_append(&parents, &ent[i].item->oid);
diff_tree_combined(&ent[0].item->oid, &parents, revs);
oid_array_clear(&parents);
return 0;
}
static void refresh_index_quietly(void)
{
struct lock_file lock_file = LOCK_INIT;
int fd;
fd = hold_locked_index(&lock_file, 0);
if (fd < 0)
return;
discard_cache();
read_cache();
refresh_cache(REFRESH_QUIET|REFRESH_UNMERGED);
repo_update_index_if_able(the_repository, &lock_file);
}
static int builtin_diff_files(struct rev_info *revs, int argc, const char **argv)
{
unsigned int options = 0;
while (1 < argc && argv[1][0] == '-') {
if (!strcmp(argv[1], "--base"))
revs->max_count = 1;
else if (!strcmp(argv[1], "--ours"))
revs->max_count = 2;
else if (!strcmp(argv[1], "--theirs"))
revs->max_count = 3;
else if (!strcmp(argv[1], "-q"))
options |= DIFF_SILENT_ON_REMOVED;
else if (!strcmp(argv[1], "-h"))
usage(builtin_diff_usage);
else
return error(_("invalid option: %s"), argv[1]);
argv++; argc--;
}
/*
* "diff --base" should not combine merges because it was not
* asked to. "diff -c" should not densify (if the user wants
* dense one, --cc can be explicitly asked for, or just rely
* on the default).
*/
if (revs->max_count == -1 && !revs->combine_merges &&
(revs->diffopt.output_format & DIFF_FORMAT_PATCH))
revs->combine_merges = revs->dense_combined_merges = 1;
setup_work_tree();
if (read_cache_preload(&revs->diffopt.pathspec) < 0) {
perror("read_cache_preload");
return -1;
}
return run_diff_files(revs, options);
}
struct symdiff {
struct bitmap *skip;
int warn;
const char *base, *left, *right;
};
/*
* Check for symmetric-difference arguments, and if present, arrange
* everything we need to know to handle them correctly. As a bonus,
* weed out all bogus range-based revision specifications, e.g.,
* "git diff A..B C..D" or "git diff A..B C" get rejected.
*
* For an actual symmetric diff, *symdiff is set this way:
*
* - its skip is non-NULL and marks *all* rev->pending.objects[i]
* indices that the caller should ignore (extra merge bases, of
* which there might be many, and A in A...B). Note that the
* chosen merge base and right side are NOT marked.
* - warn is set if there are multiple merge bases.
* - base, left, and right point to the names to use in a
* warning about multiple merge bases.
*
* If there is no symmetric diff argument, sym->skip is NULL and
* sym->warn is cleared. The remaining fields are not set.
*/
static void symdiff_prepare(struct rev_info *rev, struct symdiff *sym)
{
int i, is_symdiff = 0, basecount = 0, othercount = 0;
int lpos = -1, rpos = -1, basepos = -1;
struct bitmap *map = NULL;
/*
* Use the whence fields to find merge bases and left and
* right parts of symmetric difference, so that we do not
* depend on the order that revisions are parsed. If there
* are any revs that aren't from these sources, we have a
* "git diff C A...B" or "git diff A...B C" case. Or we
* could even get "git diff A...B C...E", for instance.
*
* If we don't have just one merge base, we pick one
* at random.
*
* NB: REV_CMD_LEFT, REV_CMD_RIGHT are also used for A..B,
* so we must check for SYMMETRIC_LEFT too. The two arrays
* rev->pending.objects and rev->cmdline.rev are parallel.
*/
for (i = 0; i < rev->cmdline.nr; i++) {
struct object *obj = rev->pending.objects[i].item;
switch (rev->cmdline.rev[i].whence) {
case REV_CMD_MERGE_BASE:
if (basepos < 0)
basepos = i;
basecount++;
break; /* do mark all bases */
case REV_CMD_LEFT:
if (lpos >= 0)
usage(builtin_diff_usage);
lpos = i;
if (obj->flags & SYMMETRIC_LEFT) {
is_symdiff = 1;
break; /* do mark A */
}
continue;
case REV_CMD_RIGHT:
if (rpos >= 0)
usage(builtin_diff_usage);
rpos = i;
continue; /* don't mark B */
case REV_CMD_PARENTS_ONLY:
case REV_CMD_REF:
case REV_CMD_REV:
othercount++;
continue;
}
if (map == NULL)
map = bitmap_new();
bitmap_set(map, i);
}
/*
* Forbid any additional revs for both A...B and A..B.
*/
if (lpos >= 0 && othercount > 0)
usage(builtin_diff_usage);
if (!is_symdiff) {
bitmap_free(map);
sym->warn = 0;
sym->skip = NULL;
return;
}
sym->left = rev->pending.objects[lpos].name;
sym->right = rev->pending.objects[rpos].name;
if (basecount == 0)
die(_("%s...%s: no merge base"), sym->left, sym->right);
sym->base = rev->pending.objects[basepos].name;
bitmap_unset(map, basepos); /* unmark the base we want */
sym->warn = basecount > 1;
sym->skip = map;
}
int cmd_diff(int argc, const char **argv, const char *prefix)
{
int i;
struct rev_info rev;
struct object_array ent = OBJECT_ARRAY_INIT;
int blobs = 0, paths = 0;
struct object_array_entry *blob[2];
int nongit = 0, no_index = 0;
int result = 0;
struct symdiff sdiff;
/*
* We could get N tree-ish in the rev.pending_objects list.
* Also there could be M blobs there, and P pathspecs. --cached may
* also be present.
*
* N=0, M=0:
* cache vs files (diff-files)
*
* N=0, M=0, --cached:
* HEAD vs cache (diff-index --cached)
*
* N=0, M=2:
* compare two random blobs. P must be zero.
*
* N=0, M=1, P=1:
* compare a blob with a working tree file.
*
* N=1, M=0:
* tree vs files (diff-index)
*
* N=1, M=0, --cached:
* tree vs cache (diff-index --cached)
*
* N=2, M=0:
* tree vs tree (diff-tree)
*
* N=0, M=0, P=2:
* compare two filesystem entities (aka --no-index).
*
* Other cases are errors.
*/
/* Were we asked to do --no-index explicitly? */
for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "--")) {
i++;
break;
}
if (!strcmp(argv[i], "--no-index"))
no_index = DIFF_NO_INDEX_EXPLICIT;
if (argv[i][0] != '-')
break;
}
prefix = setup_git_directory_gently(&nongit);
if (!no_index) {
/*
* Treat git diff with at least one path outside of the
* repo the same as if the command would have been executed
* outside of a git repository. In this case it behaves
* the same way as "git diff --no-index <a> <b>", which acts
* as a colourful "diff" replacement.
*/
if (nongit || ((argc == i + 2) &&
(!path_inside_repo(prefix, argv[i]) ||
!path_inside_repo(prefix, argv[i + 1]))))
no_index = DIFF_NO_INDEX_IMPLICIT;
}
init_diff_ui_defaults();
git_config(git_diff_ui_config, NULL);
prefix = precompose_argv_prefix(argc, argv, prefix);
repo_init_revisions(the_repository, &rev, prefix);
/* Set up defaults that will apply to both no-index and regular diffs. */
rev.diffopt.stat_width = -1;
rev.diffopt.stat_graph_width = -1;
rev.diffopt.flags.allow_external = 1;
rev.diffopt.flags.allow_textconv = 1;
/* If this is a no-index diff, just run it and exit there. */
if (no_index)
exit(diff_no_index(&rev, no_index == DIFF_NO_INDEX_IMPLICIT,
argc, argv));
/*
* Otherwise, we are doing the usual "git" diff; set up any
* further defaults that apply to regular diffs.
*/
rev.diffopt.skip_stat_unmatch = !!diff_auto_refresh_index;
/*
* Default to intent-to-add entries invisible in the
* index. This makes them show up as new files in diff-files
* and not at all in diff-cached.
*/
rev.diffopt.ita_invisible_in_index = 1;
if (nongit)
die(_("Not a git repository"));
argc = setup_revisions(argc, argv, &rev, NULL);
if (!rev.diffopt.output_format) {
rev.diffopt.output_format = DIFF_FORMAT_PATCH;
diff_setup_done(&rev.diffopt);
}
rev.diffopt.flags.recursive = 1;
setup_diff_pager(&rev.diffopt);
/*
* Do we have --cached and not have a pending object, then
* default to HEAD by hand. Eek.
*/
if (!rev.pending.nr) {
int i;
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (!strcmp(arg, "--"))
break;
else if (!strcmp(arg, "--cached") ||
!strcmp(arg, "--staged")) {
add_head_to_pending(&rev);
if (!rev.pending.nr) {
struct tree *tree;
tree = lookup_tree(the_repository,
the_repository->hash_algo->empty_tree);
add_pending_object(&rev, &tree->object, "HEAD");
}
break;
}
}
}
symdiff_prepare(&rev, &sdiff);
for (i = 0; i < rev.pending.nr; i++) {
struct object_array_entry *entry = &rev.pending.objects[i];
struct object *obj = entry->item;
const char *name = entry->name;
int flags = (obj->flags & UNINTERESTING);
if (!obj->parsed)
obj = parse_object(the_repository, &obj->oid);
obj = deref_tag(the_repository, obj, NULL, 0);
if (!obj)
die(_("invalid object '%s' given."), name);
if (obj->type == OBJ_COMMIT)
obj = &get_commit_tree(((struct commit *)obj))->object;
if (obj->type == OBJ_TREE) {
if (sdiff.skip && bitmap_get(sdiff.skip, i))
continue;
obj->flags |= flags;
add_object_array(obj, name, &ent);
} else if (obj->type == OBJ_BLOB) {
if (2 <= blobs)
die(_("more than two blobs given: '%s'"), name);
blob[blobs] = entry;
blobs++;
} else {
die(_("unhandled object '%s' given."), name);
}
}
if (rev.prune_data.nr)
paths += rev.prune_data.nr;
/*
* Now, do the arguments look reasonable?
*/
if (!ent.nr) {
switch (blobs) {
case 0:
result = builtin_diff_files(&rev, argc, argv);
break;
case 1:
if (paths != 1)
usage(builtin_diff_usage);
result = builtin_diff_b_f(&rev, argc, argv, blob);
break;
case 2:
if (paths)
usage(builtin_diff_usage);
result = builtin_diff_blobs(&rev, argc, argv, blob);
break;
default:
usage(builtin_diff_usage);
}
}
else if (blobs)
usage(builtin_diff_usage);
else if (ent.nr == 1)
result = builtin_diff_index(&rev, argc, argv);
else if (ent.nr == 2) {
if (sdiff.warn)
warning(_("%s...%s: multiple merge bases, using %s"),
sdiff.left, sdiff.right, sdiff.base);
result = builtin_diff_tree(&rev, argc, argv,
&ent.objects[0], &ent.objects[1]);
} else
result = builtin_diff_combined(&rev, argc, argv,
ent.objects, ent.nr);
result = diff_result_code(&rev.diffopt, result);
if (1 < rev.diffopt.skip_stat_unmatch)
refresh_index_quietly();
UNLEAK(rev);
UNLEAK(ent);
UNLEAK(blob);
return result;
}