git/help.c
Matthieu Moy c41494f8c8 Reduce cost of deletion in levenstein distance (4 -> 3)
Before this patch, a character deletion has the same cost as 2 swaps, or
4 additions, so Git prefers suggesting a completely scrambled command
name to removing a character. For example, "git tags" suggests "stage",
but not "tag".

By setting the deletion cost to 3, we keep it higher than swaps or
additions, but prefer 1 deletion to 2 swaps. "git tags" now suggests
"tag" in addition to staged.

Signed-off-by: Matthieu Moy <Matthieu.Moy@imag.fr>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-05-29 11:12:59 -07:00

380 lines
9.2 KiB
C

#include "cache.h"
#include "builtin.h"
#include "exec_cmd.h"
#include "levenshtein.h"
#include "help.h"
#include "common-cmds.h"
#include "string-list.h"
#include "column.h"
void add_cmdname(struct cmdnames *cmds, const char *name, int len)
{
struct cmdname *ent = xmalloc(sizeof(*ent) + len + 1);
ent->len = len;
memcpy(ent->name, name, len);
ent->name[len] = 0;
ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc);
cmds->names[cmds->cnt++] = ent;
}
static void clean_cmdnames(struct cmdnames *cmds)
{
int i;
for (i = 0; i < cmds->cnt; ++i)
free(cmds->names[i]);
free(cmds->names);
cmds->cnt = 0;
cmds->alloc = 0;
}
static int cmdname_compare(const void *a_, const void *b_)
{
struct cmdname *a = *(struct cmdname **)a_;
struct cmdname *b = *(struct cmdname **)b_;
return strcmp(a->name, b->name);
}
static void uniq(struct cmdnames *cmds)
{
int i, j;
if (!cmds->cnt)
return;
for (i = j = 1; i < cmds->cnt; i++)
if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
cmds->names[j++] = cmds->names[i];
cmds->cnt = j;
}
void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
{
int ci, cj, ei;
int cmp;
ci = cj = ei = 0;
while (ci < cmds->cnt && ei < excludes->cnt) {
cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
if (cmp < 0)
cmds->names[cj++] = cmds->names[ci++];
else if (cmp == 0)
ci++, ei++;
else if (cmp > 0)
ei++;
}
while (ci < cmds->cnt)
cmds->names[cj++] = cmds->names[ci++];
cmds->cnt = cj;
}
static void pretty_print_string_list(struct cmdnames *cmds,
unsigned int colopts)
{
struct string_list list = STRING_LIST_INIT_NODUP;
struct column_options copts;
int i;
for (i = 0; i < cmds->cnt; i++)
string_list_append(&list, cmds->names[i]->name);
/*
* always enable column display, we only consult column.*
* about layout strategy and stuff
*/
colopts = (colopts & ~COL_ENABLE_MASK) | COL_ENABLED;
memset(&copts, 0, sizeof(copts));
copts.indent = " ";
copts.padding = 2;
print_columns(&list, colopts, &copts);
string_list_clear(&list, 0);
}
static int is_executable(const char *name)
{
struct stat st;
if (stat(name, &st) || /* stat, not lstat */
!S_ISREG(st.st_mode))
return 0;
#if defined(WIN32) || defined(__CYGWIN__)
#if defined(__CYGWIN__)
if ((st.st_mode & S_IXUSR) == 0)
#endif
{ /* cannot trust the executable bit, peek into the file instead */
char buf[3] = { 0 };
int n;
int fd = open(name, O_RDONLY);
st.st_mode &= ~S_IXUSR;
if (fd >= 0) {
n = read(fd, buf, 2);
if (n == 2)
/* DOS executables start with "MZ" */
if (!strcmp(buf, "#!") || !strcmp(buf, "MZ"))
st.st_mode |= S_IXUSR;
close(fd);
}
}
#endif
return st.st_mode & S_IXUSR;
}
static void list_commands_in_dir(struct cmdnames *cmds,
const char *path,
const char *prefix)
{
int prefix_len;
DIR *dir = opendir(path);
struct dirent *de;
struct strbuf buf = STRBUF_INIT;
int len;
if (!dir)
return;
if (!prefix)
prefix = "git-";
prefix_len = strlen(prefix);
strbuf_addf(&buf, "%s/", path);
len = buf.len;
while ((de = readdir(dir)) != NULL) {
int entlen;
if (prefixcmp(de->d_name, prefix))
continue;
strbuf_setlen(&buf, len);
strbuf_addstr(&buf, de->d_name);
if (!is_executable(buf.buf))
continue;
entlen = strlen(de->d_name) - prefix_len;
if (has_extension(de->d_name, ".exe"))
entlen -= 4;
add_cmdname(cmds, de->d_name + prefix_len, entlen);
}
closedir(dir);
strbuf_release(&buf);
}
void load_command_list(const char *prefix,
struct cmdnames *main_cmds,
struct cmdnames *other_cmds)
{
const char *env_path = getenv("PATH");
const char *exec_path = git_exec_path();
if (exec_path) {
list_commands_in_dir(main_cmds, exec_path, prefix);
qsort(main_cmds->names, main_cmds->cnt,
sizeof(*main_cmds->names), cmdname_compare);
uniq(main_cmds);
}
if (env_path) {
char *paths, *path, *colon;
path = paths = xstrdup(env_path);
while (1) {
if ((colon = strchr(path, PATH_SEP)))
*colon = 0;
if (!exec_path || strcmp(path, exec_path))
list_commands_in_dir(other_cmds, path, prefix);
if (!colon)
break;
path = colon + 1;
}
free(paths);
qsort(other_cmds->names, other_cmds->cnt,
sizeof(*other_cmds->names), cmdname_compare);
uniq(other_cmds);
}
exclude_cmds(other_cmds, main_cmds);
}
void list_commands(unsigned int colopts,
struct cmdnames *main_cmds, struct cmdnames *other_cmds)
{
if (main_cmds->cnt) {
const char *exec_path = git_exec_path();
printf_ln(_("available git commands in '%s'"), exec_path);
putchar('\n');
pretty_print_string_list(main_cmds, colopts);
putchar('\n');
}
if (other_cmds->cnt) {
printf_ln(_("git commands available from elsewhere on your $PATH"));
putchar('\n');
pretty_print_string_list(other_cmds, colopts);
putchar('\n');
}
}
int is_in_cmdlist(struct cmdnames *c, const char *s)
{
int i;
for (i = 0; i < c->cnt; i++)
if (!strcmp(s, c->names[i]->name))
return 1;
return 0;
}
static int autocorrect;
static struct cmdnames aliases;
static int git_unknown_cmd_config(const char *var, const char *value, void *cb)
{
if (!strcmp(var, "help.autocorrect"))
autocorrect = git_config_int(var,value);
/* Also use aliases for command lookup */
if (!prefixcmp(var, "alias."))
add_cmdname(&aliases, var + 6, strlen(var + 6));
return git_default_config(var, value, cb);
}
static int levenshtein_compare(const void *p1, const void *p2)
{
const struct cmdname *const *c1 = p1, *const *c2 = p2;
const char *s1 = (*c1)->name, *s2 = (*c2)->name;
int l1 = (*c1)->len;
int l2 = (*c2)->len;
return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
}
static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
{
int i;
ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc);
for (i = 0; i < old->cnt; i++)
cmds->names[cmds->cnt++] = old->names[i];
free(old->names);
old->cnt = 0;
old->names = NULL;
}
/* An empirically derived magic number */
#define SIMILARITY_FLOOR 7
#define SIMILAR_ENOUGH(x) ((x) < SIMILARITY_FLOOR)
static const char bad_interpreter_advice[] =
N_("'%s' appears to be a git command, but we were not\n"
"able to execute it. Maybe git-%s is broken?");
const char *help_unknown_cmd(const char *cmd)
{
int i, n, best_similarity = 0;
struct cmdnames main_cmds, other_cmds;
memset(&main_cmds, 0, sizeof(main_cmds));
memset(&other_cmds, 0, sizeof(other_cmds));
memset(&aliases, 0, sizeof(aliases));
git_config(git_unknown_cmd_config, NULL);
load_command_list("git-", &main_cmds, &other_cmds);
add_cmd_list(&main_cmds, &aliases);
add_cmd_list(&main_cmds, &other_cmds);
qsort(main_cmds.names, main_cmds.cnt,
sizeof(main_cmds.names), cmdname_compare);
uniq(&main_cmds);
/* This abuses cmdname->len for levenshtein distance */
for (i = 0, n = 0; i < main_cmds.cnt; i++) {
int cmp = 0; /* avoid compiler stupidity */
const char *candidate = main_cmds.names[i]->name;
/*
* An exact match means we have the command, but
* for some reason exec'ing it gave us ENOENT; probably
* it's a bad interpreter in the #! line.
*/
if (!strcmp(candidate, cmd))
die(_(bad_interpreter_advice), cmd, cmd);
/* Does the candidate appear in common_cmds list? */
while (n < ARRAY_SIZE(common_cmds) &&
(cmp = strcmp(common_cmds[n].name, candidate)) < 0)
n++;
if ((n < ARRAY_SIZE(common_cmds)) && !cmp) {
/* Yes, this is one of the common commands */
n++; /* use the entry from common_cmds[] */
if (!prefixcmp(candidate, cmd)) {
/* Give prefix match a very good score */
main_cmds.names[i]->len = 0;
continue;
}
}
main_cmds.names[i]->len =
levenshtein(cmd, candidate, 0, 2, 1, 3) + 1;
}
qsort(main_cmds.names, main_cmds.cnt,
sizeof(*main_cmds.names), levenshtein_compare);
if (!main_cmds.cnt)
die(_("Uh oh. Your system reports no Git commands at all."));
/* skip and count prefix matches */
for (n = 0; n < main_cmds.cnt && !main_cmds.names[n]->len; n++)
; /* still counting */
if (main_cmds.cnt <= n) {
/* prefix matches with everything? that is too ambiguous */
best_similarity = SIMILARITY_FLOOR + 1;
} else {
/* count all the most similar ones */
for (best_similarity = main_cmds.names[n++]->len;
(n < main_cmds.cnt &&
best_similarity == main_cmds.names[n]->len);
n++)
; /* still counting */
}
if (autocorrect && n == 1 && SIMILAR_ENOUGH(best_similarity)) {
const char *assumed = main_cmds.names[0]->name;
main_cmds.names[0] = NULL;
clean_cmdnames(&main_cmds);
fprintf_ln(stderr,
_("WARNING: You called a Git command named '%s', "
"which does not exist.\n"
"Continuing under the assumption that you meant '%s'"),
cmd, assumed);
if (autocorrect > 0) {
fprintf_ln(stderr, _("in %0.1f seconds automatically..."),
(float)autocorrect/10.0);
poll(NULL, 0, autocorrect * 100);
}
return assumed;
}
fprintf_ln(stderr, _("git: '%s' is not a git command. See 'git --help'."), cmd);
if (SIMILAR_ENOUGH(best_similarity)) {
fprintf_ln(stderr,
Q_("\nDid you mean this?",
"\nDid you mean one of these?",
n));
for (i = 0; i < n; i++)
fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
}
exit(1);
}
int cmd_version(int argc, const char **argv, const char *prefix)
{
printf("git version %s\n", git_version_string);
return 0;
}