Merge branch 'ag/blame-encoding'

* ag/blame-encoding:
  builtin-blame: Reencode commit messages according to git-log rules.
This commit is contained in:
Junio C Hamano 2008-11-02 16:36:30 -08:00
commit 0eb5ebc1d8
9 changed files with 136 additions and 14 deletions

View file

@ -49,6 +49,13 @@ of lines before or after the line given by <start>.
Show the result incrementally in a format designed for
machine consumption.
--encoding=<encoding>::
Specifies the encoding used to output author names
and commit summaries. Setting it to `none` makes blame
output unconverted data. For more information see the
discussion about encoding in the linkgit:git-log[1]
manual page.
--contents <file>::
When <rev> is not specified, the command annotates the
changes starting backwards from the working tree copy.

View file

@ -37,9 +37,9 @@ of `i18n.commitencoding` in its `encoding` header. This is to
help other people who look at them later. Lack of this header
implies that the commit log message is encoded in UTF-8.
. 'git-log', 'git-show' and friends looks at the `encoding`
header of a commit object, and tries to re-code the log
message into UTF-8 unless otherwise specified. You can
. 'git-log', 'git-show', 'git-blame' and friends look at the
`encoding` header of a commit object, and try to re-code the
log message into UTF-8 unless otherwise specified. You can
specify the desired output encoding with
`i18n.logoutputencoding` in `.git/config` file, like this:
+

View file

@ -1431,7 +1431,7 @@ static void get_commit_info(struct commit *commit,
int detailed)
{
int len;
char *tmp, *endp;
char *tmp, *endp, *reencoded, *message;
static char author_buf[1024];
static char committer_buf[1024];
static char summary_buf[1024];
@ -1449,24 +1449,29 @@ static void get_commit_info(struct commit *commit,
die("Cannot read commit %s",
sha1_to_hex(commit->object.sha1));
}
reencoded = reencode_commit_message(commit, NULL);
message = reencoded ? reencoded : commit->buffer;
ret->author = author_buf;
get_ac_line(commit->buffer, "\nauthor ",
get_ac_line(message, "\nauthor ",
sizeof(author_buf), author_buf, &ret->author_mail,
&ret->author_time, &ret->author_tz);
if (!detailed)
if (!detailed) {
free(reencoded);
return;
}
ret->committer = committer_buf;
get_ac_line(commit->buffer, "\ncommitter ",
get_ac_line(message, "\ncommitter ",
sizeof(committer_buf), committer_buf, &ret->committer_mail,
&ret->committer_time, &ret->committer_tz);
ret->summary = summary_buf;
tmp = strstr(commit->buffer, "\n\n");
tmp = strstr(message, "\n\n");
if (!tmp) {
error_out:
sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
free(reencoded);
return;
}
tmp += 2;
@ -1478,6 +1483,7 @@ static void get_commit_info(struct commit *commit,
goto error_out;
memcpy(summary_buf, tmp, len);
summary_buf[len] = 0;
free(reencoded);
}
/*

View file

@ -65,6 +65,8 @@ enum cmit_fmt {
extern int non_ascii(int);
struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */
extern char *reencode_commit_message(const struct commit *commit,
const char **encoding_p);
extern void get_commit_format(const char *arg, struct rev_info *);
extern void format_commit_message(const struct commit *commit,
const void *format, struct strbuf *sb,

View file

@ -783,6 +783,20 @@ void pp_remainder(enum cmit_fmt fmt,
}
}
char *reencode_commit_message(const struct commit *commit, const char **encoding_p)
{
const char *encoding;
encoding = (git_log_output_encoding
? git_log_output_encoding
: git_commit_encoding);
if (!encoding)
encoding = "utf-8";
if (encoding_p)
*encoding_p = encoding;
return logmsg_reencode(commit, encoding);
}
void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
struct strbuf *sb, int abbrev,
const char *subject, const char *after_subject,
@ -799,12 +813,7 @@ void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
return;
}
encoding = (git_log_output_encoding
? git_log_output_encoding
: git_commit_encoding);
if (!encoding)
encoding = "utf-8";
reencoded = logmsg_reencode(commit, encoding);
reencoded = reencode_commit_message(commit, &encoding);
if (reencoded) {
msg = reencoded;
}

92
t/t8005-blame-i18n.sh Executable file
View file

@ -0,0 +1,92 @@
#!/bin/sh
test_description='git blame encoding conversion'
. ./test-lib.sh
. "$TEST_DIRECTORY"/t8005/utf8.txt
. "$TEST_DIRECTORY"/t8005/cp1251.txt
. "$TEST_DIRECTORY"/t8005/sjis.txt
test_expect_success 'setup the repository' '
# Create the file
echo "UTF-8 LINE" > file &&
git add file &&
git commit --author "$UTF8_NAME <utf8@localhost>" -m "$UTF8_MSG" &&
echo "CP1251 LINE" >> file &&
git add file &&
git config i18n.commitencoding cp1251 &&
git commit --author "$CP1251_NAME <cp1251@localhost>" -m "$CP1251_MSG" &&
echo "SJIS LINE" >> file &&
git add file &&
git config i18n.commitencoding shift-jis &&
git commit --author "$SJIS_NAME <sjis@localhost>" -m "$SJIS_MSG"
'
cat >expected <<EOF
author $SJIS_NAME
summary $SJIS_MSG
author $SJIS_NAME
summary $SJIS_MSG
author $SJIS_NAME
summary $SJIS_MSG
EOF
test_expect_success \
'blame respects i18n.commitencoding' '
git blame --incremental file | \
grep "^\(author\|summary\) " > actual &&
test_cmp actual expected
'
cat >expected <<EOF
author $CP1251_NAME
summary $CP1251_MSG
author $CP1251_NAME
summary $CP1251_MSG
author $CP1251_NAME
summary $CP1251_MSG
EOF
test_expect_success \
'blame respects i18n.logoutputencoding' '
git config i18n.logoutputencoding cp1251 &&
git blame --incremental file | \
grep "^\(author\|summary\) " > actual &&
test_cmp actual expected
'
cat >expected <<EOF
author $UTF8_NAME
summary $UTF8_MSG
author $UTF8_NAME
summary $UTF8_MSG
author $UTF8_NAME
summary $UTF8_MSG
EOF
test_expect_success \
'blame respects --encoding=utf-8' '
git blame --incremental --encoding=utf-8 file | \
grep "^\(author\|summary\) " > actual &&
test_cmp actual expected
'
cat >expected <<EOF
author $SJIS_NAME
summary $SJIS_MSG
author $CP1251_NAME
summary $CP1251_MSG
author $UTF8_NAME
summary $UTF8_MSG
EOF
test_expect_success \
'blame respects --encoding=none' '
git blame --incremental --encoding=none file | \
grep "^\(author\|summary\) " > actual &&
test_cmp actual expected
'
test_done

2
t/t8005/cp1251.txt Normal file
View file

@ -0,0 +1,2 @@
CP1251_NAME="Иван Петрович Сидоров"
CP1251_MSG="Тестовое сообщение"

2
t/t8005/sjis.txt Normal file
View file

@ -0,0 +1,2 @@
SJIS_NAME="„I„r„p„~ „P„u„„„„€„r„y„‰ „R„y„t„€„„€„r"
SJIS_MSG="„S„u„ƒ„„„€„r„€„u „ƒ„€„€„q„„u„~„y„u"

2
t/t8005/utf8.txt Normal file
View file

@ -0,0 +1,2 @@
UTF8_NAME="Иван Петрович Сидоров"
UTF8_MSG="Тестовое сообщение"