git/pretty.c
René Scharfe cde75e59e1 --pretty=format: on-demand format expansion
Some of the --pretty=format placeholders expansions are expensive to
calculate.  This is made worse by the current code's use of
interpolate(), which requires _all_ placeholders are to be prepared
up front.

One way to speed this up is to check which placeholders are present
in the format string and to prepare only the expansions that are
needed.  That still leaves the allocation overhead of interpolate().

Another way is to use a callback based approach together with the
strbuf library to keep allocations to a minimum and avoid string
copies.  That's what this patch does.  It introduces a new strbuf
function, strbuf_expand().

The function takes a format string, list of placeholder strings,
a user supplied function 'fn', and an opaque pointer 'context'
to tell 'fn' what thingy to operate on.

The function 'fn' is expected to accept a strbuf, a parsed
placeholder string and the 'context' pointer, and append the
interpolated value for the 'context' thingy, according to the
format specified by the placeholder.

Thanks to Pierre Habouzit for his suggestion to use strchrnul() and
the code surrounding its callsite.  And thanks to Junio for most of
this commit message. :)

Here my measurements of most of Paul Mackerras' test cases that
highlighted the performance problem (best of three runs):

(master)
$ time git log --pretty=oneline >/dev/null

real    0m0.390s
user    0m0.340s
sys     0m0.040s

(master)
$ time git log --pretty=raw >/dev/null

real    0m0.434s
user    0m0.408s
sys     0m0.016s

(master)
$ time git log --pretty="format:%H {%P} %ct" >/dev/null

real    0m1.347s
user    0m0.080s
sys     0m1.256s

(interp_find_active -- Dscho)
$ time ./git log --pretty="format:%H {%P} %ct" >/dev/null

real    0m0.694s
user    0m0.020s
sys     0m0.672s

(strbuf_expand -- this patch)
$ time ./git log --pretty="format:%H {%P} %ct" >/dev/null

real    0m0.395s
user    0m0.352s
sys     0m0.028s

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-09 01:30:07 -08:00

754 lines
18 KiB
C

#include "cache.h"
#include "commit.h"
#include "utf8.h"
#include "diff.h"
#include "revision.h"
static struct cmt_fmt_map {
const char *n;
size_t cmp_len;
enum cmit_fmt v;
} cmt_fmts[] = {
{ "raw", 1, CMIT_FMT_RAW },
{ "medium", 1, CMIT_FMT_MEDIUM },
{ "short", 1, CMIT_FMT_SHORT },
{ "email", 1, CMIT_FMT_EMAIL },
{ "full", 5, CMIT_FMT_FULL },
{ "fuller", 5, CMIT_FMT_FULLER },
{ "oneline", 1, CMIT_FMT_ONELINE },
{ "format:", 7, CMIT_FMT_USERFORMAT},
};
static char *user_format;
enum cmit_fmt get_commit_format(const char *arg)
{
int i;
if (!arg || !*arg)
return CMIT_FMT_DEFAULT;
if (*arg == '=')
arg++;
if (!prefixcmp(arg, "format:")) {
if (user_format)
free(user_format);
user_format = xstrdup(arg + 7);
return CMIT_FMT_USERFORMAT;
}
for (i = 0; i < ARRAY_SIZE(cmt_fmts); i++) {
if (!strncmp(arg, cmt_fmts[i].n, cmt_fmts[i].cmp_len) &&
!strncmp(arg, cmt_fmts[i].n, strlen(arg)))
return cmt_fmts[i].v;
}
die("invalid --pretty format: %s", arg);
}
/*
* Generic support for pretty-printing the header
*/
static int get_one_line(const char *msg)
{
int ret = 0;
for (;;) {
char c = *msg++;
if (!c)
break;
ret++;
if (c == '\n')
break;
}
return ret;
}
/* High bit set, or ISO-2022-INT */
int non_ascii(int ch)
{
ch = (ch & 0xff);
return ((ch & 0x80) || (ch == 0x1b));
}
static int is_rfc2047_special(char ch)
{
return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
}
static void add_rfc2047(struct strbuf *sb, const char *line, int len,
const char *encoding)
{
int i, last;
for (i = 0; i < len; i++) {
int ch = line[i];
if (non_ascii(ch))
goto needquote;
if ((i + 1 < len) && (ch == '=' && line[i+1] == '?'))
goto needquote;
}
strbuf_add(sb, line, len);
return;
needquote:
strbuf_grow(sb, len * 3 + strlen(encoding) + 100);
strbuf_addf(sb, "=?%s?q?", encoding);
for (i = last = 0; i < len; i++) {
unsigned ch = line[i] & 0xFF;
/*
* We encode ' ' using '=20' even though rfc2047
* allows using '_' for readability. Unfortunately,
* many programs do not understand this and just
* leave the underscore in place.
*/
if (is_rfc2047_special(ch) || ch == ' ') {
strbuf_add(sb, line + last, i - last);
strbuf_addf(sb, "=%02X", ch);
last = i + 1;
}
}
strbuf_add(sb, line + last, len - last);
strbuf_addstr(sb, "?=");
}
static void add_user_info(const char *what, enum cmit_fmt fmt, struct strbuf *sb,
const char *line, enum date_mode dmode,
const char *encoding)
{
char *date;
int namelen;
unsigned long time;
int tz;
const char *filler = " ";
if (fmt == CMIT_FMT_ONELINE)
return;
date = strchr(line, '>');
if (!date)
return;
namelen = ++date - line;
time = strtoul(date, &date, 10);
tz = strtol(date, NULL, 10);
if (fmt == CMIT_FMT_EMAIL) {
char *name_tail = strchr(line, '<');
int display_name_length;
if (!name_tail)
return;
while (line < name_tail && isspace(name_tail[-1]))
name_tail--;
display_name_length = name_tail - line;
filler = "";
strbuf_addstr(sb, "From: ");
add_rfc2047(sb, line, display_name_length, encoding);
strbuf_add(sb, name_tail, namelen - display_name_length);
strbuf_addch(sb, '\n');
} else {
strbuf_addf(sb, "%s: %.*s%.*s\n", what,
(fmt == CMIT_FMT_FULLER) ? 4 : 0,
filler, namelen, line);
}
switch (fmt) {
case CMIT_FMT_MEDIUM:
strbuf_addf(sb, "Date: %s\n", show_date(time, tz, dmode));
break;
case CMIT_FMT_EMAIL:
strbuf_addf(sb, "Date: %s\n", show_date(time, tz, DATE_RFC2822));
break;
case CMIT_FMT_FULLER:
strbuf_addf(sb, "%sDate: %s\n", what, show_date(time, tz, dmode));
break;
default:
/* notin' */
break;
}
}
static int is_empty_line(const char *line, int *len_p)
{
int len = *len_p;
while (len && isspace(line[len-1]))
len--;
*len_p = len;
return !len;
}
static void add_merge_info(enum cmit_fmt fmt, struct strbuf *sb,
const struct commit *commit, int abbrev)
{
struct commit_list *parent = commit->parents;
if ((fmt == CMIT_FMT_ONELINE) || (fmt == CMIT_FMT_EMAIL) ||
!parent || !parent->next)
return;
strbuf_addstr(sb, "Merge:");
while (parent) {
struct commit *p = parent->item;
const char *hex = NULL;
const char *dots;
if (abbrev)
hex = find_unique_abbrev(p->object.sha1, abbrev);
if (!hex)
hex = sha1_to_hex(p->object.sha1);
dots = (abbrev && strlen(hex) != 40) ? "..." : "";
parent = parent->next;
strbuf_addf(sb, " %s%s", hex, dots);
}
strbuf_addch(sb, '\n');
}
static char *get_header(const struct commit *commit, const char *key)
{
int key_len = strlen(key);
const char *line = commit->buffer;
for (;;) {
const char *eol = strchr(line, '\n'), *next;
if (line == eol)
return NULL;
if (!eol) {
eol = line + strlen(line);
next = NULL;
} else
next = eol + 1;
if (eol - line > key_len &&
!strncmp(line, key, key_len) &&
line[key_len] == ' ') {
return xmemdupz(line + key_len + 1, eol - line - key_len - 1);
}
line = next;
}
}
static char *replace_encoding_header(char *buf, const char *encoding)
{
struct strbuf tmp;
size_t start, len;
char *cp = buf;
/* guess if there is an encoding header before a \n\n */
while (strncmp(cp, "encoding ", strlen("encoding "))) {
cp = strchr(cp, '\n');
if (!cp || *++cp == '\n')
return buf;
}
start = cp - buf;
cp = strchr(cp, '\n');
if (!cp)
return buf; /* should not happen but be defensive */
len = cp + 1 - (buf + start);
strbuf_init(&tmp, 0);
strbuf_attach(&tmp, buf, strlen(buf), strlen(buf) + 1);
if (is_encoding_utf8(encoding)) {
/* we have re-coded to UTF-8; drop the header */
strbuf_remove(&tmp, start, len);
} else {
/* just replaces XXXX in 'encoding XXXX\n' */
strbuf_splice(&tmp, start + strlen("encoding "),
len - strlen("encoding \n"),
encoding, strlen(encoding));
}
return strbuf_detach(&tmp, NULL);
}
static char *logmsg_reencode(const struct commit *commit,
const char *output_encoding)
{
static const char *utf8 = "utf-8";
const char *use_encoding;
char *encoding;
char *out;
if (!*output_encoding)
return NULL;
encoding = get_header(commit, "encoding");
use_encoding = encoding ? encoding : utf8;
if (!strcmp(use_encoding, output_encoding))
if (encoding) /* we'll strip encoding header later */
out = xstrdup(commit->buffer);
else
return NULL; /* nothing to do */
else
out = reencode_string(commit->buffer,
output_encoding, use_encoding);
if (out)
out = replace_encoding_header(out, output_encoding);
free(encoding);
return out;
}
static void format_person_part(struct strbuf *sb, char part,
const char *msg, int len)
{
int start, end, tz = 0;
unsigned long date;
char *ep;
/* parse name */
for (end = 0; end < len && msg[end] != '<'; end++)
; /* do nothing */
start = end + 1;
while (end > 0 && isspace(msg[end - 1]))
end--;
if (part == 'n') { /* name */
strbuf_add(sb, msg, end);
return;
}
if (start >= len)
return;
/* parse email */
for (end = start + 1; end < len && msg[end] != '>'; end++)
; /* do nothing */
if (end >= len)
return;
if (part == 'e') { /* email */
strbuf_add(sb, msg + start, end - start);
return;
}
/* parse date */
for (start = end + 1; start < len && isspace(msg[start]); start++)
; /* do nothing */
if (start >= len)
return;
date = strtoul(msg + start, &ep, 10);
if (msg + start == ep)
return;
if (part == 't') { /* date, UNIX timestamp */
strbuf_add(sb, msg + start, ep - (msg + start));
return;
}
/* parse tz */
for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
; /* do nothing */
if (start + 1 < len) {
tz = strtoul(msg + start + 1, NULL, 10);
if (msg[start] == '-')
tz = -tz;
}
switch (part) {
case 'd': /* date */
strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
return;
case 'D': /* date, RFC2822 style */
strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
return;
case 'r': /* date, relative */
strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
return;
case 'i': /* date, ISO 8601 */
strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
return;
}
}
static void format_commit_item(struct strbuf *sb, const char *placeholder,
void *context)
{
const struct commit *commit = context;
struct commit_list *p;
int i;
enum { HEADER, SUBJECT, BODY } state;
const char *msg = commit->buffer;
/* these are independent of the commit */
switch (placeholder[0]) {
case 'C':
switch (placeholder[3]) {
case 'd': /* red */
strbuf_addstr(sb, "\033[31m");
return;
case 'e': /* green */
strbuf_addstr(sb, "\033[32m");
return;
case 'u': /* blue */
strbuf_addstr(sb, "\033[34m");
return;
case 's': /* reset color */
strbuf_addstr(sb, "\033[m");
return;
}
case 'n': /* newline */
strbuf_addch(sb, '\n');
return;
}
/* these depend on the commit */
if (!commit->object.parsed)
parse_object(commit->object.sha1);
switch (placeholder[0]) {
case 'H': /* commit hash */
strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
return;
case 'h': /* abbreviated commit hash */
strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
DEFAULT_ABBREV));
return;
case 'T': /* tree hash */
strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
return;
case 't': /* abbreviated tree hash */
strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
DEFAULT_ABBREV));
return;
case 'P': /* parent hashes */
for (p = commit->parents; p; p = p->next) {
if (p != commit->parents)
strbuf_addch(sb, ' ');
strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
}
return;
case 'p': /* abbreviated parent hashes */
for (p = commit->parents; p; p = p->next) {
if (p != commit->parents)
strbuf_addch(sb, ' ');
strbuf_addstr(sb, find_unique_abbrev(
p->item->object.sha1, DEFAULT_ABBREV));
}
return;
case 'm': /* left/right/bottom */
strbuf_addch(sb, (commit->object.flags & BOUNDARY)
? '-'
: (commit->object.flags & SYMMETRIC_LEFT)
? '<'
: '>');
return;
}
/* For the rest we have to parse the commit header. */
for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
int eol;
for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
; /* do nothing */
if (state == SUBJECT) {
if (placeholder[0] == 's') {
strbuf_add(sb, msg + i, eol - i);
return;
}
i = eol;
}
if (i == eol) {
state++;
/* strip empty lines */
while (msg[eol + 1] == '\n')
eol++;
} else if (!prefixcmp(msg + i, "author ")) {
if (placeholder[0] == 'a') {
format_person_part(sb, placeholder[1],
msg + i + 7, eol - i - 7);
return;
}
} else if (!prefixcmp(msg + i, "committer ")) {
if (placeholder[0] == 'c') {
format_person_part(sb, placeholder[1],
msg + i + 10, eol - i - 10);
return;
}
} else if (!prefixcmp(msg + i, "encoding ")) {
if (placeholder[0] == 'e') {
strbuf_add(sb, msg + i + 9, eol - i - 9);
return;
}
}
i = eol;
}
if (msg[i] && placeholder[0] == 'b') /* body */
strbuf_addstr(sb, msg + i);
}
void format_commit_message(const struct commit *commit,
const void *format, struct strbuf *sb)
{
const char *placeholders[] = {
"H", /* commit hash */
"h", /* abbreviated commit hash */
"T", /* tree hash */
"t", /* abbreviated tree hash */
"P", /* parent hashes */
"p", /* abbreviated parent hashes */
"an", /* author name */
"ae", /* author email */
"ad", /* author date */
"aD", /* author date, RFC2822 style */
"ar", /* author date, relative */
"at", /* author date, UNIX timestamp */
"ai", /* author date, ISO 8601 */
"cn", /* committer name */
"ce", /* committer email */
"cd", /* committer date */
"cD", /* committer date, RFC2822 style */
"cr", /* committer date, relative */
"ct", /* committer date, UNIX timestamp */
"ci", /* committer date, ISO 8601 */
"e", /* encoding */
"s", /* subject */
"b", /* body */
"Cred", /* red */
"Cgreen", /* green */
"Cblue", /* blue */
"Creset", /* reset color */
"n", /* newline */
"m", /* left/right/bottom */
NULL
};
strbuf_expand(sb, format, placeholders, format_commit_item, (void *)commit);
}
static void pp_header(enum cmit_fmt fmt,
int abbrev,
enum date_mode dmode,
const char *encoding,
const struct commit *commit,
const char **msg_p,
struct strbuf *sb)
{
int parents_shown = 0;
for (;;) {
const char *line = *msg_p;
int linelen = get_one_line(*msg_p);
if (!linelen)
return;
*msg_p += linelen;
if (linelen == 1)
/* End of header */
return;
if (fmt == CMIT_FMT_RAW) {
strbuf_add(sb, line, linelen);
continue;
}
if (!memcmp(line, "parent ", 7)) {
if (linelen != 48)
die("bad parent line in commit");
continue;
}
if (!parents_shown) {
struct commit_list *parent;
int num;
for (parent = commit->parents, num = 0;
parent;
parent = parent->next, num++)
;
/* with enough slop */
strbuf_grow(sb, num * 50 + 20);
add_merge_info(fmt, sb, commit, abbrev);
parents_shown = 1;
}
/*
* MEDIUM == DEFAULT shows only author with dates.
* FULL shows both authors but not dates.
* FULLER shows both authors and dates.
*/
if (!memcmp(line, "author ", 7)) {
strbuf_grow(sb, linelen + 80);
add_user_info("Author", fmt, sb, line + 7, dmode, encoding);
}
if (!memcmp(line, "committer ", 10) &&
(fmt == CMIT_FMT_FULL || fmt == CMIT_FMT_FULLER)) {
strbuf_grow(sb, linelen + 80);
add_user_info("Commit", fmt, sb, line + 10, dmode, encoding);
}
}
}
static void pp_title_line(enum cmit_fmt fmt,
const char **msg_p,
struct strbuf *sb,
const char *subject,
const char *after_subject,
const char *encoding,
int plain_non_ascii)
{
struct strbuf title;
strbuf_init(&title, 80);
for (;;) {
const char *line = *msg_p;
int linelen = get_one_line(line);
*msg_p += linelen;
if (!linelen || is_empty_line(line, &linelen))
break;
strbuf_grow(&title, linelen + 2);
if (title.len) {
if (fmt == CMIT_FMT_EMAIL) {
strbuf_addch(&title, '\n');
}
strbuf_addch(&title, ' ');
}
strbuf_add(&title, line, linelen);
}
strbuf_grow(sb, title.len + 1024);
if (subject) {
strbuf_addstr(sb, subject);
add_rfc2047(sb, title.buf, title.len, encoding);
} else {
strbuf_addbuf(sb, &title);
}
strbuf_addch(sb, '\n');
if (plain_non_ascii) {
const char *header_fmt =
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=%s\n"
"Content-Transfer-Encoding: 8bit\n";
strbuf_addf(sb, header_fmt, encoding);
}
if (after_subject) {
strbuf_addstr(sb, after_subject);
}
if (fmt == CMIT_FMT_EMAIL) {
strbuf_addch(sb, '\n');
}
strbuf_release(&title);
}
static void pp_remainder(enum cmit_fmt fmt,
const char **msg_p,
struct strbuf *sb,
int indent)
{
int first = 1;
for (;;) {
const char *line = *msg_p;
int linelen = get_one_line(line);
*msg_p += linelen;
if (!linelen)
break;
if (is_empty_line(line, &linelen)) {
if (first)
continue;
if (fmt == CMIT_FMT_SHORT)
break;
}
first = 0;
strbuf_grow(sb, linelen + indent + 20);
if (indent) {
memset(sb->buf + sb->len, ' ', indent);
strbuf_setlen(sb, sb->len + indent);
}
strbuf_add(sb, line, linelen);
strbuf_addch(sb, '\n');
}
}
void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
struct strbuf *sb, int abbrev,
const char *subject, const char *after_subject,
enum date_mode dmode, int plain_non_ascii)
{
unsigned long beginning_of_body;
int indent = 4;
const char *msg = commit->buffer;
char *reencoded;
const char *encoding;
if (fmt == CMIT_FMT_USERFORMAT) {
format_commit_message(commit, user_format, sb);
return;
}
encoding = (git_log_output_encoding
? git_log_output_encoding
: git_commit_encoding);
if (!encoding)
encoding = "utf-8";
reencoded = logmsg_reencode(commit, encoding);
if (reencoded) {
msg = reencoded;
}
if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
indent = 0;
/* After-subject is used to pass in Content-Type: multipart
* MIME header; in that case we do not have to do the
* plaintext content type even if the commit message has
* non 7-bit ASCII character. Otherwise, check if we need
* to say this is not a 7-bit ASCII.
*/
if (fmt == CMIT_FMT_EMAIL && !after_subject) {
int i, ch, in_body;
for (in_body = i = 0; (ch = msg[i]); i++) {
if (!in_body) {
/* author could be non 7-bit ASCII but
* the log may be so; skip over the
* header part first.
*/
if (ch == '\n' && msg[i+1] == '\n')
in_body = 1;
}
else if (non_ascii(ch)) {
plain_non_ascii = 1;
break;
}
}
}
pp_header(fmt, abbrev, dmode, encoding, commit, &msg, sb);
if (fmt != CMIT_FMT_ONELINE && !subject) {
strbuf_addch(sb, '\n');
}
/* Skip excess blank lines at the beginning of body, if any... */
for (;;) {
int linelen = get_one_line(msg);
int ll = linelen;
if (!linelen)
break;
if (!is_empty_line(msg, &ll))
break;
msg += linelen;
}
/* These formats treat the title line specially. */
if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
pp_title_line(fmt, &msg, sb, subject,
after_subject, encoding, plain_non_ascii);
beginning_of_body = sb->len;
if (fmt != CMIT_FMT_ONELINE)
pp_remainder(fmt, &msg, sb, indent);
strbuf_rtrim(sb);
/* Make sure there is an EOLN for the non-oneline case */
if (fmt != CMIT_FMT_ONELINE)
strbuf_addch(sb, '\n');
/*
* The caller may append additional body text in e-mail
* format. Make sure we did not strip the blank line
* between the header and the body.
*/
if (fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body)
strbuf_addch(sb, '\n');
free(reencoded);
}