git/builtin/shortlog.c
Ævar Arnfjörð Bjarmason 99d60545f8 string-list API: change "nr" and "alloc" to "size_t"
Change the "nr" and "alloc" members of "struct string_list" to use
"size_t" instead of "nr". On some platforms the size of an "unsigned
int" will be smaller than a "size_t", e.g. a 32 bit unsigned v.s. 64
bit unsigned. As "struct string_list" is a generic API we use in a lot
of places this might cause overflows.

As one example: code in "refs.c" keeps track of the number of refs
with a "size_t", and auxiliary code in builtin/remote.c in
get_ref_states() appends those to a "struct string_list".

While we're at it split the "nr" and "alloc" in string-list.h across
two lines, which is the case for most such struct member
declarations (e.g. in "strbuf.h" and "strvec.h").

Changing e.g. "int i" to "size_t i" in run_and_feed_hook() isn't
strictly necessary, and there are a lot more cases where we'll use a
local "int", "unsigned int" etc. variable derived from the "nr" in the
"struct string_list". But in that case as well as
add_wrapped_shortlog_msg() in builtin/shortlog.c we need to adjust the
printf format referring to "nr" anyway, so let's also change the other
variables referring to it.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-03-07 12:02:04 -08:00

478 lines
12 KiB
C

#include "builtin.h"
#include "cache.h"
#include "config.h"
#include "commit.h"
#include "diff.h"
#include "string-list.h"
#include "revision.h"
#include "utf8.h"
#include "mailmap.h"
#include "shortlog.h"
#include "parse-options.h"
#include "trailer.h"
#include "strmap.h"
static char const * const shortlog_usage[] = {
N_("git shortlog [<options>] [<revision-range>] [[--] <path>...]"),
N_("git log --pretty=short | git shortlog [<options>]"),
NULL
};
/*
* The util field of our string_list_items will contain one of two things:
*
* - if --summary is not in use, it will point to a string list of the
* oneline subjects assigned to this author
*
* - if --summary is in use, we don't need that list; we only need to know
* its size. So we abuse the pointer slot to store our integer counter.
*
* This macro accesses the latter.
*/
#define UTIL_TO_INT(x) ((intptr_t)(x)->util)
static int compare_by_counter(const void *a1, const void *a2)
{
const struct string_list_item *i1 = a1, *i2 = a2;
return UTIL_TO_INT(i2) - UTIL_TO_INT(i1);
}
static int compare_by_list(const void *a1, const void *a2)
{
const struct string_list_item *i1 = a1, *i2 = a2;
const struct string_list *l1 = i1->util, *l2 = i2->util;
if (l1->nr < l2->nr)
return 1;
else if (l1->nr == l2->nr)
return 0;
else
return -1;
}
static void insert_one_record(struct shortlog *log,
const char *ident,
const char *oneline)
{
struct string_list_item *item;
item = string_list_insert(&log->list, ident);
if (log->summary)
item->util = (void *)(UTIL_TO_INT(item) + 1);
else {
char *buffer;
struct strbuf subject = STRBUF_INIT;
const char *eol;
/* Skip any leading whitespace, including any blank lines. */
while (*oneline && isspace(*oneline))
oneline++;
eol = strchr(oneline, '\n');
if (!eol)
eol = oneline + strlen(oneline);
if (starts_with(oneline, "[PATCH")) {
char *eob = strchr(oneline, ']');
if (eob && (!eol || eob < eol))
oneline = eob + 1;
}
while (*oneline && isspace(*oneline) && *oneline != '\n')
oneline++;
format_subject(&subject, oneline, " ");
buffer = strbuf_detach(&subject, NULL);
if (item->util == NULL)
item->util = xcalloc(1, sizeof(struct string_list));
string_list_append(item->util, buffer);
}
}
static int parse_ident(struct shortlog *log,
struct strbuf *out, const char *in)
{
const char *mailbuf, *namebuf;
size_t namelen, maillen;
struct ident_split ident;
if (split_ident_line(&ident, in, strlen(in)))
return -1;
namebuf = ident.name_begin;
mailbuf = ident.mail_begin;
namelen = ident.name_end - ident.name_begin;
maillen = ident.mail_end - ident.mail_begin;
map_user(&log->mailmap, &mailbuf, &maillen, &namebuf, &namelen);
strbuf_add(out, namebuf, namelen);
if (log->email)
strbuf_addf(out, " <%.*s>", (int)maillen, mailbuf);
return 0;
}
static void read_from_stdin(struct shortlog *log)
{
struct strbuf ident = STRBUF_INIT;
struct strbuf mapped_ident = STRBUF_INIT;
struct strbuf oneline = STRBUF_INIT;
static const char *author_match[2] = { "Author: ", "author " };
static const char *committer_match[2] = { "Commit: ", "committer " };
const char **match;
if (HAS_MULTI_BITS(log->groups))
die(_("using multiple --group options with stdin is not supported"));
switch (log->groups) {
case SHORTLOG_GROUP_AUTHOR:
match = author_match;
break;
case SHORTLOG_GROUP_COMMITTER:
match = committer_match;
break;
case SHORTLOG_GROUP_TRAILER:
die(_("using --group=trailer with stdin is not supported"));
default:
BUG("unhandled shortlog group");
}
while (strbuf_getline_lf(&ident, stdin) != EOF) {
const char *v;
if (!skip_prefix(ident.buf, match[0], &v) &&
!skip_prefix(ident.buf, match[1], &v))
continue;
while (strbuf_getline_lf(&oneline, stdin) != EOF &&
oneline.len)
; /* discard headers */
while (strbuf_getline_lf(&oneline, stdin) != EOF &&
!oneline.len)
; /* discard blanks */
strbuf_reset(&mapped_ident);
if (parse_ident(log, &mapped_ident, v) < 0)
continue;
insert_one_record(log, mapped_ident.buf, oneline.buf);
}
strbuf_release(&ident);
strbuf_release(&mapped_ident);
strbuf_release(&oneline);
}
static void insert_records_from_trailers(struct shortlog *log,
struct strset *dups,
struct commit *commit,
struct pretty_print_context *ctx,
const char *oneline)
{
struct trailer_iterator iter;
const char *commit_buffer, *body;
struct strbuf ident = STRBUF_INIT;
/*
* Using format_commit_message("%B") would be simpler here, but
* this saves us copying the message.
*/
commit_buffer = logmsg_reencode(commit, NULL, ctx->output_encoding);
body = strstr(commit_buffer, "\n\n");
if (!body)
return;
trailer_iterator_init(&iter, body);
while (trailer_iterator_advance(&iter)) {
const char *value = iter.val.buf;
if (!string_list_has_string(&log->trailers, iter.key.buf))
continue;
strbuf_reset(&ident);
if (!parse_ident(log, &ident, value))
value = ident.buf;
if (!strset_add(dups, value))
continue;
insert_one_record(log, value, oneline);
}
trailer_iterator_release(&iter);
strbuf_release(&ident);
unuse_commit_buffer(commit, commit_buffer);
}
void shortlog_add_commit(struct shortlog *log, struct commit *commit)
{
struct strbuf ident = STRBUF_INIT;
struct strbuf oneline = STRBUF_INIT;
struct strset dups = STRSET_INIT;
struct pretty_print_context ctx = {0};
const char *oneline_str;
ctx.fmt = CMIT_FMT_USERFORMAT;
ctx.abbrev = log->abbrev;
ctx.print_email_subject = 1;
ctx.date_mode.type = DATE_NORMAL;
ctx.output_encoding = get_log_output_encoding();
if (!log->summary) {
if (log->user_format)
pretty_print_commit(&ctx, commit, &oneline);
else
format_commit_message(commit, "%s", &oneline, &ctx);
}
oneline_str = oneline.len ? oneline.buf : "<none>";
if (log->groups & SHORTLOG_GROUP_AUTHOR) {
strbuf_reset(&ident);
format_commit_message(commit,
log->email ? "%aN <%aE>" : "%aN",
&ident, &ctx);
if (!HAS_MULTI_BITS(log->groups) ||
strset_add(&dups, ident.buf))
insert_one_record(log, ident.buf, oneline_str);
}
if (log->groups & SHORTLOG_GROUP_COMMITTER) {
strbuf_reset(&ident);
format_commit_message(commit,
log->email ? "%cN <%cE>" : "%cN",
&ident, &ctx);
if (!HAS_MULTI_BITS(log->groups) ||
strset_add(&dups, ident.buf))
insert_one_record(log, ident.buf, oneline_str);
}
if (log->groups & SHORTLOG_GROUP_TRAILER) {
insert_records_from_trailers(log, &dups, commit, &ctx, oneline_str);
}
strset_clear(&dups);
strbuf_release(&ident);
strbuf_release(&oneline);
}
static void get_from_rev(struct rev_info *rev, struct shortlog *log)
{
struct commit *commit;
if (prepare_revision_walk(rev))
die(_("revision walk setup failed"));
while ((commit = get_revision(rev)) != NULL)
shortlog_add_commit(log, commit);
}
static int parse_uint(char const **arg, int comma, int defval)
{
unsigned long ul;
int ret;
char *endp;
ul = strtoul(*arg, &endp, 10);
if (*endp && *endp != comma)
return -1;
if (ul > INT_MAX)
return -1;
ret = *arg == endp ? defval : (int)ul;
*arg = *endp ? endp + 1 : endp;
return ret;
}
static const char wrap_arg_usage[] = "-w[<width>[,<indent1>[,<indent2>]]]";
#define DEFAULT_WRAPLEN 76
#define DEFAULT_INDENT1 6
#define DEFAULT_INDENT2 9
static int parse_wrap_args(const struct option *opt, const char *arg, int unset)
{
struct shortlog *log = opt->value;
log->wrap_lines = !unset;
if (unset)
return 0;
if (!arg) {
log->wrap = DEFAULT_WRAPLEN;
log->in1 = DEFAULT_INDENT1;
log->in2 = DEFAULT_INDENT2;
return 0;
}
log->wrap = parse_uint(&arg, ',', DEFAULT_WRAPLEN);
log->in1 = parse_uint(&arg, ',', DEFAULT_INDENT1);
log->in2 = parse_uint(&arg, '\0', DEFAULT_INDENT2);
if (log->wrap < 0 || log->in1 < 0 || log->in2 < 0)
return error(wrap_arg_usage);
if (log->wrap &&
((log->in1 && log->wrap <= log->in1) ||
(log->in2 && log->wrap <= log->in2)))
return error(wrap_arg_usage);
return 0;
}
static int parse_group_option(const struct option *opt, const char *arg, int unset)
{
struct shortlog *log = opt->value;
const char *field;
if (unset) {
log->groups = 0;
string_list_clear(&log->trailers, 0);
} else if (!strcasecmp(arg, "author"))
log->groups |= SHORTLOG_GROUP_AUTHOR;
else if (!strcasecmp(arg, "committer"))
log->groups |= SHORTLOG_GROUP_COMMITTER;
else if (skip_prefix(arg, "trailer:", &field)) {
log->groups |= SHORTLOG_GROUP_TRAILER;
string_list_append(&log->trailers, field);
} else
return error(_("unknown group type: %s"), arg);
return 0;
}
void shortlog_init(struct shortlog *log)
{
memset(log, 0, sizeof(*log));
read_mailmap(&log->mailmap);
log->list.strdup_strings = 1;
log->wrap = DEFAULT_WRAPLEN;
log->in1 = DEFAULT_INDENT1;
log->in2 = DEFAULT_INDENT2;
log->trailers.strdup_strings = 1;
log->trailers.cmp = strcasecmp;
}
int cmd_shortlog(int argc, const char **argv, const char *prefix)
{
struct shortlog log = { STRING_LIST_INIT_NODUP };
struct rev_info rev;
int nongit = !startup_info->have_repository;
const struct option options[] = {
OPT_BIT('c', "committer", &log.groups,
N_("group by committer rather than author"),
SHORTLOG_GROUP_COMMITTER),
OPT_BOOL('n', "numbered", &log.sort_by_number,
N_("sort output according to the number of commits per author")),
OPT_BOOL('s', "summary", &log.summary,
N_("suppress commit descriptions, only provides commit count")),
OPT_BOOL('e', "email", &log.email,
N_("show the email address of each author")),
OPT_CALLBACK_F('w', NULL, &log, N_("<w>[,<i1>[,<i2>]]"),
N_("linewrap output"), PARSE_OPT_OPTARG,
&parse_wrap_args),
OPT_CALLBACK(0, "group", &log, N_("field"),
N_("group by field"), parse_group_option),
OPT_END(),
};
struct parse_opt_ctx_t ctx;
git_config(git_default_config, NULL);
shortlog_init(&log);
repo_init_revisions(the_repository, &rev, prefix);
parse_options_start(&ctx, argc, argv, prefix, options,
PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_KEEP_ARGV0);
for (;;) {
switch (parse_options_step(&ctx, options, shortlog_usage)) {
case PARSE_OPT_NON_OPTION:
case PARSE_OPT_UNKNOWN:
break;
case PARSE_OPT_HELP:
case PARSE_OPT_ERROR:
exit(129);
case PARSE_OPT_COMPLETE:
exit(0);
case PARSE_OPT_DONE:
goto parse_done;
}
parse_revision_opt(&rev, &ctx, options, shortlog_usage);
}
parse_done:
revision_opts_finish(&rev);
argc = parse_options_end(&ctx);
if (nongit && argc > 1) {
error(_("too many arguments given outside repository"));
usage_with_options(shortlog_usage, options);
}
if (setup_revisions(argc, argv, &rev, NULL) != 1) {
error(_("unrecognized argument: %s"), argv[1]);
usage_with_options(shortlog_usage, options);
}
log.user_format = rev.commit_format == CMIT_FMT_USERFORMAT;
log.abbrev = rev.abbrev;
log.file = rev.diffopt.file;
if (!log.groups)
log.groups = SHORTLOG_GROUP_AUTHOR;
string_list_sort(&log.trailers);
/* assume HEAD if from a tty */
if (!nongit && !rev.pending.nr && isatty(0))
add_head_to_pending(&rev);
if (rev.pending.nr == 0) {
if (isatty(0))
fprintf(stderr, _("(reading log message from standard input)\n"));
read_from_stdin(&log);
}
else
get_from_rev(&rev, &log);
shortlog_output(&log);
if (log.file != stdout)
fclose(log.file);
return 0;
}
static void add_wrapped_shortlog_msg(struct strbuf *sb, const char *s,
const struct shortlog *log)
{
strbuf_add_wrapped_text(sb, s, log->in1, log->in2, log->wrap);
strbuf_addch(sb, '\n');
}
void shortlog_output(struct shortlog *log)
{
size_t i, j;
struct strbuf sb = STRBUF_INIT;
if (log->sort_by_number)
QSORT(log->list.items, log->list.nr,
log->summary ? compare_by_counter : compare_by_list);
for (i = 0; i < log->list.nr; i++) {
const struct string_list_item *item = &log->list.items[i];
if (log->summary) {
fprintf(log->file, "%6d\t%s\n",
(int)UTIL_TO_INT(item), item->string);
} else {
struct string_list *onelines = item->util;
fprintf(log->file, "%s (%"PRIuMAX"):\n",
item->string, (uintmax_t)onelines->nr);
for (j = onelines->nr; j >= 1; j--) {
const char *msg = onelines->items[j - 1].string;
if (log->wrap_lines) {
strbuf_reset(&sb);
add_wrapped_shortlog_msg(&sb, msg, log);
fwrite(sb.buf, sb.len, 1, log->file);
}
else
fprintf(log->file, " %s\n", msg);
}
putc('\n', log->file);
onelines->strdup_strings = 1;
string_list_clear(onelines, 0);
free(onelines);
}
log->list.items[i].util = NULL;
}
strbuf_release(&sb);
log->list.strdup_strings = 1;
string_list_clear(&log->list, 1);
clear_mailmap(&log->mailmap);
}