git/builtin/am.c
Paul Tan 3e20dcf367 builtin-am: extract patch and commit info with git-mailinfo
For the purpose of applying the patch and committing the results,
implement extracting the patch data, commit message and authorship from
an e-mail message using git-mailinfo.

git-mailinfo is run as a separate process, but ideally in the future,
we should be be able to access its functionality directly without
spawning a new process.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Jeff King <peff@peff.net>
Helped-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Paul Tan <pyokagan@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-08-04 22:02:11 -07:00

734 lines
17 KiB
C

/*
* Builtin "git am"
*
* Based on git-am.sh by Junio C Hamano.
*/
#include "cache.h"
#include "builtin.h"
#include "exec_cmd.h"
#include "parse-options.h"
#include "dir.h"
#include "run-command.h"
#include "quote.h"
/**
* Returns 1 if the file is empty or does not exist, 0 otherwise.
*/
static int is_empty_file(const char *filename)
{
struct stat st;
if (stat(filename, &st) < 0) {
if (errno == ENOENT)
return 1;
die_errno(_("could not stat %s"), filename);
}
return !st.st_size;
}
/**
* Like strbuf_getline(), but treats both '\n' and "\r\n" as line terminators.
*/
static int strbuf_getline_crlf(struct strbuf *sb, FILE *fp)
{
if (strbuf_getwholeline(sb, fp, '\n'))
return EOF;
if (sb->buf[sb->len - 1] == '\n') {
strbuf_setlen(sb, sb->len - 1);
if (sb->len > 0 && sb->buf[sb->len - 1] == '\r')
strbuf_setlen(sb, sb->len - 1);
}
return 0;
}
enum patch_format {
PATCH_FORMAT_UNKNOWN = 0,
PATCH_FORMAT_MBOX
};
struct am_state {
/* state directory path */
char *dir;
/* current and last patch numbers, 1-indexed */
int cur;
int last;
/* commit metadata and message */
char *author_name;
char *author_email;
char *author_date;
char *msg;
size_t msg_len;
/* number of digits in patch filename */
int prec;
};
/**
* Initializes am_state with the default values. The state directory is set to
* dir.
*/
static void am_state_init(struct am_state *state, const char *dir)
{
memset(state, 0, sizeof(*state));
assert(dir);
state->dir = xstrdup(dir);
state->prec = 4;
}
/**
* Releases memory allocated by an am_state.
*/
static void am_state_release(struct am_state *state)
{
free(state->dir);
free(state->author_name);
free(state->author_email);
free(state->author_date);
free(state->msg);
}
/**
* Returns path relative to the am_state directory.
*/
static inline const char *am_path(const struct am_state *state, const char *path)
{
return mkpath("%s/%s", state->dir, path);
}
/**
* Returns 1 if there is an am session in progress, 0 otherwise.
*/
static int am_in_progress(const struct am_state *state)
{
struct stat st;
if (lstat(state->dir, &st) < 0 || !S_ISDIR(st.st_mode))
return 0;
if (lstat(am_path(state, "last"), &st) || !S_ISREG(st.st_mode))
return 0;
if (lstat(am_path(state, "next"), &st) || !S_ISREG(st.st_mode))
return 0;
return 1;
}
/**
* Reads the contents of `file` in the `state` directory into `sb`. Returns the
* number of bytes read on success, -1 if the file does not exist. If `trim` is
* set, trailing whitespace will be removed.
*/
static int read_state_file(struct strbuf *sb, const struct am_state *state,
const char *file, int trim)
{
strbuf_reset(sb);
if (strbuf_read_file(sb, am_path(state, file), 0) >= 0) {
if (trim)
strbuf_trim(sb);
return sb->len;
}
if (errno == ENOENT)
return -1;
die_errno(_("could not read '%s'"), am_path(state, file));
}
/**
* Reads a KEY=VALUE shell variable assignment from `fp`, returning the VALUE
* as a newly-allocated string. VALUE must be a quoted string, and the KEY must
* match `key`. Returns NULL on failure.
*
* This is used by read_author_script() to read the GIT_AUTHOR_* variables from
* the author-script.
*/
static char *read_shell_var(FILE *fp, const char *key)
{
struct strbuf sb = STRBUF_INIT;
const char *str;
if (strbuf_getline(&sb, fp, '\n'))
goto fail;
if (!skip_prefix(sb.buf, key, &str))
goto fail;
if (!skip_prefix(str, "=", &str))
goto fail;
strbuf_remove(&sb, 0, str - sb.buf);
str = sq_dequote(sb.buf);
if (!str)
goto fail;
return strbuf_detach(&sb, NULL);
fail:
strbuf_release(&sb);
return NULL;
}
/**
* Reads and parses the state directory's "author-script" file, and sets
* state->author_name, state->author_email and state->author_date accordingly.
* Returns 0 on success, -1 if the file could not be parsed.
*
* The author script is of the format:
*
* GIT_AUTHOR_NAME='$author_name'
* GIT_AUTHOR_EMAIL='$author_email'
* GIT_AUTHOR_DATE='$author_date'
*
* where $author_name, $author_email and $author_date are quoted. We are strict
* with our parsing, as the file was meant to be eval'd in the old git-am.sh
* script, and thus if the file differs from what this function expects, it is
* better to bail out than to do something that the user does not expect.
*/
static int read_author_script(struct am_state *state)
{
const char *filename = am_path(state, "author-script");
FILE *fp;
assert(!state->author_name);
assert(!state->author_email);
assert(!state->author_date);
fp = fopen(filename, "r");
if (!fp) {
if (errno == ENOENT)
return 0;
die_errno(_("could not open '%s' for reading"), filename);
}
state->author_name = read_shell_var(fp, "GIT_AUTHOR_NAME");
if (!state->author_name) {
fclose(fp);
return -1;
}
state->author_email = read_shell_var(fp, "GIT_AUTHOR_EMAIL");
if (!state->author_email) {
fclose(fp);
return -1;
}
state->author_date = read_shell_var(fp, "GIT_AUTHOR_DATE");
if (!state->author_date) {
fclose(fp);
return -1;
}
if (fgetc(fp) != EOF) {
fclose(fp);
return -1;
}
fclose(fp);
return 0;
}
/**
* Saves state->author_name, state->author_email and state->author_date in the
* state directory's "author-script" file.
*/
static void write_author_script(const struct am_state *state)
{
struct strbuf sb = STRBUF_INIT;
strbuf_addstr(&sb, "GIT_AUTHOR_NAME=");
sq_quote_buf(&sb, state->author_name);
strbuf_addch(&sb, '\n');
strbuf_addstr(&sb, "GIT_AUTHOR_EMAIL=");
sq_quote_buf(&sb, state->author_email);
strbuf_addch(&sb, '\n');
strbuf_addstr(&sb, "GIT_AUTHOR_DATE=");
sq_quote_buf(&sb, state->author_date);
strbuf_addch(&sb, '\n');
write_file(am_path(state, "author-script"), 1, "%s", sb.buf);
strbuf_release(&sb);
}
/**
* Reads the commit message from the state directory's "final-commit" file,
* setting state->msg to its contents and state->msg_len to the length of its
* contents in bytes.
*
* Returns 0 on success, -1 if the file does not exist.
*/
static int read_commit_msg(struct am_state *state)
{
struct strbuf sb = STRBUF_INIT;
assert(!state->msg);
if (read_state_file(&sb, state, "final-commit", 0) < 0) {
strbuf_release(&sb);
return -1;
}
state->msg = strbuf_detach(&sb, &state->msg_len);
return 0;
}
/**
* Saves state->msg in the state directory's "final-commit" file.
*/
static void write_commit_msg(const struct am_state *state)
{
int fd;
const char *filename = am_path(state, "final-commit");
fd = xopen(filename, O_WRONLY | O_CREAT, 0666);
if (write_in_full(fd, state->msg, state->msg_len) < 0)
die_errno(_("could not write to %s"), filename);
close(fd);
}
/**
* Loads state from disk.
*/
static void am_load(struct am_state *state)
{
struct strbuf sb = STRBUF_INIT;
if (read_state_file(&sb, state, "next", 1) < 0)
die("BUG: state file 'next' does not exist");
state->cur = strtol(sb.buf, NULL, 10);
if (read_state_file(&sb, state, "last", 1) < 0)
die("BUG: state file 'last' does not exist");
state->last = strtol(sb.buf, NULL, 10);
if (read_author_script(state) < 0)
die(_("could not parse author script"));
read_commit_msg(state);
strbuf_release(&sb);
}
/**
* Removes the am_state directory, forcefully terminating the current am
* session.
*/
static void am_destroy(const struct am_state *state)
{
struct strbuf sb = STRBUF_INIT;
strbuf_addstr(&sb, state->dir);
remove_dir_recursively(&sb, 0);
strbuf_release(&sb);
}
/**
* Determines if the file looks like a piece of RFC2822 mail by grabbing all
* non-indented lines and checking if they look like they begin with valid
* header field names.
*
* Returns 1 if the file looks like a piece of mail, 0 otherwise.
*/
static int is_mail(FILE *fp)
{
const char *header_regex = "^[!-9;-~]+:";
struct strbuf sb = STRBUF_INIT;
regex_t regex;
int ret = 1;
if (fseek(fp, 0L, SEEK_SET))
die_errno(_("fseek failed"));
if (regcomp(&regex, header_regex, REG_NOSUB | REG_EXTENDED))
die("invalid pattern: %s", header_regex);
while (!strbuf_getline_crlf(&sb, fp)) {
if (!sb.len)
break; /* End of header */
/* Ignore indented folded lines */
if (*sb.buf == '\t' || *sb.buf == ' ')
continue;
/* It's a header if it matches header_regex */
if (regexec(&regex, sb.buf, 0, NULL, 0)) {
ret = 0;
goto done;
}
}
done:
regfree(&regex);
strbuf_release(&sb);
return ret;
}
/**
* Attempts to detect the patch_format of the patches contained in `paths`,
* returning the PATCH_FORMAT_* enum value. Returns PATCH_FORMAT_UNKNOWN if
* detection fails.
*/
static int detect_patch_format(const char **paths)
{
enum patch_format ret = PATCH_FORMAT_UNKNOWN;
struct strbuf l1 = STRBUF_INIT;
FILE *fp;
/*
* We default to mbox format if input is from stdin and for directories
*/
if (!*paths || !strcmp(*paths, "-") || is_directory(*paths))
return PATCH_FORMAT_MBOX;
/*
* Otherwise, check the first few lines of the first patch, starting
* from the first non-blank line, to try to detect its format.
*/
fp = xfopen(*paths, "r");
while (!strbuf_getline_crlf(&l1, fp)) {
if (l1.len)
break;
}
if (starts_with(l1.buf, "From ") || starts_with(l1.buf, "From: ")) {
ret = PATCH_FORMAT_MBOX;
goto done;
}
if (l1.len && is_mail(fp)) {
ret = PATCH_FORMAT_MBOX;
goto done;
}
done:
fclose(fp);
strbuf_release(&l1);
return ret;
}
/**
* Splits out individual email patches from `paths`, where each path is either
* a mbox file or a Maildir. Returns 0 on success, -1 on failure.
*/
static int split_mail_mbox(struct am_state *state, const char **paths)
{
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf last = STRBUF_INIT;
cp.git_cmd = 1;
argv_array_push(&cp.args, "mailsplit");
argv_array_pushf(&cp.args, "-d%d", state->prec);
argv_array_pushf(&cp.args, "-o%s", state->dir);
argv_array_push(&cp.args, "-b");
argv_array_push(&cp.args, "--");
argv_array_pushv(&cp.args, paths);
if (capture_command(&cp, &last, 8))
return -1;
state->cur = 1;
state->last = strtol(last.buf, NULL, 10);
return 0;
}
/**
* Splits a list of files/directories into individual email patches. Each path
* in `paths` must be a file/directory that is formatted according to
* `patch_format`.
*
* Once split out, the individual email patches will be stored in the state
* directory, with each patch's filename being its index, padded to state->prec
* digits.
*
* state->cur will be set to the index of the first mail, and state->last will
* be set to the index of the last mail.
*
* Returns 0 on success, -1 on failure.
*/
static int split_mail(struct am_state *state, enum patch_format patch_format,
const char **paths)
{
switch (patch_format) {
case PATCH_FORMAT_MBOX:
return split_mail_mbox(state, paths);
default:
die("BUG: invalid patch_format");
}
return -1;
}
/**
* Setup a new am session for applying patches
*/
static void am_setup(struct am_state *state, enum patch_format patch_format,
const char **paths)
{
if (!patch_format)
patch_format = detect_patch_format(paths);
if (!patch_format) {
fprintf_ln(stderr, _("Patch format detection failed."));
exit(128);
}
if (mkdir(state->dir, 0777) < 0 && errno != EEXIST)
die_errno(_("failed to create directory '%s'"), state->dir);
if (split_mail(state, patch_format, paths) < 0) {
am_destroy(state);
die(_("Failed to split patches."));
}
/*
* NOTE: Since the "next" and "last" files determine if an am_state
* session is in progress, they should be written last.
*/
write_file(am_path(state, "next"), 1, "%d", state->cur);
write_file(am_path(state, "last"), 1, "%d", state->last);
}
/**
* Increments the patch pointer, and cleans am_state for the application of the
* next patch.
*/
static void am_next(struct am_state *state)
{
free(state->author_name);
state->author_name = NULL;
free(state->author_email);
state->author_email = NULL;
free(state->author_date);
state->author_date = NULL;
free(state->msg);
state->msg = NULL;
state->msg_len = 0;
unlink(am_path(state, "author-script"));
unlink(am_path(state, "final-commit"));
state->cur++;
write_file(am_path(state, "next"), 1, "%d", state->cur);
}
/**
* Returns the filename of the current patch email.
*/
static const char *msgnum(const struct am_state *state)
{
static struct strbuf sb = STRBUF_INIT;
strbuf_reset(&sb);
strbuf_addf(&sb, "%0*d", state->prec, state->cur);
return sb.buf;
}
/**
* Parses `mail` using git-mailinfo, extracting its patch and authorship info.
* state->msg will be set to the patch message. state->author_name,
* state->author_email and state->author_date will be set to the patch author's
* name, email and date respectively. The patch body will be written to the
* state directory's "patch" file.
*
* Returns 1 if the patch should be skipped, 0 otherwise.
*/
static int parse_mail(struct am_state *state, const char *mail)
{
FILE *fp;
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf sb = STRBUF_INIT;
struct strbuf msg = STRBUF_INIT;
struct strbuf author_name = STRBUF_INIT;
struct strbuf author_date = STRBUF_INIT;
struct strbuf author_email = STRBUF_INIT;
int ret = 0;
cp.git_cmd = 1;
cp.in = xopen(mail, O_RDONLY, 0);
cp.out = xopen(am_path(state, "info"), O_WRONLY | O_CREAT, 0777);
argv_array_push(&cp.args, "mailinfo");
argv_array_push(&cp.args, am_path(state, "msg"));
argv_array_push(&cp.args, am_path(state, "patch"));
if (run_command(&cp) < 0)
die("could not parse patch");
close(cp.in);
close(cp.out);
/* Extract message and author information */
fp = xfopen(am_path(state, "info"), "r");
while (!strbuf_getline(&sb, fp, '\n')) {
const char *x;
if (skip_prefix(sb.buf, "Subject: ", &x)) {
if (msg.len)
strbuf_addch(&msg, '\n');
strbuf_addstr(&msg, x);
} else if (skip_prefix(sb.buf, "Author: ", &x))
strbuf_addstr(&author_name, x);
else if (skip_prefix(sb.buf, "Email: ", &x))
strbuf_addstr(&author_email, x);
else if (skip_prefix(sb.buf, "Date: ", &x))
strbuf_addstr(&author_date, x);
}
fclose(fp);
/* Skip pine's internal folder data */
if (!strcmp(author_name.buf, "Mail System Internal Data")) {
ret = 1;
goto finish;
}
if (is_empty_file(am_path(state, "patch"))) {
printf_ln(_("Patch is empty. Was it split wrong?"));
exit(128);
}
strbuf_addstr(&msg, "\n\n");
if (strbuf_read_file(&msg, am_path(state, "msg"), 0) < 0)
die_errno(_("could not read '%s'"), am_path(state, "msg"));
stripspace(&msg, 0);
assert(!state->author_name);
state->author_name = strbuf_detach(&author_name, NULL);
assert(!state->author_email);
state->author_email = strbuf_detach(&author_email, NULL);
assert(!state->author_date);
state->author_date = strbuf_detach(&author_date, NULL);
assert(!state->msg);
state->msg = strbuf_detach(&msg, &state->msg_len);
finish:
strbuf_release(&msg);
strbuf_release(&author_date);
strbuf_release(&author_email);
strbuf_release(&author_name);
strbuf_release(&sb);
return ret;
}
/**
* Applies all queued mail.
*/
static void am_run(struct am_state *state)
{
while (state->cur <= state->last) {
const char *mail = am_path(state, msgnum(state));
if (!file_exists(mail))
goto next;
if (parse_mail(state, mail))
goto next; /* mail should be skipped */
write_author_script(state);
write_commit_msg(state);
/* NEEDSWORK: Patch application not implemented yet */
next:
am_next(state);
}
am_destroy(state);
}
/**
* parse_options() callback that validates and sets opt->value to the
* PATCH_FORMAT_* enum value corresponding to `arg`.
*/
static int parse_opt_patchformat(const struct option *opt, const char *arg, int unset)
{
int *opt_value = opt->value;
if (!strcmp(arg, "mbox"))
*opt_value = PATCH_FORMAT_MBOX;
else
return error(_("Invalid value for --patch-format: %s"), arg);
return 0;
}
int cmd_am(int argc, const char **argv, const char *prefix)
{
struct am_state state;
int patch_format = PATCH_FORMAT_UNKNOWN;
const char * const usage[] = {
N_("git am [options] [(<mbox>|<Maildir>)...]"),
NULL
};
struct option options[] = {
OPT_CALLBACK(0, "patch-format", &patch_format, N_("format"),
N_("format the patch(es) are in"),
parse_opt_patchformat),
OPT_END()
};
/*
* NEEDSWORK: Once all the features of git-am.sh have been
* re-implemented in builtin/am.c, this preamble can be removed.
*/
if (!getenv("_GIT_USE_BUILTIN_AM")) {
const char *path = mkpath("%s/git-am", git_exec_path());
if (sane_execvp(path, (char **)argv) < 0)
die_errno("could not exec %s", path);
} else {
prefix = setup_git_directory();
trace_repo_setup(prefix);
setup_work_tree();
}
git_config(git_default_config, NULL);
am_state_init(&state, git_path("rebase-apply"));
argc = parse_options(argc, argv, prefix, options, usage, 0);
if (am_in_progress(&state))
am_load(&state);
else {
struct argv_array paths = ARGV_ARRAY_INIT;
int i;
for (i = 0; i < argc; i++) {
if (is_absolute_path(argv[i]) || !prefix)
argv_array_push(&paths, argv[i]);
else
argv_array_push(&paths, mkpath("%s/%s", prefix, argv[i]));
}
am_setup(&state, patch_format, paths.argv);
argv_array_clear(&paths);
}
am_run(&state);
am_state_release(&state);
return 0;
}