mirror of
https://github.com/git/git
synced 2024-10-01 06:05:20 +00:00
Merge branch 'jk/parse-commit-with-malformed-ident'
The commit object parser has been taught to be a bit more lenient to parse timestamps on the author/committer line with a malformed author/committer ident. * jk/parse-commit-with-malformed-ident: parse_commit(): describe more date-parsing failure modes parse_commit(): handle broken whitespace-only timestamp parse_commit(): parse timestamp from end of line t4212: avoid putting git on left-hand side of pipe
This commit is contained in:
commit
620e92b845
57
commit.c
57
commit.c
|
@ -96,6 +96,7 @@ struct commit *lookup_commit_reference_by_name(const char *name)
|
|||
static timestamp_t parse_commit_date(const char *buf, const char *tail)
|
||||
{
|
||||
const char *dateptr;
|
||||
const char *eol;
|
||||
|
||||
if (buf + 6 >= tail)
|
||||
return 0;
|
||||
|
@ -107,16 +108,56 @@ static timestamp_t parse_commit_date(const char *buf, const char *tail)
|
|||
return 0;
|
||||
if (memcmp(buf, "committer", 9))
|
||||
return 0;
|
||||
while (buf < tail && *buf++ != '>')
|
||||
/* nada */;
|
||||
if (buf >= tail)
|
||||
|
||||
/*
|
||||
* Jump to end-of-line so that we can walk backwards to find the
|
||||
* end-of-email ">". This is more forgiving of malformed cases
|
||||
* because unexpected characters tend to be in the name and email
|
||||
* fields.
|
||||
*/
|
||||
eol = memchr(buf, '\n', tail - buf);
|
||||
if (!eol)
|
||||
return 0;
|
||||
dateptr = buf;
|
||||
while (buf < tail && *buf++ != '\n')
|
||||
/* nada */;
|
||||
if (buf >= tail)
|
||||
dateptr = eol;
|
||||
while (dateptr > buf && dateptr[-1] != '>')
|
||||
dateptr--;
|
||||
if (dateptr == buf)
|
||||
return 0;
|
||||
/* dateptr < buf && buf[-1] == '\n', so parsing will stop at buf-1 */
|
||||
|
||||
/*
|
||||
* Trim leading whitespace, but make sure we have at least one
|
||||
* non-whitespace character, as parse_timestamp() will otherwise walk
|
||||
* right past the newline we found in "eol" when skipping whitespace
|
||||
* itself.
|
||||
*
|
||||
* In theory it would be sufficient to allow any character not matched
|
||||
* by isspace(), but there's a catch: our isspace() does not
|
||||
* necessarily match the behavior of parse_timestamp(), as the latter
|
||||
* is implemented by system routines which match more exotic control
|
||||
* codes, or even locale-dependent sequences.
|
||||
*
|
||||
* Since we expect the timestamp to be a number, we can check for that.
|
||||
* Anything else (e.g., a non-numeric token like "foo") would just
|
||||
* cause parse_timestamp() to return 0 anyway.
|
||||
*/
|
||||
while (dateptr < eol && isspace(*dateptr))
|
||||
dateptr++;
|
||||
if (!isdigit(*dateptr) && *dateptr != '-')
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* We know there is at least one digit (or dash), so we'll begin
|
||||
* parsing there and stop at worst case at eol.
|
||||
*
|
||||
* Note that we may feed parse_timestamp() extra characters here if the
|
||||
* commit is malformed, and it will parse as far as it can. For
|
||||
* example, "123foo456" would return "123". That might be questionable
|
||||
* (versus returning "0"), but it would help in a hypothetical case
|
||||
* like "123456+0100", where the whitespace from the timezone is
|
||||
* missing. Since such syntactic errors may be baked into history and
|
||||
* hard to correct now, let's err on trying to make our best guess
|
||||
* here, rather than insist on perfect syntax.
|
||||
*/
|
||||
return parse_timestamp(dateptr, NULL, 10);
|
||||
}
|
||||
|
||||
|
|
|
@ -8,8 +8,9 @@ TEST_PASSES_SANITIZE_LEAK=true
|
|||
test_expect_success 'setup' '
|
||||
test_commit foo &&
|
||||
|
||||
git cat-file commit HEAD |
|
||||
sed "/^author /s/>/>-<>/" >broken_email.commit &&
|
||||
git cat-file commit HEAD >ok.commit &&
|
||||
sed "s/>/>-<>/" <ok.commit >broken_email.commit &&
|
||||
|
||||
git hash-object --literally -w -t commit broken_email.commit >broken_email.hash &&
|
||||
git update-ref refs/heads/broken_email $(cat broken_email.hash)
|
||||
'
|
||||
|
@ -43,6 +44,11 @@ test_expect_success 'git log --format with broken author email' '
|
|||
test_must_be_empty actual.err
|
||||
'
|
||||
|
||||
test_expect_success '--until handles broken email' '
|
||||
git rev-list --until=1980-01-01 broken_email >actual &&
|
||||
test_must_be_empty actual
|
||||
'
|
||||
|
||||
munge_author_date () {
|
||||
git cat-file commit "$1" >commit.orig &&
|
||||
sed "s/^\(author .*>\) [0-9]*/\1 $2/" <commit.orig >commit.munge &&
|
||||
|
@ -86,4 +92,45 @@ test_expect_success 'absurdly far-in-future date' '
|
|||
git log -1 --format=%ad $commit
|
||||
'
|
||||
|
||||
test_expect_success 'create commits with whitespace committer dates' '
|
||||
# It is important that this subject line is numeric, since we want to
|
||||
# be sure we are not confused by skipping whitespace and accidentally
|
||||
# parsing the subject as a timestamp.
|
||||
#
|
||||
# Do not use munge_author_date here. Besides not hitting the committer
|
||||
# line, it leaves the timezone intact, and we want nothing but
|
||||
# whitespace.
|
||||
#
|
||||
# We will make two munged commits here. The first, ws_commit, will
|
||||
# be purely spaces. The second contains a vertical tab, which is
|
||||
# considered a space by strtoumax(), but not by our isspace().
|
||||
test_commit 1234567890 &&
|
||||
git cat-file commit HEAD >commit.orig &&
|
||||
sed "s/>.*/> /" <commit.orig >commit.munge &&
|
||||
ws_commit=$(git hash-object --literally -w -t commit commit.munge) &&
|
||||
sed "s/>.*/> $(printf "\013")/" <commit.orig >commit.munge &&
|
||||
vt_commit=$(git hash-object --literally -w -t commit commit.munge)
|
||||
'
|
||||
|
||||
test_expect_success '--until treats whitespace date as sentinel' '
|
||||
echo $ws_commit >expect &&
|
||||
git rev-list --until=1980-01-01 $ws_commit >actual &&
|
||||
test_cmp expect actual &&
|
||||
|
||||
echo $vt_commit >expect &&
|
||||
git rev-list --until=1980-01-01 $vt_commit >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_expect_success 'pretty-printer handles whitespace date' '
|
||||
# as with the %ad test above, we will show these as the empty string,
|
||||
# not the 1970 epoch date. This is intentional; see 7d9a281941 (t4212:
|
||||
# test bogus timestamps with git-log, 2014-02-24) for more discussion.
|
||||
echo : >expect &&
|
||||
git log -1 --format="%at:%ct" $ws_commit >actual &&
|
||||
test_cmp expect actual &&
|
||||
git log -1 --format="%at:%ct" $vt_commit >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_done
|
||||
|
|
Loading…
Reference in a new issue