mirror of
https://github.com/git/git
synced 2024-10-30 14:03:28 +00:00
089d9adff6
The comment in parse_commit_date() claims that parse_timestamp() will not walk past the end of the buffer we've been given, since it will hit the newline at "eol" and stop. This is usually true, when dateptr contains actual numbers to parse. But with a line like: committer name <email> \n with just whitespace, and no numbers, parse_timestamp() will consume that newline as part of the leading whitespace, and we may walk past our "tail" pointer (which itself is set from the "size" parameter passed in to parse_commit_buffer()). In practice this can't cause us to walk off the end of an array, because we always add an extra NUL byte to the end of objects we load from disk (as a defense against exactly this kind of bug). However, you can see the behavior in action when "committer" is the final header (which it usually is, unless there's an encoding) and the subject line can be parsed as an integer. We walk right past the newline on the committer line, as well as the "\n\n" separator, and mistake the subject for the timestamp. We can solve this by trimming the whitespace ourselves, making sure that it has some non-whitespace to parse. Note that we need to be a bit careful about the definition of "whitespace" here, as our isspace() doesn't match exotic characters like vertical tab or formfeed. We can work around that by checking for an actual number (see the in-code comment). This is slightly more restrictive than the current code, but in practice the results are either the same (we reject "foo" as "0", but so would parse_timestamp()) or extremely unlikely even for broken commits (parse_timestamp() would allow "\v123" as "123", but we'll now make it "0"). I did also allow "-" here, which may be controversial, as we don't currently support negative timestamps. My reasoning was two-fold. One, the design of parse_timestamp() is such that we should be able to easily switch it to handling signed values, and this otherwise creates a hard-to-find gotcha that anybody doing that work would get tripped up on. And two, the status quo is that we currently parse them, though the result of course ends up as a very large unsigned value (which is likely to just get clamped to "0" for display anyway, since our date routines can't handle it). The new test checks the commit parser (via "--until") for both vanilla spaces and the vertical-tab case. I also added a test to check these against the pretty-print formatter, which uses split_ident_line(). It's not subject to the same bug, because it already insists that there be one or more digits in the timestamp. Helped-by: Phillip Wood <phillip.wood123@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
136 lines
4.3 KiB
Bash
Executable file
136 lines
4.3 KiB
Bash
Executable file
#!/bin/sh
|
|
|
|
test_description='git log with invalid commit headers'
|
|
|
|
TEST_PASSES_SANITIZE_LEAK=true
|
|
. ./test-lib.sh
|
|
|
|
test_expect_success 'setup' '
|
|
test_commit foo &&
|
|
|
|
git cat-file commit HEAD >ok.commit &&
|
|
sed "s/>/>-<>/" <ok.commit >broken_email.commit &&
|
|
|
|
git hash-object --literally -w -t commit broken_email.commit >broken_email.hash &&
|
|
git update-ref refs/heads/broken_email $(cat broken_email.hash)
|
|
'
|
|
|
|
test_expect_success 'fsck notices broken commit' '
|
|
test_must_fail git fsck 2>actual &&
|
|
test_i18ngrep invalid.author actual
|
|
'
|
|
|
|
test_expect_success 'git log with broken author email' '
|
|
{
|
|
echo commit $(cat broken_email.hash) &&
|
|
echo "Author: A U Thor <author@example.com>" &&
|
|
echo "Date: Thu Apr 7 15:13:13 2005 -0700" &&
|
|
echo &&
|
|
echo " foo"
|
|
} >expect.out &&
|
|
|
|
git log broken_email >actual.out 2>actual.err &&
|
|
|
|
test_cmp expect.out actual.out &&
|
|
test_must_be_empty actual.err
|
|
'
|
|
|
|
test_expect_success 'git log --format with broken author email' '
|
|
echo "A U Thor+author@example.com+Thu Apr 7 15:13:13 2005 -0700" >expect.out &&
|
|
|
|
git log --format="%an+%ae+%ad" broken_email >actual.out 2>actual.err &&
|
|
|
|
test_cmp expect.out actual.out &&
|
|
test_must_be_empty actual.err
|
|
'
|
|
|
|
test_expect_success '--until handles broken email' '
|
|
git rev-list --until=1980-01-01 broken_email >actual &&
|
|
test_must_be_empty actual
|
|
'
|
|
|
|
munge_author_date () {
|
|
git cat-file commit "$1" >commit.orig &&
|
|
sed "s/^\(author .*>\) [0-9]*/\1 $2/" <commit.orig >commit.munge &&
|
|
git hash-object --literally -w -t commit commit.munge
|
|
}
|
|
|
|
test_expect_success 'unparsable dates produce sentinel value' '
|
|
commit=$(munge_author_date HEAD totally_bogus) &&
|
|
echo "Date: Thu Jan 1 00:00:00 1970 +0000" >expect &&
|
|
git log -1 $commit >actual.full &&
|
|
grep Date <actual.full >actual &&
|
|
test_cmp expect actual
|
|
'
|
|
|
|
test_expect_success 'unparsable dates produce sentinel value (%ad)' '
|
|
commit=$(munge_author_date HEAD totally_bogus) &&
|
|
echo >expect &&
|
|
git log -1 --format=%ad $commit >actual &&
|
|
test_cmp expect actual
|
|
'
|
|
|
|
# date is 2^64 + 1
|
|
test_expect_success 'date parser recognizes integer overflow' '
|
|
commit=$(munge_author_date HEAD 18446744073709551617) &&
|
|
echo "Thu Jan 1 00:00:00 1970 +0000" >expect &&
|
|
git log -1 --format=%ad $commit >actual &&
|
|
test_cmp expect actual
|
|
'
|
|
|
|
# date is 2^64 - 2
|
|
test_expect_success 'date parser recognizes time_t overflow' '
|
|
commit=$(munge_author_date HEAD 18446744073709551614) &&
|
|
echo "Thu Jan 1 00:00:00 1970 +0000" >expect &&
|
|
git log -1 --format=%ad $commit >actual &&
|
|
test_cmp expect actual
|
|
'
|
|
|
|
# date is within 2^63-1, but enough to choke glibc's gmtime
|
|
test_expect_success 'absurdly far-in-future date' '
|
|
commit=$(munge_author_date HEAD 999999999999999999) &&
|
|
git log -1 --format=%ad $commit
|
|
'
|
|
|
|
test_expect_success 'create commits with whitespace committer dates' '
|
|
# It is important that this subject line is numeric, since we want to
|
|
# be sure we are not confused by skipping whitespace and accidentally
|
|
# parsing the subject as a timestamp.
|
|
#
|
|
# Do not use munge_author_date here. Besides not hitting the committer
|
|
# line, it leaves the timezone intact, and we want nothing but
|
|
# whitespace.
|
|
#
|
|
# We will make two munged commits here. The first, ws_commit, will
|
|
# be purely spaces. The second contains a vertical tab, which is
|
|
# considered a space by strtoumax(), but not by our isspace().
|
|
test_commit 1234567890 &&
|
|
git cat-file commit HEAD >commit.orig &&
|
|
sed "s/>.*/> /" <commit.orig >commit.munge &&
|
|
ws_commit=$(git hash-object --literally -w -t commit commit.munge) &&
|
|
sed "s/>.*/> $(printf "\013")/" <commit.orig >commit.munge &&
|
|
vt_commit=$(git hash-object --literally -w -t commit commit.munge)
|
|
'
|
|
|
|
test_expect_success '--until treats whitespace date as sentinel' '
|
|
echo $ws_commit >expect &&
|
|
git rev-list --until=1980-01-01 $ws_commit >actual &&
|
|
test_cmp expect actual &&
|
|
|
|
echo $vt_commit >expect &&
|
|
git rev-list --until=1980-01-01 $vt_commit >actual &&
|
|
test_cmp expect actual
|
|
'
|
|
|
|
test_expect_success 'pretty-printer handles whitespace date' '
|
|
# as with the %ad test above, we will show these as the empty string,
|
|
# not the 1970 epoch date. This is intentional; see 7d9a281941 (t4212:
|
|
# test bogus timestamps with git-log, 2014-02-24) for more discussion.
|
|
echo : >expect &&
|
|
git log -1 --format="%at:%ct" $ws_commit >actual &&
|
|
test_cmp expect actual &&
|
|
git log -1 --format="%at:%ct" $vt_commit >actual &&
|
|
test_cmp expect actual
|
|
'
|
|
|
|
test_done
|