diff --git a/archive-tar.c b/archive-tar.c index cb99df2814..7ea4e90814 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -18,6 +18,16 @@ static int tar_umask = 002; static int write_tar_filter_archive(const struct archiver *ar, struct archiver_args *args); +/* + * This is the max value that a ustar size header can specify, as it is fixed + * at 11 octal digits. POSIX specifies that we switch to extended headers at + * this size. + * + * Likewise for the mtime (which happens to use a buffer of the same size). + */ +#define USTAR_MAX_SIZE 077777777777UL +#define USTAR_MAX_MTIME 077777777777UL + /* writes out the whole block, but only if it is full */ static void write_if_needed(void) { @@ -137,6 +147,20 @@ static void strbuf_append_ext_header(struct strbuf *sb, const char *keyword, strbuf_addch(sb, '\n'); } +/* + * Like strbuf_append_ext_header, but for numeric values. + */ +static void strbuf_append_ext_header_uint(struct strbuf *sb, + const char *keyword, + uintmax_t value) +{ + char buf[40]; /* big enough for 2^128 in decimal, plus NUL */ + int len; + + len = xsnprintf(buf, sizeof(buf), "%"PRIuMAX, value); + strbuf_append_ext_header(sb, keyword, buf, len); +} + static unsigned int ustar_header_chksum(const struct ustar_header *header) { const unsigned char *p = (const unsigned char *)header; @@ -208,7 +232,7 @@ static int write_tar_entry(struct archiver_args *args, struct ustar_header header; struct strbuf ext_header = STRBUF_INIT; unsigned int old_mode = mode; - unsigned long size; + unsigned long size, size_in_header; void *buffer; int err = 0; @@ -267,7 +291,13 @@ static int write_tar_entry(struct archiver_args *args, memcpy(header.linkname, buffer, size); } - prepare_header(args, &header, mode, size); + size_in_header = size; + if (S_ISREG(mode) && size > USTAR_MAX_SIZE) { + size_in_header = 0; + strbuf_append_ext_header_uint(&ext_header, "size", size); + } + + prepare_header(args, &header, mode, size_in_header); if (ext_header.len > 0) { err = write_extended_header(args, sha1, ext_header.buf, @@ -289,15 +319,25 @@ static int write_tar_entry(struct archiver_args *args, return err; } -static int write_global_extended_header(struct archiver_args *args) +static void write_global_extended_header(struct archiver_args *args) { const unsigned char *sha1 = args->commit_sha1; struct strbuf ext_header = STRBUF_INIT; struct ustar_header header; unsigned int mode; - int err = 0; - strbuf_append_ext_header(&ext_header, "comment", sha1_to_hex(sha1), 40); + if (sha1) + strbuf_append_ext_header(&ext_header, "comment", + sha1_to_hex(sha1), 40); + if (args->time > USTAR_MAX_MTIME) { + strbuf_append_ext_header_uint(&ext_header, "mtime", + args->time); + args->time = USTAR_MAX_MTIME; + } + + if (!ext_header.len) + return; + memset(&header, 0, sizeof(header)); *header.typeflag = TYPEFLAG_GLOBAL_HEADER; mode = 0100666; @@ -306,7 +346,6 @@ static int write_global_extended_header(struct archiver_args *args) write_blocked(&header, sizeof(header)); write_blocked(ext_header.buf, ext_header.len); strbuf_release(&ext_header); - return err; } static struct archiver **tar_filters; @@ -382,10 +421,8 @@ static int write_tar_archive(const struct archiver *ar, { int err = 0; - if (args->commit_sha1) - err = write_global_extended_header(args); - if (!err) - err = write_archive_entries(args, write_tar_entry); + write_global_extended_header(args); + err = write_archive_entries(args, write_tar_entry); if (!err) write_trailer(); return err; diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index 4b68bbafbe..96d208da25 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -319,4 +319,78 @@ test_expect_success 'catch non-matching pathspec' ' test_must_fail git archive -v HEAD -- "*.abc" >/dev/null ' +# Pull the size and date of each entry in a tarfile using the system tar. +# +# We'll pull out only the year from the date; that avoids any question of +# timezones impacting the result (as long as we keep our test times away from a +# year boundary; our reference times are all in August). +# +# The output of tar_info is expected to be " ", both in decimal. It +# ignores the return value of tar. We have to do this, because some of our test +# input is only partial (the real data is 64GB in some cases). +tar_info () { + "$TAR" tvf "$1" | + awk '{ + split($4, date, "-") + print $3 " " date[1] + }' +} + +# See if our system tar can handle a tar file with huge sizes and dates far in +# the future, and that we can actually parse its output. +# +# The reference file was generated by GNU tar, and the magic time and size are +# both octal 01000000000001, which overflows normal ustar fields. +test_lazy_prereq TAR_HUGE ' + echo "68719476737 4147" >expect && + tar_info "$TEST_DIRECTORY"/t5000/huge-and-future.tar >actual && + test_cmp expect actual +' + +test_expect_success 'set up repository with huge blob' ' + obj_d=19 && + obj_f=f9c8273ec45a8938e6999cb59b3ff66739902a && + obj=${obj_d}${obj_f} && + mkdir -p .git/objects/$obj_d && + cp "$TEST_DIRECTORY"/t5000/$obj .git/objects/$obj_d/$obj_f && + rm -f .git/index && + git update-index --add --cacheinfo 100644,$obj,huge && + git commit -m huge +' + +# We expect git to die with SIGPIPE here (otherwise we +# would generate the whole 64GB). +test_expect_success 'generate tar with huge size' ' + { + git archive HEAD + echo $? >exit-code + } | test_copy_bytes 4096 >huge.tar && + echo 141 >expect && + test_cmp expect exit-code +' + +test_expect_success TAR_HUGE 'system tar can read our huge size' ' + echo 68719476737 >expect && + tar_info huge.tar | cut -d" " -f1 >actual && + test_cmp expect actual +' + +test_expect_success 'set up repository with far-future commit' ' + rm -f .git/index && + echo content >file && + git add file && + GIT_COMMITTER_DATE="@68719476737 +0000" \ + git commit -m "tempori parendum" +' + +test_expect_success 'generate tar with future mtime' ' + git archive HEAD >future.tar +' + +test_expect_success TAR_HUGE 'system tar can read our future mtime' ' + echo 4147 >expect && + tar_info future.tar | cut -d" " -f2 >actual && + test_cmp expect actual +' + test_done diff --git a/t/t5000/19f9c8273ec45a8938e6999cb59b3ff66739902a b/t/t5000/19f9c8273ec45a8938e6999cb59b3ff66739902a new file mode 100644 index 0000000000..5cbe9ec312 Binary files /dev/null and b/t/t5000/19f9c8273ec45a8938e6999cb59b3ff66739902a differ diff --git a/t/t5000/huge-and-future.tar b/t/t5000/huge-and-future.tar new file mode 100644 index 0000000000..63155e1855 Binary files /dev/null and b/t/t5000/huge-and-future.tar differ diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 74d740de41..2e0ba3ebd8 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -7,23 +7,6 @@ test_description='test git fast-import utility' . ./test-lib.sh . "$TEST_DIRECTORY"/diff-lib.sh ;# test-lib chdir's into trash -# Print $1 bytes from stdin to stdout. -# -# This could be written as "head -c $1", but IRIX "head" does not -# support the -c option. -head_c () { - perl -e ' - my $len = $ARGV[1]; - while ($len > 0) { - my $s; - my $nread = sysread(STDIN, $s, $len); - die "cannot read: $!" unless defined($nread); - print $s; - $len -= $nread; - } - ' - "$1" -} - verify_packs () { for p in .git/objects/pack/*.pack do @@ -2481,7 +2464,7 @@ test_expect_success PIPE 'R: copy using cat-file' ' read blob_id type size <&3 && echo "$blob_id $type $size" >response && - head_c $size >blob <&3 && + test_copy_bytes $size >blob <&3 && read newline <&3 && cat <<-EOF && @@ -2524,7 +2507,7 @@ test_expect_success PIPE 'R: print blob mid-commit' ' EOF read blob_id type size <&3 && - head_c $size >actual <&3 && + test_copy_bytes $size >actual <&3 && read newline <&3 && echo @@ -2559,7 +2542,7 @@ test_expect_success PIPE 'R: print staged blob within commit' ' echo "cat-blob $to_get" && read blob_id type size <&3 && - head_c $size >actual <&3 && + test_copy_bytes $size >actual <&3 && read newline <&3 && echo deleteall diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index 48884d5208..90856d67e5 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -961,3 +961,17 @@ test_env () { done ) } + +# Read up to "$1" bytes (or to EOF) from stdin and write them to stdout. +test_copy_bytes () { + perl -e ' + my $len = $ARGV[1]; + while ($len > 0) { + my $s; + my $nread = sysread(STDIN, $s, $len); + die "cannot read: $!" unless defined($nread); + print $s; + $len -= $nread; + } + ' - "$1" +}