From 0e36456bf9aa7b3041987634e478449215cdbd82 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Fri, 2 Sep 2016 16:17:37 -0700 Subject: [PATCH] archive/tar: fix and cleanup readOldGNUSparseMap * Assert that the format is GNU. Both GNU and STAR have some form of sparse file support with incompatible header structures. Worse yet, both formats use the 'S' type flag to indicate the presence of a sparse file. As such, we should check the format (based on magic numbers) and fail early. * Move realsize parsing logic into readOldGNUSparseMap. This is related to the sparse parsing logic and belongs here. * Fix the termination condition for parsing sparse fields. The termination condition for reading the sparse fields is to simply check if the first byte of the offset field is NULL. This does not seem to be documented in the GNU manual, but this is the check done by the both the GNU and BSD implementations: http://git.savannah.gnu.org/cgit/tar.git/tree/src/sparse.c?id=9a33077a7b7ad7d32815a21dee54eba63b38a81c#n731 https://github.com/libarchive/libarchive/blob/1fa9c7bf90f0862036a99896b0501c381584451a/libarchive/archive_read_support_format_tar.c#L2207 * Fix the parsing of sparse fields to use parseNumeric. This is what GNU and BSD do. The previous two links show that GNU and BSD both handle base-256 and base-8. * Detect truncated streams. The call to io.ReadFull does not check if the error is io.EOF. Getting io.EOF in this method is never okay and should always be converted to io.ErrUnexpectedEOF. * Simplify the function. The logic is essentially a do-while loop so we can remove some redundant code. Change-Id: Ib2f601b1a283eaec1e41b1d3396d649c80749c4e Reviewed-on: https://go-review.googlesource.com/28471 Reviewed-by: Russ Cox Run-TryBot: Russ Cox --- src/archive/tar/reader.go | 80 ++++++++++++++++++---------------- src/archive/tar/reader_test.go | 61 +++++++++++++++++++++++++- 2 files changed, 101 insertions(+), 40 deletions(-) diff --git a/src/archive/tar/reader.go b/src/archive/tar/reader.go index 4eff314c76..33e64687d8 100644 --- a/src/archive/tar/reader.go +++ b/src/archive/tar/reader.go @@ -208,13 +208,7 @@ func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[strin var sp []sparseEntry var err error if hdr.Typeflag == TypeGNUSparse { - var p parser - hdr.Size = p.parseNumeric(rawHdr.GNU().RealSize()) - if p.err != nil { - return p.err - } - - sp, err = tr.readOldGNUSparseMap(rawHdr) + sp, err = tr.readOldGNUSparseMap(hdr, rawHdr) if err != nil { return err } @@ -493,46 +487,56 @@ func (tr *Reader) readHeader() (*Header, *block, error) { return hdr, &tr.blk, p.err } -// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. -// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, -// then one or more extension headers are used to store the rest of the sparse map. -func (tr *Reader) readOldGNUSparseMap(blk *block) ([]sparseEntry, error) { - var p parser - var s sparseArray = blk.GNU().Sparse() - var sp = make([]sparseEntry, 0, s.MaxEntries()) - for i := 0; i < s.MaxEntries(); i++ { - offset := p.parseOctal(s.Entry(i).Offset()) - numBytes := p.parseOctal(s.Entry(i).NumBytes()) - if p.err != nil { - return nil, p.err - } - if offset == 0 && numBytes == 0 { - break - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) +// readOldGNUSparseMap reads the sparse map from the old GNU sparse format. +// The sparse map is stored in the tar header if it's small enough. +// If it's larger than four entries, then one or more extension headers are used +// to store the rest of the sparse map. +// +// The Header.Size does not reflect the size of any extended headers used. +// Thus, this function will read from the raw io.Reader to fetch extra headers. +// This method mutates blk in the process. +func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, error) { + // Make sure that the input format is GNU. + // Unfortunately, the STAR format also has a sparse header format that uses + // the same type flag but has a completely different layout. + if blk.GetFormat() != formatGNU { + return nil, ErrHeader } - for s.IsExtended()[0] > 0 { - // There are more entries. Read an extension header and parse its entries. - var blk block - if _, err := io.ReadFull(tr.r, blk[:]); err != nil { - return nil, err - } - s = blk.Sparse() - + var p parser + hdr.Size = p.parseNumeric(blk.GNU().RealSize()) + if p.err != nil { + return nil, p.err + } + var s sparseArray = blk.GNU().Sparse() + var sp = make([]sparseEntry, 0, s.MaxEntries()) + for { for i := 0; i < s.MaxEntries(); i++ { - offset := p.parseOctal(s.Entry(i).Offset()) - numBytes := p.parseOctal(s.Entry(i).NumBytes()) + // This termination condition is identical to GNU and BSD tar. + if s.Entry(i).Offset()[0] == 0x00 { + break // Don't return, need to process extended headers (even if empty) + } + offset := p.parseNumeric(s.Entry(i).Offset()) + numBytes := p.parseNumeric(s.Entry(i).NumBytes()) if p.err != nil { return nil, p.err } - if offset == 0 && numBytes == 0 { - break - } sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } + + if s.IsExtended()[0] > 0 { + // There are more entries. Read an extension header and parse its entries. + if _, err := io.ReadFull(tr.r, blk[:]); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return nil, err + } + s = blk.Sparse() + continue + } + return sp, nil // Done } - return sp, nil } // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format diff --git a/src/archive/tar/reader_test.go b/src/archive/tar/reader_test.go index 15b942fafe..fa374d223a 100644 --- a/src/archive/tar/reader_test.go +++ b/src/archive/tar/reader_test.go @@ -618,6 +618,64 @@ func TestSparseFileReader(t *testing.T) { } } +func TestReadOldGNUSparseMap(t *testing.T) { + const ( + t00 = "00000000000\x0000000000000\x00" + t11 = "00000000001\x0000000000001\x00" + t12 = "00000000001\x0000000000002\x00" + t21 = "00000000002\x0000000000001\x00" + ) + + mkBlk := func(size, sp0, sp1, sp2, sp3, ext string, format int) *block { + var blk block + copy(blk.GNU().RealSize(), size) + copy(blk.GNU().Sparse().Entry(0), sp0) + copy(blk.GNU().Sparse().Entry(1), sp1) + copy(blk.GNU().Sparse().Entry(2), sp2) + copy(blk.GNU().Sparse().Entry(3), sp3) + copy(blk.GNU().Sparse().IsExtended(), ext) + if format != formatUnknown { + blk.SetFormat(format) + } + return &blk + } + + vectors := []struct { + data string // Input data + rawHdr *block // Input raw header + want []sparseEntry // Expected sparse entries to be outputted + err error // Expected error to be returned + }{ + {"", mkBlk("", "", "", "", "", "", formatUnknown), nil, ErrHeader}, + {"", mkBlk("1234", "fewa", "", "", "", "", formatGNU), nil, ErrHeader}, + {"", mkBlk("0031", "", "", "", "", "", formatGNU), nil, nil}, + {"", mkBlk("1234", t00, t11, "", "", "", formatGNU), + []sparseEntry{{0, 0}, {1, 1}}, nil}, + {"", mkBlk("1234", t11, t12, t21, t11, "", formatGNU), + []sparseEntry{{1, 1}, {1, 2}, {2, 1}, {1, 1}}, nil}, + {"", mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU), + []sparseEntry{}, io.ErrUnexpectedEOF}, + {t11 + t11, + mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU), + []sparseEntry{}, io.ErrUnexpectedEOF}, + {t11 + t21 + strings.Repeat("\x00", 512), + mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU), + []sparseEntry{{1, 1}, {1, 2}, {2, 1}, {1, 1}, {1, 1}, {2, 1}}, nil}, + } + + for i, v := range vectors { + tr := Reader{r: strings.NewReader(v.data)} + hdr := new(Header) + got, err := tr.readOldGNUSparseMap(hdr, v.rawHdr) + if !reflect.DeepEqual(got, v.want) && !(len(got) == 0 && len(v.want) == 0) { + t.Errorf("test %d, readOldGNUSparseMap(...): got %v, want %v", i, got, v.want) + } + if err != v.err { + t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) + } + } +} + func TestReadGNUSparseMap0x1(t *testing.T) { const ( maxUint = ^uint(0) @@ -854,8 +912,7 @@ func TestReadTruncation(t *testing.T) { {pax + trash[:1], 0, io.ErrUnexpectedEOF}, {pax + trash[:511], 0, io.ErrUnexpectedEOF}, {sparse[:511], 0, io.ErrUnexpectedEOF}, - // TODO(dsnet): This should pass, but currently fails. - // {sparse[:512], 0, io.ErrUnexpectedEOF}, + {sparse[:512], 0, io.ErrUnexpectedEOF}, {sparse[:3584], 1, io.EOF}, {sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header {sparse[:9216], 1, io.EOF},