archive/tar: fix and cleanup readOldGNUSparseMap

* Assert that the format is GNU.
Both GNU and STAR have some form of sparse file support with
incompatible header structures. Worse yet, both formats use the
'S' type flag to indicate the presence of a sparse file.
As such, we should check the format (based on magic numbers)
and fail early.

* Move realsize parsing logic into readOldGNUSparseMap.
This is related to the sparse parsing logic and belongs here.

* Fix the termination condition for parsing sparse fields.
The termination condition for reading the sparse fields
is to simply check if the first byte of the offset field is NULL.
This does not seem to be documented in the GNU manual, but this is
the check done by the both the GNU and BSD implementations:
	http://git.savannah.gnu.org/cgit/tar.git/tree/src/sparse.c?id=9a33077a7b7ad7d32815a21dee54eba63b38a81c#n731
	1fa9c7bf90/libarchive/archive_read_support_format_tar.c (L2207)

* Fix the parsing of sparse fields to use parseNumeric.
This is what GNU and BSD do. The previous two links show that
GNU and BSD both handle base-256 and base-8.

* Detect truncated streams.
The call to io.ReadFull does not check if the error is io.EOF.
Getting io.EOF in this method is never okay and should always be
converted to io.ErrUnexpectedEOF.

* Simplify the function.
The logic is essentially a do-while loop so we can remove
some redundant code.

Change-Id: Ib2f601b1a283eaec1e41b1d3396d649c80749c4e
Reviewed-on: https://go-review.googlesource.com/28471
Reviewed-by: Russ Cox <rsc@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
This commit is contained in:
Joe Tsai 2016-09-02 16:17:37 -07:00 committed by Joe Tsai
parent 14204662c8
commit 0e36456bf9
2 changed files with 101 additions and 40 deletions

View file

@ -208,13 +208,7 @@ func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[strin
var sp []sparseEntry
var err error
if hdr.Typeflag == TypeGNUSparse {
var p parser
hdr.Size = p.parseNumeric(rawHdr.GNU().RealSize())
if p.err != nil {
return p.err
}
sp, err = tr.readOldGNUSparseMap(rawHdr)
sp, err = tr.readOldGNUSparseMap(hdr, rawHdr)
if err != nil {
return err
}
@ -493,46 +487,56 @@ func (tr *Reader) readHeader() (*Header, *block, error) {
return hdr, &tr.blk, p.err
}
// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
// then one or more extension headers are used to store the rest of the sparse map.
func (tr *Reader) readOldGNUSparseMap(blk *block) ([]sparseEntry, error) {
var p parser
var s sparseArray = blk.GNU().Sparse()
var sp = make([]sparseEntry, 0, s.MaxEntries())
for i := 0; i < s.MaxEntries(); i++ {
offset := p.parseOctal(s.Entry(i).Offset())
numBytes := p.parseOctal(s.Entry(i).NumBytes())
if p.err != nil {
return nil, p.err
}
if offset == 0 && numBytes == 0 {
break
}
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
// readOldGNUSparseMap reads the sparse map from the old GNU sparse format.
// The sparse map is stored in the tar header if it's small enough.
// If it's larger than four entries, then one or more extension headers are used
// to store the rest of the sparse map.
//
// The Header.Size does not reflect the size of any extended headers used.
// Thus, this function will read from the raw io.Reader to fetch extra headers.
// This method mutates blk in the process.
func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, error) {
// Make sure that the input format is GNU.
// Unfortunately, the STAR format also has a sparse header format that uses
// the same type flag but has a completely different layout.
if blk.GetFormat() != formatGNU {
return nil, ErrHeader
}
for s.IsExtended()[0] > 0 {
// There are more entries. Read an extension header and parse its entries.
var blk block
if _, err := io.ReadFull(tr.r, blk[:]); err != nil {
return nil, err
}
s = blk.Sparse()
var p parser
hdr.Size = p.parseNumeric(blk.GNU().RealSize())
if p.err != nil {
return nil, p.err
}
var s sparseArray = blk.GNU().Sparse()
var sp = make([]sparseEntry, 0, s.MaxEntries())
for {
for i := 0; i < s.MaxEntries(); i++ {
offset := p.parseOctal(s.Entry(i).Offset())
numBytes := p.parseOctal(s.Entry(i).NumBytes())
// This termination condition is identical to GNU and BSD tar.
if s.Entry(i).Offset()[0] == 0x00 {
break // Don't return, need to process extended headers (even if empty)
}
offset := p.parseNumeric(s.Entry(i).Offset())
numBytes := p.parseNumeric(s.Entry(i).NumBytes())
if p.err != nil {
return nil, p.err
}
if offset == 0 && numBytes == 0 {
break
}
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
}
if s.IsExtended()[0] > 0 {
// There are more entries. Read an extension header and parse its entries.
if _, err := io.ReadFull(tr.r, blk[:]); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return nil, err
}
s = blk.Sparse()
continue
}
return sp, nil // Done
}
return sp, nil
}
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format

View file

@ -618,6 +618,64 @@ func TestSparseFileReader(t *testing.T) {
}
}
func TestReadOldGNUSparseMap(t *testing.T) {
const (
t00 = "00000000000\x0000000000000\x00"
t11 = "00000000001\x0000000000001\x00"
t12 = "00000000001\x0000000000002\x00"
t21 = "00000000002\x0000000000001\x00"
)
mkBlk := func(size, sp0, sp1, sp2, sp3, ext string, format int) *block {
var blk block
copy(blk.GNU().RealSize(), size)
copy(blk.GNU().Sparse().Entry(0), sp0)
copy(blk.GNU().Sparse().Entry(1), sp1)
copy(blk.GNU().Sparse().Entry(2), sp2)
copy(blk.GNU().Sparse().Entry(3), sp3)
copy(blk.GNU().Sparse().IsExtended(), ext)
if format != formatUnknown {
blk.SetFormat(format)
}
return &blk
}
vectors := []struct {
data string // Input data
rawHdr *block // Input raw header
want []sparseEntry // Expected sparse entries to be outputted
err error // Expected error to be returned
}{
{"", mkBlk("", "", "", "", "", "", formatUnknown), nil, ErrHeader},
{"", mkBlk("1234", "fewa", "", "", "", "", formatGNU), nil, ErrHeader},
{"", mkBlk("0031", "", "", "", "", "", formatGNU), nil, nil},
{"", mkBlk("1234", t00, t11, "", "", "", formatGNU),
[]sparseEntry{{0, 0}, {1, 1}}, nil},
{"", mkBlk("1234", t11, t12, t21, t11, "", formatGNU),
[]sparseEntry{{1, 1}, {1, 2}, {2, 1}, {1, 1}}, nil},
{"", mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU),
[]sparseEntry{}, io.ErrUnexpectedEOF},
{t11 + t11,
mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU),
[]sparseEntry{}, io.ErrUnexpectedEOF},
{t11 + t21 + strings.Repeat("\x00", 512),
mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU),
[]sparseEntry{{1, 1}, {1, 2}, {2, 1}, {1, 1}, {1, 1}, {2, 1}}, nil},
}
for i, v := range vectors {
tr := Reader{r: strings.NewReader(v.data)}
hdr := new(Header)
got, err := tr.readOldGNUSparseMap(hdr, v.rawHdr)
if !reflect.DeepEqual(got, v.want) && !(len(got) == 0 && len(v.want) == 0) {
t.Errorf("test %d, readOldGNUSparseMap(...): got %v, want %v", i, got, v.want)
}
if err != v.err {
t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
}
}
}
func TestReadGNUSparseMap0x1(t *testing.T) {
const (
maxUint = ^uint(0)
@ -854,8 +912,7 @@ func TestReadTruncation(t *testing.T) {
{pax + trash[:1], 0, io.ErrUnexpectedEOF},
{pax + trash[:511], 0, io.ErrUnexpectedEOF},
{sparse[:511], 0, io.ErrUnexpectedEOF},
// TODO(dsnet): This should pass, but currently fails.
// {sparse[:512], 0, io.ErrUnexpectedEOF},
{sparse[:512], 0, io.ErrUnexpectedEOF},
{sparse[:3584], 1, io.EOF},
{sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header
{sparse[:9216], 1, io.EOF},