github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/pkg/archive/tar/reader.go

github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/pkg/archive/tar/reader.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package tar
     6  
     7  // TODO(dsymonds):
     8  //   - pax extensions
     9  
    10  import (
    11  	"bytes"
    12  	"errors"
    13  	"io"
    14  	"io/ioutil"
    15  	"os"
    16  	"strconv"
    17  	"strings"
    18  	"time"
    19  )
    20  
    21  var (
    22  	ErrHeader = errors.New("archive/tar: invalid tar header")
    23  )
    24  
    25  const maxNanoSecondIntSize = 9
    26  
    27  // A Reader provides sequential access to the contents of a tar archive.
    28  // A tar archive consists of a sequence of files.
    29  // The Next method advances to the next file in the archive (including the first),
    30  // and then it can be treated as an io.Reader to access the file's data.
    31  type Reader struct {
    32  	r    io.Reader
    33  	err  error
    34  	pad  int64          // amount of padding (ignored) after current file entry
    35  	curr numBytesReader // reader for current file entry
    36  }
    37  
    38  // A numBytesReader is an io.Reader with a numBytes method, returning the number
    39  // of bytes remaining in the underlying encoded data.
    40  type numBytesReader interface {
    41  	io.Reader
    42  	numBytes() int64
    43  }
    44  
    45  // A regFileReader is a numBytesReader for reading file data from a tar archive.
    46  type regFileReader struct {
    47  	r  io.Reader // underlying reader
    48  	nb int64     // number of unread bytes for current file entry
    49  }
    50  
    51  // A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive.
    52  type sparseFileReader struct {
    53  	rfr *regFileReader // reads the sparse-encoded file data
    54  	sp  []sparseEntry  // the sparse map for the file
    55  	pos int64          // keeps track of file position
    56  	tot int64          // total size of the file
    57  }
    58  
    59  // Keywords for GNU sparse files in a PAX extended header
    60  const (
    61  	paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
    62  	paxGNUSparseOffset    = "GNU.sparse.offset"
    63  	paxGNUSparseNumBytes  = "GNU.sparse.numbytes"
    64  	paxGNUSparseMap       = "GNU.sparse.map"
    65  	paxGNUSparseName      = "GNU.sparse.name"
    66  	paxGNUSparseMajor     = "GNU.sparse.major"
    67  	paxGNUSparseMinor     = "GNU.sparse.minor"
    68  	paxGNUSparseSize      = "GNU.sparse.size"
    69  	paxGNUSparseRealSize  = "GNU.sparse.realsize"
    70  )
    71  
    72  // Keywords for old GNU sparse headers
    73  const (
    74  	oldGNUSparseMainHeaderOffset               = 386
    75  	oldGNUSparseMainHeaderIsExtendedOffset     = 482
    76  	oldGNUSparseMainHeaderNumEntries           = 4
    77  	oldGNUSparseExtendedHeaderIsExtendedOffset = 504
    78  	oldGNUSparseExtendedHeaderNumEntries       = 21
    79  	oldGNUSparseOffsetSize                     = 12
    80  	oldGNUSparseNumBytesSize                   = 12
    81  )
    82  
    83  // NewReader creates a new Reader reading from r.
    84  func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
    85  
    86  // Next advances to the next entry in the tar archive.
    87  func (tr *Reader) Next() (*Header, error) {
    88  	var hdr *Header
    89  	if tr.err == nil {
    90  		tr.skipUnread()
    91  	}
    92  	if tr.err != nil {
    93  		return hdr, tr.err
    94  	}
    95  	hdr = tr.readHeader()
    96  	if hdr == nil {
    97  		return hdr, tr.err
    98  	}
    99  	// Check for PAX/GNU header.
   100  	switch hdr.Typeflag {
   101  	case TypeXHeader:
   102  		//  PAX extended header
   103  		headers, err := parsePAX(tr)
   104  		if err != nil {
   105  			return nil, err
   106  		}
   107  		// We actually read the whole file,
   108  		// but this skips alignment padding
   109  		tr.skipUnread()
   110  		hdr = tr.readHeader()
   111  		mergePAX(hdr, headers)
   112  
   113  		// Check for a PAX format sparse file
   114  		sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers)
   115  		if err != nil {
   116  			tr.err = err
   117  			return nil, err
   118  		}
   119  		if sp != nil {
   120  			// Current file is a PAX format GNU sparse file.
   121  			// Set the current file reader to a sparse file reader.
   122  			tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
   123  		}
   124  		return hdr, nil
   125  	case TypeGNULongName:
   126  		// We have a GNU long name header. Its contents are the real file name.
   127  		realname, err := ioutil.ReadAll(tr)
   128  		if err != nil {
   129  			return nil, err
   130  		}
   131  		hdr, err := tr.Next()
   132  		hdr.Name = cString(realname)
   133  		return hdr, err
   134  	case TypeGNULongLink:
   135  		// We have a GNU long link header.
   136  		realname, err := ioutil.ReadAll(tr)
   137  		if err != nil {
   138  			return nil, err
   139  		}
   140  		hdr, err := tr.Next()
   141  		hdr.Linkname = cString(realname)
   142  		return hdr, err
   143  	}
   144  	return hdr, tr.err
   145  }
   146  
   147  // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
   148  // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
   149  // be treated as a regular file.
   150  func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) {
   151  	var sparseFormat string
   152  
   153  	// Check for sparse format indicators
   154  	major, majorOk := headers[paxGNUSparseMajor]
   155  	minor, minorOk := headers[paxGNUSparseMinor]
   156  	sparseName, sparseNameOk := headers[paxGNUSparseName]
   157  	_, sparseMapOk := headers[paxGNUSparseMap]
   158  	sparseSize, sparseSizeOk := headers[paxGNUSparseSize]
   159  	sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize]
   160  
   161  	// Identify which, if any, sparse format applies from which PAX headers are set
   162  	if majorOk && minorOk {
   163  		sparseFormat = major + "." + minor
   164  	} else if sparseNameOk && sparseMapOk {
   165  		sparseFormat = "0.1"
   166  	} else if sparseSizeOk {
   167  		sparseFormat = "0.0"
   168  	} else {
   169  		// Not a PAX format GNU sparse file.
   170  		return nil, nil
   171  	}
   172  
   173  	// Check for unknown sparse format
   174  	if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" {
   175  		return nil, nil
   176  	}
   177  
   178  	// Update hdr from GNU sparse PAX headers
   179  	if sparseNameOk {
   180  		hdr.Name = sparseName
   181  	}
   182  	if sparseSizeOk {
   183  		realSize, err := strconv.ParseInt(sparseSize, 10, 0)
   184  		if err != nil {
   185  			return nil, ErrHeader
   186  		}
   187  		hdr.Size = realSize
   188  	} else if sparseRealSizeOk {
   189  		realSize, err := strconv.ParseInt(sparseRealSize, 10, 0)
   190  		if err != nil {
   191  			return nil, ErrHeader
   192  		}
   193  		hdr.Size = realSize
   194  	}
   195  
   196  	// Set up the sparse map, according to the particular sparse format in use
   197  	var sp []sparseEntry
   198  	var err error
   199  	switch sparseFormat {
   200  	case "0.0", "0.1":
   201  		sp, err = readGNUSparseMap0x1(headers)
   202  	case "1.0":
   203  		sp, err = readGNUSparseMap1x0(tr.curr)
   204  	}
   205  	return sp, err
   206  }
   207  
   208  // mergePAX merges well known headers according to PAX standard.
   209  // In general headers with the same name as those found
   210  // in the header struct overwrite those found in the header
   211  // struct with higher precision or longer values. Esp. useful
   212  // for name and linkname fields.
   213  func mergePAX(hdr *Header, headers map[string]string) error {
   214  	for k, v := range headers {
   215  		switch k {
   216  		case paxPath:
   217  			hdr.Name = v
   218  		case paxLinkpath:
   219  			hdr.Linkname = v
   220  		case paxGname:
   221  			hdr.Gname = v
   222  		case paxUname:
   223  			hdr.Uname = v
   224  		case paxUid:
   225  			uid, err := strconv.ParseInt(v, 10, 0)
   226  			if err != nil {
   227  				return err
   228  			}
   229  			hdr.Uid = int(uid)
   230  		case paxGid:
   231  			gid, err := strconv.ParseInt(v, 10, 0)
   232  			if err != nil {
   233  				return err
   234  			}
   235  			hdr.Gid = int(gid)
   236  		case paxAtime:
   237  			t, err := parsePAXTime(v)
   238  			if err != nil {
   239  				return err
   240  			}
   241  			hdr.AccessTime = t
   242  		case paxMtime:
   243  			t, err := parsePAXTime(v)
   244  			if err != nil {
   245  				return err
   246  			}
   247  			hdr.ModTime = t
   248  		case paxCtime:
   249  			t, err := parsePAXTime(v)
   250  			if err != nil {
   251  				return err
   252  			}
   253  			hdr.ChangeTime = t
   254  		case paxSize:
   255  			size, err := strconv.ParseInt(v, 10, 0)
   256  			if err != nil {
   257  				return err
   258  			}
   259  			hdr.Size = int64(size)
   260  		default:
   261  			if strings.HasPrefix(k, paxXattr) {
   262  				if hdr.Xattrs == nil {
   263  					hdr.Xattrs = make(map[string]string)
   264  				}
   265  				hdr.Xattrs[k[len(paxXattr):]] = v
   266  			}
   267  		}
   268  	}
   269  	return nil
   270  }
   271  
   272  // parsePAXTime takes a string of the form %d.%d as described in
   273  // the PAX specification.
   274  func parsePAXTime(t string) (time.Time, error) {
   275  	buf := []byte(t)
   276  	pos := bytes.IndexByte(buf, '.')
   277  	var seconds, nanoseconds int64
   278  	var err error
   279  	if pos == -1 {
   280  		seconds, err = strconv.ParseInt(t, 10, 0)
   281  		if err != nil {
   282  			return time.Time{}, err
   283  		}
   284  	} else {
   285  		seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
   286  		if err != nil {
   287  			return time.Time{}, err
   288  		}
   289  		nano_buf := string(buf[pos+1:])
   290  		// Pad as needed before converting to a decimal.
   291  		// For example .030 -> .030000000 -> 30000000 nanoseconds
   292  		if len(nano_buf) < maxNanoSecondIntSize {
   293  			// Right pad
   294  			nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
   295  		} else if len(nano_buf) > maxNanoSecondIntSize {
   296  			// Right truncate
   297  			nano_buf = nano_buf[:maxNanoSecondIntSize]
   298  		}
   299  		nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
   300  		if err != nil {
   301  			return time.Time{}, err
   302  		}
   303  	}
   304  	ts := time.Unix(seconds, nanoseconds)
   305  	return ts, nil
   306  }
   307  
   308  // parsePAX parses PAX headers.
   309  // If an extended header (type 'x') is invalid, ErrHeader is returned
   310  func parsePAX(r io.Reader) (map[string]string, error) {
   311  	buf, err := ioutil.ReadAll(r)
   312  	if err != nil {
   313  		return nil, err
   314  	}
   315  
   316  	// For GNU PAX sparse format 0.0 support.
   317  	// This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
   318  	var sparseMap bytes.Buffer
   319  
   320  	headers := make(map[string]string)
   321  	// Each record is constructed as
   322  	//     "%d %s=%s\n", length, keyword, value
   323  	for len(buf) > 0 {
   324  		// or the header was empty to start with.
   325  		var sp int
   326  		// The size field ends at the first space.
   327  		sp = bytes.IndexByte(buf, ' ')
   328  		if sp == -1 {
   329  			return nil, ErrHeader
   330  		}
   331  		// Parse the first token as a decimal integer.
   332  		n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
   333  		if err != nil {
   334  			return nil, ErrHeader
   335  		}
   336  		// Extract everything between the decimal and the n -1 on the
   337  		// beginning to eat the ' ', -1 on the end to skip the newline.
   338  		var record []byte
   339  		record, buf = buf[sp+1:n-1], buf[n:]
   340  		// The first equals is guaranteed to mark the end of the key.
   341  		// Everything else is value.
   342  		eq := bytes.IndexByte(record, '=')
   343  		if eq == -1 {
   344  			return nil, ErrHeader
   345  		}
   346  		key, value := record[:eq], record[eq+1:]
   347  
   348  		keyStr := string(key)
   349  		if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
   350  			// GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
   351  			sparseMap.Write(value)
   352  			sparseMap.Write([]byte{','})
   353  		} else {
   354  			// Normal key. Set the value in the headers map.
   355  			headers[keyStr] = string(value)
   356  		}
   357  	}
   358  	if sparseMap.Len() != 0 {
   359  		// Add sparse info to headers, chopping off the extra comma
   360  		sparseMap.Truncate(sparseMap.Len() - 1)
   361  		headers[paxGNUSparseMap] = sparseMap.String()
   362  	}
   363  	return headers, nil
   364  }
   365  
   366  // cString parses bytes as a NUL-terminated C-style string.
   367  // If a NUL byte is not found then the whole slice is returned as a string.
   368  func cString(b []byte) string {
   369  	n := 0
   370  	for n < len(b) && b[n] != 0 {
   371  		n++
   372  	}
   373  	return string(b[0:n])
   374  }
   375  
   376  func (tr *Reader) octal(b []byte) int64 {
   377  	// Check for binary format first.
   378  	if len(b) > 0 && b[0]&0x80 != 0 {
   379  		var x int64
   380  		for i, c := range b {
   381  			if i == 0 {
   382  				c &= 0x7f // ignore signal bit in first byte
   383  			}
   384  			x = x<<8 | int64(c)
   385  		}
   386  		return x
   387  	}
   388  
   389  	// Because unused fields are filled with NULs, we need
   390  	// to skip leading NULs. Fields may also be padded with
   391  	// spaces or NULs.
   392  	// So we remove leading and trailing NULs and spaces to
   393  	// be sure.
   394  	b = bytes.Trim(b, " \x00")
   395  
   396  	if len(b) == 0 {
   397  		return 0
   398  	}
   399  	x, err := strconv.ParseUint(cString(b), 8, 64)
   400  	if err != nil {
   401  		tr.err = err
   402  	}
   403  	return int64(x)
   404  }
   405  
   406  // skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
   407  func (tr *Reader) skipUnread() {
   408  	nr := tr.numBytes() + tr.pad // number of bytes to skip
   409  	tr.curr, tr.pad = nil, 0
   410  	if sr, ok := tr.r.(io.Seeker); ok {
   411  		if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
   412  			return
   413  		}
   414  	}
   415  	_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
   416  }
   417  
   418  func (tr *Reader) verifyChecksum(header []byte) bool {
   419  	if tr.err != nil {
   420  		return false
   421  	}
   422  
   423  	given := tr.octal(header[148:156])
   424  	unsigned, signed := checksum(header)
   425  	return given == unsigned || given == signed
   426  }
   427  
   428  func (tr *Reader) readHeader() *Header {
   429  	header := make([]byte, blockSize)
   430  	if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
   431  		return nil
   432  	}
   433  
   434  	// Two blocks of zero bytes marks the end of the archive.
   435  	if bytes.Equal(header, zeroBlock[0:blockSize]) {
   436  		if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
   437  			return nil
   438  		}
   439  		if bytes.Equal(header, zeroBlock[0:blockSize]) {
   440  			tr.err = io.EOF
   441  		} else {
   442  			tr.err = ErrHeader // zero block and then non-zero block
   443  		}
   444  		return nil
   445  	}
   446  
   447  	if !tr.verifyChecksum(header) {
   448  		tr.err = ErrHeader
   449  		return nil
   450  	}
   451  
   452  	// Unpack
   453  	hdr := new(Header)
   454  	s := slicer(header)
   455  
   456  	hdr.Name = cString(s.next(100))
   457  	hdr.Mode = tr.octal(s.next(8))
   458  	hdr.Uid = int(tr.octal(s.next(8)))
   459  	hdr.Gid = int(tr.octal(s.next(8)))
   460  	hdr.Size = tr.octal(s.next(12))
   461  	hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
   462  	s.next(8) // chksum
   463  	hdr.Typeflag = s.next(1)[0]
   464  	hdr.Linkname = cString(s.next(100))
   465  
   466  	// The remainder of the header depends on the value of magic.
   467  	// The original (v7) version of tar had no explicit magic field,
   468  	// so its magic bytes, like the rest of the block, are NULs.
   469  	magic := string(s.next(8)) // contains version field as well.
   470  	var format string
   471  	switch {
   472  	case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988)
   473  		if string(header[508:512]) == "tar\x00" {
   474  			format = "star"
   475  		} else {
   476  			format = "posix"
   477  		}
   478  	case magic == "ustar  \x00": // old GNU tar
   479  		format = "gnu"
   480  	}
   481  
   482  	switch format {
   483  	case "posix", "gnu", "star":
   484  		hdr.Uname = cString(s.next(32))
   485  		hdr.Gname = cString(s.next(32))
   486  		devmajor := s.next(8)
   487  		devminor := s.next(8)
   488  		if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
   489  			hdr.Devmajor = tr.octal(devmajor)
   490  			hdr.Devminor = tr.octal(devminor)
   491  		}
   492  		var prefix string
   493  		switch format {
   494  		case "posix", "gnu":
   495  			prefix = cString(s.next(155))
   496  		case "star":
   497  			prefix = cString(s.next(131))
   498  			hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0)
   499  			hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0)
   500  		}
   501  		if len(prefix) > 0 {
   502  			hdr.Name = prefix + "/" + hdr.Name
   503  		}
   504  	}
   505  
   506  	if tr.err != nil {
   507  		tr.err = ErrHeader
   508  		return nil
   509  	}
   510  
   511  	// Maximum value of hdr.Size is 64 GB (12 octal digits),
   512  	// so there's no risk of int64 overflowing.
   513  	nb := int64(hdr.Size)
   514  	tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
   515  
   516  	// Set the current file reader.
   517  	tr.curr = &regFileReader{r: tr.r, nb: nb}
   518  
   519  	// Check for old GNU sparse format entry.
   520  	if hdr.Typeflag == TypeGNUSparse {
   521  		// Get the real size of the file.
   522  		hdr.Size = tr.octal(header[483:495])
   523  
   524  		// Read the sparse map.
   525  		sp := tr.readOldGNUSparseMap(header)
   526  		if tr.err != nil {
   527  			return nil
   528  		}
   529  		// Current file is a GNU sparse file. Update the current file reader.
   530  		tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
   531  	}
   532  
   533  	return hdr
   534  }
   535  
   536  // A sparseEntry holds a single entry in a sparse file's sparse map.
   537  // A sparse entry indicates the offset and size in a sparse file of a
   538  // block of data.
   539  type sparseEntry struct {
   540  	offset   int64
   541  	numBytes int64
   542  }
   543  
   544  // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
   545  // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
   546  // then one or more extension headers are used to store the rest of the sparse map.
   547  func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
   548  	isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
   549  	spCap := oldGNUSparseMainHeaderNumEntries
   550  	if isExtended {
   551  		spCap += oldGNUSparseExtendedHeaderNumEntries
   552  	}
   553  	sp := make([]sparseEntry, 0, spCap)
   554  	s := slicer(header[oldGNUSparseMainHeaderOffset:])
   555  
   556  	// Read the four entries from the main tar header
   557  	for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
   558  		offset := tr.octal(s.next(oldGNUSparseOffsetSize))
   559  		numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
   560  		if tr.err != nil {
   561  			tr.err = ErrHeader
   562  			return nil
   563  		}
   564  		if offset == 0 && numBytes == 0 {
   565  			break
   566  		}
   567  		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
   568  	}
   569  
   570  	for isExtended {
   571  		// There are more entries. Read an extension header and parse its entries.
   572  		sparseHeader := make([]byte, blockSize)
   573  		if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil {
   574  			return nil
   575  		}
   576  		isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
   577  		s = slicer(sparseHeader)
   578  		for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
   579  			offset := tr.octal(s.next(oldGNUSparseOffsetSize))
   580  			numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
   581  			if tr.err != nil {
   582  				tr.err = ErrHeader
   583  				return nil
   584  			}
   585  			if offset == 0 && numBytes == 0 {
   586  				break
   587  			}
   588  			sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
   589  		}
   590  	}
   591  	return sp
   592  }
   593  
   594  // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0.
   595  // The sparse map is stored just before the file data and padded out to the nearest block boundary.
   596  func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
   597  	buf := make([]byte, 2*blockSize)
   598  	sparseHeader := buf[:blockSize]
   599  
   600  	// readDecimal is a helper function to read a decimal integer from the sparse map
   601  	// while making sure to read from the file in blocks of size blockSize
   602  	readDecimal := func() (int64, error) {
   603  		// Look for newline
   604  		nl := bytes.IndexByte(sparseHeader, '\n')
   605  		if nl == -1 {
   606  			if len(sparseHeader) >= blockSize {
   607  				// This is an error
   608  				return 0, ErrHeader
   609  			}
   610  			oldLen := len(sparseHeader)
   611  			newLen := oldLen + blockSize
   612  			if cap(sparseHeader) < newLen {
   613  				// There's more header, but we need to make room for the next block
   614  				copy(buf, sparseHeader)
   615  				sparseHeader = buf[:newLen]
   616  			} else {
   617  				// There's more header, and we can just reslice
   618  				sparseHeader = sparseHeader[:newLen]
   619  			}
   620  
   621  			// Now that sparseHeader is large enough, read next block
   622  			if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil {
   623  				return 0, err
   624  			}
   625  
   626  			// Look for a newline in the new data
   627  			nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n')
   628  			if nl == -1 {
   629  				// This is an error
   630  				return 0, ErrHeader
   631  			}
   632  			nl += oldLen // We want the position from the beginning
   633  		}
   634  		// Now that we've found a newline, read a number
   635  		n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0)
   636  		if err != nil {
   637  			return 0, ErrHeader
   638  		}
   639  
   640  		// Update sparseHeader to consume this number
   641  		sparseHeader = sparseHeader[nl+1:]
   642  		return n, nil
   643  	}
   644  
   645  	// Read the first block
   646  	if _, err := io.ReadFull(r, sparseHeader); err != nil {
   647  		return nil, err
   648  	}
   649  
   650  	// The first line contains the number of entries
   651  	numEntries, err := readDecimal()
   652  	if err != nil {
   653  		return nil, err
   654  	}
   655  
   656  	// Read all the entries
   657  	sp := make([]sparseEntry, 0, numEntries)
   658  	for i := int64(0); i < numEntries; i++ {
   659  		// Read the offset
   660  		offset, err := readDecimal()
   661  		if err != nil {
   662  			return nil, err
   663  		}
   664  		// Read numBytes
   665  		numBytes, err := readDecimal()
   666  		if err != nil {
   667  			return nil, err
   668  		}
   669  
   670  		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
   671  	}
   672  
   673  	return sp, nil
   674  }
   675  
   676  // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1.
   677  // The sparse map is stored in the PAX headers.
   678  func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) {
   679  	// Get number of entries
   680  	numEntriesStr, ok := headers[paxGNUSparseNumBlocks]
   681  	if !ok {
   682  		return nil, ErrHeader
   683  	}
   684  	numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0)
   685  	if err != nil {
   686  		return nil, ErrHeader
   687  	}
   688  
   689  	sparseMap := strings.Split(headers[paxGNUSparseMap], ",")
   690  
   691  	// There should be two numbers in sparseMap for each entry
   692  	if int64(len(sparseMap)) != 2*numEntries {
   693  		return nil, ErrHeader
   694  	}
   695  
   696  	// Loop through the entries in the sparse map
   697  	sp := make([]sparseEntry, 0, numEntries)
   698  	for i := int64(0); i < numEntries; i++ {
   699  		offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0)
   700  		if err != nil {
   701  			return nil, ErrHeader
   702  		}
   703  		numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0)
   704  		if err != nil {
   705  			return nil, ErrHeader
   706  		}
   707  		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
   708  	}
   709  
   710  	return sp, nil
   711  }
   712  
   713  // numBytes returns the number of bytes left to read in the current file's entry
   714  // in the tar archive, or 0 if there is no current file.
   715  func (tr *Reader) numBytes() int64 {
   716  	if tr.curr == nil {
   717  		// No current file, so no bytes
   718  		return 0
   719  	}
   720  	return tr.curr.numBytes()
   721  }
   722  
   723  // Read reads from the current entry in the tar archive.
   724  // It returns 0, io.EOF when it reaches the end of that entry,
   725  // until Next is called to advance to the next entry.
   726  func (tr *Reader) Read(b []byte) (n int, err error) {
   727  	if tr.curr == nil {
   728  		return 0, io.EOF
   729  	}
   730  	n, err = tr.curr.Read(b)
   731  	if err != nil && err != io.EOF {
   732  		tr.err = err
   733  	}
   734  	return
   735  }
   736  
   737  func (rfr *regFileReader) Read(b []byte) (n int, err error) {
   738  	if rfr.nb == 0 {
   739  		// file consumed
   740  		return 0, io.EOF
   741  	}
   742  	if int64(len(b)) > rfr.nb {
   743  		b = b[0:rfr.nb]
   744  	}
   745  	n, err = rfr.r.Read(b)
   746  	rfr.nb -= int64(n)
   747  
   748  	if err == io.EOF && rfr.nb > 0 {
   749  		err = io.ErrUnexpectedEOF
   750  	}
   751  	return
   752  }
   753  
   754  // numBytes returns the number of bytes left to read in the file's data in the tar archive.
   755  func (rfr *regFileReader) numBytes() int64 {
   756  	return rfr.nb
   757  }
   758  
   759  // readHole reads a sparse file hole ending at offset toOffset
   760  func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
   761  	n64 := toOffset - sfr.pos
   762  	if n64 > int64(len(b)) {
   763  		n64 = int64(len(b))
   764  	}
   765  	n := int(n64)
   766  	for i := 0; i < n; i++ {
   767  		b[i] = 0
   768  	}
   769  	sfr.pos += n64
   770  	return n
   771  }
   772  
   773  // Read reads the sparse file data in expanded form.
   774  func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
   775  	if len(sfr.sp) == 0 {
   776  		// No more data fragments to read from.
   777  		if sfr.pos < sfr.tot {
   778  			// We're in the last hole
   779  			n = sfr.readHole(b, sfr.tot)
   780  			return
   781  		}
   782  		// Otherwise, we're at the end of the file
   783  		return 0, io.EOF
   784  	}
   785  	if sfr.pos < sfr.sp[0].offset {
   786  		// We're in a hole
   787  		n = sfr.readHole(b, sfr.sp[0].offset)
   788  		return
   789  	}
   790  
   791  	// We're not in a hole, so we'll read from the next data fragment
   792  	posInFragment := sfr.pos - sfr.sp[0].offset
   793  	bytesLeft := sfr.sp[0].numBytes - posInFragment
   794  	if int64(len(b)) > bytesLeft {
   795  		b = b[0:bytesLeft]
   796  	}
   797  
   798  	n, err = sfr.rfr.Read(b)
   799  	sfr.pos += int64(n)
   800  
   801  	if int64(n) == bytesLeft {
   802  		// We're done with this fragment
   803  		sfr.sp = sfr.sp[1:]
   804  	}
   805  
   806  	if err == io.EOF && sfr.pos < sfr.tot {
   807  		// We reached the end of the last fragment's data, but there's a final hole
   808  		err = nil
   809  	}
   810  	return
   811  }
   812  
   813  // numBytes returns the number of bytes left to read in the sparse file's
   814  // sparse-encoded data in the tar archive.
   815  func (sfr *sparseFileReader) numBytes() int64 {
   816  	return sfr.rfr.nb
   817  }