github.com/sean-/go@v0.0.0-20151219100004-97f854cd7bb6/src/archive/tar/reader.go

github.com/sean-/go@v0.0.0-20151219100004-97f854cd7bb6/src/archive/tar/reader.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package tar
     6  
     7  // TODO(dsymonds):
     8  //   - pax extensions
     9  
    10  import (
    11  	"bytes"
    12  	"errors"
    13  	"io"
    14  	"io/ioutil"
    15  	"math"
    16  	"os"
    17  	"strconv"
    18  	"strings"
    19  	"time"
    20  )
    21  
    22  var (
    23  	ErrHeader = errors.New("archive/tar: invalid tar header")
    24  )
    25  
    26  const maxNanoSecondIntSize = 9
    27  
    28  // A Reader provides sequential access to the contents of a tar archive.
    29  // A tar archive consists of a sequence of files.
    30  // The Next method advances to the next file in the archive (including the first),
    31  // and then it can be treated as an io.Reader to access the file's data.
    32  type Reader struct {
    33  	r       io.Reader
    34  	err     error
    35  	pad     int64           // amount of padding (ignored) after current file entry
    36  	curr    numBytesReader  // reader for current file entry
    37  	hdrBuff [blockSize]byte // buffer to use in readHeader
    38  }
    39  
    40  type parser struct {
    41  	err error // Last error seen
    42  }
    43  
    44  // A numBytesReader is an io.Reader with a numBytes method, returning the number
    45  // of bytes remaining in the underlying encoded data.
    46  type numBytesReader interface {
    47  	io.Reader
    48  	numBytes() int64
    49  }
    50  
    51  // A regFileReader is a numBytesReader for reading file data from a tar archive.
    52  type regFileReader struct {
    53  	r  io.Reader // underlying reader
    54  	nb int64     // number of unread bytes for current file entry
    55  }
    56  
    57  // A sparseFileReader is a numBytesReader for reading sparse file data from a
    58  // tar archive.
    59  type sparseFileReader struct {
    60  	rfr   numBytesReader // Reads the sparse-encoded file data
    61  	sp    []sparseEntry  // The sparse map for the file
    62  	pos   int64          // Keeps track of file position
    63  	total int64          // Total size of the file
    64  }
    65  
    66  // A sparseEntry holds a single entry in a sparse file's sparse map.
    67  //
    68  // Sparse files are represented using a series of sparseEntrys.
    69  // Despite the name, a sparseEntry represents an actual data fragment that
    70  // references data found in the underlying archive stream. All regions not
    71  // covered by a sparseEntry are logically filled with zeros.
    72  //
    73  // For example, if the underlying raw file contains the 10-byte data:
    74  //	var compactData = "abcdefgh"
    75  //
    76  // And the sparse map has the following entries:
    77  //	var sp = []sparseEntry{
    78  //		{offset: 2,  numBytes: 5} // Data fragment for [2..7]
    79  //		{offset: 18, numBytes: 3} // Data fragment for [18..21]
    80  //	}
    81  //
    82  // Then the content of the resulting sparse file with a "real" size of 25 is:
    83  //	var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
    84  type sparseEntry struct {
    85  	offset   int64 // Starting position of the fragment
    86  	numBytes int64 // Length of the fragment
    87  }
    88  
    89  // Keywords for GNU sparse files in a PAX extended header
    90  const (
    91  	paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
    92  	paxGNUSparseOffset    = "GNU.sparse.offset"
    93  	paxGNUSparseNumBytes  = "GNU.sparse.numbytes"
    94  	paxGNUSparseMap       = "GNU.sparse.map"
    95  	paxGNUSparseName      = "GNU.sparse.name"
    96  	paxGNUSparseMajor     = "GNU.sparse.major"
    97  	paxGNUSparseMinor     = "GNU.sparse.minor"
    98  	paxGNUSparseSize      = "GNU.sparse.size"
    99  	paxGNUSparseRealSize  = "GNU.sparse.realsize"
   100  )
   101  
   102  // Keywords for old GNU sparse headers
   103  const (
   104  	oldGNUSparseMainHeaderOffset               = 386
   105  	oldGNUSparseMainHeaderIsExtendedOffset     = 482
   106  	oldGNUSparseMainHeaderNumEntries           = 4
   107  	oldGNUSparseExtendedHeaderIsExtendedOffset = 504
   108  	oldGNUSparseExtendedHeaderNumEntries       = 21
   109  	oldGNUSparseOffsetSize                     = 12
   110  	oldGNUSparseNumBytesSize                   = 12
   111  )
   112  
   113  // NewReader creates a new Reader reading from r.
   114  func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
   115  
   116  // Next advances to the next entry in the tar archive.
   117  //
   118  // io.EOF is returned at the end of the input.
   119  func (tr *Reader) Next() (*Header, error) {
   120  	if tr.err != nil {
   121  		return nil, tr.err
   122  	}
   123  
   124  	var hdr *Header
   125  	var extHdrs map[string]string
   126  
   127  	// Externally, Next iterates through the tar archive as if it is a series of
   128  	// files. Internally, the tar format often uses fake "files" to add meta
   129  	// data that describes the next file. These meta data "files" should not
   130  	// normally be visible to the outside. As such, this loop iterates through
   131  	// one or more "header files" until it finds a "normal file".
   132  loop:
   133  	for {
   134  		tr.err = tr.skipUnread()
   135  		if tr.err != nil {
   136  			return nil, tr.err
   137  		}
   138  
   139  		hdr = tr.readHeader()
   140  		if tr.err != nil {
   141  			return nil, tr.err
   142  		}
   143  
   144  		// Check for PAX/GNU special headers and files.
   145  		switch hdr.Typeflag {
   146  		case TypeXHeader:
   147  			extHdrs, tr.err = parsePAX(tr)
   148  			if tr.err != nil {
   149  				return nil, tr.err
   150  			}
   151  			continue loop // This is a meta header affecting the next header
   152  		case TypeGNULongName, TypeGNULongLink:
   153  			var realname []byte
   154  			realname, tr.err = ioutil.ReadAll(tr)
   155  			if tr.err != nil {
   156  				return nil, tr.err
   157  			}
   158  
   159  			// Convert GNU extensions to use PAX headers.
   160  			if extHdrs == nil {
   161  				extHdrs = make(map[string]string)
   162  			}
   163  			var p parser
   164  			switch hdr.Typeflag {
   165  			case TypeGNULongName:
   166  				extHdrs[paxPath] = p.parseString(realname)
   167  			case TypeGNULongLink:
   168  				extHdrs[paxLinkpath] = p.parseString(realname)
   169  			}
   170  			if p.err != nil {
   171  				tr.err = p.err
   172  				return nil, tr.err
   173  			}
   174  			continue loop // This is a meta header affecting the next header
   175  		default:
   176  			mergePAX(hdr, extHdrs)
   177  
   178  			// Check for a PAX format sparse file
   179  			sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
   180  			if err != nil {
   181  				tr.err = err
   182  				return nil, err
   183  			}
   184  			if sp != nil {
   185  				// Current file is a PAX format GNU sparse file.
   186  				// Set the current file reader to a sparse file reader.
   187  				tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
   188  				if tr.err != nil {
   189  					return nil, tr.err
   190  				}
   191  			}
   192  			break loop // This is a file, so stop
   193  		}
   194  	}
   195  	return hdr, nil
   196  }
   197  
   198  // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
   199  // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
   200  // be treated as a regular file.
   201  func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) {
   202  	var sparseFormat string
   203  
   204  	// Check for sparse format indicators
   205  	major, majorOk := headers[paxGNUSparseMajor]
   206  	minor, minorOk := headers[paxGNUSparseMinor]
   207  	sparseName, sparseNameOk := headers[paxGNUSparseName]
   208  	_, sparseMapOk := headers[paxGNUSparseMap]
   209  	sparseSize, sparseSizeOk := headers[paxGNUSparseSize]
   210  	sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize]
   211  
   212  	// Identify which, if any, sparse format applies from which PAX headers are set
   213  	if majorOk && minorOk {
   214  		sparseFormat = major + "." + minor
   215  	} else if sparseNameOk && sparseMapOk {
   216  		sparseFormat = "0.1"
   217  	} else if sparseSizeOk {
   218  		sparseFormat = "0.0"
   219  	} else {
   220  		// Not a PAX format GNU sparse file.
   221  		return nil, nil
   222  	}
   223  
   224  	// Check for unknown sparse format
   225  	if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" {
   226  		return nil, nil
   227  	}
   228  
   229  	// Update hdr from GNU sparse PAX headers
   230  	if sparseNameOk {
   231  		hdr.Name = sparseName
   232  	}
   233  	if sparseSizeOk {
   234  		realSize, err := strconv.ParseInt(sparseSize, 10, 0)
   235  		if err != nil {
   236  			return nil, ErrHeader
   237  		}
   238  		hdr.Size = realSize
   239  	} else if sparseRealSizeOk {
   240  		realSize, err := strconv.ParseInt(sparseRealSize, 10, 0)
   241  		if err != nil {
   242  			return nil, ErrHeader
   243  		}
   244  		hdr.Size = realSize
   245  	}
   246  
   247  	// Set up the sparse map, according to the particular sparse format in use
   248  	var sp []sparseEntry
   249  	var err error
   250  	switch sparseFormat {
   251  	case "0.0", "0.1":
   252  		sp, err = readGNUSparseMap0x1(headers)
   253  	case "1.0":
   254  		sp, err = readGNUSparseMap1x0(tr.curr)
   255  	}
   256  	return sp, err
   257  }
   258  
   259  // mergePAX merges well known headers according to PAX standard.
   260  // In general headers with the same name as those found
   261  // in the header struct overwrite those found in the header
   262  // struct with higher precision or longer values. Esp. useful
   263  // for name and linkname fields.
   264  func mergePAX(hdr *Header, headers map[string]string) error {
   265  	for k, v := range headers {
   266  		switch k {
   267  		case paxPath:
   268  			hdr.Name = v
   269  		case paxLinkpath:
   270  			hdr.Linkname = v
   271  		case paxGname:
   272  			hdr.Gname = v
   273  		case paxUname:
   274  			hdr.Uname = v
   275  		case paxUid:
   276  			uid, err := strconv.ParseInt(v, 10, 0)
   277  			if err != nil {
   278  				return err
   279  			}
   280  			hdr.Uid = int(uid)
   281  		case paxGid:
   282  			gid, err := strconv.ParseInt(v, 10, 0)
   283  			if err != nil {
   284  				return err
   285  			}
   286  			hdr.Gid = int(gid)
   287  		case paxAtime:
   288  			t, err := parsePAXTime(v)
   289  			if err != nil {
   290  				return err
   291  			}
   292  			hdr.AccessTime = t
   293  		case paxMtime:
   294  			t, err := parsePAXTime(v)
   295  			if err != nil {
   296  				return err
   297  			}
   298  			hdr.ModTime = t
   299  		case paxCtime:
   300  			t, err := parsePAXTime(v)
   301  			if err != nil {
   302  				return err
   303  			}
   304  			hdr.ChangeTime = t
   305  		case paxSize:
   306  			size, err := strconv.ParseInt(v, 10, 0)
   307  			if err != nil {
   308  				return err
   309  			}
   310  			hdr.Size = int64(size)
   311  		default:
   312  			if strings.HasPrefix(k, paxXattr) {
   313  				if hdr.Xattrs == nil {
   314  					hdr.Xattrs = make(map[string]string)
   315  				}
   316  				hdr.Xattrs[k[len(paxXattr):]] = v
   317  			}
   318  		}
   319  	}
   320  	return nil
   321  }
   322  
   323  // parsePAXTime takes a string of the form %d.%d as described in
   324  // the PAX specification.
   325  func parsePAXTime(t string) (time.Time, error) {
   326  	buf := []byte(t)
   327  	pos := bytes.IndexByte(buf, '.')
   328  	var seconds, nanoseconds int64
   329  	var err error
   330  	if pos == -1 {
   331  		seconds, err = strconv.ParseInt(t, 10, 0)
   332  		if err != nil {
   333  			return time.Time{}, err
   334  		}
   335  	} else {
   336  		seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
   337  		if err != nil {
   338  			return time.Time{}, err
   339  		}
   340  		nano_buf := string(buf[pos+1:])
   341  		// Pad as needed before converting to a decimal.
   342  		// For example .030 -> .030000000 -> 30000000 nanoseconds
   343  		if len(nano_buf) < maxNanoSecondIntSize {
   344  			// Right pad
   345  			nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
   346  		} else if len(nano_buf) > maxNanoSecondIntSize {
   347  			// Right truncate
   348  			nano_buf = nano_buf[:maxNanoSecondIntSize]
   349  		}
   350  		nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
   351  		if err != nil {
   352  			return time.Time{}, err
   353  		}
   354  	}
   355  	ts := time.Unix(seconds, nanoseconds)
   356  	return ts, nil
   357  }
   358  
   359  // parsePAX parses PAX headers.
   360  // If an extended header (type 'x') is invalid, ErrHeader is returned
   361  func parsePAX(r io.Reader) (map[string]string, error) {
   362  	buf, err := ioutil.ReadAll(r)
   363  	if err != nil {
   364  		return nil, err
   365  	}
   366  	sbuf := string(buf)
   367  
   368  	// For GNU PAX sparse format 0.0 support.
   369  	// This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
   370  	var sparseMap bytes.Buffer
   371  
   372  	headers := make(map[string]string)
   373  	// Each record is constructed as
   374  	//     "%d %s=%s\n", length, keyword, value
   375  	for len(sbuf) > 0 {
   376  		key, value, residual, err := parsePAXRecord(sbuf)
   377  		if err != nil {
   378  			return nil, ErrHeader
   379  		}
   380  		sbuf = residual
   381  
   382  		keyStr := string(key)
   383  		if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
   384  			// GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
   385  			sparseMap.WriteString(value)
   386  			sparseMap.Write([]byte{','})
   387  		} else {
   388  			// Normal key. Set the value in the headers map.
   389  			headers[keyStr] = string(value)
   390  		}
   391  	}
   392  	if sparseMap.Len() != 0 {
   393  		// Add sparse info to headers, chopping off the extra comma
   394  		sparseMap.Truncate(sparseMap.Len() - 1)
   395  		headers[paxGNUSparseMap] = sparseMap.String()
   396  	}
   397  	return headers, nil
   398  }
   399  
   400  // parsePAXRecord parses the input PAX record string into a key-value pair.
   401  // If parsing is successful, it will slice off the currently read record and
   402  // return the remainder as r.
   403  //
   404  // A PAX record is of the following form:
   405  //	"%d %s=%s\n" % (size, key, value)
   406  func parsePAXRecord(s string) (k, v, r string, err error) {
   407  	// The size field ends at the first space.
   408  	sp := strings.IndexByte(s, ' ')
   409  	if sp == -1 {
   410  		return "", "", s, ErrHeader
   411  	}
   412  
   413  	// Parse the first token as a decimal integer.
   414  	n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
   415  	if perr != nil || n < 5 || int64(len(s)) < n {
   416  		return "", "", s, ErrHeader
   417  	}
   418  
   419  	// Extract everything between the space and the final newline.
   420  	rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
   421  	if nl != "\n" {
   422  		return "", "", s, ErrHeader
   423  	}
   424  
   425  	// The first equals separates the key from the value.
   426  	eq := strings.IndexByte(rec, '=')
   427  	if eq == -1 {
   428  		return "", "", s, ErrHeader
   429  	}
   430  	return rec[:eq], rec[eq+1:], rem, nil
   431  }
   432  
   433  // parseString parses bytes as a NUL-terminated C-style string.
   434  // If a NUL byte is not found then the whole slice is returned as a string.
   435  func (*parser) parseString(b []byte) string {
   436  	n := 0
   437  	for n < len(b) && b[n] != 0 {
   438  		n++
   439  	}
   440  	return string(b[0:n])
   441  }
   442  
   443  // parseNumeric parses the input as being encoded in either base-256 or octal.
   444  // This function may return negative numbers.
   445  // If parsing fails or an integer overflow occurs, err will be set.
   446  func (p *parser) parseNumeric(b []byte) int64 {
   447  	// Check for base-256 (binary) format first.
   448  	// If the first bit is set, then all following bits constitute a two's
   449  	// complement encoded number in big-endian byte order.
   450  	if len(b) > 0 && b[0]&0x80 != 0 {
   451  		// Handling negative numbers relies on the following identity:
   452  		//	-a-1 == ^a
   453  		//
   454  		// If the number is negative, we use an inversion mask to invert the
   455  		// data bytes and treat the value as an unsigned number.
   456  		var inv byte // 0x00 if positive or zero, 0xff if negative
   457  		if b[0]&0x40 != 0 {
   458  			inv = 0xff
   459  		}
   460  
   461  		var x uint64
   462  		for i, c := range b {
   463  			c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
   464  			if i == 0 {
   465  				c &= 0x7f // Ignore signal bit in first byte
   466  			}
   467  			if (x >> 56) > 0 {
   468  				p.err = ErrHeader // Integer overflow
   469  				return 0
   470  			}
   471  			x = x<<8 | uint64(c)
   472  		}
   473  		if (x >> 63) > 0 {
   474  			p.err = ErrHeader // Integer overflow
   475  			return 0
   476  		}
   477  		if inv == 0xff {
   478  			return ^int64(x)
   479  		}
   480  		return int64(x)
   481  	}
   482  
   483  	// Normal case is base-8 (octal) format.
   484  	return p.parseOctal(b)
   485  }
   486  
   487  func (p *parser) parseOctal(b []byte) int64 {
   488  	// Because unused fields are filled with NULs, we need
   489  	// to skip leading NULs. Fields may also be padded with
   490  	// spaces or NULs.
   491  	// So we remove leading and trailing NULs and spaces to
   492  	// be sure.
   493  	b = bytes.Trim(b, " \x00")
   494  
   495  	if len(b) == 0 {
   496  		return 0
   497  	}
   498  	x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
   499  	if perr != nil {
   500  		p.err = ErrHeader
   501  	}
   502  	return int64(x)
   503  }
   504  
   505  // skipUnread skips any unread bytes in the existing file entry, as well as any
   506  // alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
   507  // encountered in the data portion; it is okay to hit io.EOF in the padding.
   508  //
   509  // Note that this function still works properly even when sparse files are being
   510  // used since numBytes returns the bytes remaining in the underlying io.Reader.
   511  func (tr *Reader) skipUnread() error {
   512  	dataSkip := tr.numBytes()      // Number of data bytes to skip
   513  	totalSkip := dataSkip + tr.pad // Total number of bytes to skip
   514  	tr.curr, tr.pad = nil, 0
   515  
   516  	// If possible, Seek to the last byte before the end of the data section.
   517  	// Do this because Seek is often lazy about reporting errors; this will mask
   518  	// the fact that the tar stream may be truncated. We can rely on the
   519  	// io.CopyN done shortly afterwards to trigger any IO errors.
   520  	var seekSkipped int64 // Number of bytes skipped via Seek
   521  	if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
   522  		// Not all io.Seeker can actually Seek. For example, os.Stdin implements
   523  		// io.Seeker, but calling Seek always returns an error and performs
   524  		// no action. Thus, we try an innocent seek to the current position
   525  		// to see if Seek is really supported.
   526  		pos1, err := sr.Seek(0, os.SEEK_CUR)
   527  		if err == nil {
   528  			// Seek seems supported, so perform the real Seek.
   529  			pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
   530  			if err != nil {
   531  				tr.err = err
   532  				return tr.err
   533  			}
   534  			seekSkipped = pos2 - pos1
   535  		}
   536  	}
   537  
   538  	var copySkipped int64 // Number of bytes skipped via CopyN
   539  	copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
   540  	if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
   541  		tr.err = io.ErrUnexpectedEOF
   542  	}
   543  	return tr.err
   544  }
   545  
   546  func (tr *Reader) verifyChecksum(header []byte) bool {
   547  	if tr.err != nil {
   548  		return false
   549  	}
   550  
   551  	var p parser
   552  	given := p.parseOctal(header[148:156])
   553  	unsigned, signed := checksum(header)
   554  	return p.err == nil && (given == unsigned || given == signed)
   555  }
   556  
   557  // readHeader reads the next block header and assumes that the underlying reader
   558  // is already aligned to a block boundary.
   559  //
   560  // The err will be set to io.EOF only when one of the following occurs:
   561  //	* Exactly 0 bytes are read and EOF is hit.
   562  //	* Exactly 1 block of zeros is read and EOF is hit.
   563  //	* At least 2 blocks of zeros are read.
   564  func (tr *Reader) readHeader() *Header {
   565  	header := tr.hdrBuff[:]
   566  	copy(header, zeroBlock)
   567  
   568  	if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
   569  		return nil // io.EOF is okay here
   570  	}
   571  
   572  	// Two blocks of zero bytes marks the end of the archive.
   573  	if bytes.Equal(header, zeroBlock[0:blockSize]) {
   574  		if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
   575  			return nil // io.EOF is okay here
   576  		}
   577  		if bytes.Equal(header, zeroBlock[0:blockSize]) {
   578  			tr.err = io.EOF
   579  		} else {
   580  			tr.err = ErrHeader // zero block and then non-zero block
   581  		}
   582  		return nil
   583  	}
   584  
   585  	if !tr.verifyChecksum(header) {
   586  		tr.err = ErrHeader
   587  		return nil
   588  	}
   589  
   590  	// Unpack
   591  	var p parser
   592  	hdr := new(Header)
   593  	s := slicer(header)
   594  
   595  	hdr.Name = p.parseString(s.next(100))
   596  	hdr.Mode = p.parseNumeric(s.next(8))
   597  	hdr.Uid = int(p.parseNumeric(s.next(8)))
   598  	hdr.Gid = int(p.parseNumeric(s.next(8)))
   599  	hdr.Size = p.parseNumeric(s.next(12))
   600  	hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0)
   601  	s.next(8) // chksum
   602  	hdr.Typeflag = s.next(1)[0]
   603  	hdr.Linkname = p.parseString(s.next(100))
   604  
   605  	// The remainder of the header depends on the value of magic.
   606  	// The original (v7) version of tar had no explicit magic field,
   607  	// so its magic bytes, like the rest of the block, are NULs.
   608  	magic := string(s.next(8)) // contains version field as well.
   609  	var format string
   610  	switch {
   611  	case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988)
   612  		if string(header[508:512]) == "tar\x00" {
   613  			format = "star"
   614  		} else {
   615  			format = "posix"
   616  		}
   617  	case magic == "ustar  \x00": // old GNU tar
   618  		format = "gnu"
   619  	}
   620  
   621  	switch format {
   622  	case "posix", "gnu", "star":
   623  		hdr.Uname = p.parseString(s.next(32))
   624  		hdr.Gname = p.parseString(s.next(32))
   625  		devmajor := s.next(8)
   626  		devminor := s.next(8)
   627  		if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
   628  			hdr.Devmajor = p.parseNumeric(devmajor)
   629  			hdr.Devminor = p.parseNumeric(devminor)
   630  		}
   631  		var prefix string
   632  		switch format {
   633  		case "posix", "gnu":
   634  			prefix = p.parseString(s.next(155))
   635  		case "star":
   636  			prefix = p.parseString(s.next(131))
   637  			hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0)
   638  			hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0)
   639  		}
   640  		if len(prefix) > 0 {
   641  			hdr.Name = prefix + "/" + hdr.Name
   642  		}
   643  	}
   644  
   645  	if p.err != nil {
   646  		tr.err = p.err
   647  		return nil
   648  	}
   649  
   650  	nb := hdr.Size
   651  	if isHeaderOnlyType(hdr.Typeflag) {
   652  		nb = 0
   653  	}
   654  	if nb < 0 {
   655  		tr.err = ErrHeader
   656  		return nil
   657  	}
   658  
   659  	// Set the current file reader.
   660  	tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
   661  	tr.curr = &regFileReader{r: tr.r, nb: nb}
   662  
   663  	// Check for old GNU sparse format entry.
   664  	if hdr.Typeflag == TypeGNUSparse {
   665  		// Get the real size of the file.
   666  		hdr.Size = p.parseNumeric(header[483:495])
   667  		if p.err != nil {
   668  			tr.err = p.err
   669  			return nil
   670  		}
   671  
   672  		// Read the sparse map.
   673  		sp := tr.readOldGNUSparseMap(header)
   674  		if tr.err != nil {
   675  			return nil
   676  		}
   677  
   678  		// Current file is a GNU sparse file. Update the current file reader.
   679  		tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
   680  		if tr.err != nil {
   681  			return nil
   682  		}
   683  	}
   684  
   685  	return hdr
   686  }
   687  
   688  // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
   689  // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
   690  // then one or more extension headers are used to store the rest of the sparse map.
   691  func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
   692  	var p parser
   693  	isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
   694  	spCap := oldGNUSparseMainHeaderNumEntries
   695  	if isExtended {
   696  		spCap += oldGNUSparseExtendedHeaderNumEntries
   697  	}
   698  	sp := make([]sparseEntry, 0, spCap)
   699  	s := slicer(header[oldGNUSparseMainHeaderOffset:])
   700  
   701  	// Read the four entries from the main tar header
   702  	for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
   703  		offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
   704  		numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
   705  		if p.err != nil {
   706  			tr.err = p.err
   707  			return nil
   708  		}
   709  		if offset == 0 && numBytes == 0 {
   710  			break
   711  		}
   712  		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
   713  	}
   714  
   715  	for isExtended {
   716  		// There are more entries. Read an extension header and parse its entries.
   717  		sparseHeader := make([]byte, blockSize)
   718  		if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil {
   719  			return nil
   720  		}
   721  		isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
   722  		s = slicer(sparseHeader)
   723  		for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
   724  			offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
   725  			numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
   726  			if p.err != nil {
   727  				tr.err = p.err
   728  				return nil
   729  			}
   730  			if offset == 0 && numBytes == 0 {
   731  				break
   732  			}
   733  			sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
   734  		}
   735  	}
   736  	return sp
   737  }
   738  
   739  // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
   740  // version 1.0. The format of the sparse map consists of a series of
   741  // newline-terminated numeric fields. The first field is the number of entries
   742  // and is always present. Following this are the entries, consisting of two
   743  // fields (offset, numBytes). This function must stop reading at the end
   744  // boundary of the block containing the last newline.
   745  //
   746  // Note that the GNU manual says that numeric values should be encoded in octal
   747  // format. However, the GNU tar utility itself outputs these values in decimal.
   748  // As such, this library treats values as being encoded in decimal.
   749  func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
   750  	var cntNewline int64
   751  	var buf bytes.Buffer
   752  	var blk = make([]byte, blockSize)
   753  
   754  	// feedTokens copies data in numBlock chunks from r into buf until there are
   755  	// at least cnt newlines in buf. It will not read more blocks than needed.
   756  	var feedTokens = func(cnt int64) error {
   757  		for cntNewline < cnt {
   758  			if _, err := io.ReadFull(r, blk); err != nil {
   759  				if err == io.EOF {
   760  					err = io.ErrUnexpectedEOF
   761  				}
   762  				return err
   763  			}
   764  			buf.Write(blk)
   765  			for _, c := range blk {
   766  				if c == '\n' {
   767  					cntNewline++
   768  				}
   769  			}
   770  		}
   771  		return nil
   772  	}
   773  
   774  	// nextToken gets the next token delimited by a newline. This assumes that
   775  	// at least one newline exists in the buffer.
   776  	var nextToken = func() string {
   777  		cntNewline--
   778  		tok, _ := buf.ReadString('\n')
   779  		return tok[:len(tok)-1] // Cut off newline
   780  	}
   781  
   782  	// Parse for the number of entries.
   783  	// Use integer overflow resistant math to check this.
   784  	if err := feedTokens(1); err != nil {
   785  		return nil, err
   786  	}
   787  	numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
   788  	if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
   789  		return nil, ErrHeader
   790  	}
   791  
   792  	// Parse for all member entries.
   793  	// numEntries is trusted after this since a potential attacker must have
   794  	// committed resources proportional to what this library used.
   795  	if err := feedTokens(2 * numEntries); err != nil {
   796  		return nil, err
   797  	}
   798  	sp := make([]sparseEntry, 0, numEntries)
   799  	for i := int64(0); i < numEntries; i++ {
   800  		offset, err := strconv.ParseInt(nextToken(), 10, 64)
   801  		if err != nil {
   802  			return nil, ErrHeader
   803  		}
   804  		numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
   805  		if err != nil {
   806  			return nil, ErrHeader
   807  		}
   808  		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
   809  	}
   810  	return sp, nil
   811  }
   812  
   813  // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
   814  // version 0.1. The sparse map is stored in the PAX headers.
   815  func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
   816  	// Get number of entries.
   817  	// Use integer overflow resistant math to check this.
   818  	numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
   819  	numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
   820  	if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
   821  		return nil, ErrHeader
   822  	}
   823  
   824  	// There should be two numbers in sparseMap for each entry.
   825  	sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
   826  	if int64(len(sparseMap)) != 2*numEntries {
   827  		return nil, ErrHeader
   828  	}
   829  
   830  	// Loop through the entries in the sparse map.
   831  	// numEntries is trusted now.
   832  	sp := make([]sparseEntry, 0, numEntries)
   833  	for i := int64(0); i < numEntries; i++ {
   834  		offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
   835  		if err != nil {
   836  			return nil, ErrHeader
   837  		}
   838  		numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
   839  		if err != nil {
   840  			return nil, ErrHeader
   841  		}
   842  		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
   843  	}
   844  	return sp, nil
   845  }
   846  
   847  // numBytes returns the number of bytes left to read in the current file's entry
   848  // in the tar archive, or 0 if there is no current file.
   849  func (tr *Reader) numBytes() int64 {
   850  	if tr.curr == nil {
   851  		// No current file, so no bytes
   852  		return 0
   853  	}
   854  	return tr.curr.numBytes()
   855  }
   856  
   857  // Read reads from the current entry in the tar archive.
   858  // It returns 0, io.EOF when it reaches the end of that entry,
   859  // until Next is called to advance to the next entry.
   860  //
   861  // Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
   862  // TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
   863  // the Header.Size claims.
   864  func (tr *Reader) Read(b []byte) (n int, err error) {
   865  	if tr.err != nil {
   866  		return 0, tr.err
   867  	}
   868  	if tr.curr == nil {
   869  		return 0, io.EOF
   870  	}
   871  
   872  	n, err = tr.curr.Read(b)
   873  	if err != nil && err != io.EOF {
   874  		tr.err = err
   875  	}
   876  	return
   877  }
   878  
   879  func (rfr *regFileReader) Read(b []byte) (n int, err error) {
   880  	if rfr.nb == 0 {
   881  		// file consumed
   882  		return 0, io.EOF
   883  	}
   884  	if int64(len(b)) > rfr.nb {
   885  		b = b[0:rfr.nb]
   886  	}
   887  	n, err = rfr.r.Read(b)
   888  	rfr.nb -= int64(n)
   889  
   890  	if err == io.EOF && rfr.nb > 0 {
   891  		err = io.ErrUnexpectedEOF
   892  	}
   893  	return
   894  }
   895  
   896  // numBytes returns the number of bytes left to read in the file's data in the tar archive.
   897  func (rfr *regFileReader) numBytes() int64 {
   898  	return rfr.nb
   899  }
   900  
   901  // newSparseFileReader creates a new sparseFileReader, but validates all of the
   902  // sparse entries before doing so.
   903  func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
   904  	if total < 0 {
   905  		return nil, ErrHeader // Total size cannot be negative
   906  	}
   907  
   908  	// Validate all sparse entries. These are the same checks as performed by
   909  	// the BSD tar utility.
   910  	for i, s := range sp {
   911  		switch {
   912  		case s.offset < 0 || s.numBytes < 0:
   913  			return nil, ErrHeader // Negative values are never okay
   914  		case s.offset > math.MaxInt64-s.numBytes:
   915  			return nil, ErrHeader // Integer overflow with large length
   916  		case s.offset+s.numBytes > total:
   917  			return nil, ErrHeader // Region extends beyond the "real" size
   918  		case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
   919  			return nil, ErrHeader // Regions can't overlap and must be in order
   920  		}
   921  	}
   922  	return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
   923  }
   924  
   925  // readHole reads a sparse hole ending at endOffset.
   926  func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
   927  	n64 := endOffset - sfr.pos
   928  	if n64 > int64(len(b)) {
   929  		n64 = int64(len(b))
   930  	}
   931  	n := int(n64)
   932  	for i := 0; i < n; i++ {
   933  		b[i] = 0
   934  	}
   935  	sfr.pos += n64
   936  	return n
   937  }
   938  
   939  // Read reads the sparse file data in expanded form.
   940  func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
   941  	// Skip past all empty fragments.
   942  	for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
   943  		sfr.sp = sfr.sp[1:]
   944  	}
   945  
   946  	// If there are no more fragments, then it is possible that there
   947  	// is one last sparse hole.
   948  	if len(sfr.sp) == 0 {
   949  		// This behavior matches the BSD tar utility.
   950  		// However, GNU tar stops returning data even if sfr.total is unmet.
   951  		if sfr.pos < sfr.total {
   952  			return sfr.readHole(b, sfr.total), nil
   953  		}
   954  		return 0, io.EOF
   955  	}
   956  
   957  	// In front of a data fragment, so read a hole.
   958  	if sfr.pos < sfr.sp[0].offset {
   959  		return sfr.readHole(b, sfr.sp[0].offset), nil
   960  	}
   961  
   962  	// In a data fragment, so read from it.
   963  	// This math is overflow free since we verify that offset and numBytes can
   964  	// be safely added when creating the sparseFileReader.
   965  	endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
   966  	bytesLeft := endPos - sfr.pos                   // Bytes left in fragment
   967  	if int64(len(b)) > bytesLeft {
   968  		b = b[:bytesLeft]
   969  	}
   970  
   971  	n, err = sfr.rfr.Read(b)
   972  	sfr.pos += int64(n)
   973  	if err == io.EOF {
   974  		if sfr.pos < endPos {
   975  			err = io.ErrUnexpectedEOF // There was supposed to be more data
   976  		} else if sfr.pos < sfr.total {
   977  			err = nil // There is still an implicit sparse hole at the end
   978  		}
   979  	}
   980  
   981  	if sfr.pos == endPos {
   982  		sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
   983  	}
   984  	return n, err
   985  }
   986  
   987  // numBytes returns the number of bytes left to read in the sparse file's
   988  // sparse-encoded data in the tar archive.
   989  func (sfr *sparseFileReader) numBytes() int64 {
   990  	return sfr.rfr.numBytes()
   991  }