github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/archive/tar/reader.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package tar
     6  
     7  // TODO(dsymonds):
     8  //   - pax extensions
     9  
    10  import (
    11  	"bytes"
    12  	"errors"
    13  	"io"
    14  	"io/ioutil"
    15  	"os"
    16  	"strconv"
    17  	"strings"
    18  	"time"
    19  )
    20  
    21  var (
    22  	ErrHeader = errors.New("archive/tar: invalid tar header")
    23  )
    24  
    25  const maxNanoSecondIntSize = 9
    26  
    27  // A Reader provides sequential access to the contents of a tar archive.
    28  // A tar archive consists of a sequence of files.
    29  // The Next method advances to the next file in the archive (including the first),
    30  // and then it can be treated as an io.Reader to access the file's data.
    31  type Reader struct {
    32  	r   io.Reader
    33  	err error
    34  	nb  int64 // number of unread bytes for current file entry
    35  	pad int64 // amount of padding (ignored) after current file entry
    36  }
    37  
    38  // NewReader creates a new Reader reading from r.
    39  func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
    40  
    41  // Next advances to the next entry in the tar archive.
    42  func (tr *Reader) Next() (*Header, error) {
    43  	var hdr *Header
    44  	if tr.err == nil {
    45  		tr.skipUnread()
    46  	}
    47  	if tr.err != nil {
    48  		return hdr, tr.err
    49  	}
    50  	hdr = tr.readHeader()
    51  	if hdr == nil {
    52  		return hdr, tr.err
    53  	}
    54  	// Check for PAX/GNU header.
    55  	switch hdr.Typeflag {
    56  	case TypeXHeader:
    57  		//  PAX extended header
    58  		headers, err := parsePAX(tr)
    59  		if err != nil {
    60  			return nil, err
    61  		}
    62  		// We actually read the whole file,
    63  		// but this skips alignment padding
    64  		tr.skipUnread()
    65  		hdr = tr.readHeader()
    66  		mergePAX(hdr, headers)
    67  		return hdr, nil
    68  	case TypeGNULongName:
    69  		// We have a GNU long name header. Its contents are the real file name.
    70  		realname, err := ioutil.ReadAll(tr)
    71  		if err != nil {
    72  			return nil, err
    73  		}
    74  		hdr, err := tr.Next()
    75  		hdr.Name = cString(realname)
    76  		return hdr, err
    77  	case TypeGNULongLink:
    78  		// We have a GNU long link header.
    79  		realname, err := ioutil.ReadAll(tr)
    80  		if err != nil {
    81  			return nil, err
    82  		}
    83  		hdr, err := tr.Next()
    84  		hdr.Linkname = cString(realname)
    85  		return hdr, err
    86  	}
    87  	return hdr, tr.err
    88  }
    89  
    90  // mergePAX merges well known headers according to PAX standard.
    91  // In general headers with the same name as those found
    92  // in the header struct overwrite those found in the header
    93  // struct with higher precision or longer values. Esp. useful
    94  // for name and linkname fields.
    95  func mergePAX(hdr *Header, headers map[string]string) error {
    96  	for k, v := range headers {
    97  		switch k {
    98  		case paxPath:
    99  			hdr.Name = v
   100  		case paxLinkpath:
   101  			hdr.Linkname = v
   102  		case paxGname:
   103  			hdr.Gname = v
   104  		case paxUname:
   105  			hdr.Uname = v
   106  		case paxUid:
   107  			uid, err := strconv.ParseInt(v, 10, 0)
   108  			if err != nil {
   109  				return err
   110  			}
   111  			hdr.Uid = int(uid)
   112  		case paxGid:
   113  			gid, err := strconv.ParseInt(v, 10, 0)
   114  			if err != nil {
   115  				return err
   116  			}
   117  			hdr.Gid = int(gid)
   118  		case paxAtime:
   119  			t, err := parsePAXTime(v)
   120  			if err != nil {
   121  				return err
   122  			}
   123  			hdr.AccessTime = t
   124  		case paxMtime:
   125  			t, err := parsePAXTime(v)
   126  			if err != nil {
   127  				return err
   128  			}
   129  			hdr.ModTime = t
   130  		case paxCtime:
   131  			t, err := parsePAXTime(v)
   132  			if err != nil {
   133  				return err
   134  			}
   135  			hdr.ChangeTime = t
   136  		case paxSize:
   137  			size, err := strconv.ParseInt(v, 10, 0)
   138  			if err != nil {
   139  				return err
   140  			}
   141  			hdr.Size = int64(size)
   142  		}
   143  
   144  	}
   145  	return nil
   146  }
   147  
   148  // parsePAXTime takes a string of the form %d.%d as described in
   149  // the PAX specification.
   150  func parsePAXTime(t string) (time.Time, error) {
   151  	buf := []byte(t)
   152  	pos := bytes.IndexByte(buf, '.')
   153  	var seconds, nanoseconds int64
   154  	var err error
   155  	if pos == -1 {
   156  		seconds, err = strconv.ParseInt(t, 10, 0)
   157  		if err != nil {
   158  			return time.Time{}, err
   159  		}
   160  	} else {
   161  		seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
   162  		if err != nil {
   163  			return time.Time{}, err
   164  		}
   165  		nano_buf := string(buf[pos+1:])
   166  		// Pad as needed before converting to a decimal.
   167  		// For example .030 -> .030000000 -> 30000000 nanoseconds
   168  		if len(nano_buf) < maxNanoSecondIntSize {
   169  			// Right pad
   170  			nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
   171  		} else if len(nano_buf) > maxNanoSecondIntSize {
   172  			// Right truncate
   173  			nano_buf = nano_buf[:maxNanoSecondIntSize]
   174  		}
   175  		nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
   176  		if err != nil {
   177  			return time.Time{}, err
   178  		}
   179  	}
   180  	ts := time.Unix(seconds, nanoseconds)
   181  	return ts, nil
   182  }
   183  
   184  // parsePAX parses PAX headers.
   185  // If an extended header (type 'x') is invalid, ErrHeader is returned
   186  func parsePAX(r io.Reader) (map[string]string, error) {
   187  	buf, err := ioutil.ReadAll(r)
   188  	if err != nil {
   189  		return nil, err
   190  	}
   191  	headers := make(map[string]string)
   192  	// Each record is constructed as
   193  	//     "%d %s=%s\n", length, keyword, value
   194  	for len(buf) > 0 {
   195  		// or the header was empty to start with.
   196  		var sp int
   197  		// The size field ends at the first space.
   198  		sp = bytes.IndexByte(buf, ' ')
   199  		if sp == -1 {
   200  			return nil, ErrHeader
   201  		}
   202  		// Parse the first token as a decimal integer.
   203  		n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
   204  		if err != nil {
   205  			return nil, ErrHeader
   206  		}
   207  		// Extract everything between the decimal and the n -1 on the
   208  		// beginning to to eat the ' ', -1 on the end to skip the newline.
   209  		var record []byte
   210  		record, buf = buf[sp+1:n-1], buf[n:]
   211  		// The first equals is guaranteed to mark the end of the key.
   212  		// Everything else is value.
   213  		eq := bytes.IndexByte(record, '=')
   214  		if eq == -1 {
   215  			return nil, ErrHeader
   216  		}
   217  		key, value := record[:eq], record[eq+1:]
   218  		headers[string(key)] = string(value)
   219  	}
   220  	return headers, nil
   221  }
   222  
   223  // cString parses bytes as a NUL-terminated C-style string.
   224  // If a NUL byte is not found then the whole slice is returned as a string.
   225  func cString(b []byte) string {
   226  	n := 0
   227  	for n < len(b) && b[n] != 0 {
   228  		n++
   229  	}
   230  	return string(b[0:n])
   231  }
   232  
   233  func (tr *Reader) octal(b []byte) int64 {
   234  	// Check for binary format first.
   235  	if len(b) > 0 && b[0]&0x80 != 0 {
   236  		var x int64
   237  		for i, c := range b {
   238  			if i == 0 {
   239  				c &= 0x7f // ignore signal bit in first byte
   240  			}
   241  			x = x<<8 | int64(c)
   242  		}
   243  		return x
   244  	}
   245  
   246  	// Because unused fields are filled with NULs, we need
   247  	// to skip leading NULs. Fields may also be padded with
   248  	// spaces or NULs.
   249  	// So we remove leading and trailing NULs and spaces to
   250  	// be sure.
   251  	b = bytes.Trim(b, " \x00")
   252  
   253  	if len(b) == 0 {
   254  		return 0
   255  	}
   256  	x, err := strconv.ParseUint(cString(b), 8, 64)
   257  	if err != nil {
   258  		tr.err = err
   259  	}
   260  	return int64(x)
   261  }
   262  
   263  // skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
   264  func (tr *Reader) skipUnread() {
   265  	nr := tr.nb + tr.pad // number of bytes to skip
   266  	tr.nb, tr.pad = 0, 0
   267  	if sr, ok := tr.r.(io.Seeker); ok {
   268  		if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
   269  			return
   270  		}
   271  	}
   272  	_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
   273  }
   274  
   275  func (tr *Reader) verifyChecksum(header []byte) bool {
   276  	if tr.err != nil {
   277  		return false
   278  	}
   279  
   280  	given := tr.octal(header[148:156])
   281  	unsigned, signed := checksum(header)
   282  	return given == unsigned || given == signed
   283  }
   284  
   285  func (tr *Reader) readHeader() *Header {
   286  	header := make([]byte, blockSize)
   287  	if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
   288  		return nil
   289  	}
   290  
   291  	// Two blocks of zero bytes marks the end of the archive.
   292  	if bytes.Equal(header, zeroBlock[0:blockSize]) {
   293  		if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
   294  			return nil
   295  		}
   296  		if bytes.Equal(header, zeroBlock[0:blockSize]) {
   297  			tr.err = io.EOF
   298  		} else {
   299  			tr.err = ErrHeader // zero block and then non-zero block
   300  		}
   301  		return nil
   302  	}
   303  
   304  	if !tr.verifyChecksum(header) {
   305  		tr.err = ErrHeader
   306  		return nil
   307  	}
   308  
   309  	// Unpack
   310  	hdr := new(Header)
   311  	s := slicer(header)
   312  
   313  	hdr.Name = cString(s.next(100))
   314  	hdr.Mode = tr.octal(s.next(8))
   315  	hdr.Uid = int(tr.octal(s.next(8)))
   316  	hdr.Gid = int(tr.octal(s.next(8)))
   317  	hdr.Size = tr.octal(s.next(12))
   318  	hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
   319  	s.next(8) // chksum
   320  	hdr.Typeflag = s.next(1)[0]
   321  	hdr.Linkname = cString(s.next(100))
   322  
   323  	// The remainder of the header depends on the value of magic.
   324  	// The original (v7) version of tar had no explicit magic field,
   325  	// so its magic bytes, like the rest of the block, are NULs.
   326  	magic := string(s.next(8)) // contains version field as well.
   327  	var format string
   328  	switch magic {
   329  	case "ustar\x0000": // POSIX tar (1003.1-1988)
   330  		if string(header[508:512]) == "tar\x00" {
   331  			format = "star"
   332  		} else {
   333  			format = "posix"
   334  		}
   335  	case "ustar  \x00": // old GNU tar
   336  		format = "gnu"
   337  	}
   338  
   339  	switch format {
   340  	case "posix", "gnu", "star":
   341  		hdr.Uname = cString(s.next(32))
   342  		hdr.Gname = cString(s.next(32))
   343  		devmajor := s.next(8)
   344  		devminor := s.next(8)
   345  		if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
   346  			hdr.Devmajor = tr.octal(devmajor)
   347  			hdr.Devminor = tr.octal(devminor)
   348  		}
   349  		var prefix string
   350  		switch format {
   351  		case "posix", "gnu":
   352  			prefix = cString(s.next(155))
   353  		case "star":
   354  			prefix = cString(s.next(131))
   355  			hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0)
   356  			hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0)
   357  		}
   358  		if len(prefix) > 0 {
   359  			hdr.Name = prefix + "/" + hdr.Name
   360  		}
   361  	}
   362  
   363  	if tr.err != nil {
   364  		tr.err = ErrHeader
   365  		return nil
   366  	}
   367  
   368  	// Maximum value of hdr.Size is 64 GB (12 octal digits),
   369  	// so there's no risk of int64 overflowing.
   370  	tr.nb = int64(hdr.Size)
   371  	tr.pad = -tr.nb & (blockSize - 1) // blockSize is a power of two
   372  
   373  	return hdr
   374  }
   375  
   376  // Read reads from the current entry in the tar archive.
   377  // It returns 0, io.EOF when it reaches the end of that entry,
   378  // until Next is called to advance to the next entry.
   379  func (tr *Reader) Read(b []byte) (n int, err error) {
   380  	if tr.nb == 0 {
   381  		// file consumed
   382  		return 0, io.EOF
   383  	}
   384  
   385  	if int64(len(b)) > tr.nb {
   386  		b = b[0:tr.nb]
   387  	}
   388  	n, err = tr.r.Read(b)
   389  	tr.nb -= int64(n)
   390  
   391  	if err == io.EOF && tr.nb > 0 {
   392  		err = io.ErrUnexpectedEOF
   393  	}
   394  	tr.err = err
   395  	return
   396  }