github.com/mdempsky/go@v0.0.0-20151201204031-5dd372bd1e70/src/archive/tar/writer.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package tar
     6  
     7  // TODO(dsymonds):
     8  // - catch more errors (no first header, etc.)
     9  
    10  import (
    11  	"bytes"
    12  	"errors"
    13  	"fmt"
    14  	"io"
    15  	"path"
    16  	"sort"
    17  	"strconv"
    18  	"strings"
    19  	"time"
    20  )
    21  
    22  var (
    23  	ErrWriteTooLong    = errors.New("archive/tar: write too long")
    24  	ErrFieldTooLong    = errors.New("archive/tar: header field too long")
    25  	ErrWriteAfterClose = errors.New("archive/tar: write after close")
    26  	errInvalidHeader   = errors.New("archive/tar: header field too long or contains invalid values")
    27  )
    28  
    29  // A Writer provides sequential writing of a tar archive in POSIX.1 format.
    30  // A tar archive consists of a sequence of files.
    31  // Call WriteHeader to begin a new file, and then call Write to supply that file's data,
    32  // writing at most hdr.Size bytes in total.
    33  type Writer struct {
    34  	w          io.Writer
    35  	err        error
    36  	nb         int64 // number of unwritten bytes for current file entry
    37  	pad        int64 // amount of padding to write after current file entry
    38  	closed     bool
    39  	usedBinary bool            // whether the binary numeric field extension was used
    40  	preferPax  bool            // use pax header instead of binary numeric header
    41  	hdrBuff    [blockSize]byte // buffer to use in writeHeader when writing a regular header
    42  	paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header
    43  }
    44  
    45  // NewWriter creates a new Writer writing to w.
    46  func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
    47  
    48  // Flush finishes writing the current file (optional).
    49  func (tw *Writer) Flush() error {
    50  	if tw.nb > 0 {
    51  		tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb)
    52  		return tw.err
    53  	}
    54  
    55  	n := tw.nb + tw.pad
    56  	for n > 0 && tw.err == nil {
    57  		nr := n
    58  		if nr > blockSize {
    59  			nr = blockSize
    60  		}
    61  		var nw int
    62  		nw, tw.err = tw.w.Write(zeroBlock[0:nr])
    63  		n -= int64(nw)
    64  	}
    65  	tw.nb = 0
    66  	tw.pad = 0
    67  	return tw.err
    68  }
    69  
    70  // Write s into b, terminating it with a NUL if there is room.
    71  // If the value is too long for the field and allowPax is true add a paxheader record instead
    72  func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
    73  	needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s)
    74  	if needsPaxHeader {
    75  		paxHeaders[paxKeyword] = s
    76  		return
    77  	}
    78  	if len(s) > len(b) {
    79  		if tw.err == nil {
    80  			tw.err = ErrFieldTooLong
    81  		}
    82  		return
    83  	}
    84  	ascii := toASCII(s)
    85  	copy(b, ascii)
    86  	if len(ascii) < len(b) {
    87  		b[len(ascii)] = 0
    88  	}
    89  }
    90  
    91  // Encode x as an octal ASCII string and write it into b with leading zeros.
    92  func (tw *Writer) octal(b []byte, x int64) {
    93  	s := strconv.FormatInt(x, 8)
    94  	// leading zeros, but leave room for a NUL.
    95  	for len(s)+1 < len(b) {
    96  		s = "0" + s
    97  	}
    98  	tw.cString(b, s, false, paxNone, nil)
    99  }
   100  
   101  // Write x into b, either as octal or as binary (GNUtar/star extension).
   102  // If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead
   103  func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
   104  	// Try octal first.
   105  	s := strconv.FormatInt(x, 8)
   106  	if len(s) < len(b) {
   107  		tw.octal(b, x)
   108  		return
   109  	}
   110  
   111  	// If it is too long for octal, and pax is preferred, use a pax header
   112  	if allowPax && tw.preferPax {
   113  		tw.octal(b, 0)
   114  		s := strconv.FormatInt(x, 10)
   115  		paxHeaders[paxKeyword] = s
   116  		return
   117  	}
   118  
   119  	// Too big: use binary (big-endian).
   120  	tw.usedBinary = true
   121  	for i := len(b) - 1; x > 0 && i >= 0; i-- {
   122  		b[i] = byte(x)
   123  		x >>= 8
   124  	}
   125  	b[0] |= 0x80 // highest bit indicates binary format
   126  }
   127  
   128  var (
   129  	minTime = time.Unix(0, 0)
   130  	// There is room for 11 octal digits (33 bits) of mtime.
   131  	maxTime = minTime.Add((1<<33 - 1) * time.Second)
   132  )
   133  
   134  // WriteHeader writes hdr and prepares to accept the file's contents.
   135  // WriteHeader calls Flush if it is not the first header.
   136  // Calling after a Close will return ErrWriteAfterClose.
   137  func (tw *Writer) WriteHeader(hdr *Header) error {
   138  	return tw.writeHeader(hdr, true)
   139  }
   140  
   141  // WriteHeader writes hdr and prepares to accept the file's contents.
   142  // WriteHeader calls Flush if it is not the first header.
   143  // Calling after a Close will return ErrWriteAfterClose.
   144  // As this method is called internally by writePax header to allow it to
   145  // suppress writing the pax header.
   146  func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
   147  	if tw.closed {
   148  		return ErrWriteAfterClose
   149  	}
   150  	if tw.err == nil {
   151  		tw.Flush()
   152  	}
   153  	if tw.err != nil {
   154  		return tw.err
   155  	}
   156  
   157  	// a map to hold pax header records, if any are needed
   158  	paxHeaders := make(map[string]string)
   159  
   160  	// TODO(shanemhansen): we might want to use PAX headers for
   161  	// subsecond time resolution, but for now let's just capture
   162  	// too long fields or non ascii characters
   163  
   164  	var header []byte
   165  
   166  	// We need to select which scratch buffer to use carefully,
   167  	// since this method is called recursively to write PAX headers.
   168  	// If allowPax is true, this is the non-recursive call, and we will use hdrBuff.
   169  	// If allowPax is false, we are being called by writePAXHeader, and hdrBuff is
   170  	// already being used by the non-recursive call, so we must use paxHdrBuff.
   171  	header = tw.hdrBuff[:]
   172  	if !allowPax {
   173  		header = tw.paxHdrBuff[:]
   174  	}
   175  	copy(header, zeroBlock)
   176  	s := slicer(header)
   177  
   178  	// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
   179  	pathHeaderBytes := s.next(fileNameSize)
   180  
   181  	tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders)
   182  
   183  	// Handle out of range ModTime carefully.
   184  	var modTime int64
   185  	if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) {
   186  		modTime = hdr.ModTime.Unix()
   187  	}
   188  
   189  	tw.octal(s.next(8), hdr.Mode)                                   // 100:108
   190  	tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116
   191  	tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124
   192  	tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders)     // 124:136
   193  	tw.numeric(s.next(12), modTime, false, paxNone, nil)            // 136:148 --- consider using pax for finer granularity
   194  	s.next(8)                                                       // chksum (148:156)
   195  	s.next(1)[0] = hdr.Typeflag                                     // 156:157
   196  
   197  	tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders)
   198  
   199  	copy(s.next(8), []byte("ustar\x0000"))                        // 257:265
   200  	tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297
   201  	tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329
   202  	tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil)      // 329:337
   203  	tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil)      // 337:345
   204  
   205  	// keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
   206  	prefixHeaderBytes := s.next(155)
   207  	tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500  prefix
   208  
   209  	// Use the GNU magic instead of POSIX magic if we used any GNU extensions.
   210  	if tw.usedBinary {
   211  		copy(header[257:265], []byte("ustar  \x00"))
   212  	}
   213  
   214  	_, paxPathUsed := paxHeaders[paxPath]
   215  	// try to use a ustar header when only the name is too long
   216  	if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
   217  		prefix, suffix, ok := splitUSTARPath(hdr.Name)
   218  		if ok {
   219  			// Since we can encode in USTAR format, disable PAX header.
   220  			delete(paxHeaders, paxPath)
   221  
   222  			// Update the path fields
   223  			tw.cString(pathHeaderBytes, suffix, false, paxNone, nil)
   224  			tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil)
   225  		}
   226  	}
   227  
   228  	// The chksum field is terminated by a NUL and a space.
   229  	// This is different from the other octal fields.
   230  	chksum, _ := checksum(header)
   231  	tw.octal(header[148:155], chksum)
   232  	header[155] = ' '
   233  
   234  	if tw.err != nil {
   235  		// problem with header; probably integer too big for a field.
   236  		return tw.err
   237  	}
   238  
   239  	if allowPax {
   240  		for k, v := range hdr.Xattrs {
   241  			paxHeaders[paxXattr+k] = v
   242  		}
   243  	}
   244  
   245  	if len(paxHeaders) > 0 {
   246  		if !allowPax {
   247  			return errInvalidHeader
   248  		}
   249  		if err := tw.writePAXHeader(hdr, paxHeaders); err != nil {
   250  			return err
   251  		}
   252  	}
   253  	tw.nb = int64(hdr.Size)
   254  	tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize
   255  
   256  	_, tw.err = tw.w.Write(header)
   257  	return tw.err
   258  }
   259  
   260  // splitUSTARPath splits a path according to USTAR prefix and suffix rules.
   261  // If the path is not splittable, then it will return ("", "", false).
   262  func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
   263  	length := len(name)
   264  	if length <= fileNameSize || !isASCII(name) {
   265  		return "", "", false
   266  	} else if length > fileNamePrefixSize+1 {
   267  		length = fileNamePrefixSize + 1
   268  	} else if name[length-1] == '/' {
   269  		length--
   270  	}
   271  
   272  	i := strings.LastIndex(name[:length], "/")
   273  	nlen := len(name) - i - 1 // nlen is length of suffix
   274  	plen := i                 // plen is length of prefix
   275  	if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
   276  		return "", "", false
   277  	}
   278  	return name[:i], name[i+1:], true
   279  }
   280  
   281  // writePaxHeader writes an extended pax header to the
   282  // archive.
   283  func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) error {
   284  	// Prepare extended header
   285  	ext := new(Header)
   286  	ext.Typeflag = TypeXHeader
   287  	// Setting ModTime is required for reader parsing to
   288  	// succeed, and seems harmless enough.
   289  	ext.ModTime = hdr.ModTime
   290  	// The spec asks that we namespace our pseudo files
   291  	// with the current pid.  However, this results in differing outputs
   292  	// for identical inputs.  As such, the constant 0 is now used instead.
   293  	// golang.org/issue/12358
   294  	dir, file := path.Split(hdr.Name)
   295  	fullName := path.Join(dir, "PaxHeaders.0", file)
   296  
   297  	ascii := toASCII(fullName)
   298  	if len(ascii) > 100 {
   299  		ascii = ascii[:100]
   300  	}
   301  	ext.Name = ascii
   302  	// Construct the body
   303  	var buf bytes.Buffer
   304  
   305  	// Keys are sorted before writing to body to allow deterministic output.
   306  	var keys []string
   307  	for k := range paxHeaders {
   308  		keys = append(keys, k)
   309  	}
   310  	sort.Strings(keys)
   311  
   312  	for _, k := range keys {
   313  		fmt.Fprint(&buf, paxHeader(k+"="+paxHeaders[k]))
   314  	}
   315  
   316  	ext.Size = int64(len(buf.Bytes()))
   317  	if err := tw.writeHeader(ext, false); err != nil {
   318  		return err
   319  	}
   320  	if _, err := tw.Write(buf.Bytes()); err != nil {
   321  		return err
   322  	}
   323  	if err := tw.Flush(); err != nil {
   324  		return err
   325  	}
   326  	return nil
   327  }
   328  
   329  // paxHeader formats a single pax record, prefixing it with the appropriate length
   330  func paxHeader(msg string) string {
   331  	const padding = 2 // Extra padding for space and newline
   332  	size := len(msg) + padding
   333  	size += len(strconv.Itoa(size))
   334  	record := fmt.Sprintf("%d %s\n", size, msg)
   335  	if len(record) != size {
   336  		// Final adjustment if adding size increased
   337  		// the number of digits in size
   338  		size = len(record)
   339  		record = fmt.Sprintf("%d %s\n", size, msg)
   340  	}
   341  	return record
   342  }
   343  
   344  // Write writes to the current entry in the tar archive.
   345  // Write returns the error ErrWriteTooLong if more than
   346  // hdr.Size bytes are written after WriteHeader.
   347  func (tw *Writer) Write(b []byte) (n int, err error) {
   348  	if tw.closed {
   349  		err = ErrWriteAfterClose
   350  		return
   351  	}
   352  	overwrite := false
   353  	if int64(len(b)) > tw.nb {
   354  		b = b[0:tw.nb]
   355  		overwrite = true
   356  	}
   357  	n, err = tw.w.Write(b)
   358  	tw.nb -= int64(n)
   359  	if err == nil && overwrite {
   360  		err = ErrWriteTooLong
   361  		return
   362  	}
   363  	tw.err = err
   364  	return
   365  }
   366  
   367  // Close closes the tar archive, flushing any unwritten
   368  // data to the underlying writer.
   369  func (tw *Writer) Close() error {
   370  	if tw.err != nil || tw.closed {
   371  		return tw.err
   372  	}
   373  	tw.Flush()
   374  	tw.closed = true
   375  	if tw.err != nil {
   376  		return tw.err
   377  	}
   378  
   379  	// trailer: two zero blocks
   380  	for i := 0; i < 2; i++ {
   381  		_, tw.err = tw.w.Write(zeroBlock)
   382  		if tw.err != nil {
   383  			break
   384  		}
   385  	}
   386  	return tw.err
   387  }