github.com/mattn/go@v0.0.0-20171011075504-07f7db3ea99f/src/archive/tar/common.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package tar implements access to tar archives.
     6  //
     7  // Tape archives (tar) are a file format for storing a sequence of files that
     8  // can be read and written in a streaming manner.
     9  // This package aims to cover most variations of the format,
    10  // including those produced by GNU and BSD tar tools.
    11  package tar
    12  
    13  import (
    14  	"errors"
    15  	"fmt"
    16  	"io"
    17  	"math"
    18  	"os"
    19  	"path"
    20  	"reflect"
    21  	"strconv"
    22  	"strings"
    23  	"time"
    24  )
    25  
    26  // BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit
    27  // architectures. If a large value is encountered when decoding, the result
    28  // stored in Header will be the truncated version.
    29  
    30  var (
    31  	ErrHeader          = errors.New("tar: invalid tar header")
    32  	ErrWriteTooLong    = errors.New("tar: write too long")
    33  	ErrFieldTooLong    = errors.New("tar: header field too long")
    34  	ErrWriteAfterClose = errors.New("tar: write after close")
    35  	errMissData        = errors.New("tar: sparse file references non-existent data")
    36  	errUnrefData       = errors.New("tar: sparse file contains unreferenced data")
    37  	errWriteHole       = errors.New("tar: write non-NUL byte in sparse hole")
    38  )
    39  
    40  type headerError []string
    41  
    42  func (he headerError) Error() string {
    43  	const prefix = "tar: cannot encode header"
    44  	var ss []string
    45  	for _, s := range he {
    46  		if s != "" {
    47  			ss = append(ss, s)
    48  		}
    49  	}
    50  	if len(ss) == 0 {
    51  		return prefix
    52  	}
    53  	return fmt.Sprintf("%s: %v", prefix, strings.Join(ss, "; and "))
    54  }
    55  
    56  // Type flags for Header.Typeflag.
    57  const (
    58  	// Type '0' indicates a regular file.
    59  	TypeReg  = '0'
    60  	TypeRegA = '\x00' // For legacy support; use TypeReg instead
    61  
    62  	// Type '1' to '6' are header-only flags and may not have a data body.
    63  	TypeLink    = '1' // Hard link
    64  	TypeSymlink = '2' // Symbolic link
    65  	TypeChar    = '3' // Character device node
    66  	TypeBlock   = '4' // Block device node
    67  	TypeDir     = '5' // Directory
    68  	TypeFifo    = '6' // FIFO node
    69  
    70  	// Type '7' is reserved.
    71  	TypeCont = '7'
    72  
    73  	// Type 'x' is used by the PAX format to store key-value records that
    74  	// are only relevant to the next file.
    75  	// This package transparently handles these types.
    76  	TypeXHeader = 'x'
    77  
    78  	// Type 'g' is used by the PAX format to store key-value records that
    79  	// are relevant to all subsequent files.
    80  	// This package only supports parsing and composing such headers,
    81  	// but does not currently support persisting the global state across files.
    82  	TypeXGlobalHeader = 'g'
    83  
    84  	// Type 'S' indicates a sparse file in the GNU format.
    85  	// Header.SparseHoles should be populated when using this type.
    86  	TypeGNUSparse = 'S'
    87  
    88  	// Types 'L' and 'K' are used by the GNU format for a meta file
    89  	// used to store the path or link name for the next file.
    90  	// This package transparently handles these types.
    91  	TypeGNULongName = 'L'
    92  	TypeGNULongLink = 'K'
    93  )
    94  
    95  // Keywords for PAX extended header records.
    96  const (
    97  	paxNone     = "" // Indicates that no PAX key is suitable
    98  	paxPath     = "path"
    99  	paxLinkpath = "linkpath"
   100  	paxSize     = "size"
   101  	paxUid      = "uid"
   102  	paxGid      = "gid"
   103  	paxUname    = "uname"
   104  	paxGname    = "gname"
   105  	paxMtime    = "mtime"
   106  	paxAtime    = "atime"
   107  	paxCtime    = "ctime"   // Removed from later revision of PAX spec, but was valid
   108  	paxCharset  = "charset" // Currently unused
   109  	paxComment  = "comment" // Currently unused
   110  
   111  	paxSchilyXattr = "SCHILY.xattr."
   112  
   113  	// Keywords for GNU sparse files in a PAX extended header.
   114  	paxGNUSparse          = "GNU.sparse."
   115  	paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
   116  	paxGNUSparseOffset    = "GNU.sparse.offset"
   117  	paxGNUSparseNumBytes  = "GNU.sparse.numbytes"
   118  	paxGNUSparseMap       = "GNU.sparse.map"
   119  	paxGNUSparseName      = "GNU.sparse.name"
   120  	paxGNUSparseMajor     = "GNU.sparse.major"
   121  	paxGNUSparseMinor     = "GNU.sparse.minor"
   122  	paxGNUSparseSize      = "GNU.sparse.size"
   123  	paxGNUSparseRealSize  = "GNU.sparse.realsize"
   124  )
   125  
   126  // basicKeys is a set of the PAX keys for which we have built-in support.
   127  // This does not contain "charset" or "comment", which are both PAX-specific,
   128  // so adding them as first-class features of Header is unlikely.
   129  // Users can use the PAXRecords field to set it themselves.
   130  var basicKeys = map[string]bool{
   131  	paxPath: true, paxLinkpath: true, paxSize: true, paxUid: true, paxGid: true,
   132  	paxUname: true, paxGname: true, paxMtime: true, paxAtime: true, paxCtime: true,
   133  }
   134  
   135  // A Header represents a single header in a tar archive.
   136  // Some fields may not be populated.
   137  //
   138  // For forward compatibility, users that retrieve a Header from Reader.Next,
   139  // mutate it in some ways, and then pass it back to Writer.WriteHeader
   140  // should do so by creating a new Header and copying the fields
   141  // that they are interested in preserving.
   142  type Header struct {
   143  	Typeflag byte // Type of header entry (should be TypeReg for most files)
   144  
   145  	Name     string // Name of file entry
   146  	Linkname string // Target name of link (valid for TypeLink or TypeSymlink)
   147  
   148  	Size  int64  // Logical file size in bytes
   149  	Mode  int64  // Permission and mode bits
   150  	Uid   int    // User ID of owner
   151  	Gid   int    // Group ID of owner
   152  	Uname string // User name of owner
   153  	Gname string // Group name of owner
   154  
   155  	// If the Format is unspecified, then Writer.WriteHeader rounds ModTime
   156  	// to the nearest second and ignores the AccessTime and ChangeTime fields.
   157  	//
   158  	// To use AccessTime or ChangeTime, specify the Format as PAX or GNU.
   159  	// To use sub-second resolution, specify the Format as PAX.
   160  	ModTime    time.Time // Modification time
   161  	AccessTime time.Time // Access time (requires either PAX or GNU support)
   162  	ChangeTime time.Time // Change time (requires either PAX or GNU support)
   163  
   164  	Devmajor int64 // Major device number (valid for TypeChar or TypeBlock)
   165  	Devminor int64 // Minor device number (valid for TypeChar or TypeBlock)
   166  
   167  	// SparseHoles represents a sequence of holes in a sparse file.
   168  	//
   169  	// A file is sparse if len(SparseHoles) > 0 or Typeflag is TypeGNUSparse.
   170  	// If TypeGNUSparse is set, then the format is GNU, otherwise
   171  	// the format is PAX (by using GNU-specific PAX records).
   172  	//
   173  	// A sparse file consists of fragments of data, intermixed with holes
   174  	// (described by this field). A hole is semantically a block of NUL-bytes,
   175  	// but does not actually exist within the tar file.
   176  	// The holes must be sorted in ascending order,
   177  	// not overlap with each other, and not extend past the specified Size.
   178  	SparseHoles []SparseEntry
   179  
   180  	// Xattrs stores extended attributes as PAX records under the
   181  	// "SCHILY.xattr." namespace.
   182  	//
   183  	// The following are semantically equivalent:
   184  	//  h.Xattrs[key] = value
   185  	//  h.PAXRecords["SCHILY.xattr."+key] = value
   186  	//
   187  	// When Writer.WriteHeader is called, the contents of Xattrs will take
   188  	// precedence over those in PAXRecords.
   189  	//
   190  	// Deprecated: Use PAXRecords instead.
   191  	Xattrs map[string]string
   192  
   193  	// PAXRecords is a map of PAX extended header records.
   194  	//
   195  	// User-defined records should have keys of the following form:
   196  	//	VENDOR.keyword
   197  	// Where VENDOR is some namespace in all uppercase, and keyword may
   198  	// not contain the '=' character (e.g., "GOLANG.pkg.version").
   199  	// The key and value should be non-empty UTF-8 strings.
   200  	//
   201  	// When Writer.WriteHeader is called, PAX records derived from the
   202  	// the other fields in Header take precedence over PAXRecords.
   203  	PAXRecords map[string]string
   204  
   205  	// Format specifies the format of the tar header.
   206  	//
   207  	// This is set by Reader.Next as a best-effort guess at the format.
   208  	// Since the Reader liberally reads some non-compliant files,
   209  	// it is possible for this to be FormatUnknown.
   210  	//
   211  	// If the format is unspecified when Writer.WriteHeader is called,
   212  	// then it uses the first format (in the order of USTAR, PAX, GNU)
   213  	// capable of encoding this Header (see Format).
   214  	Format Format
   215  }
   216  
   217  // SparseEntry represents a Length-sized fragment at Offset in the file.
   218  type SparseEntry struct{ Offset, Length int64 }
   219  
   220  func (s SparseEntry) endOffset() int64 { return s.Offset + s.Length }
   221  
   222  // A sparse file can be represented as either a sparseDatas or a sparseHoles.
   223  // As long as the total size is known, they are equivalent and one can be
   224  // converted to the other form and back. The various tar formats with sparse
   225  // file support represent sparse files in the sparseDatas form. That is, they
   226  // specify the fragments in the file that has data, and treat everything else as
   227  // having zero bytes. As such, the encoding and decoding logic in this package
   228  // deals with sparseDatas.
   229  //
   230  // However, the external API uses sparseHoles instead of sparseDatas because the
   231  // zero value of sparseHoles logically represents a normal file (i.e., there are
   232  // no holes in it). On the other hand, the zero value of sparseDatas implies
   233  // that the file has no data in it, which is rather odd.
   234  //
   235  // As an example, if the underlying raw file contains the 10-byte data:
   236  //	var compactFile = "abcdefgh"
   237  //
   238  // And the sparse map has the following entries:
   239  //	var spd sparseDatas = []sparseEntry{
   240  //		{Offset: 2,  Length: 5},  // Data fragment for 2..6
   241  //		{Offset: 18, Length: 3},  // Data fragment for 18..20
   242  //	}
   243  //	var sph sparseHoles = []SparseEntry{
   244  //		{Offset: 0,  Length: 2},  // Hole fragment for 0..1
   245  //		{Offset: 7,  Length: 11}, // Hole fragment for 7..17
   246  //		{Offset: 21, Length: 4},  // Hole fragment for 21..24
   247  //	}
   248  //
   249  // Then the content of the resulting sparse file with a Header.Size of 25 is:
   250  //	var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
   251  type (
   252  	sparseDatas []SparseEntry
   253  	sparseHoles []SparseEntry
   254  )
   255  
   256  // validateSparseEntries reports whether sp is a valid sparse map.
   257  // It does not matter whether sp represents data fragments or hole fragments.
   258  func validateSparseEntries(sp []SparseEntry, size int64) bool {
   259  	// Validate all sparse entries. These are the same checks as performed by
   260  	// the BSD tar utility.
   261  	if size < 0 {
   262  		return false
   263  	}
   264  	var pre SparseEntry
   265  	for _, cur := range sp {
   266  		switch {
   267  		case cur.Offset < 0 || cur.Length < 0:
   268  			return false // Negative values are never okay
   269  		case cur.Offset > math.MaxInt64-cur.Length:
   270  			return false // Integer overflow with large length
   271  		case cur.endOffset() > size:
   272  			return false // Region extends beyond the actual size
   273  		case pre.endOffset() > cur.Offset:
   274  			return false // Regions cannot overlap and must be in order
   275  		}
   276  		pre = cur
   277  	}
   278  	return true
   279  }
   280  
   281  // alignSparseEntries mutates src and returns dst where each fragment's
   282  // starting offset is aligned up to the nearest block edge, and each
   283  // ending offset is aligned down to the nearest block edge.
   284  //
   285  // Even though the Go tar Reader and the BSD tar utility can handle entries
   286  // with arbitrary offsets and lengths, the GNU tar utility can only handle
   287  // offsets and lengths that are multiples of blockSize.
   288  func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry {
   289  	dst := src[:0]
   290  	for _, s := range src {
   291  		pos, end := s.Offset, s.endOffset()
   292  		pos += blockPadding(+pos) // Round-up to nearest blockSize
   293  		if end != size {
   294  			end -= blockPadding(-end) // Round-down to nearest blockSize
   295  		}
   296  		if pos < end {
   297  			dst = append(dst, SparseEntry{Offset: pos, Length: end - pos})
   298  		}
   299  	}
   300  	return dst
   301  }
   302  
   303  // invertSparseEntries converts a sparse map from one form to the other.
   304  // If the input is sparseHoles, then it will output sparseDatas and vice-versa.
   305  // The input must have been already validated.
   306  //
   307  // This function mutates src and returns a normalized map where:
   308  //	* adjacent fragments are coalesced together
   309  //	* only the last fragment may be empty
   310  //	* the endOffset of the last fragment is the total size
   311  func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry {
   312  	dst := src[:0]
   313  	var pre SparseEntry
   314  	for _, cur := range src {
   315  		if cur.Length == 0 {
   316  			continue // Skip empty fragments
   317  		}
   318  		pre.Length = cur.Offset - pre.Offset
   319  		if pre.Length > 0 {
   320  			dst = append(dst, pre) // Only add non-empty fragments
   321  		}
   322  		pre.Offset = cur.endOffset()
   323  	}
   324  	pre.Length = size - pre.Offset // Possibly the only empty fragment
   325  	return append(dst, pre)
   326  }
   327  
   328  // fileState tracks the number of logical (includes sparse holes) and physical
   329  // (actual in tar archive) bytes remaining for the current file.
   330  //
   331  // Invariant: LogicalRemaining >= PhysicalRemaining
   332  type fileState interface {
   333  	LogicalRemaining() int64
   334  	PhysicalRemaining() int64
   335  }
   336  
   337  // allowedFormats determines which formats can be used.
   338  // The value returned is the logical OR of multiple possible formats.
   339  // If the value is FormatUnknown, then the input Header cannot be encoded
   340  // and an error is returned explaining why.
   341  //
   342  // As a by-product of checking the fields, this function returns paxHdrs, which
   343  // contain all fields that could not be directly encoded.
   344  // A value receiver ensures that this method does not mutate the source Header.
   345  func (h Header) allowedFormats() (format Format, paxHdrs map[string]string, err error) {
   346  	format = FormatUSTAR | FormatPAX | FormatGNU
   347  	paxHdrs = make(map[string]string)
   348  
   349  	var whyNoUSTAR, whyNoPAX, whyNoGNU string
   350  	var preferPAX bool // Prefer PAX over USTAR
   351  	verifyString := func(s string, size int, name, paxKey string) {
   352  		// NUL-terminator is optional for path and linkpath.
   353  		// Technically, it is required for uname and gname,
   354  		// but neither GNU nor BSD tar checks for it.
   355  		tooLong := len(s) > size
   356  		allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath
   357  		if hasNUL(s) || (tooLong && !allowLongGNU) {
   358  			whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%q", name, s)
   359  			format.mustNotBe(FormatGNU)
   360  		}
   361  		if !isASCII(s) || tooLong {
   362  			canSplitUSTAR := paxKey == paxPath
   363  			if _, _, ok := splitUSTARPath(s); !canSplitUSTAR || !ok {
   364  				whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%q", name, s)
   365  				format.mustNotBe(FormatUSTAR)
   366  			}
   367  			if paxKey == paxNone {
   368  				whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%q", name, s)
   369  				format.mustNotBe(FormatPAX)
   370  			} else {
   371  				paxHdrs[paxKey] = s
   372  			}
   373  		}
   374  		if v, ok := h.PAXRecords[paxKey]; ok && v == s {
   375  			paxHdrs[paxKey] = v
   376  		}
   377  	}
   378  	verifyNumeric := func(n int64, size int, name, paxKey string) {
   379  		if !fitsInBase256(size, n) {
   380  			whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%d", name, n)
   381  			format.mustNotBe(FormatGNU)
   382  		}
   383  		if !fitsInOctal(size, n) {
   384  			whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%d", name, n)
   385  			format.mustNotBe(FormatUSTAR)
   386  			if paxKey == paxNone {
   387  				whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%d", name, n)
   388  				format.mustNotBe(FormatPAX)
   389  			} else {
   390  				paxHdrs[paxKey] = strconv.FormatInt(n, 10)
   391  			}
   392  		}
   393  		if v, ok := h.PAXRecords[paxKey]; ok && v == strconv.FormatInt(n, 10) {
   394  			paxHdrs[paxKey] = v
   395  		}
   396  	}
   397  	verifyTime := func(ts time.Time, size int, name, paxKey string) {
   398  		if ts.IsZero() {
   399  			return // Always okay
   400  		}
   401  		if !fitsInBase256(size, ts.Unix()) {
   402  			whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts)
   403  			format.mustNotBe(FormatGNU)
   404  		}
   405  		isMtime := paxKey == paxMtime
   406  		fitsOctal := fitsInOctal(size, ts.Unix())
   407  		if (isMtime && !fitsOctal) || !isMtime {
   408  			whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts)
   409  			format.mustNotBe(FormatUSTAR)
   410  		}
   411  		needsNano := ts.Nanosecond() != 0
   412  		if !isMtime || !fitsOctal || needsNano {
   413  			preferPAX = true // USTAR may truncate sub-second measurements
   414  			if paxKey == paxNone {
   415  				whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts)
   416  				format.mustNotBe(FormatPAX)
   417  			} else {
   418  				paxHdrs[paxKey] = formatPAXTime(ts)
   419  			}
   420  		}
   421  		if v, ok := h.PAXRecords[paxKey]; ok && v == formatPAXTime(ts) {
   422  			paxHdrs[paxKey] = v
   423  		}
   424  	}
   425  
   426  	// Check basic fields.
   427  	var blk block
   428  	v7 := blk.V7()
   429  	ustar := blk.USTAR()
   430  	gnu := blk.GNU()
   431  	verifyString(h.Name, len(v7.Name()), "Name", paxPath)
   432  	verifyString(h.Linkname, len(v7.LinkName()), "Linkname", paxLinkpath)
   433  	verifyString(h.Uname, len(ustar.UserName()), "Uname", paxUname)
   434  	verifyString(h.Gname, len(ustar.GroupName()), "Gname", paxGname)
   435  	verifyNumeric(h.Mode, len(v7.Mode()), "Mode", paxNone)
   436  	verifyNumeric(int64(h.Uid), len(v7.UID()), "Uid", paxUid)
   437  	verifyNumeric(int64(h.Gid), len(v7.GID()), "Gid", paxGid)
   438  	verifyNumeric(h.Size, len(v7.Size()), "Size", paxSize)
   439  	verifyNumeric(h.Devmajor, len(ustar.DevMajor()), "Devmajor", paxNone)
   440  	verifyNumeric(h.Devminor, len(ustar.DevMinor()), "Devminor", paxNone)
   441  	verifyTime(h.ModTime, len(v7.ModTime()), "ModTime", paxMtime)
   442  	verifyTime(h.AccessTime, len(gnu.AccessTime()), "AccessTime", paxAtime)
   443  	verifyTime(h.ChangeTime, len(gnu.ChangeTime()), "ChangeTime", paxCtime)
   444  
   445  	// Check for header-only types.
   446  	var whyOnlyPAX, whyOnlyGNU string
   447  	switch h.Typeflag {
   448  	case TypeReg, TypeChar, TypeBlock, TypeFifo, TypeGNUSparse:
   449  		// Exclude TypeLink and TypeSymlink, since they may reference directories.
   450  		if strings.HasSuffix(h.Name, "/") {
   451  			return FormatUnknown, nil, headerError{"filename may not have trailing slash"}
   452  		}
   453  	case TypeXHeader, TypeGNULongName, TypeGNULongLink:
   454  		return FormatUnknown, nil, headerError{"cannot manually encode TypeXHeader, TypeGNULongName, or TypeGNULongLink headers"}
   455  	case TypeXGlobalHeader:
   456  		if !reflect.DeepEqual(h, Header{Typeflag: h.Typeflag, Xattrs: h.Xattrs, PAXRecords: h.PAXRecords, Format: h.Format}) {
   457  			return FormatUnknown, nil, headerError{"only PAXRecords may be set for TypeXGlobalHeader"}
   458  		}
   459  		whyOnlyPAX = "only PAX supports TypeXGlobalHeader"
   460  		format.mayOnlyBe(FormatPAX)
   461  	}
   462  	if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 {
   463  		return FormatUnknown, nil, headerError{"negative size on header-only type"}
   464  	}
   465  
   466  	// Check PAX records.
   467  	if len(h.Xattrs) > 0 {
   468  		for k, v := range h.Xattrs {
   469  			paxHdrs[paxSchilyXattr+k] = v
   470  		}
   471  		whyOnlyPAX = "only PAX supports Xattrs"
   472  		format.mayOnlyBe(FormatPAX)
   473  	}
   474  	if len(h.PAXRecords) > 0 {
   475  		for k, v := range h.PAXRecords {
   476  			switch _, exists := paxHdrs[k]; {
   477  			case exists:
   478  				continue // Do not overwrite existing records
   479  			case h.Typeflag == TypeXGlobalHeader:
   480  				paxHdrs[k] = v // Copy all records
   481  			case !basicKeys[k] && !strings.HasPrefix(k, paxGNUSparse):
   482  				paxHdrs[k] = v // Ignore local records that may conflict
   483  			}
   484  		}
   485  		whyOnlyPAX = "only PAX supports PAXRecords"
   486  		format.mayOnlyBe(FormatPAX)
   487  	}
   488  	for k, v := range paxHdrs {
   489  		if !validPAXRecord(k, v) {
   490  			return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", k+" = "+v)}
   491  		}
   492  	}
   493  
   494  	// Check sparse files.
   495  	if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse {
   496  		if isHeaderOnlyType(h.Typeflag) {
   497  			return FormatUnknown, nil, headerError{"header-only type cannot be sparse"}
   498  		}
   499  		if !validateSparseEntries(h.SparseHoles, h.Size) {
   500  			return FormatUnknown, nil, headerError{"invalid sparse holes"}
   501  		}
   502  		if h.Typeflag == TypeGNUSparse {
   503  			whyOnlyGNU = "only GNU supports TypeGNUSparse"
   504  			format.mayOnlyBe(FormatGNU)
   505  		} else {
   506  			whyNoGNU = "GNU supports sparse files only with TypeGNUSparse"
   507  			format.mustNotBe(FormatGNU)
   508  		}
   509  		whyNoUSTAR = "USTAR does not support sparse files"
   510  		format.mustNotBe(FormatUSTAR)
   511  	}
   512  
   513  	// Check desired format.
   514  	if wantFormat := h.Format; wantFormat != FormatUnknown {
   515  		if wantFormat.has(FormatPAX) && !preferPAX {
   516  			wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too
   517  		}
   518  		format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted
   519  	}
   520  	if format == FormatUnknown {
   521  		switch h.Format {
   522  		case FormatUSTAR:
   523  			err = headerError{"Format specifies USTAR", whyNoUSTAR, whyOnlyPAX, whyOnlyGNU}
   524  		case FormatPAX:
   525  			err = headerError{"Format specifies PAX", whyNoPAX, whyOnlyGNU}
   526  		case FormatGNU:
   527  			err = headerError{"Format specifies GNU", whyNoGNU, whyOnlyPAX}
   528  		default:
   529  			err = headerError{whyNoUSTAR, whyNoPAX, whyNoGNU, whyOnlyPAX, whyOnlyGNU}
   530  		}
   531  	}
   532  	return format, paxHdrs, err
   533  }
   534  
   535  var sysSparseDetect func(f *os.File) (sparseHoles, error)
   536  var sysSparsePunch func(f *os.File, sph sparseHoles) error
   537  
   538  // DetectSparseHoles searches for holes within f to populate SparseHoles
   539  // on supported operating systems and filesystems.
   540  // The file offset is cleared to zero.
   541  //
   542  // When packing a sparse file, DetectSparseHoles should be called prior to
   543  // serializing the header to the archive with Writer.WriteHeader.
   544  func (h *Header) DetectSparseHoles(f *os.File) (err error) {
   545  	defer func() {
   546  		if _, serr := f.Seek(0, io.SeekStart); err == nil {
   547  			err = serr
   548  		}
   549  	}()
   550  
   551  	h.SparseHoles = nil
   552  	if sysSparseDetect != nil {
   553  		sph, err := sysSparseDetect(f)
   554  		h.SparseHoles = sph
   555  		return err
   556  	}
   557  	return nil
   558  }
   559  
   560  // PunchSparseHoles destroys the contents of f, and prepares a sparse file
   561  // (on supported operating systems and filesystems)
   562  // with holes punched according to SparseHoles.
   563  // The file offset is cleared to zero.
   564  //
   565  // When extracting a sparse file, PunchSparseHoles should be called prior to
   566  // populating the content of a file with Reader.WriteTo.
   567  func (h *Header) PunchSparseHoles(f *os.File) (err error) {
   568  	defer func() {
   569  		if _, serr := f.Seek(0, io.SeekStart); err == nil {
   570  			err = serr
   571  		}
   572  	}()
   573  
   574  	if err := f.Truncate(0); err != nil {
   575  		return err
   576  	}
   577  
   578  	var size int64
   579  	if len(h.SparseHoles) > 0 {
   580  		size = h.SparseHoles[len(h.SparseHoles)-1].endOffset()
   581  	}
   582  	if !validateSparseEntries(h.SparseHoles, size) {
   583  		return errors.New("tar: invalid sparse holes")
   584  	}
   585  
   586  	if size == 0 {
   587  		return nil // For non-sparse files, do nothing (other than Truncate)
   588  	}
   589  	if sysSparsePunch != nil {
   590  		return sysSparsePunch(f, h.SparseHoles)
   591  	}
   592  	return f.Truncate(size)
   593  }
   594  
   595  // FileInfo returns an os.FileInfo for the Header.
   596  func (h *Header) FileInfo() os.FileInfo {
   597  	return headerFileInfo{h}
   598  }
   599  
   600  // headerFileInfo implements os.FileInfo.
   601  type headerFileInfo struct {
   602  	h *Header
   603  }
   604  
   605  func (fi headerFileInfo) Size() int64        { return fi.h.Size }
   606  func (fi headerFileInfo) IsDir() bool        { return fi.Mode().IsDir() }
   607  func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime }
   608  func (fi headerFileInfo) Sys() interface{}   { return fi.h }
   609  
   610  // Name returns the base name of the file.
   611  func (fi headerFileInfo) Name() string {
   612  	if fi.IsDir() {
   613  		return path.Base(path.Clean(fi.h.Name))
   614  	}
   615  	return path.Base(fi.h.Name)
   616  }
   617  
   618  // Mode returns the permission and mode bits for the headerFileInfo.
   619  func (fi headerFileInfo) Mode() (mode os.FileMode) {
   620  	// Set file permission bits.
   621  	mode = os.FileMode(fi.h.Mode).Perm()
   622  
   623  	// Set setuid, setgid and sticky bits.
   624  	if fi.h.Mode&c_ISUID != 0 {
   625  		mode |= os.ModeSetuid
   626  	}
   627  	if fi.h.Mode&c_ISGID != 0 {
   628  		mode |= os.ModeSetgid
   629  	}
   630  	if fi.h.Mode&c_ISVTX != 0 {
   631  		mode |= os.ModeSticky
   632  	}
   633  
   634  	// Set file mode bits; clear perm, setuid, setgid, and sticky bits.
   635  	switch m := os.FileMode(fi.h.Mode) &^ 07777; m {
   636  	case c_ISDIR:
   637  		mode |= os.ModeDir
   638  	case c_ISFIFO:
   639  		mode |= os.ModeNamedPipe
   640  	case c_ISLNK:
   641  		mode |= os.ModeSymlink
   642  	case c_ISBLK:
   643  		mode |= os.ModeDevice
   644  	case c_ISCHR:
   645  		mode |= os.ModeDevice
   646  		mode |= os.ModeCharDevice
   647  	case c_ISSOCK:
   648  		mode |= os.ModeSocket
   649  	}
   650  
   651  	switch fi.h.Typeflag {
   652  	case TypeSymlink:
   653  		mode |= os.ModeSymlink
   654  	case TypeChar:
   655  		mode |= os.ModeDevice
   656  		mode |= os.ModeCharDevice
   657  	case TypeBlock:
   658  		mode |= os.ModeDevice
   659  	case TypeDir:
   660  		mode |= os.ModeDir
   661  	case TypeFifo:
   662  		mode |= os.ModeNamedPipe
   663  	}
   664  
   665  	return mode
   666  }
   667  
   668  // sysStat, if non-nil, populates h from system-dependent fields of fi.
   669  var sysStat func(fi os.FileInfo, h *Header) error
   670  
   671  const (
   672  	// Mode constants from the USTAR spec:
   673  	// See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
   674  	c_ISUID = 04000 // Set uid
   675  	c_ISGID = 02000 // Set gid
   676  	c_ISVTX = 01000 // Save text (sticky bit)
   677  
   678  	// Common Unix mode constants; these are not defined in any common tar standard.
   679  	// Header.FileInfo understands these, but FileInfoHeader will never produce these.
   680  	c_ISDIR  = 040000  // Directory
   681  	c_ISFIFO = 010000  // FIFO
   682  	c_ISREG  = 0100000 // Regular file
   683  	c_ISLNK  = 0120000 // Symbolic link
   684  	c_ISBLK  = 060000  // Block special file
   685  	c_ISCHR  = 020000  // Character special file
   686  	c_ISSOCK = 0140000 // Socket
   687  )
   688  
   689  // FileInfoHeader creates a partially-populated Header from fi.
   690  // If fi describes a symlink, FileInfoHeader records link as the link target.
   691  // If fi describes a directory, a slash is appended to the name.
   692  //
   693  // Since os.FileInfo's Name method only returns the base name of
   694  // the file it describes, it may be necessary to modify Header.Name
   695  // to provide the full path name of the file.
   696  //
   697  // This function does not populate Header.SparseHoles;
   698  // for sparse file support, additionally call Header.DetectSparseHoles.
   699  func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
   700  	if fi == nil {
   701  		return nil, errors.New("tar: FileInfo is nil")
   702  	}
   703  	fm := fi.Mode()
   704  	h := &Header{
   705  		Name:    fi.Name(),
   706  		ModTime: fi.ModTime(),
   707  		Mode:    int64(fm.Perm()), // or'd with c_IS* constants later
   708  	}
   709  	switch {
   710  	case fm.IsRegular():
   711  		h.Typeflag = TypeReg
   712  		h.Size = fi.Size()
   713  	case fi.IsDir():
   714  		h.Typeflag = TypeDir
   715  		h.Name += "/"
   716  	case fm&os.ModeSymlink != 0:
   717  		h.Typeflag = TypeSymlink
   718  		h.Linkname = link
   719  	case fm&os.ModeDevice != 0:
   720  		if fm&os.ModeCharDevice != 0 {
   721  			h.Typeflag = TypeChar
   722  		} else {
   723  			h.Typeflag = TypeBlock
   724  		}
   725  	case fm&os.ModeNamedPipe != 0:
   726  		h.Typeflag = TypeFifo
   727  	case fm&os.ModeSocket != 0:
   728  		return nil, fmt.Errorf("tar: sockets not supported")
   729  	default:
   730  		return nil, fmt.Errorf("tar: unknown file mode %v", fm)
   731  	}
   732  	if fm&os.ModeSetuid != 0 {
   733  		h.Mode |= c_ISUID
   734  	}
   735  	if fm&os.ModeSetgid != 0 {
   736  		h.Mode |= c_ISGID
   737  	}
   738  	if fm&os.ModeSticky != 0 {
   739  		h.Mode |= c_ISVTX
   740  	}
   741  	// If possible, populate additional fields from OS-specific
   742  	// FileInfo fields.
   743  	if sys, ok := fi.Sys().(*Header); ok {
   744  		// This FileInfo came from a Header (not the OS). Use the
   745  		// original Header to populate all remaining fields.
   746  		h.Uid = sys.Uid
   747  		h.Gid = sys.Gid
   748  		h.Uname = sys.Uname
   749  		h.Gname = sys.Gname
   750  		h.AccessTime = sys.AccessTime
   751  		h.ChangeTime = sys.ChangeTime
   752  		if sys.Xattrs != nil {
   753  			h.Xattrs = make(map[string]string)
   754  			for k, v := range sys.Xattrs {
   755  				h.Xattrs[k] = v
   756  			}
   757  		}
   758  		if sys.Typeflag == TypeLink {
   759  			// hard link
   760  			h.Typeflag = TypeLink
   761  			h.Size = 0
   762  			h.Linkname = sys.Linkname
   763  		}
   764  		if sys.SparseHoles != nil {
   765  			h.SparseHoles = append([]SparseEntry{}, sys.SparseHoles...)
   766  		}
   767  		if sys.PAXRecords != nil {
   768  			h.PAXRecords = make(map[string]string)
   769  			for k, v := range sys.PAXRecords {
   770  				h.PAXRecords[k] = v
   771  			}
   772  		}
   773  	}
   774  	if sysStat != nil {
   775  		return h, sysStat(fi, h)
   776  	}
   777  	return h, nil
   778  }
   779  
   780  // isHeaderOnlyType checks if the given type flag is of the type that has no
   781  // data section even if a size is specified.
   782  func isHeaderOnlyType(flag byte) bool {
   783  	switch flag {
   784  	case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo:
   785  		return true
   786  	default:
   787  		return false
   788  	}
   789  }
   790  
   791  func min(a, b int64) int64 {
   792  	if a < b {
   793  		return a
   794  	}
   795  	return b
   796  }