github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/archive/tar/format.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package tar
     6  
     7  import "strings"
     8  
     9  // Format represents the tar archive format.
    10  //
    11  // The original tar format was introduced in Unix V7.
    12  // Since then, there have been multiple competing formats attempting to
    13  // standardize or extend the V7 format to overcome its limitations.
    14  // The most common formats are the USTAR, PAX, and GNU formats,
    15  // each with their own advantages and limitations.
    16  //
    17  // The following table captures the capabilities of each format:
    18  //
    19  //	                  |  USTAR |       PAX |       GNU
    20  //	------------------+--------+-----------+----------
    21  //	Name              |   256B | unlimited | unlimited
    22  //	Linkname          |   100B | unlimited | unlimited
    23  //	Size              | uint33 | unlimited |    uint89
    24  //	Mode              | uint21 |    uint21 |    uint57
    25  //	Uid/Gid           | uint21 | unlimited |    uint57
    26  //	Uname/Gname       |    32B | unlimited |       32B
    27  //	ModTime           | uint33 | unlimited |     int89
    28  //	AccessTime        |    n/a | unlimited |     int89
    29  //	ChangeTime        |    n/a | unlimited |     int89
    30  //	Devmajor/Devminor | uint21 |    uint21 |    uint57
    31  //	------------------+--------+-----------+----------
    32  //	string encoding   |  ASCII |     UTF-8 |    binary
    33  //	sub-second times  |     no |       yes |        no
    34  //	sparse files      |     no |       yes |       yes
    35  //
    36  // The table's upper portion shows the Header fields, where each format reports
    37  // the maximum number of bytes allowed for each string field and
    38  // the integer type used to store each numeric field
    39  // (where timestamps are stored as the number of seconds since the Unix epoch).
    40  //
    41  // The table's lower portion shows specialized features of each format,
    42  // such as supported string encodings, support for sub-second timestamps,
    43  // or support for sparse files.
    44  type Format int
    45  
    46  // Constants to identify various tar formats.
    47  const (
    48  	// Deliberately hide the meaning of constants from public API.
    49  	_ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc...
    50  
    51  	// FormatUnknown indicates that the format is unknown.
    52  	FormatUnknown
    53  
    54  	// The format of the original Unix V7 tar tool prior to standardization.
    55  	formatV7
    56  
    57  	// FormatUSTAR represents the USTAR header format defined in POSIX.1-1988.
    58  	//
    59  	// While this format is compatible with most tar readers,
    60  	// the format has several limitations making it unsuitable for some usages.
    61  	// Most notably, it cannot support sparse files, files larger than 8GiB,
    62  	// filenames larger than 256 characters, and non-ASCII filenames.
    63  	//
    64  	// Reference:
    65  	//	http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
    66  	FormatUSTAR
    67  
    68  	// FormatPAX represents the PAX header format defined in POSIX.1-2001.
    69  	//
    70  	// PAX extends USTAR by writing a special file with Typeflag TypeXHeader
    71  	// preceding the original header. This file contains a set of key-value
    72  	// records, which are used to overcome USTAR's shortcomings, in addition to
    73  	// providing the ability to have sub-second resolution for timestamps.
    74  	//
    75  	// Some newer formats add their own extensions to PAX by defining their
    76  	// own keys and assigning certain semantic meaning to the associated values.
    77  	// For example, sparse file support in PAX is implemented using keys
    78  	// defined by the GNU manual (e.g., "GNU.sparse.map").
    79  	//
    80  	// Reference:
    81  	//	http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html
    82  	FormatPAX
    83  
    84  	// FormatGNU represents the GNU header format.
    85  	//
    86  	// The GNU header format is older than the USTAR and PAX standards and
    87  	// is not compatible with them. The GNU format supports
    88  	// arbitrary file sizes, filenames of arbitrary encoding and length,
    89  	// sparse files, and other features.
    90  	//
    91  	// It is recommended that PAX be chosen over GNU unless the target
    92  	// application can only parse GNU formatted archives.
    93  	//
    94  	// Reference:
    95  	//	http://www.gnu.org/software/tar/manual/html_node/Standard.html
    96  	FormatGNU
    97  
    98  	// Schily's tar format, which is incompatible with USTAR.
    99  	// This does not cover STAR extensions to the PAX format; these fall under
   100  	// the PAX format.
   101  	formatSTAR
   102  
   103  	formatMax
   104  )
   105  
   106  func (f Format) has(f2 Format) bool   { return f&f2 != 0 }
   107  func (f *Format) mayBe(f2 Format)     { *f |= f2 }
   108  func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 }
   109  func (f *Format) mustNotBe(f2 Format) { *f &^= f2 }
   110  
   111  var formatNames = map[Format]string{
   112  	formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR",
   113  }
   114  
   115  func (f Format) String() string {
   116  	var ss []string
   117  	for f2 := Format(1); f2 < formatMax; f2 <<= 1 {
   118  		if f.has(f2) {
   119  			ss = append(ss, formatNames[f2])
   120  		}
   121  	}
   122  	switch len(ss) {
   123  	case 0:
   124  		return "<unknown>"
   125  	case 1:
   126  		return ss[0]
   127  	default:
   128  		return "(" + strings.Join(ss, " | ") + ")"
   129  	}
   130  }
   131  
   132  // Magics used to identify various formats.
   133  const (
   134  	magicGNU, versionGNU     = "ustar ", " \x00"
   135  	magicUSTAR, versionUSTAR = "ustar\x00", "00"
   136  	trailerSTAR              = "tar\x00"
   137  )
   138  
   139  // Size constants from various tar specifications.
   140  const (
   141  	blockSize  = 512 // Size of each block in a tar stream
   142  	nameSize   = 100 // Max length of the name field in USTAR format
   143  	prefixSize = 155 // Max length of the prefix field in USTAR format
   144  )
   145  
   146  // blockPadding computes the number of bytes needed to pad offset up to the
   147  // nearest block edge where 0 <= n < blockSize.
   148  func blockPadding(offset int64) (n int64) {
   149  	return -offset & (blockSize - 1)
   150  }
   151  
   152  var zeroBlock block
   153  
   154  type block [blockSize]byte
   155  
   156  // Convert block to any number of formats.
   157  func (b *block) V7() *headerV7       { return (*headerV7)(b) }
   158  func (b *block) GNU() *headerGNU     { return (*headerGNU)(b) }
   159  func (b *block) STAR() *headerSTAR   { return (*headerSTAR)(b) }
   160  func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) }
   161  func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) }
   162  
   163  // GetFormat checks that the block is a valid tar header based on the checksum.
   164  // It then attempts to guess the specific format based on magic values.
   165  // If the checksum fails, then FormatUnknown is returned.
   166  func (b *block) GetFormat() Format {
   167  	// Verify checksum.
   168  	var p parser
   169  	value := p.parseOctal(b.V7().Chksum())
   170  	chksum1, chksum2 := b.ComputeChecksum()
   171  	if p.err != nil || (value != chksum1 && value != chksum2) {
   172  		return FormatUnknown
   173  	}
   174  
   175  	// Guess the magic values.
   176  	magic := string(b.USTAR().Magic())
   177  	version := string(b.USTAR().Version())
   178  	trailer := string(b.STAR().Trailer())
   179  	switch {
   180  	case magic == magicUSTAR && trailer == trailerSTAR:
   181  		return formatSTAR
   182  	case magic == magicUSTAR:
   183  		return FormatUSTAR | FormatPAX
   184  	case magic == magicGNU && version == versionGNU:
   185  		return FormatGNU
   186  	default:
   187  		return formatV7
   188  	}
   189  }
   190  
   191  // SetFormat writes the magic values necessary for specified format
   192  // and then updates the checksum accordingly.
   193  func (b *block) SetFormat(format Format) {
   194  	// Set the magic values.
   195  	switch {
   196  	case format.has(formatV7):
   197  		// Do nothing.
   198  	case format.has(FormatGNU):
   199  		copy(b.GNU().Magic(), magicGNU)
   200  		copy(b.GNU().Version(), versionGNU)
   201  	case format.has(formatSTAR):
   202  		copy(b.STAR().Magic(), magicUSTAR)
   203  		copy(b.STAR().Version(), versionUSTAR)
   204  		copy(b.STAR().Trailer(), trailerSTAR)
   205  	case format.has(FormatUSTAR | FormatPAX):
   206  		copy(b.USTAR().Magic(), magicUSTAR)
   207  		copy(b.USTAR().Version(), versionUSTAR)
   208  	default:
   209  		panic("invalid format")
   210  	}
   211  
   212  	// Update checksum.
   213  	// This field is special in that it is terminated by a NULL then space.
   214  	var f formatter
   215  	field := b.V7().Chksum()
   216  	chksum, _ := b.ComputeChecksum() // Possible values are 256..128776
   217  	f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143
   218  	field[7] = ' '
   219  }
   220  
   221  // ComputeChecksum computes the checksum for the header block.
   222  // POSIX specifies a sum of the unsigned byte values, but the Sun tar used
   223  // signed byte values.
   224  // We compute and return both.
   225  func (b *block) ComputeChecksum() (unsigned, signed int64) {
   226  	for i, c := range b {
   227  		if 148 <= i && i < 156 {
   228  			c = ' ' // Treat the checksum field itself as all spaces.
   229  		}
   230  		unsigned += int64(uint8(c))
   231  		signed += int64(int8(c))
   232  	}
   233  	return unsigned, signed
   234  }
   235  
   236  // Reset clears the block with all zeros.
   237  func (b *block) Reset() {
   238  	*b = block{}
   239  }
   240  
   241  type headerV7 [blockSize]byte
   242  
   243  func (h *headerV7) Name() []byte     { return h[000:][:100] }
   244  func (h *headerV7) Mode() []byte     { return h[100:][:8] }
   245  func (h *headerV7) UID() []byte      { return h[108:][:8] }
   246  func (h *headerV7) GID() []byte      { return h[116:][:8] }
   247  func (h *headerV7) Size() []byte     { return h[124:][:12] }
   248  func (h *headerV7) ModTime() []byte  { return h[136:][:12] }
   249  func (h *headerV7) Chksum() []byte   { return h[148:][:8] }
   250  func (h *headerV7) TypeFlag() []byte { return h[156:][:1] }
   251  func (h *headerV7) LinkName() []byte { return h[157:][:100] }
   252  
   253  type headerGNU [blockSize]byte
   254  
   255  func (h *headerGNU) V7() *headerV7       { return (*headerV7)(h) }
   256  func (h *headerGNU) Magic() []byte       { return h[257:][:6] }
   257  func (h *headerGNU) Version() []byte     { return h[263:][:2] }
   258  func (h *headerGNU) UserName() []byte    { return h[265:][:32] }
   259  func (h *headerGNU) GroupName() []byte   { return h[297:][:32] }
   260  func (h *headerGNU) DevMajor() []byte    { return h[329:][:8] }
   261  func (h *headerGNU) DevMinor() []byte    { return h[337:][:8] }
   262  func (h *headerGNU) AccessTime() []byte  { return h[345:][:12] }
   263  func (h *headerGNU) ChangeTime() []byte  { return h[357:][:12] }
   264  func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) }
   265  func (h *headerGNU) RealSize() []byte    { return h[483:][:12] }
   266  
   267  type headerSTAR [blockSize]byte
   268  
   269  func (h *headerSTAR) V7() *headerV7      { return (*headerV7)(h) }
   270  func (h *headerSTAR) Magic() []byte      { return h[257:][:6] }
   271  func (h *headerSTAR) Version() []byte    { return h[263:][:2] }
   272  func (h *headerSTAR) UserName() []byte   { return h[265:][:32] }
   273  func (h *headerSTAR) GroupName() []byte  { return h[297:][:32] }
   274  func (h *headerSTAR) DevMajor() []byte   { return h[329:][:8] }
   275  func (h *headerSTAR) DevMinor() []byte   { return h[337:][:8] }
   276  func (h *headerSTAR) Prefix() []byte     { return h[345:][:131] }
   277  func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] }
   278  func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] }
   279  func (h *headerSTAR) Trailer() []byte    { return h[508:][:4] }
   280  
   281  type headerUSTAR [blockSize]byte
   282  
   283  func (h *headerUSTAR) V7() *headerV7     { return (*headerV7)(h) }
   284  func (h *headerUSTAR) Magic() []byte     { return h[257:][:6] }
   285  func (h *headerUSTAR) Version() []byte   { return h[263:][:2] }
   286  func (h *headerUSTAR) UserName() []byte  { return h[265:][:32] }
   287  func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] }
   288  func (h *headerUSTAR) DevMajor() []byte  { return h[329:][:8] }
   289  func (h *headerUSTAR) DevMinor() []byte  { return h[337:][:8] }
   290  func (h *headerUSTAR) Prefix() []byte    { return h[345:][:155] }
   291  
   292  type sparseArray []byte
   293  
   294  func (s sparseArray) Entry(i int) sparseElem { return (sparseElem)(s[i*24:]) }
   295  func (s sparseArray) IsExtended() []byte     { return s[24*s.MaxEntries():][:1] }
   296  func (s sparseArray) MaxEntries() int        { return len(s) / 24 }
   297  
   298  type sparseElem []byte
   299  
   300  func (s sparseElem) Offset() []byte { return s[00:][:12] }
   301  func (s sparseElem) Length() []byte { return s[12:][:12] }