github.com/mattn/go@v0.0.0-20171011075504-07f7db3ea99f/src/archive/tar/format.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 import "strings" 8 9 // Format represents the tar archive format. 10 // 11 // The original tar format was introduced in Unix V7. 12 // Since then, there have been multiple competing formats attempting to 13 // standardize or extend the V7 format to overcome its limitations. 14 // The most common formats are the USTAR, PAX, and GNU formats, 15 // each with their own advantages and limitations. 16 // 17 // The following table captures the capabilities of each format: 18 // 19 // | USTAR | PAX | GNU 20 // ------------------+--------+-----------+---------- 21 // Name | 256B | unlimited | unlimited 22 // Linkname | 100B | unlimited | unlimited 23 // Size | uint33 | unlimited | uint89 24 // Mode | uint21 | uint21 | uint57 25 // Uid/Gid | uint21 | unlimited | uint57 26 // Uname/Gname | 32B | unlimited | 32B 27 // ModTime | uint33 | unlimited | int89 28 // AccessTime | n/a | unlimited | int89 29 // ChangeTime | n/a | unlimited | int89 30 // Devmajor/Devminor | uint21 | uint21 | uint57 31 // ------------------+--------+-----------+---------- 32 // string encoding | ASCII | UTF-8 | binary 33 // sub-second times | no | yes | no 34 // sparse files | no | yes | yes 35 // 36 // The table's upper portion shows the Header fields, where each format reports 37 // the maximum number of bytes allowed for each string field and 38 // the integer type used to store each numeric field 39 // (where timestamps are stored as the number of seconds since the Unix epoch). 40 // 41 // The table's lower portion shows specialized features of each format, 42 // such as supported string encodings, support for sub-second timestamps, 43 // or support for sparse files. 44 type Format int 45 46 // Constants to identify various tar formats. 47 const ( 48 // Deliberately hide the meaning of constants from public API. 49 _ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc... 50 51 // FormatUnknown indicates that the format is unknown. 52 FormatUnknown 53 54 // The format of the original Unix V7 tar tool prior to standardization. 55 formatV7 56 57 // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988. 58 // 59 // While this format is compatible with most tar readers, 60 // the format has several limitations making it unsuitable for some usages. 61 // Most notably, it cannot support sparse files, files larger than 8GiB, 62 // filenames larger than 256 characters, and non-ASCII filenames. 63 // 64 // Reference: 65 // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 66 FormatUSTAR 67 68 // FormatPAX represents the PAX header format defined in POSIX.1-2001. 69 // 70 // PAX extends USTAR by writing a special file with Typeflag TypeXHeader 71 // preceding the original header. This file contains a set of key-value 72 // records, which are used to overcome USTAR's shortcomings, in addition to 73 // providing the ability to have sub-second resolution for timestamps. 74 // 75 // Some newer formats add their own extensions to PAX by defining their 76 // own keys and assigning certain semantic meaning to the associated values. 77 // For example, sparse file support in PAX is implemented using keys 78 // defined by the GNU manual (e.g., "GNU.sparse.map"). 79 // 80 // Reference: 81 // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html 82 FormatPAX 83 84 // FormatGNU represents the GNU header format. 85 // 86 // The GNU header format is older than the USTAR and PAX standards and 87 // is not compatible with them. The GNU format supports 88 // arbitrary file sizes, filenames of arbitrary encoding and length, 89 // sparse files, and other features. 90 // 91 // It is recommended that PAX be chosen over GNU unless the target 92 // application can only parse GNU formatted archives. 93 // 94 // Reference: 95 // http://www.gnu.org/software/tar/manual/html_node/Standard.html 96 FormatGNU 97 98 // Schily's tar format, which is incompatible with USTAR. 99 // This does not cover STAR extensions to the PAX format; these fall under 100 // the PAX format. 101 formatSTAR 102 103 formatMax 104 ) 105 106 func (f Format) has(f2 Format) bool { return f&f2 != 0 } 107 func (f *Format) mayBe(f2 Format) { *f |= f2 } 108 func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 } 109 func (f *Format) mustNotBe(f2 Format) { *f &^= f2 } 110 111 var formatNames = map[Format]string{ 112 formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR", 113 } 114 115 func (f Format) String() string { 116 var ss []string 117 for f2 := Format(1); f2 < formatMax; f2 <<= 1 { 118 if f.has(f2) { 119 ss = append(ss, formatNames[f2]) 120 } 121 } 122 switch len(ss) { 123 case 0: 124 return "<unknown>" 125 case 1: 126 return ss[0] 127 default: 128 return "(" + strings.Join(ss, " | ") + ")" 129 } 130 } 131 132 // Magics used to identify various formats. 133 const ( 134 magicGNU, versionGNU = "ustar ", " \x00" 135 magicUSTAR, versionUSTAR = "ustar\x00", "00" 136 trailerSTAR = "tar\x00" 137 ) 138 139 // Size constants from various tar specifications. 140 const ( 141 blockSize = 512 // Size of each block in a tar stream 142 nameSize = 100 // Max length of the name field in USTAR format 143 prefixSize = 155 // Max length of the prefix field in USTAR format 144 ) 145 146 // blockPadding computes the number of bytes needed to pad offset up to the 147 // nearest block edge where 0 <= n < blockSize. 148 func blockPadding(offset int64) (n int64) { 149 return -offset & (blockSize - 1) 150 } 151 152 var zeroBlock block 153 154 type block [blockSize]byte 155 156 // Convert block to any number of formats. 157 func (b *block) V7() *headerV7 { return (*headerV7)(b) } 158 func (b *block) GNU() *headerGNU { return (*headerGNU)(b) } 159 func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) } 160 func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) } 161 func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) } 162 163 // GetFormat checks that the block is a valid tar header based on the checksum. 164 // It then attempts to guess the specific format based on magic values. 165 // If the checksum fails, then FormatUnknown is returned. 166 func (b *block) GetFormat() Format { 167 // Verify checksum. 168 var p parser 169 value := p.parseOctal(b.V7().Chksum()) 170 chksum1, chksum2 := b.ComputeChecksum() 171 if p.err != nil || (value != chksum1 && value != chksum2) { 172 return FormatUnknown 173 } 174 175 // Guess the magic values. 176 magic := string(b.USTAR().Magic()) 177 version := string(b.USTAR().Version()) 178 trailer := string(b.STAR().Trailer()) 179 switch { 180 case magic == magicUSTAR && trailer == trailerSTAR: 181 return formatSTAR 182 case magic == magicUSTAR: 183 return FormatUSTAR | FormatPAX 184 case magic == magicGNU && version == versionGNU: 185 return FormatGNU 186 default: 187 return formatV7 188 } 189 } 190 191 // SetFormat writes the magic values necessary for specified format 192 // and then updates the checksum accordingly. 193 func (b *block) SetFormat(format Format) { 194 // Set the magic values. 195 switch { 196 case format.has(formatV7): 197 // Do nothing. 198 case format.has(FormatGNU): 199 copy(b.GNU().Magic(), magicGNU) 200 copy(b.GNU().Version(), versionGNU) 201 case format.has(formatSTAR): 202 copy(b.STAR().Magic(), magicUSTAR) 203 copy(b.STAR().Version(), versionUSTAR) 204 copy(b.STAR().Trailer(), trailerSTAR) 205 case format.has(FormatUSTAR | FormatPAX): 206 copy(b.USTAR().Magic(), magicUSTAR) 207 copy(b.USTAR().Version(), versionUSTAR) 208 default: 209 panic("invalid format") 210 } 211 212 // Update checksum. 213 // This field is special in that it is terminated by a NULL then space. 214 var f formatter 215 field := b.V7().Chksum() 216 chksum, _ := b.ComputeChecksum() // Possible values are 256..128776 217 f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143 218 field[7] = ' ' 219 } 220 221 // ComputeChecksum computes the checksum for the header block. 222 // POSIX specifies a sum of the unsigned byte values, but the Sun tar used 223 // signed byte values. 224 // We compute and return both. 225 func (b *block) ComputeChecksum() (unsigned, signed int64) { 226 for i, c := range b { 227 if 148 <= i && i < 156 { 228 c = ' ' // Treat the checksum field itself as all spaces. 229 } 230 unsigned += int64(uint8(c)) 231 signed += int64(int8(c)) 232 } 233 return unsigned, signed 234 } 235 236 // Reset clears the block with all zeros. 237 func (b *block) Reset() { 238 *b = block{} 239 } 240 241 type headerV7 [blockSize]byte 242 243 func (h *headerV7) Name() []byte { return h[000:][:100] } 244 func (h *headerV7) Mode() []byte { return h[100:][:8] } 245 func (h *headerV7) UID() []byte { return h[108:][:8] } 246 func (h *headerV7) GID() []byte { return h[116:][:8] } 247 func (h *headerV7) Size() []byte { return h[124:][:12] } 248 func (h *headerV7) ModTime() []byte { return h[136:][:12] } 249 func (h *headerV7) Chksum() []byte { return h[148:][:8] } 250 func (h *headerV7) TypeFlag() []byte { return h[156:][:1] } 251 func (h *headerV7) LinkName() []byte { return h[157:][:100] } 252 253 type headerGNU [blockSize]byte 254 255 func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) } 256 func (h *headerGNU) Magic() []byte { return h[257:][:6] } 257 func (h *headerGNU) Version() []byte { return h[263:][:2] } 258 func (h *headerGNU) UserName() []byte { return h[265:][:32] } 259 func (h *headerGNU) GroupName() []byte { return h[297:][:32] } 260 func (h *headerGNU) DevMajor() []byte { return h[329:][:8] } 261 func (h *headerGNU) DevMinor() []byte { return h[337:][:8] } 262 func (h *headerGNU) AccessTime() []byte { return h[345:][:12] } 263 func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] } 264 func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) } 265 func (h *headerGNU) RealSize() []byte { return h[483:][:12] } 266 267 type headerSTAR [blockSize]byte 268 269 func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) } 270 func (h *headerSTAR) Magic() []byte { return h[257:][:6] } 271 func (h *headerSTAR) Version() []byte { return h[263:][:2] } 272 func (h *headerSTAR) UserName() []byte { return h[265:][:32] } 273 func (h *headerSTAR) GroupName() []byte { return h[297:][:32] } 274 func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] } 275 func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] } 276 func (h *headerSTAR) Prefix() []byte { return h[345:][:131] } 277 func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] } 278 func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] } 279 func (h *headerSTAR) Trailer() []byte { return h[508:][:4] } 280 281 type headerUSTAR [blockSize]byte 282 283 func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) } 284 func (h *headerUSTAR) Magic() []byte { return h[257:][:6] } 285 func (h *headerUSTAR) Version() []byte { return h[263:][:2] } 286 func (h *headerUSTAR) UserName() []byte { return h[265:][:32] } 287 func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] } 288 func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] } 289 func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] } 290 func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] } 291 292 type sparseArray []byte 293 294 func (s sparseArray) Entry(i int) sparseElem { return (sparseElem)(s[i*24:]) } 295 func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] } 296 func (s sparseArray) MaxEntries() int { return len(s) / 24 } 297 298 type sparseElem []byte 299 300 func (s sparseElem) Offset() []byte { return s[00:][:12] } 301 func (s sparseElem) Length() []byte { return s[12:][:12] }