github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/archive/tar/format.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 import "strings" 8 9 // Format represents the tar archive format. 10 // 11 // The original tar format was introduced in Unix V7. 12 // Since then, there have been multiple competing formats attempting to 13 // standardize or extend the V7 format to overcome its limitations. 14 // The most common formats are the USTAR, PAX, and GNU formats, 15 // each with their own advantages and limitations. 16 // 17 // The following table captures the capabilities of each format: 18 // 19 // | USTAR | PAX | GNU 20 // ------------------+--------+-----------+---------- 21 // Name | 256B | unlimited | unlimited 22 // Linkname | 100B | unlimited | unlimited 23 // Size | uint33 | unlimited | uint89 24 // Mode | uint21 | uint21 | uint57 25 // Uid/Gid | uint21 | unlimited | uint57 26 // Uname/Gname | 32B | unlimited | 32B 27 // ModTime | uint33 | unlimited | int89 28 // AccessTime | n/a | unlimited | int89 29 // ChangeTime | n/a | unlimited | int89 30 // Devmajor/Devminor | uint21 | uint21 | uint57 31 // ------------------+--------+-----------+---------- 32 // string encoding | ASCII | UTF-8 | binary 33 // sub-second times | no | yes | no 34 // sparse files | no | yes | yes 35 // 36 // The table's upper portion shows the [Header] fields, where each format reports 37 // the maximum number of bytes allowed for each string field and 38 // the integer type used to store each numeric field 39 // (where timestamps are stored as the number of seconds since the Unix epoch). 40 // 41 // The table's lower portion shows specialized features of each format, 42 // such as supported string encodings, support for sub-second timestamps, 43 // or support for sparse files. 44 // 45 // The Writer currently provides no support for sparse files. 46 type Format int 47 48 // Constants to identify various tar formats. 49 const ( 50 // Deliberately hide the meaning of constants from public API. 51 _ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc... 52 53 // FormatUnknown indicates that the format is unknown. 54 FormatUnknown 55 56 // The format of the original Unix V7 tar tool prior to standardization. 57 formatV7 58 59 // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988. 60 // 61 // While this format is compatible with most tar readers, 62 // the format has several limitations making it unsuitable for some usages. 63 // Most notably, it cannot support sparse files, files larger than 8GiB, 64 // filenames larger than 256 characters, and non-ASCII filenames. 65 // 66 // Reference: 67 // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 68 FormatUSTAR 69 70 // FormatPAX represents the PAX header format defined in POSIX.1-2001. 71 // 72 // PAX extends USTAR by writing a special file with Typeflag TypeXHeader 73 // preceding the original header. This file contains a set of key-value 74 // records, which are used to overcome USTAR's shortcomings, in addition to 75 // providing the ability to have sub-second resolution for timestamps. 76 // 77 // Some newer formats add their own extensions to PAX by defining their 78 // own keys and assigning certain semantic meaning to the associated values. 79 // For example, sparse file support in PAX is implemented using keys 80 // defined by the GNU manual (e.g., "GNU.sparse.map"). 81 // 82 // Reference: 83 // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html 84 FormatPAX 85 86 // FormatGNU represents the GNU header format. 87 // 88 // The GNU header format is older than the USTAR and PAX standards and 89 // is not compatible with them. The GNU format supports 90 // arbitrary file sizes, filenames of arbitrary encoding and length, 91 // sparse files, and other features. 92 // 93 // It is recommended that PAX be chosen over GNU unless the target 94 // application can only parse GNU formatted archives. 95 // 96 // Reference: 97 // https://www.gnu.org/software/tar/manual/html_node/Standard.html 98 FormatGNU 99 100 // Schily's tar format, which is incompatible with USTAR. 101 // This does not cover STAR extensions to the PAX format; these fall under 102 // the PAX format. 103 formatSTAR 104 105 formatMax 106 ) 107 108 func (f Format) has(f2 Format) bool { return f&f2 != 0 } 109 func (f *Format) mayBe(f2 Format) { *f |= f2 } 110 func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 } 111 func (f *Format) mustNotBe(f2 Format) { *f &^= f2 } 112 113 var formatNames = map[Format]string{ 114 formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR", 115 } 116 117 func (f Format) String() string { 118 var ss []string 119 for f2 := Format(1); f2 < formatMax; f2 <<= 1 { 120 if f.has(f2) { 121 ss = append(ss, formatNames[f2]) 122 } 123 } 124 switch len(ss) { 125 case 0: 126 return "<unknown>" 127 case 1: 128 return ss[0] 129 default: 130 return "(" + strings.Join(ss, " | ") + ")" 131 } 132 } 133 134 // Magics used to identify various formats. 135 const ( 136 magicGNU, versionGNU = "ustar ", " \x00" 137 magicUSTAR, versionUSTAR = "ustar\x00", "00" 138 trailerSTAR = "tar\x00" 139 ) 140 141 // Size constants from various tar specifications. 142 const ( 143 blockSize = 512 // Size of each block in a tar stream 144 nameSize = 100 // Max length of the name field in USTAR format 145 prefixSize = 155 // Max length of the prefix field in USTAR format 146 147 // Max length of a special file (PAX header, GNU long name or link). 148 // This matches the limit used by libarchive. 149 maxSpecialFileSize = 1 << 20 150 ) 151 152 // blockPadding computes the number of bytes needed to pad offset up to the 153 // nearest block edge where 0 <= n < blockSize. 154 func blockPadding(offset int64) (n int64) { 155 return -offset & (blockSize - 1) 156 } 157 158 var zeroBlock block 159 160 type block [blockSize]byte 161 162 // Convert block to any number of formats. 163 func (b *block) toV7() *headerV7 { return (*headerV7)(b) } 164 func (b *block) toGNU() *headerGNU { return (*headerGNU)(b) } 165 func (b *block) toSTAR() *headerSTAR { return (*headerSTAR)(b) } 166 func (b *block) toUSTAR() *headerUSTAR { return (*headerUSTAR)(b) } 167 func (b *block) toSparse() sparseArray { return sparseArray(b[:]) } 168 169 // getFormat checks that the block is a valid tar header based on the checksum. 170 // It then attempts to guess the specific format based on magic values. 171 // If the checksum fails, then FormatUnknown is returned. 172 func (b *block) getFormat() Format { 173 // Verify checksum. 174 var p parser 175 value := p.parseOctal(b.toV7().chksum()) 176 chksum1, chksum2 := b.computeChecksum() 177 if p.err != nil || (value != chksum1 && value != chksum2) { 178 return FormatUnknown 179 } 180 181 // Guess the magic values. 182 magic := string(b.toUSTAR().magic()) 183 version := string(b.toUSTAR().version()) 184 trailer := string(b.toSTAR().trailer()) 185 switch { 186 case magic == magicUSTAR && trailer == trailerSTAR: 187 return formatSTAR 188 case magic == magicUSTAR: 189 return FormatUSTAR | FormatPAX 190 case magic == magicGNU && version == versionGNU: 191 return FormatGNU 192 default: 193 return formatV7 194 } 195 } 196 197 // setFormat writes the magic values necessary for specified format 198 // and then updates the checksum accordingly. 199 func (b *block) setFormat(format Format) { 200 // Set the magic values. 201 switch { 202 case format.has(formatV7): 203 // Do nothing. 204 case format.has(FormatGNU): 205 copy(b.toGNU().magic(), magicGNU) 206 copy(b.toGNU().version(), versionGNU) 207 case format.has(formatSTAR): 208 copy(b.toSTAR().magic(), magicUSTAR) 209 copy(b.toSTAR().version(), versionUSTAR) 210 copy(b.toSTAR().trailer(), trailerSTAR) 211 case format.has(FormatUSTAR | FormatPAX): 212 copy(b.toUSTAR().magic(), magicUSTAR) 213 copy(b.toUSTAR().version(), versionUSTAR) 214 default: 215 panic("invalid format") 216 } 217 218 // Update checksum. 219 // This field is special in that it is terminated by a NULL then space. 220 var f formatter 221 field := b.toV7().chksum() 222 chksum, _ := b.computeChecksum() // Possible values are 256..128776 223 f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143 224 field[7] = ' ' 225 } 226 227 // computeChecksum computes the checksum for the header block. 228 // POSIX specifies a sum of the unsigned byte values, but the Sun tar used 229 // signed byte values. 230 // We compute and return both. 231 func (b *block) computeChecksum() (unsigned, signed int64) { 232 for i, c := range b { 233 if 148 <= i && i < 156 { 234 c = ' ' // Treat the checksum field itself as all spaces. 235 } 236 unsigned += int64(c) 237 signed += int64(int8(c)) 238 } 239 return unsigned, signed 240 } 241 242 // reset clears the block with all zeros. 243 func (b *block) reset() { 244 *b = block{} 245 } 246 247 type headerV7 [blockSize]byte 248 249 func (h *headerV7) name() []byte { return h[000:][:100] } 250 func (h *headerV7) mode() []byte { return h[100:][:8] } 251 func (h *headerV7) uid() []byte { return h[108:][:8] } 252 func (h *headerV7) gid() []byte { return h[116:][:8] } 253 func (h *headerV7) size() []byte { return h[124:][:12] } 254 func (h *headerV7) modTime() []byte { return h[136:][:12] } 255 func (h *headerV7) chksum() []byte { return h[148:][:8] } 256 func (h *headerV7) typeFlag() []byte { return h[156:][:1] } 257 func (h *headerV7) linkName() []byte { return h[157:][:100] } 258 259 type headerGNU [blockSize]byte 260 261 func (h *headerGNU) v7() *headerV7 { return (*headerV7)(h) } 262 func (h *headerGNU) magic() []byte { return h[257:][:6] } 263 func (h *headerGNU) version() []byte { return h[263:][:2] } 264 func (h *headerGNU) userName() []byte { return h[265:][:32] } 265 func (h *headerGNU) groupName() []byte { return h[297:][:32] } 266 func (h *headerGNU) devMajor() []byte { return h[329:][:8] } 267 func (h *headerGNU) devMinor() []byte { return h[337:][:8] } 268 func (h *headerGNU) accessTime() []byte { return h[345:][:12] } 269 func (h *headerGNU) changeTime() []byte { return h[357:][:12] } 270 func (h *headerGNU) sparse() sparseArray { return sparseArray(h[386:][:24*4+1]) } 271 func (h *headerGNU) realSize() []byte { return h[483:][:12] } 272 273 type headerSTAR [blockSize]byte 274 275 func (h *headerSTAR) v7() *headerV7 { return (*headerV7)(h) } 276 func (h *headerSTAR) magic() []byte { return h[257:][:6] } 277 func (h *headerSTAR) version() []byte { return h[263:][:2] } 278 func (h *headerSTAR) userName() []byte { return h[265:][:32] } 279 func (h *headerSTAR) groupName() []byte { return h[297:][:32] } 280 func (h *headerSTAR) devMajor() []byte { return h[329:][:8] } 281 func (h *headerSTAR) devMinor() []byte { return h[337:][:8] } 282 func (h *headerSTAR) prefix() []byte { return h[345:][:131] } 283 func (h *headerSTAR) accessTime() []byte { return h[476:][:12] } 284 func (h *headerSTAR) changeTime() []byte { return h[488:][:12] } 285 func (h *headerSTAR) trailer() []byte { return h[508:][:4] } 286 287 type headerUSTAR [blockSize]byte 288 289 func (h *headerUSTAR) v7() *headerV7 { return (*headerV7)(h) } 290 func (h *headerUSTAR) magic() []byte { return h[257:][:6] } 291 func (h *headerUSTAR) version() []byte { return h[263:][:2] } 292 func (h *headerUSTAR) userName() []byte { return h[265:][:32] } 293 func (h *headerUSTAR) groupName() []byte { return h[297:][:32] } 294 func (h *headerUSTAR) devMajor() []byte { return h[329:][:8] } 295 func (h *headerUSTAR) devMinor() []byte { return h[337:][:8] } 296 func (h *headerUSTAR) prefix() []byte { return h[345:][:155] } 297 298 type sparseArray []byte 299 300 func (s sparseArray) entry(i int) sparseElem { return sparseElem(s[i*24:]) } 301 func (s sparseArray) isExtended() []byte { return s[24*s.maxEntries():][:1] } 302 func (s sparseArray) maxEntries() int { return len(s) / 24 } 303 304 type sparseElem []byte 305 306 func (s sparseElem) offset() []byte { return s[00:][:12] } 307 func (s sparseElem) length() []byte { return s[12:][:12] }