github.com/karrick/go@v0.0.0-20170817181416-d5b0ec858b37/src/archive/tar/reader.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 // TODO(dsymonds): 8 // - pax extensions 9 10 import ( 11 "bytes" 12 "io" 13 "io/ioutil" 14 "math" 15 "strconv" 16 "strings" 17 "time" 18 ) 19 20 // A Reader provides sequential access to the contents of a tar archive. 21 // A tar archive consists of a sequence of files. 22 // The Next method advances to the next file in the archive (including the first), 23 // and then it can be treated as an io.Reader to access the file's data. 24 type Reader struct { 25 r io.Reader 26 pad int64 // amount of padding (ignored) after current file entry 27 curr numBytesReader // reader for current file entry 28 blk block // buffer to use as temporary local storage 29 30 // err is a persistent error. 31 // It is only the responsibility of every exported method of Reader to 32 // ensure that this error is sticky. 33 err error 34 } 35 36 // A numBytesReader is an io.Reader with a numBytes method, returning the number 37 // of bytes remaining in the underlying encoded data. 38 type numBytesReader interface { 39 io.Reader 40 numBytes() int64 41 } 42 43 // A regFileReader is a numBytesReader for reading file data from a tar archive. 44 type regFileReader struct { 45 r io.Reader // underlying reader 46 nb int64 // number of unread bytes for current file entry 47 } 48 49 // A sparseFileReader is a numBytesReader for reading sparse file data from a 50 // tar archive. 51 type sparseFileReader struct { 52 rfr numBytesReader // Reads the sparse-encoded file data 53 sp []sparseEntry // The sparse map for the file 54 pos int64 // Keeps track of file position 55 total int64 // Total size of the file 56 } 57 58 // A sparseEntry holds a single entry in a sparse file's sparse map. 59 // 60 // Sparse files are represented using a series of sparseEntrys. 61 // Despite the name, a sparseEntry represents an actual data fragment that 62 // references data found in the underlying archive stream. All regions not 63 // covered by a sparseEntry are logically filled with zeros. 64 // 65 // For example, if the underlying raw file contains the 10-byte data: 66 // var compactData = "abcdefgh" 67 // 68 // And the sparse map has the following entries: 69 // var sp = []sparseEntry{ 70 // {offset: 2, numBytes: 5} // Data fragment for [2..7] 71 // {offset: 18, numBytes: 3} // Data fragment for [18..21] 72 // } 73 // 74 // Then the content of the resulting sparse file with a "real" size of 25 is: 75 // var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 76 type sparseEntry struct { 77 offset int64 // Starting position of the fragment 78 numBytes int64 // Length of the fragment 79 } 80 81 // Keywords for GNU sparse files in a PAX extended header 82 const ( 83 paxGNUSparseNumBlocks = "GNU.sparse.numblocks" 84 paxGNUSparseOffset = "GNU.sparse.offset" 85 paxGNUSparseNumBytes = "GNU.sparse.numbytes" 86 paxGNUSparseMap = "GNU.sparse.map" 87 paxGNUSparseName = "GNU.sparse.name" 88 paxGNUSparseMajor = "GNU.sparse.major" 89 paxGNUSparseMinor = "GNU.sparse.minor" 90 paxGNUSparseSize = "GNU.sparse.size" 91 paxGNUSparseRealSize = "GNU.sparse.realsize" 92 ) 93 94 // NewReader creates a new Reader reading from r. 95 func NewReader(r io.Reader) *Reader { return &Reader{r: r} } 96 97 // Next advances to the next entry in the tar archive. 98 // 99 // io.EOF is returned at the end of the input. 100 func (tr *Reader) Next() (*Header, error) { 101 if tr.err != nil { 102 return nil, tr.err 103 } 104 hdr, err := tr.next() 105 tr.err = err 106 return hdr, err 107 } 108 109 func (tr *Reader) next() (*Header, error) { 110 var extHdrs map[string]string 111 112 // Externally, Next iterates through the tar archive as if it is a series of 113 // files. Internally, the tar format often uses fake "files" to add meta 114 // data that describes the next file. These meta data "files" should not 115 // normally be visible to the outside. As such, this loop iterates through 116 // one or more "header files" until it finds a "normal file". 117 loop: 118 for { 119 if err := tr.skipUnread(); err != nil { 120 return nil, err 121 } 122 hdr, rawHdr, err := tr.readHeader() 123 if err != nil { 124 return nil, err 125 } 126 if err := tr.handleRegularFile(hdr); err != nil { 127 return nil, err 128 } 129 130 // Check for PAX/GNU special headers and files. 131 switch hdr.Typeflag { 132 case TypeXHeader: 133 extHdrs, err = parsePAX(tr) 134 if err != nil { 135 return nil, err 136 } 137 continue loop // This is a meta header affecting the next header 138 case TypeGNULongName, TypeGNULongLink: 139 realname, err := ioutil.ReadAll(tr) 140 if err != nil { 141 return nil, err 142 } 143 144 // Convert GNU extensions to use PAX headers. 145 if extHdrs == nil { 146 extHdrs = make(map[string]string) 147 } 148 var p parser 149 switch hdr.Typeflag { 150 case TypeGNULongName: 151 extHdrs[paxPath] = p.parseString(realname) 152 case TypeGNULongLink: 153 extHdrs[paxLinkpath] = p.parseString(realname) 154 } 155 if p.err != nil { 156 return nil, p.err 157 } 158 continue loop // This is a meta header affecting the next header 159 default: 160 // The old GNU sparse format is handled here since it is technically 161 // just a regular file with additional attributes. 162 163 if err := mergePAX(hdr, extHdrs); err != nil { 164 return nil, err 165 } 166 167 // The extended headers may have updated the size. 168 // Thus, setup the regFileReader again after merging PAX headers. 169 if err := tr.handleRegularFile(hdr); err != nil { 170 return nil, err 171 } 172 173 // Sparse formats rely on being able to read from the logical data 174 // section; there must be a preceding call to handleRegularFile. 175 if err := tr.handleSparseFile(hdr, rawHdr, extHdrs); err != nil { 176 return nil, err 177 } 178 return hdr, nil // This is a file, so stop 179 } 180 } 181 } 182 183 // handleRegularFile sets up the current file reader and padding such that it 184 // can only read the following logical data section. It will properly handle 185 // special headers that contain no data section. 186 func (tr *Reader) handleRegularFile(hdr *Header) error { 187 nb := hdr.Size 188 if isHeaderOnlyType(hdr.Typeflag) { 189 nb = 0 190 } 191 if nb < 0 { 192 return ErrHeader 193 } 194 195 tr.pad = -nb & (blockSize - 1) // blockSize is a power of two 196 tr.curr = ®FileReader{r: tr.r, nb: nb} 197 return nil 198 } 199 200 // handleSparseFile checks if the current file is a sparse format of any type 201 // and sets the curr reader appropriately. 202 func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[string]string) error { 203 var sp []sparseEntry 204 var err error 205 if hdr.Typeflag == TypeGNUSparse { 206 sp, err = tr.readOldGNUSparseMap(hdr, rawHdr) 207 if err != nil { 208 return err 209 } 210 } else { 211 sp, err = tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) 212 if err != nil { 213 return err 214 } 215 } 216 217 // If sp is non-nil, then this is a sparse file. 218 // Note that it is possible for len(sp) to be zero. 219 if sp != nil { 220 tr.curr, err = newSparseFileReader(tr.curr, sp, hdr.Size) 221 } 222 return err 223 } 224 225 // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then 226 // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to 227 // be treated as a regular file. 228 func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { 229 var sparseFormat string 230 231 // Check for sparse format indicators 232 major, majorOk := headers[paxGNUSparseMajor] 233 minor, minorOk := headers[paxGNUSparseMinor] 234 sparseName, sparseNameOk := headers[paxGNUSparseName] 235 _, sparseMapOk := headers[paxGNUSparseMap] 236 sparseSize, sparseSizeOk := headers[paxGNUSparseSize] 237 sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] 238 239 // Identify which, if any, sparse format applies from which PAX headers are set 240 if majorOk && minorOk { 241 sparseFormat = major + "." + minor 242 } else if sparseNameOk && sparseMapOk { 243 sparseFormat = "0.1" 244 } else if sparseSizeOk { 245 sparseFormat = "0.0" 246 } else { 247 // Not a PAX format GNU sparse file. 248 return nil, nil 249 } 250 251 // Check for unknown sparse format 252 if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { 253 return nil, nil 254 } 255 256 // Update hdr from GNU sparse PAX headers 257 if sparseNameOk { 258 hdr.Name = sparseName 259 } 260 if sparseSizeOk { 261 realSize, err := strconv.ParseInt(sparseSize, 10, 64) 262 if err != nil { 263 return nil, ErrHeader 264 } 265 hdr.Size = realSize 266 } else if sparseRealSizeOk { 267 realSize, err := strconv.ParseInt(sparseRealSize, 10, 64) 268 if err != nil { 269 return nil, ErrHeader 270 } 271 hdr.Size = realSize 272 } 273 274 // Set up the sparse map, according to the particular sparse format in use 275 var sp []sparseEntry 276 var err error 277 switch sparseFormat { 278 case "0.0", "0.1": 279 sp, err = readGNUSparseMap0x1(headers) 280 case "1.0": 281 sp, err = readGNUSparseMap1x0(tr.curr) 282 } 283 return sp, err 284 } 285 286 // mergePAX merges well known headers according to PAX standard. 287 // In general headers with the same name as those found 288 // in the header struct overwrite those found in the header 289 // struct with higher precision or longer values. Esp. useful 290 // for name and linkname fields. 291 func mergePAX(hdr *Header, headers map[string]string) (err error) { 292 var id64 int64 293 for k, v := range headers { 294 switch k { 295 case paxPath: 296 hdr.Name = v 297 case paxLinkpath: 298 hdr.Linkname = v 299 case paxUname: 300 hdr.Uname = v 301 case paxGname: 302 hdr.Gname = v 303 case paxUid: 304 id64, err = strconv.ParseInt(v, 10, 64) 305 hdr.Uid = int(id64) // Integer overflow possible 306 case paxGid: 307 id64, err = strconv.ParseInt(v, 10, 64) 308 hdr.Gid = int(id64) // Integer overflow possible 309 case paxAtime: 310 hdr.AccessTime, err = parsePAXTime(v) 311 case paxMtime: 312 hdr.ModTime, err = parsePAXTime(v) 313 case paxCtime: 314 hdr.ChangeTime, err = parsePAXTime(v) 315 case paxSize: 316 hdr.Size, err = strconv.ParseInt(v, 10, 64) 317 default: 318 if strings.HasPrefix(k, paxXattr) { 319 if hdr.Xattrs == nil { 320 hdr.Xattrs = make(map[string]string) 321 } 322 hdr.Xattrs[k[len(paxXattr):]] = v 323 } 324 } 325 if err != nil { 326 return ErrHeader 327 } 328 } 329 return nil 330 } 331 332 // parsePAX parses PAX headers. 333 // If an extended header (type 'x') is invalid, ErrHeader is returned 334 func parsePAX(r io.Reader) (map[string]string, error) { 335 buf, err := ioutil.ReadAll(r) 336 if err != nil { 337 return nil, err 338 } 339 sbuf := string(buf) 340 341 // For GNU PAX sparse format 0.0 support. 342 // This function transforms the sparse format 0.0 headers into format 0.1 343 // headers since 0.0 headers were not PAX compliant. 344 var sparseMap []string 345 346 extHdrs := make(map[string]string) 347 for len(sbuf) > 0 { 348 key, value, residual, err := parsePAXRecord(sbuf) 349 if err != nil { 350 return nil, ErrHeader 351 } 352 sbuf = residual 353 354 switch key { 355 case paxGNUSparseOffset, paxGNUSparseNumBytes: 356 // Validate sparse header order and value. 357 if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) || 358 (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) || 359 strings.Contains(value, ",") { 360 return nil, ErrHeader 361 } 362 sparseMap = append(sparseMap, value) 363 default: 364 // According to PAX specification, a value is stored only if it is 365 // non-empty. Otherwise, the key is deleted. 366 if len(value) > 0 { 367 extHdrs[key] = value 368 } else { 369 delete(extHdrs, key) 370 } 371 } 372 } 373 if len(sparseMap) > 0 { 374 extHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",") 375 } 376 return extHdrs, nil 377 } 378 379 // skipUnread skips any unread bytes in the existing file entry, as well as any 380 // alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is 381 // encountered in the data portion; it is okay to hit io.EOF in the padding. 382 // 383 // Note that this function still works properly even when sparse files are being 384 // used since numBytes returns the bytes remaining in the underlying io.Reader. 385 func (tr *Reader) skipUnread() error { 386 dataSkip := tr.numBytes() // Number of data bytes to skip 387 totalSkip := dataSkip + tr.pad // Total number of bytes to skip 388 tr.curr, tr.pad = nil, 0 389 390 // If possible, Seek to the last byte before the end of the data section. 391 // Do this because Seek is often lazy about reporting errors; this will mask 392 // the fact that the tar stream may be truncated. We can rely on the 393 // io.CopyN done shortly afterwards to trigger any IO errors. 394 var seekSkipped int64 // Number of bytes skipped via Seek 395 if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { 396 // Not all io.Seeker can actually Seek. For example, os.Stdin implements 397 // io.Seeker, but calling Seek always returns an error and performs 398 // no action. Thus, we try an innocent seek to the current position 399 // to see if Seek is really supported. 400 pos1, err := sr.Seek(0, io.SeekCurrent) 401 if err == nil { 402 // Seek seems supported, so perform the real Seek. 403 pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent) 404 if err != nil { 405 return err 406 } 407 seekSkipped = pos2 - pos1 408 } 409 } 410 411 copySkipped, err := io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) 412 if err == io.EOF && seekSkipped+copySkipped < dataSkip { 413 err = io.ErrUnexpectedEOF 414 } 415 return err 416 } 417 418 // readHeader reads the next block header and assumes that the underlying reader 419 // is already aligned to a block boundary. It returns the raw block of the 420 // header in case further processing is required. 421 // 422 // The err will be set to io.EOF only when one of the following occurs: 423 // * Exactly 0 bytes are read and EOF is hit. 424 // * Exactly 1 block of zeros is read and EOF is hit. 425 // * At least 2 blocks of zeros are read. 426 func (tr *Reader) readHeader() (*Header, *block, error) { 427 // Two blocks of zero bytes marks the end of the archive. 428 if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { 429 return nil, nil, err // EOF is okay here; exactly 0 bytes read 430 } 431 if bytes.Equal(tr.blk[:], zeroBlock[:]) { 432 if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { 433 return nil, nil, err // EOF is okay here; exactly 1 block of zeros read 434 } 435 if bytes.Equal(tr.blk[:], zeroBlock[:]) { 436 return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read 437 } 438 return nil, nil, ErrHeader // Zero block and then non-zero block 439 } 440 441 // Verify the header matches a known format. 442 format := tr.blk.GetFormat() 443 if format == formatUnknown { 444 return nil, nil, ErrHeader 445 } 446 447 var p parser 448 hdr := new(Header) 449 450 // Unpack the V7 header. 451 v7 := tr.blk.V7() 452 hdr.Name = p.parseString(v7.Name()) 453 hdr.Mode = p.parseNumeric(v7.Mode()) 454 hdr.Uid = int(p.parseNumeric(v7.UID())) 455 hdr.Gid = int(p.parseNumeric(v7.GID())) 456 hdr.Size = p.parseNumeric(v7.Size()) 457 hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0) 458 hdr.Typeflag = v7.TypeFlag()[0] 459 hdr.Linkname = p.parseString(v7.LinkName()) 460 461 // Unpack format specific fields. 462 if format > formatV7 { 463 ustar := tr.blk.USTAR() 464 hdr.Uname = p.parseString(ustar.UserName()) 465 hdr.Gname = p.parseString(ustar.GroupName()) 466 hdr.Devmajor = p.parseNumeric(ustar.DevMajor()) 467 hdr.Devminor = p.parseNumeric(ustar.DevMinor()) 468 469 var prefix string 470 switch format { 471 case formatUSTAR: 472 ustar := tr.blk.USTAR() 473 prefix = p.parseString(ustar.Prefix()) 474 case formatSTAR: 475 star := tr.blk.STAR() 476 prefix = p.parseString(star.Prefix()) 477 hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0) 478 hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0) 479 case formatGNU: 480 var p2 parser 481 gnu := tr.blk.GNU() 482 if b := gnu.AccessTime(); b[0] != 0 { 483 hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0) 484 } 485 if b := gnu.ChangeTime(); b[0] != 0 { 486 hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0) 487 } 488 489 // Prior to Go1.8, the Writer had a bug where it would output 490 // an invalid tar file in certain rare situations because the logic 491 // incorrectly believed that the old GNU format had a prefix field. 492 // This is wrong and leads to an output file that mangles the 493 // atime and ctime fields, which are often left unused. 494 // 495 // In order to continue reading tar files created by former, buggy 496 // versions of Go, we skeptically parse the atime and ctime fields. 497 // If we are unable to parse them and the prefix field looks like 498 // an ASCII string, then we fallback on the pre-Go1.8 behavior 499 // of treating these fields as the USTAR prefix field. 500 // 501 // Note that this will not use the fallback logic for all possible 502 // files generated by a pre-Go1.8 toolchain. If the generated file 503 // happened to have a prefix field that parses as valid 504 // atime and ctime fields (e.g., when they are valid octal strings), 505 // then it is impossible to distinguish between an valid GNU file 506 // and an invalid pre-Go1.8 file. 507 // 508 // See https://golang.org/issues/12594 509 // See https://golang.org/issues/21005 510 if p2.err != nil { 511 hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{} 512 ustar := tr.blk.USTAR() 513 if s := p.parseString(ustar.Prefix()); isASCII(s) { 514 prefix = s 515 } 516 } 517 } 518 if len(prefix) > 0 { 519 hdr.Name = prefix + "/" + hdr.Name 520 } 521 } 522 return hdr, &tr.blk, p.err 523 } 524 525 // readOldGNUSparseMap reads the sparse map from the old GNU sparse format. 526 // The sparse map is stored in the tar header if it's small enough. 527 // If it's larger than four entries, then one or more extension headers are used 528 // to store the rest of the sparse map. 529 // 530 // The Header.Size does not reflect the size of any extended headers used. 531 // Thus, this function will read from the raw io.Reader to fetch extra headers. 532 // This method mutates blk in the process. 533 func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, error) { 534 // Make sure that the input format is GNU. 535 // Unfortunately, the STAR format also has a sparse header format that uses 536 // the same type flag but has a completely different layout. 537 if blk.GetFormat() != formatGNU { 538 return nil, ErrHeader 539 } 540 541 var p parser 542 hdr.Size = p.parseNumeric(blk.GNU().RealSize()) 543 if p.err != nil { 544 return nil, p.err 545 } 546 var s sparseArray = blk.GNU().Sparse() 547 var sp = make([]sparseEntry, 0, s.MaxEntries()) 548 for { 549 for i := 0; i < s.MaxEntries(); i++ { 550 // This termination condition is identical to GNU and BSD tar. 551 if s.Entry(i).Offset()[0] == 0x00 { 552 break // Don't return, need to process extended headers (even if empty) 553 } 554 offset := p.parseNumeric(s.Entry(i).Offset()) 555 numBytes := p.parseNumeric(s.Entry(i).NumBytes()) 556 if p.err != nil { 557 return nil, p.err 558 } 559 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 560 } 561 562 if s.IsExtended()[0] > 0 { 563 // There are more entries. Read an extension header and parse its entries. 564 if _, err := io.ReadFull(tr.r, blk[:]); err != nil { 565 if err == io.EOF { 566 err = io.ErrUnexpectedEOF 567 } 568 return nil, err 569 } 570 s = blk.Sparse() 571 continue 572 } 573 return sp, nil // Done 574 } 575 } 576 577 // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format 578 // version 1.0. The format of the sparse map consists of a series of 579 // newline-terminated numeric fields. The first field is the number of entries 580 // and is always present. Following this are the entries, consisting of two 581 // fields (offset, numBytes). This function must stop reading at the end 582 // boundary of the block containing the last newline. 583 // 584 // Note that the GNU manual says that numeric values should be encoded in octal 585 // format. However, the GNU tar utility itself outputs these values in decimal. 586 // As such, this library treats values as being encoded in decimal. 587 func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { 588 var cntNewline int64 589 var buf bytes.Buffer 590 var blk = make([]byte, blockSize) 591 592 // feedTokens copies data in numBlock chunks from r into buf until there are 593 // at least cnt newlines in buf. It will not read more blocks than needed. 594 var feedTokens = func(cnt int64) error { 595 for cntNewline < cnt { 596 if _, err := io.ReadFull(r, blk); err != nil { 597 if err == io.EOF { 598 err = io.ErrUnexpectedEOF 599 } 600 return err 601 } 602 buf.Write(blk) 603 for _, c := range blk { 604 if c == '\n' { 605 cntNewline++ 606 } 607 } 608 } 609 return nil 610 } 611 612 // nextToken gets the next token delimited by a newline. This assumes that 613 // at least one newline exists in the buffer. 614 var nextToken = func() string { 615 cntNewline-- 616 tok, _ := buf.ReadString('\n') 617 return tok[:len(tok)-1] // Cut off newline 618 } 619 620 // Parse for the number of entries. 621 // Use integer overflow resistant math to check this. 622 if err := feedTokens(1); err != nil { 623 return nil, err 624 } 625 numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int 626 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { 627 return nil, ErrHeader 628 } 629 630 // Parse for all member entries. 631 // numEntries is trusted after this since a potential attacker must have 632 // committed resources proportional to what this library used. 633 if err := feedTokens(2 * numEntries); err != nil { 634 return nil, err 635 } 636 sp := make([]sparseEntry, 0, numEntries) 637 for i := int64(0); i < numEntries; i++ { 638 offset, err := strconv.ParseInt(nextToken(), 10, 64) 639 if err != nil { 640 return nil, ErrHeader 641 } 642 numBytes, err := strconv.ParseInt(nextToken(), 10, 64) 643 if err != nil { 644 return nil, ErrHeader 645 } 646 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 647 } 648 return sp, nil 649 } 650 651 // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format 652 // version 0.1. The sparse map is stored in the PAX headers. 653 func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { 654 // Get number of entries. 655 // Use integer overflow resistant math to check this. 656 numEntriesStr := extHdrs[paxGNUSparseNumBlocks] 657 numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int 658 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { 659 return nil, ErrHeader 660 } 661 662 // There should be two numbers in sparseMap for each entry. 663 sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") 664 if int64(len(sparseMap)) != 2*numEntries { 665 return nil, ErrHeader 666 } 667 668 // Loop through the entries in the sparse map. 669 // numEntries is trusted now. 670 sp := make([]sparseEntry, 0, numEntries) 671 for i := int64(0); i < numEntries; i++ { 672 offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) 673 if err != nil { 674 return nil, ErrHeader 675 } 676 numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) 677 if err != nil { 678 return nil, ErrHeader 679 } 680 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 681 } 682 return sp, nil 683 } 684 685 // numBytes returns the number of bytes left to read in the current file's entry 686 // in the tar archive, or 0 if there is no current file. 687 func (tr *Reader) numBytes() int64 { 688 if tr.curr == nil { 689 // No current file, so no bytes 690 return 0 691 } 692 return tr.curr.numBytes() 693 } 694 695 // Read reads from the current entry in the tar archive. 696 // It returns 0, io.EOF when it reaches the end of that entry, 697 // until Next is called to advance to the next entry. 698 // 699 // Calling Read on special types like TypeLink, TypeSymLink, TypeChar, 700 // TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what 701 // the Header.Size claims. 702 func (tr *Reader) Read(b []byte) (int, error) { 703 if tr.err != nil { 704 return 0, tr.err 705 } 706 if tr.curr == nil { 707 return 0, io.EOF 708 } 709 710 n, err := tr.curr.Read(b) 711 if err != nil && err != io.EOF { 712 tr.err = err 713 } 714 return n, err 715 } 716 717 func (rfr *regFileReader) Read(b []byte) (n int, err error) { 718 if rfr.nb == 0 { 719 // file consumed 720 return 0, io.EOF 721 } 722 if int64(len(b)) > rfr.nb { 723 b = b[0:rfr.nb] 724 } 725 n, err = rfr.r.Read(b) 726 rfr.nb -= int64(n) 727 728 if err == io.EOF && rfr.nb > 0 { 729 err = io.ErrUnexpectedEOF 730 } 731 return 732 } 733 734 // numBytes returns the number of bytes left to read in the file's data in the tar archive. 735 func (rfr *regFileReader) numBytes() int64 { 736 return rfr.nb 737 } 738 739 // newSparseFileReader creates a new sparseFileReader, but validates all of the 740 // sparse entries before doing so. 741 func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { 742 if total < 0 { 743 return nil, ErrHeader // Total size cannot be negative 744 } 745 746 // Validate all sparse entries. These are the same checks as performed by 747 // the BSD tar utility. 748 for i, s := range sp { 749 switch { 750 case s.offset < 0 || s.numBytes < 0: 751 return nil, ErrHeader // Negative values are never okay 752 case s.offset > math.MaxInt64-s.numBytes: 753 return nil, ErrHeader // Integer overflow with large length 754 case s.offset+s.numBytes > total: 755 return nil, ErrHeader // Region extends beyond the "real" size 756 case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: 757 return nil, ErrHeader // Regions can't overlap and must be in order 758 } 759 } 760 return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil 761 } 762 763 // readHole reads a sparse hole ending at endOffset. 764 func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { 765 n64 := endOffset - sfr.pos 766 if n64 > int64(len(b)) { 767 n64 = int64(len(b)) 768 } 769 n := int(n64) 770 for i := 0; i < n; i++ { 771 b[i] = 0 772 } 773 sfr.pos += n64 774 return n 775 } 776 777 // Read reads the sparse file data in expanded form. 778 func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { 779 // Skip past all empty fragments. 780 for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { 781 sfr.sp = sfr.sp[1:] 782 } 783 784 // If there are no more fragments, then it is possible that there 785 // is one last sparse hole. 786 if len(sfr.sp) == 0 { 787 // This behavior matches the BSD tar utility. 788 // However, GNU tar stops returning data even if sfr.total is unmet. 789 if sfr.pos < sfr.total { 790 return sfr.readHole(b, sfr.total), nil 791 } 792 return 0, io.EOF 793 } 794 795 // In front of a data fragment, so read a hole. 796 if sfr.pos < sfr.sp[0].offset { 797 return sfr.readHole(b, sfr.sp[0].offset), nil 798 } 799 800 // In a data fragment, so read from it. 801 // This math is overflow free since we verify that offset and numBytes can 802 // be safely added when creating the sparseFileReader. 803 endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment 804 bytesLeft := endPos - sfr.pos // Bytes left in fragment 805 if int64(len(b)) > bytesLeft { 806 b = b[:bytesLeft] 807 } 808 809 n, err = sfr.rfr.Read(b) 810 sfr.pos += int64(n) 811 if err == io.EOF { 812 if sfr.pos < endPos { 813 err = io.ErrUnexpectedEOF // There was supposed to be more data 814 } else if sfr.pos < sfr.total { 815 err = nil // There is still an implicit sparse hole at the end 816 } 817 } 818 819 if sfr.pos == endPos { 820 sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it 821 } 822 return n, err 823 } 824 825 // numBytes returns the number of bytes left to read in the sparse file's 826 // sparse-encoded data in the tar archive. 827 func (sfr *sparseFileReader) numBytes() int64 { 828 return sfr.rfr.numBytes() 829 }