github.com/d4l3k/go@v0.0.0-20151015000803-65fc379daeda/src/archive/tar/reader.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 // TODO(dsymonds): 8 // - pax extensions 9 10 import ( 11 "bytes" 12 "errors" 13 "io" 14 "io/ioutil" 15 "math" 16 "os" 17 "strconv" 18 "strings" 19 "time" 20 ) 21 22 var ( 23 ErrHeader = errors.New("archive/tar: invalid tar header") 24 ) 25 26 const maxNanoSecondIntSize = 9 27 28 // A Reader provides sequential access to the contents of a tar archive. 29 // A tar archive consists of a sequence of files. 30 // The Next method advances to the next file in the archive (including the first), 31 // and then it can be treated as an io.Reader to access the file's data. 32 type Reader struct { 33 r io.Reader 34 err error 35 pad int64 // amount of padding (ignored) after current file entry 36 curr numBytesReader // reader for current file entry 37 hdrBuff [blockSize]byte // buffer to use in readHeader 38 } 39 40 // A numBytesReader is an io.Reader with a numBytes method, returning the number 41 // of bytes remaining in the underlying encoded data. 42 type numBytesReader interface { 43 io.Reader 44 numBytes() int64 45 } 46 47 // A regFileReader is a numBytesReader for reading file data from a tar archive. 48 type regFileReader struct { 49 r io.Reader // underlying reader 50 nb int64 // number of unread bytes for current file entry 51 } 52 53 // A sparseFileReader is a numBytesReader for reading sparse file data from a 54 // tar archive. 55 type sparseFileReader struct { 56 rfr numBytesReader // Reads the sparse-encoded file data 57 sp []sparseEntry // The sparse map for the file 58 pos int64 // Keeps track of file position 59 total int64 // Total size of the file 60 } 61 62 // A sparseEntry holds a single entry in a sparse file's sparse map. 63 // 64 // Sparse files are represented using a series of sparseEntrys. 65 // Despite the name, a sparseEntry represents an actual data fragment that 66 // references data found in the underlying archive stream. All regions not 67 // covered by a sparseEntry are logically filled with zeros. 68 // 69 // For example, if the underlying raw file contains the 10-byte data: 70 // var compactData = "abcdefgh" 71 // 72 // And the sparse map has the following entries: 73 // var sp = []sparseEntry{ 74 // {offset: 2, numBytes: 5} // Data fragment for [2..7] 75 // {offset: 18, numBytes: 3} // Data fragment for [18..21] 76 // } 77 // 78 // Then the content of the resulting sparse file with a "real" size of 25 is: 79 // var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 80 type sparseEntry struct { 81 offset int64 // Starting position of the fragment 82 numBytes int64 // Length of the fragment 83 } 84 85 // Keywords for GNU sparse files in a PAX extended header 86 const ( 87 paxGNUSparseNumBlocks = "GNU.sparse.numblocks" 88 paxGNUSparseOffset = "GNU.sparse.offset" 89 paxGNUSparseNumBytes = "GNU.sparse.numbytes" 90 paxGNUSparseMap = "GNU.sparse.map" 91 paxGNUSparseName = "GNU.sparse.name" 92 paxGNUSparseMajor = "GNU.sparse.major" 93 paxGNUSparseMinor = "GNU.sparse.minor" 94 paxGNUSparseSize = "GNU.sparse.size" 95 paxGNUSparseRealSize = "GNU.sparse.realsize" 96 ) 97 98 // Keywords for old GNU sparse headers 99 const ( 100 oldGNUSparseMainHeaderOffset = 386 101 oldGNUSparseMainHeaderIsExtendedOffset = 482 102 oldGNUSparseMainHeaderNumEntries = 4 103 oldGNUSparseExtendedHeaderIsExtendedOffset = 504 104 oldGNUSparseExtendedHeaderNumEntries = 21 105 oldGNUSparseOffsetSize = 12 106 oldGNUSparseNumBytesSize = 12 107 ) 108 109 // NewReader creates a new Reader reading from r. 110 func NewReader(r io.Reader) *Reader { return &Reader{r: r} } 111 112 // Next advances to the next entry in the tar archive. 113 // 114 // io.EOF is returned at the end of the input. 115 func (tr *Reader) Next() (*Header, error) { 116 var hdr *Header 117 if tr.err == nil { 118 tr.skipUnread() 119 } 120 if tr.err != nil { 121 return hdr, tr.err 122 } 123 hdr = tr.readHeader() 124 if hdr == nil { 125 return hdr, tr.err 126 } 127 // Check for PAX/GNU header. 128 switch hdr.Typeflag { 129 case TypeXHeader: 130 // PAX extended header 131 headers, err := parsePAX(tr) 132 if err != nil { 133 return nil, err 134 } 135 // We actually read the whole file, 136 // but this skips alignment padding 137 tr.skipUnread() 138 if tr.err != nil { 139 return nil, tr.err 140 } 141 hdr = tr.readHeader() 142 if hdr == nil { 143 return nil, tr.err 144 } 145 mergePAX(hdr, headers) 146 147 // Check for a PAX format sparse file 148 sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers) 149 if err != nil { 150 tr.err = err 151 return nil, err 152 } 153 if sp != nil { 154 // Current file is a PAX format GNU sparse file. 155 // Set the current file reader to a sparse file reader. 156 tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) 157 if tr.err != nil { 158 return nil, tr.err 159 } 160 } 161 return hdr, nil 162 case TypeGNULongName: 163 // We have a GNU long name header. Its contents are the real file name. 164 realname, err := ioutil.ReadAll(tr) 165 if err != nil { 166 return nil, err 167 } 168 hdr, tr.err = tr.Next() 169 if tr.err != nil { 170 return nil, tr.err 171 } 172 hdr.Name = cString(realname) 173 return hdr, nil 174 case TypeGNULongLink: 175 // We have a GNU long link header. 176 realname, err := ioutil.ReadAll(tr) 177 if err != nil { 178 return nil, err 179 } 180 hdr, tr.err = tr.Next() 181 if tr.err != nil { 182 return nil, tr.err 183 } 184 hdr.Linkname = cString(realname) 185 return hdr, nil 186 } 187 return hdr, tr.err 188 } 189 190 // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then 191 // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to 192 // be treated as a regular file. 193 func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { 194 var sparseFormat string 195 196 // Check for sparse format indicators 197 major, majorOk := headers[paxGNUSparseMajor] 198 minor, minorOk := headers[paxGNUSparseMinor] 199 sparseName, sparseNameOk := headers[paxGNUSparseName] 200 _, sparseMapOk := headers[paxGNUSparseMap] 201 sparseSize, sparseSizeOk := headers[paxGNUSparseSize] 202 sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] 203 204 // Identify which, if any, sparse format applies from which PAX headers are set 205 if majorOk && minorOk { 206 sparseFormat = major + "." + minor 207 } else if sparseNameOk && sparseMapOk { 208 sparseFormat = "0.1" 209 } else if sparseSizeOk { 210 sparseFormat = "0.0" 211 } else { 212 // Not a PAX format GNU sparse file. 213 return nil, nil 214 } 215 216 // Check for unknown sparse format 217 if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { 218 return nil, nil 219 } 220 221 // Update hdr from GNU sparse PAX headers 222 if sparseNameOk { 223 hdr.Name = sparseName 224 } 225 if sparseSizeOk { 226 realSize, err := strconv.ParseInt(sparseSize, 10, 0) 227 if err != nil { 228 return nil, ErrHeader 229 } 230 hdr.Size = realSize 231 } else if sparseRealSizeOk { 232 realSize, err := strconv.ParseInt(sparseRealSize, 10, 0) 233 if err != nil { 234 return nil, ErrHeader 235 } 236 hdr.Size = realSize 237 } 238 239 // Set up the sparse map, according to the particular sparse format in use 240 var sp []sparseEntry 241 var err error 242 switch sparseFormat { 243 case "0.0", "0.1": 244 sp, err = readGNUSparseMap0x1(headers) 245 case "1.0": 246 sp, err = readGNUSparseMap1x0(tr.curr) 247 } 248 return sp, err 249 } 250 251 // mergePAX merges well known headers according to PAX standard. 252 // In general headers with the same name as those found 253 // in the header struct overwrite those found in the header 254 // struct with higher precision or longer values. Esp. useful 255 // for name and linkname fields. 256 func mergePAX(hdr *Header, headers map[string]string) error { 257 for k, v := range headers { 258 switch k { 259 case paxPath: 260 hdr.Name = v 261 case paxLinkpath: 262 hdr.Linkname = v 263 case paxGname: 264 hdr.Gname = v 265 case paxUname: 266 hdr.Uname = v 267 case paxUid: 268 uid, err := strconv.ParseInt(v, 10, 0) 269 if err != nil { 270 return err 271 } 272 hdr.Uid = int(uid) 273 case paxGid: 274 gid, err := strconv.ParseInt(v, 10, 0) 275 if err != nil { 276 return err 277 } 278 hdr.Gid = int(gid) 279 case paxAtime: 280 t, err := parsePAXTime(v) 281 if err != nil { 282 return err 283 } 284 hdr.AccessTime = t 285 case paxMtime: 286 t, err := parsePAXTime(v) 287 if err != nil { 288 return err 289 } 290 hdr.ModTime = t 291 case paxCtime: 292 t, err := parsePAXTime(v) 293 if err != nil { 294 return err 295 } 296 hdr.ChangeTime = t 297 case paxSize: 298 size, err := strconv.ParseInt(v, 10, 0) 299 if err != nil { 300 return err 301 } 302 hdr.Size = int64(size) 303 default: 304 if strings.HasPrefix(k, paxXattr) { 305 if hdr.Xattrs == nil { 306 hdr.Xattrs = make(map[string]string) 307 } 308 hdr.Xattrs[k[len(paxXattr):]] = v 309 } 310 } 311 } 312 return nil 313 } 314 315 // parsePAXTime takes a string of the form %d.%d as described in 316 // the PAX specification. 317 func parsePAXTime(t string) (time.Time, error) { 318 buf := []byte(t) 319 pos := bytes.IndexByte(buf, '.') 320 var seconds, nanoseconds int64 321 var err error 322 if pos == -1 { 323 seconds, err = strconv.ParseInt(t, 10, 0) 324 if err != nil { 325 return time.Time{}, err 326 } 327 } else { 328 seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0) 329 if err != nil { 330 return time.Time{}, err 331 } 332 nano_buf := string(buf[pos+1:]) 333 // Pad as needed before converting to a decimal. 334 // For example .030 -> .030000000 -> 30000000 nanoseconds 335 if len(nano_buf) < maxNanoSecondIntSize { 336 // Right pad 337 nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf)) 338 } else if len(nano_buf) > maxNanoSecondIntSize { 339 // Right truncate 340 nano_buf = nano_buf[:maxNanoSecondIntSize] 341 } 342 nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0) 343 if err != nil { 344 return time.Time{}, err 345 } 346 } 347 ts := time.Unix(seconds, nanoseconds) 348 return ts, nil 349 } 350 351 // parsePAX parses PAX headers. 352 // If an extended header (type 'x') is invalid, ErrHeader is returned 353 func parsePAX(r io.Reader) (map[string]string, error) { 354 buf, err := ioutil.ReadAll(r) 355 if err != nil { 356 return nil, err 357 } 358 359 // For GNU PAX sparse format 0.0 support. 360 // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. 361 var sparseMap bytes.Buffer 362 363 headers := make(map[string]string) 364 // Each record is constructed as 365 // "%d %s=%s\n", length, keyword, value 366 for len(buf) > 0 { 367 // or the header was empty to start with. 368 var sp int 369 // The size field ends at the first space. 370 sp = bytes.IndexByte(buf, ' ') 371 if sp == -1 { 372 return nil, ErrHeader 373 } 374 // Parse the first token as a decimal integer. 375 n, err := strconv.ParseInt(string(buf[:sp]), 10, 0) 376 if err != nil || n < 5 || int64(len(buf)) < n { 377 return nil, ErrHeader 378 } 379 // Extract everything between the decimal and the n -1 on the 380 // beginning to eat the ' ', -1 on the end to skip the newline. 381 var record []byte 382 record, buf = buf[sp+1:n-1], buf[n:] 383 // The first equals is guaranteed to mark the end of the key. 384 // Everything else is value. 385 eq := bytes.IndexByte(record, '=') 386 if eq == -1 { 387 return nil, ErrHeader 388 } 389 key, value := record[:eq], record[eq+1:] 390 391 keyStr := string(key) 392 if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { 393 // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. 394 sparseMap.Write(value) 395 sparseMap.Write([]byte{','}) 396 } else { 397 // Normal key. Set the value in the headers map. 398 headers[keyStr] = string(value) 399 } 400 } 401 if sparseMap.Len() != 0 { 402 // Add sparse info to headers, chopping off the extra comma 403 sparseMap.Truncate(sparseMap.Len() - 1) 404 headers[paxGNUSparseMap] = sparseMap.String() 405 } 406 return headers, nil 407 } 408 409 // cString parses bytes as a NUL-terminated C-style string. 410 // If a NUL byte is not found then the whole slice is returned as a string. 411 func cString(b []byte) string { 412 n := 0 413 for n < len(b) && b[n] != 0 { 414 n++ 415 } 416 return string(b[0:n]) 417 } 418 419 func (tr *Reader) octal(b []byte) int64 { 420 // Check for binary format first. 421 if len(b) > 0 && b[0]&0x80 != 0 { 422 var x int64 423 for i, c := range b { 424 if i == 0 { 425 c &= 0x7f // ignore signal bit in first byte 426 } 427 x = x<<8 | int64(c) 428 } 429 return x 430 } 431 432 // Because unused fields are filled with NULs, we need 433 // to skip leading NULs. Fields may also be padded with 434 // spaces or NULs. 435 // So we remove leading and trailing NULs and spaces to 436 // be sure. 437 b = bytes.Trim(b, " \x00") 438 439 if len(b) == 0 { 440 return 0 441 } 442 x, err := strconv.ParseUint(cString(b), 8, 64) 443 if err != nil { 444 tr.err = err 445 } 446 return int64(x) 447 } 448 449 // skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding. 450 func (tr *Reader) skipUnread() { 451 nr := tr.numBytes() + tr.pad // number of bytes to skip 452 tr.curr, tr.pad = nil, 0 453 if sr, ok := tr.r.(io.Seeker); ok { 454 if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil { 455 return 456 } 457 } 458 _, tr.err = io.CopyN(ioutil.Discard, tr.r, nr) 459 } 460 461 func (tr *Reader) verifyChecksum(header []byte) bool { 462 if tr.err != nil { 463 return false 464 } 465 466 given := tr.octal(header[148:156]) 467 unsigned, signed := checksum(header) 468 return given == unsigned || given == signed 469 } 470 471 func (tr *Reader) readHeader() *Header { 472 header := tr.hdrBuff[:] 473 copy(header, zeroBlock) 474 475 if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { 476 return nil 477 } 478 479 // Two blocks of zero bytes marks the end of the archive. 480 if bytes.Equal(header, zeroBlock[0:blockSize]) { 481 if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { 482 return nil 483 } 484 if bytes.Equal(header, zeroBlock[0:blockSize]) { 485 tr.err = io.EOF 486 } else { 487 tr.err = ErrHeader // zero block and then non-zero block 488 } 489 return nil 490 } 491 492 if !tr.verifyChecksum(header) { 493 tr.err = ErrHeader 494 return nil 495 } 496 497 // Unpack 498 hdr := new(Header) 499 s := slicer(header) 500 501 hdr.Name = cString(s.next(100)) 502 hdr.Mode = tr.octal(s.next(8)) 503 hdr.Uid = int(tr.octal(s.next(8))) 504 hdr.Gid = int(tr.octal(s.next(8))) 505 hdr.Size = tr.octal(s.next(12)) 506 if hdr.Size < 0 { 507 tr.err = ErrHeader 508 return nil 509 } 510 hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0) 511 s.next(8) // chksum 512 hdr.Typeflag = s.next(1)[0] 513 hdr.Linkname = cString(s.next(100)) 514 515 // The remainder of the header depends on the value of magic. 516 // The original (v7) version of tar had no explicit magic field, 517 // so its magic bytes, like the rest of the block, are NULs. 518 magic := string(s.next(8)) // contains version field as well. 519 var format string 520 switch { 521 case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988) 522 if string(header[508:512]) == "tar\x00" { 523 format = "star" 524 } else { 525 format = "posix" 526 } 527 case magic == "ustar \x00": // old GNU tar 528 format = "gnu" 529 } 530 531 switch format { 532 case "posix", "gnu", "star": 533 hdr.Uname = cString(s.next(32)) 534 hdr.Gname = cString(s.next(32)) 535 devmajor := s.next(8) 536 devminor := s.next(8) 537 if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { 538 hdr.Devmajor = tr.octal(devmajor) 539 hdr.Devminor = tr.octal(devminor) 540 } 541 var prefix string 542 switch format { 543 case "posix", "gnu": 544 prefix = cString(s.next(155)) 545 case "star": 546 prefix = cString(s.next(131)) 547 hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0) 548 hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0) 549 } 550 if len(prefix) > 0 { 551 hdr.Name = prefix + "/" + hdr.Name 552 } 553 } 554 555 if tr.err != nil { 556 tr.err = ErrHeader 557 return nil 558 } 559 560 // Maximum value of hdr.Size is 64 GB (12 octal digits), 561 // so there's no risk of int64 overflowing. 562 nb := int64(hdr.Size) 563 tr.pad = -nb & (blockSize - 1) // blockSize is a power of two 564 565 // Set the current file reader. 566 tr.curr = ®FileReader{r: tr.r, nb: nb} 567 568 // Check for old GNU sparse format entry. 569 if hdr.Typeflag == TypeGNUSparse { 570 // Get the real size of the file. 571 hdr.Size = tr.octal(header[483:495]) 572 573 // Read the sparse map. 574 sp := tr.readOldGNUSparseMap(header) 575 if tr.err != nil { 576 return nil 577 } 578 579 // Current file is a GNU sparse file. Update the current file reader. 580 tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) 581 if tr.err != nil { 582 return nil 583 } 584 } 585 586 return hdr 587 } 588 589 // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. 590 // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, 591 // then one or more extension headers are used to store the rest of the sparse map. 592 func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { 593 isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 594 spCap := oldGNUSparseMainHeaderNumEntries 595 if isExtended { 596 spCap += oldGNUSparseExtendedHeaderNumEntries 597 } 598 sp := make([]sparseEntry, 0, spCap) 599 s := slicer(header[oldGNUSparseMainHeaderOffset:]) 600 601 // Read the four entries from the main tar header 602 for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ { 603 offset := tr.octal(s.next(oldGNUSparseOffsetSize)) 604 numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) 605 if tr.err != nil { 606 tr.err = ErrHeader 607 return nil 608 } 609 if offset == 0 && numBytes == 0 { 610 break 611 } 612 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 613 } 614 615 for isExtended { 616 // There are more entries. Read an extension header and parse its entries. 617 sparseHeader := make([]byte, blockSize) 618 if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil { 619 return nil 620 } 621 isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 622 s = slicer(sparseHeader) 623 for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { 624 offset := tr.octal(s.next(oldGNUSparseOffsetSize)) 625 numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) 626 if tr.err != nil { 627 tr.err = ErrHeader 628 return nil 629 } 630 if offset == 0 && numBytes == 0 { 631 break 632 } 633 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 634 } 635 } 636 return sp 637 } 638 639 // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0. 640 // The sparse map is stored just before the file data and padded out to the nearest block boundary. 641 func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { 642 buf := make([]byte, 2*blockSize) 643 sparseHeader := buf[:blockSize] 644 645 // readDecimal is a helper function to read a decimal integer from the sparse map 646 // while making sure to read from the file in blocks of size blockSize 647 readDecimal := func() (int64, error) { 648 // Look for newline 649 nl := bytes.IndexByte(sparseHeader, '\n') 650 if nl == -1 { 651 if len(sparseHeader) >= blockSize { 652 // This is an error 653 return 0, ErrHeader 654 } 655 oldLen := len(sparseHeader) 656 newLen := oldLen + blockSize 657 if cap(sparseHeader) < newLen { 658 // There's more header, but we need to make room for the next block 659 copy(buf, sparseHeader) 660 sparseHeader = buf[:newLen] 661 } else { 662 // There's more header, and we can just reslice 663 sparseHeader = sparseHeader[:newLen] 664 } 665 666 // Now that sparseHeader is large enough, read next block 667 if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil { 668 return 0, err 669 } 670 671 // Look for a newline in the new data 672 nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n') 673 if nl == -1 { 674 // This is an error 675 return 0, ErrHeader 676 } 677 nl += oldLen // We want the position from the beginning 678 } 679 // Now that we've found a newline, read a number 680 n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0) 681 if err != nil { 682 return 0, ErrHeader 683 } 684 685 // Update sparseHeader to consume this number 686 sparseHeader = sparseHeader[nl+1:] 687 return n, nil 688 } 689 690 // Read the first block 691 if _, err := io.ReadFull(r, sparseHeader); err != nil { 692 return nil, err 693 } 694 695 // The first line contains the number of entries 696 numEntries, err := readDecimal() 697 if err != nil { 698 return nil, err 699 } 700 701 // Read all the entries 702 sp := make([]sparseEntry, 0, numEntries) 703 for i := int64(0); i < numEntries; i++ { 704 // Read the offset 705 offset, err := readDecimal() 706 if err != nil { 707 return nil, err 708 } 709 // Read numBytes 710 numBytes, err := readDecimal() 711 if err != nil { 712 return nil, err 713 } 714 715 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 716 } 717 718 return sp, nil 719 } 720 721 // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format 722 // version 0.1. The sparse map is stored in the PAX headers. 723 func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { 724 // Get number of entries. 725 // Use integer overflow resistant math to check this. 726 numEntriesStr := extHdrs[paxGNUSparseNumBlocks] 727 numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int 728 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { 729 return nil, ErrHeader 730 } 731 732 // There should be two numbers in sparseMap for each entry. 733 sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") 734 if int64(len(sparseMap)) != 2*numEntries { 735 return nil, ErrHeader 736 } 737 738 // Loop through the entries in the sparse map. 739 // numEntries is trusted now. 740 sp := make([]sparseEntry, 0, numEntries) 741 for i := int64(0); i < numEntries; i++ { 742 offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) 743 if err != nil { 744 return nil, ErrHeader 745 } 746 numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) 747 if err != nil { 748 return nil, ErrHeader 749 } 750 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 751 } 752 return sp, nil 753 } 754 755 // numBytes returns the number of bytes left to read in the current file's entry 756 // in the tar archive, or 0 if there is no current file. 757 func (tr *Reader) numBytes() int64 { 758 if tr.curr == nil { 759 // No current file, so no bytes 760 return 0 761 } 762 return tr.curr.numBytes() 763 } 764 765 // Read reads from the current entry in the tar archive. 766 // It returns 0, io.EOF when it reaches the end of that entry, 767 // until Next is called to advance to the next entry. 768 func (tr *Reader) Read(b []byte) (n int, err error) { 769 if tr.err != nil { 770 return 0, tr.err 771 } 772 if tr.curr == nil { 773 return 0, io.EOF 774 } 775 776 n, err = tr.curr.Read(b) 777 if err != nil && err != io.EOF { 778 tr.err = err 779 } 780 return 781 } 782 783 func (rfr *regFileReader) Read(b []byte) (n int, err error) { 784 if rfr.nb == 0 { 785 // file consumed 786 return 0, io.EOF 787 } 788 if int64(len(b)) > rfr.nb { 789 b = b[0:rfr.nb] 790 } 791 n, err = rfr.r.Read(b) 792 rfr.nb -= int64(n) 793 794 if err == io.EOF && rfr.nb > 0 { 795 err = io.ErrUnexpectedEOF 796 } 797 return 798 } 799 800 // numBytes returns the number of bytes left to read in the file's data in the tar archive. 801 func (rfr *regFileReader) numBytes() int64 { 802 return rfr.nb 803 } 804 805 // newSparseFileReader creates a new sparseFileReader, but validates all of the 806 // sparse entries before doing so. 807 func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { 808 if total < 0 { 809 return nil, ErrHeader // Total size cannot be negative 810 } 811 812 // Validate all sparse entries. These are the same checks as performed by 813 // the BSD tar utility. 814 for i, s := range sp { 815 switch { 816 case s.offset < 0 || s.numBytes < 0: 817 return nil, ErrHeader // Negative values are never okay 818 case s.offset > math.MaxInt64-s.numBytes: 819 return nil, ErrHeader // Integer overflow with large length 820 case s.offset+s.numBytes > total: 821 return nil, ErrHeader // Region extends beyond the "real" size 822 case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: 823 return nil, ErrHeader // Regions can't overlap and must be in order 824 } 825 } 826 return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil 827 } 828 829 // readHole reads a sparse hole ending at endOffset. 830 func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { 831 n64 := endOffset - sfr.pos 832 if n64 > int64(len(b)) { 833 n64 = int64(len(b)) 834 } 835 n := int(n64) 836 for i := 0; i < n; i++ { 837 b[i] = 0 838 } 839 sfr.pos += n64 840 return n 841 } 842 843 // Read reads the sparse file data in expanded form. 844 func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { 845 // Skip past all empty fragments. 846 for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { 847 sfr.sp = sfr.sp[1:] 848 } 849 850 // If there are no more fragments, then it is possible that there 851 // is one last sparse hole. 852 if len(sfr.sp) == 0 { 853 // This behavior matches the BSD tar utility. 854 // However, GNU tar stops returning data even if sfr.total is unmet. 855 if sfr.pos < sfr.total { 856 return sfr.readHole(b, sfr.total), nil 857 } 858 return 0, io.EOF 859 } 860 861 // In front of a data fragment, so read a hole. 862 if sfr.pos < sfr.sp[0].offset { 863 return sfr.readHole(b, sfr.sp[0].offset), nil 864 } 865 866 // In a data fragment, so read from it. 867 // This math is overflow free since we verify that offset and numBytes can 868 // be safely added when creating the sparseFileReader. 869 endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment 870 bytesLeft := endPos - sfr.pos // Bytes left in fragment 871 if int64(len(b)) > bytesLeft { 872 b = b[:bytesLeft] 873 } 874 875 n, err = sfr.rfr.Read(b) 876 sfr.pos += int64(n) 877 if err == io.EOF { 878 if sfr.pos < endPos { 879 err = io.ErrUnexpectedEOF // There was supposed to be more data 880 } else if sfr.pos < sfr.total { 881 err = nil // There is still an implicit sparse hole at the end 882 } 883 } 884 885 if sfr.pos == endPos { 886 sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it 887 } 888 return n, err 889 } 890 891 // numBytes returns the number of bytes left to read in the sparse file's 892 // sparse-encoded data in the tar archive. 893 func (sfr *sparseFileReader) numBytes() int64 { 894 return sfr.rfr.numBytes() 895 }