github.com/sean-/go@v0.0.0-20151219100004-97f854cd7bb6/src/archive/tar/reader.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 // TODO(dsymonds): 8 // - pax extensions 9 10 import ( 11 "bytes" 12 "errors" 13 "io" 14 "io/ioutil" 15 "math" 16 "os" 17 "strconv" 18 "strings" 19 "time" 20 ) 21 22 var ( 23 ErrHeader = errors.New("archive/tar: invalid tar header") 24 ) 25 26 const maxNanoSecondIntSize = 9 27 28 // A Reader provides sequential access to the contents of a tar archive. 29 // A tar archive consists of a sequence of files. 30 // The Next method advances to the next file in the archive (including the first), 31 // and then it can be treated as an io.Reader to access the file's data. 32 type Reader struct { 33 r io.Reader 34 err error 35 pad int64 // amount of padding (ignored) after current file entry 36 curr numBytesReader // reader for current file entry 37 hdrBuff [blockSize]byte // buffer to use in readHeader 38 } 39 40 type parser struct { 41 err error // Last error seen 42 } 43 44 // A numBytesReader is an io.Reader with a numBytes method, returning the number 45 // of bytes remaining in the underlying encoded data. 46 type numBytesReader interface { 47 io.Reader 48 numBytes() int64 49 } 50 51 // A regFileReader is a numBytesReader for reading file data from a tar archive. 52 type regFileReader struct { 53 r io.Reader // underlying reader 54 nb int64 // number of unread bytes for current file entry 55 } 56 57 // A sparseFileReader is a numBytesReader for reading sparse file data from a 58 // tar archive. 59 type sparseFileReader struct { 60 rfr numBytesReader // Reads the sparse-encoded file data 61 sp []sparseEntry // The sparse map for the file 62 pos int64 // Keeps track of file position 63 total int64 // Total size of the file 64 } 65 66 // A sparseEntry holds a single entry in a sparse file's sparse map. 67 // 68 // Sparse files are represented using a series of sparseEntrys. 69 // Despite the name, a sparseEntry represents an actual data fragment that 70 // references data found in the underlying archive stream. All regions not 71 // covered by a sparseEntry are logically filled with zeros. 72 // 73 // For example, if the underlying raw file contains the 10-byte data: 74 // var compactData = "abcdefgh" 75 // 76 // And the sparse map has the following entries: 77 // var sp = []sparseEntry{ 78 // {offset: 2, numBytes: 5} // Data fragment for [2..7] 79 // {offset: 18, numBytes: 3} // Data fragment for [18..21] 80 // } 81 // 82 // Then the content of the resulting sparse file with a "real" size of 25 is: 83 // var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 84 type sparseEntry struct { 85 offset int64 // Starting position of the fragment 86 numBytes int64 // Length of the fragment 87 } 88 89 // Keywords for GNU sparse files in a PAX extended header 90 const ( 91 paxGNUSparseNumBlocks = "GNU.sparse.numblocks" 92 paxGNUSparseOffset = "GNU.sparse.offset" 93 paxGNUSparseNumBytes = "GNU.sparse.numbytes" 94 paxGNUSparseMap = "GNU.sparse.map" 95 paxGNUSparseName = "GNU.sparse.name" 96 paxGNUSparseMajor = "GNU.sparse.major" 97 paxGNUSparseMinor = "GNU.sparse.minor" 98 paxGNUSparseSize = "GNU.sparse.size" 99 paxGNUSparseRealSize = "GNU.sparse.realsize" 100 ) 101 102 // Keywords for old GNU sparse headers 103 const ( 104 oldGNUSparseMainHeaderOffset = 386 105 oldGNUSparseMainHeaderIsExtendedOffset = 482 106 oldGNUSparseMainHeaderNumEntries = 4 107 oldGNUSparseExtendedHeaderIsExtendedOffset = 504 108 oldGNUSparseExtendedHeaderNumEntries = 21 109 oldGNUSparseOffsetSize = 12 110 oldGNUSparseNumBytesSize = 12 111 ) 112 113 // NewReader creates a new Reader reading from r. 114 func NewReader(r io.Reader) *Reader { return &Reader{r: r} } 115 116 // Next advances to the next entry in the tar archive. 117 // 118 // io.EOF is returned at the end of the input. 119 func (tr *Reader) Next() (*Header, error) { 120 if tr.err != nil { 121 return nil, tr.err 122 } 123 124 var hdr *Header 125 var extHdrs map[string]string 126 127 // Externally, Next iterates through the tar archive as if it is a series of 128 // files. Internally, the tar format often uses fake "files" to add meta 129 // data that describes the next file. These meta data "files" should not 130 // normally be visible to the outside. As such, this loop iterates through 131 // one or more "header files" until it finds a "normal file". 132 loop: 133 for { 134 tr.err = tr.skipUnread() 135 if tr.err != nil { 136 return nil, tr.err 137 } 138 139 hdr = tr.readHeader() 140 if tr.err != nil { 141 return nil, tr.err 142 } 143 144 // Check for PAX/GNU special headers and files. 145 switch hdr.Typeflag { 146 case TypeXHeader: 147 extHdrs, tr.err = parsePAX(tr) 148 if tr.err != nil { 149 return nil, tr.err 150 } 151 continue loop // This is a meta header affecting the next header 152 case TypeGNULongName, TypeGNULongLink: 153 var realname []byte 154 realname, tr.err = ioutil.ReadAll(tr) 155 if tr.err != nil { 156 return nil, tr.err 157 } 158 159 // Convert GNU extensions to use PAX headers. 160 if extHdrs == nil { 161 extHdrs = make(map[string]string) 162 } 163 var p parser 164 switch hdr.Typeflag { 165 case TypeGNULongName: 166 extHdrs[paxPath] = p.parseString(realname) 167 case TypeGNULongLink: 168 extHdrs[paxLinkpath] = p.parseString(realname) 169 } 170 if p.err != nil { 171 tr.err = p.err 172 return nil, tr.err 173 } 174 continue loop // This is a meta header affecting the next header 175 default: 176 mergePAX(hdr, extHdrs) 177 178 // Check for a PAX format sparse file 179 sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) 180 if err != nil { 181 tr.err = err 182 return nil, err 183 } 184 if sp != nil { 185 // Current file is a PAX format GNU sparse file. 186 // Set the current file reader to a sparse file reader. 187 tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) 188 if tr.err != nil { 189 return nil, tr.err 190 } 191 } 192 break loop // This is a file, so stop 193 } 194 } 195 return hdr, nil 196 } 197 198 // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then 199 // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to 200 // be treated as a regular file. 201 func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { 202 var sparseFormat string 203 204 // Check for sparse format indicators 205 major, majorOk := headers[paxGNUSparseMajor] 206 minor, minorOk := headers[paxGNUSparseMinor] 207 sparseName, sparseNameOk := headers[paxGNUSparseName] 208 _, sparseMapOk := headers[paxGNUSparseMap] 209 sparseSize, sparseSizeOk := headers[paxGNUSparseSize] 210 sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] 211 212 // Identify which, if any, sparse format applies from which PAX headers are set 213 if majorOk && minorOk { 214 sparseFormat = major + "." + minor 215 } else if sparseNameOk && sparseMapOk { 216 sparseFormat = "0.1" 217 } else if sparseSizeOk { 218 sparseFormat = "0.0" 219 } else { 220 // Not a PAX format GNU sparse file. 221 return nil, nil 222 } 223 224 // Check for unknown sparse format 225 if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { 226 return nil, nil 227 } 228 229 // Update hdr from GNU sparse PAX headers 230 if sparseNameOk { 231 hdr.Name = sparseName 232 } 233 if sparseSizeOk { 234 realSize, err := strconv.ParseInt(sparseSize, 10, 0) 235 if err != nil { 236 return nil, ErrHeader 237 } 238 hdr.Size = realSize 239 } else if sparseRealSizeOk { 240 realSize, err := strconv.ParseInt(sparseRealSize, 10, 0) 241 if err != nil { 242 return nil, ErrHeader 243 } 244 hdr.Size = realSize 245 } 246 247 // Set up the sparse map, according to the particular sparse format in use 248 var sp []sparseEntry 249 var err error 250 switch sparseFormat { 251 case "0.0", "0.1": 252 sp, err = readGNUSparseMap0x1(headers) 253 case "1.0": 254 sp, err = readGNUSparseMap1x0(tr.curr) 255 } 256 return sp, err 257 } 258 259 // mergePAX merges well known headers according to PAX standard. 260 // In general headers with the same name as those found 261 // in the header struct overwrite those found in the header 262 // struct with higher precision or longer values. Esp. useful 263 // for name and linkname fields. 264 func mergePAX(hdr *Header, headers map[string]string) error { 265 for k, v := range headers { 266 switch k { 267 case paxPath: 268 hdr.Name = v 269 case paxLinkpath: 270 hdr.Linkname = v 271 case paxGname: 272 hdr.Gname = v 273 case paxUname: 274 hdr.Uname = v 275 case paxUid: 276 uid, err := strconv.ParseInt(v, 10, 0) 277 if err != nil { 278 return err 279 } 280 hdr.Uid = int(uid) 281 case paxGid: 282 gid, err := strconv.ParseInt(v, 10, 0) 283 if err != nil { 284 return err 285 } 286 hdr.Gid = int(gid) 287 case paxAtime: 288 t, err := parsePAXTime(v) 289 if err != nil { 290 return err 291 } 292 hdr.AccessTime = t 293 case paxMtime: 294 t, err := parsePAXTime(v) 295 if err != nil { 296 return err 297 } 298 hdr.ModTime = t 299 case paxCtime: 300 t, err := parsePAXTime(v) 301 if err != nil { 302 return err 303 } 304 hdr.ChangeTime = t 305 case paxSize: 306 size, err := strconv.ParseInt(v, 10, 0) 307 if err != nil { 308 return err 309 } 310 hdr.Size = int64(size) 311 default: 312 if strings.HasPrefix(k, paxXattr) { 313 if hdr.Xattrs == nil { 314 hdr.Xattrs = make(map[string]string) 315 } 316 hdr.Xattrs[k[len(paxXattr):]] = v 317 } 318 } 319 } 320 return nil 321 } 322 323 // parsePAXTime takes a string of the form %d.%d as described in 324 // the PAX specification. 325 func parsePAXTime(t string) (time.Time, error) { 326 buf := []byte(t) 327 pos := bytes.IndexByte(buf, '.') 328 var seconds, nanoseconds int64 329 var err error 330 if pos == -1 { 331 seconds, err = strconv.ParseInt(t, 10, 0) 332 if err != nil { 333 return time.Time{}, err 334 } 335 } else { 336 seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0) 337 if err != nil { 338 return time.Time{}, err 339 } 340 nano_buf := string(buf[pos+1:]) 341 // Pad as needed before converting to a decimal. 342 // For example .030 -> .030000000 -> 30000000 nanoseconds 343 if len(nano_buf) < maxNanoSecondIntSize { 344 // Right pad 345 nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf)) 346 } else if len(nano_buf) > maxNanoSecondIntSize { 347 // Right truncate 348 nano_buf = nano_buf[:maxNanoSecondIntSize] 349 } 350 nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0) 351 if err != nil { 352 return time.Time{}, err 353 } 354 } 355 ts := time.Unix(seconds, nanoseconds) 356 return ts, nil 357 } 358 359 // parsePAX parses PAX headers. 360 // If an extended header (type 'x') is invalid, ErrHeader is returned 361 func parsePAX(r io.Reader) (map[string]string, error) { 362 buf, err := ioutil.ReadAll(r) 363 if err != nil { 364 return nil, err 365 } 366 sbuf := string(buf) 367 368 // For GNU PAX sparse format 0.0 support. 369 // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. 370 var sparseMap bytes.Buffer 371 372 headers := make(map[string]string) 373 // Each record is constructed as 374 // "%d %s=%s\n", length, keyword, value 375 for len(sbuf) > 0 { 376 key, value, residual, err := parsePAXRecord(sbuf) 377 if err != nil { 378 return nil, ErrHeader 379 } 380 sbuf = residual 381 382 keyStr := string(key) 383 if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { 384 // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. 385 sparseMap.WriteString(value) 386 sparseMap.Write([]byte{','}) 387 } else { 388 // Normal key. Set the value in the headers map. 389 headers[keyStr] = string(value) 390 } 391 } 392 if sparseMap.Len() != 0 { 393 // Add sparse info to headers, chopping off the extra comma 394 sparseMap.Truncate(sparseMap.Len() - 1) 395 headers[paxGNUSparseMap] = sparseMap.String() 396 } 397 return headers, nil 398 } 399 400 // parsePAXRecord parses the input PAX record string into a key-value pair. 401 // If parsing is successful, it will slice off the currently read record and 402 // return the remainder as r. 403 // 404 // A PAX record is of the following form: 405 // "%d %s=%s\n" % (size, key, value) 406 func parsePAXRecord(s string) (k, v, r string, err error) { 407 // The size field ends at the first space. 408 sp := strings.IndexByte(s, ' ') 409 if sp == -1 { 410 return "", "", s, ErrHeader 411 } 412 413 // Parse the first token as a decimal integer. 414 n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int 415 if perr != nil || n < 5 || int64(len(s)) < n { 416 return "", "", s, ErrHeader 417 } 418 419 // Extract everything between the space and the final newline. 420 rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] 421 if nl != "\n" { 422 return "", "", s, ErrHeader 423 } 424 425 // The first equals separates the key from the value. 426 eq := strings.IndexByte(rec, '=') 427 if eq == -1 { 428 return "", "", s, ErrHeader 429 } 430 return rec[:eq], rec[eq+1:], rem, nil 431 } 432 433 // parseString parses bytes as a NUL-terminated C-style string. 434 // If a NUL byte is not found then the whole slice is returned as a string. 435 func (*parser) parseString(b []byte) string { 436 n := 0 437 for n < len(b) && b[n] != 0 { 438 n++ 439 } 440 return string(b[0:n]) 441 } 442 443 // parseNumeric parses the input as being encoded in either base-256 or octal. 444 // This function may return negative numbers. 445 // If parsing fails or an integer overflow occurs, err will be set. 446 func (p *parser) parseNumeric(b []byte) int64 { 447 // Check for base-256 (binary) format first. 448 // If the first bit is set, then all following bits constitute a two's 449 // complement encoded number in big-endian byte order. 450 if len(b) > 0 && b[0]&0x80 != 0 { 451 // Handling negative numbers relies on the following identity: 452 // -a-1 == ^a 453 // 454 // If the number is negative, we use an inversion mask to invert the 455 // data bytes and treat the value as an unsigned number. 456 var inv byte // 0x00 if positive or zero, 0xff if negative 457 if b[0]&0x40 != 0 { 458 inv = 0xff 459 } 460 461 var x uint64 462 for i, c := range b { 463 c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing 464 if i == 0 { 465 c &= 0x7f // Ignore signal bit in first byte 466 } 467 if (x >> 56) > 0 { 468 p.err = ErrHeader // Integer overflow 469 return 0 470 } 471 x = x<<8 | uint64(c) 472 } 473 if (x >> 63) > 0 { 474 p.err = ErrHeader // Integer overflow 475 return 0 476 } 477 if inv == 0xff { 478 return ^int64(x) 479 } 480 return int64(x) 481 } 482 483 // Normal case is base-8 (octal) format. 484 return p.parseOctal(b) 485 } 486 487 func (p *parser) parseOctal(b []byte) int64 { 488 // Because unused fields are filled with NULs, we need 489 // to skip leading NULs. Fields may also be padded with 490 // spaces or NULs. 491 // So we remove leading and trailing NULs and spaces to 492 // be sure. 493 b = bytes.Trim(b, " \x00") 494 495 if len(b) == 0 { 496 return 0 497 } 498 x, perr := strconv.ParseUint(p.parseString(b), 8, 64) 499 if perr != nil { 500 p.err = ErrHeader 501 } 502 return int64(x) 503 } 504 505 // skipUnread skips any unread bytes in the existing file entry, as well as any 506 // alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is 507 // encountered in the data portion; it is okay to hit io.EOF in the padding. 508 // 509 // Note that this function still works properly even when sparse files are being 510 // used since numBytes returns the bytes remaining in the underlying io.Reader. 511 func (tr *Reader) skipUnread() error { 512 dataSkip := tr.numBytes() // Number of data bytes to skip 513 totalSkip := dataSkip + tr.pad // Total number of bytes to skip 514 tr.curr, tr.pad = nil, 0 515 516 // If possible, Seek to the last byte before the end of the data section. 517 // Do this because Seek is often lazy about reporting errors; this will mask 518 // the fact that the tar stream may be truncated. We can rely on the 519 // io.CopyN done shortly afterwards to trigger any IO errors. 520 var seekSkipped int64 // Number of bytes skipped via Seek 521 if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { 522 // Not all io.Seeker can actually Seek. For example, os.Stdin implements 523 // io.Seeker, but calling Seek always returns an error and performs 524 // no action. Thus, we try an innocent seek to the current position 525 // to see if Seek is really supported. 526 pos1, err := sr.Seek(0, os.SEEK_CUR) 527 if err == nil { 528 // Seek seems supported, so perform the real Seek. 529 pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR) 530 if err != nil { 531 tr.err = err 532 return tr.err 533 } 534 seekSkipped = pos2 - pos1 535 } 536 } 537 538 var copySkipped int64 // Number of bytes skipped via CopyN 539 copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) 540 if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip { 541 tr.err = io.ErrUnexpectedEOF 542 } 543 return tr.err 544 } 545 546 func (tr *Reader) verifyChecksum(header []byte) bool { 547 if tr.err != nil { 548 return false 549 } 550 551 var p parser 552 given := p.parseOctal(header[148:156]) 553 unsigned, signed := checksum(header) 554 return p.err == nil && (given == unsigned || given == signed) 555 } 556 557 // readHeader reads the next block header and assumes that the underlying reader 558 // is already aligned to a block boundary. 559 // 560 // The err will be set to io.EOF only when one of the following occurs: 561 // * Exactly 0 bytes are read and EOF is hit. 562 // * Exactly 1 block of zeros is read and EOF is hit. 563 // * At least 2 blocks of zeros are read. 564 func (tr *Reader) readHeader() *Header { 565 header := tr.hdrBuff[:] 566 copy(header, zeroBlock) 567 568 if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { 569 return nil // io.EOF is okay here 570 } 571 572 // Two blocks of zero bytes marks the end of the archive. 573 if bytes.Equal(header, zeroBlock[0:blockSize]) { 574 if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { 575 return nil // io.EOF is okay here 576 } 577 if bytes.Equal(header, zeroBlock[0:blockSize]) { 578 tr.err = io.EOF 579 } else { 580 tr.err = ErrHeader // zero block and then non-zero block 581 } 582 return nil 583 } 584 585 if !tr.verifyChecksum(header) { 586 tr.err = ErrHeader 587 return nil 588 } 589 590 // Unpack 591 var p parser 592 hdr := new(Header) 593 s := slicer(header) 594 595 hdr.Name = p.parseString(s.next(100)) 596 hdr.Mode = p.parseNumeric(s.next(8)) 597 hdr.Uid = int(p.parseNumeric(s.next(8))) 598 hdr.Gid = int(p.parseNumeric(s.next(8))) 599 hdr.Size = p.parseNumeric(s.next(12)) 600 hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0) 601 s.next(8) // chksum 602 hdr.Typeflag = s.next(1)[0] 603 hdr.Linkname = p.parseString(s.next(100)) 604 605 // The remainder of the header depends on the value of magic. 606 // The original (v7) version of tar had no explicit magic field, 607 // so its magic bytes, like the rest of the block, are NULs. 608 magic := string(s.next(8)) // contains version field as well. 609 var format string 610 switch { 611 case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988) 612 if string(header[508:512]) == "tar\x00" { 613 format = "star" 614 } else { 615 format = "posix" 616 } 617 case magic == "ustar \x00": // old GNU tar 618 format = "gnu" 619 } 620 621 switch format { 622 case "posix", "gnu", "star": 623 hdr.Uname = p.parseString(s.next(32)) 624 hdr.Gname = p.parseString(s.next(32)) 625 devmajor := s.next(8) 626 devminor := s.next(8) 627 if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { 628 hdr.Devmajor = p.parseNumeric(devmajor) 629 hdr.Devminor = p.parseNumeric(devminor) 630 } 631 var prefix string 632 switch format { 633 case "posix", "gnu": 634 prefix = p.parseString(s.next(155)) 635 case "star": 636 prefix = p.parseString(s.next(131)) 637 hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0) 638 hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0) 639 } 640 if len(prefix) > 0 { 641 hdr.Name = prefix + "/" + hdr.Name 642 } 643 } 644 645 if p.err != nil { 646 tr.err = p.err 647 return nil 648 } 649 650 nb := hdr.Size 651 if isHeaderOnlyType(hdr.Typeflag) { 652 nb = 0 653 } 654 if nb < 0 { 655 tr.err = ErrHeader 656 return nil 657 } 658 659 // Set the current file reader. 660 tr.pad = -nb & (blockSize - 1) // blockSize is a power of two 661 tr.curr = ®FileReader{r: tr.r, nb: nb} 662 663 // Check for old GNU sparse format entry. 664 if hdr.Typeflag == TypeGNUSparse { 665 // Get the real size of the file. 666 hdr.Size = p.parseNumeric(header[483:495]) 667 if p.err != nil { 668 tr.err = p.err 669 return nil 670 } 671 672 // Read the sparse map. 673 sp := tr.readOldGNUSparseMap(header) 674 if tr.err != nil { 675 return nil 676 } 677 678 // Current file is a GNU sparse file. Update the current file reader. 679 tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) 680 if tr.err != nil { 681 return nil 682 } 683 } 684 685 return hdr 686 } 687 688 // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. 689 // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, 690 // then one or more extension headers are used to store the rest of the sparse map. 691 func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { 692 var p parser 693 isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 694 spCap := oldGNUSparseMainHeaderNumEntries 695 if isExtended { 696 spCap += oldGNUSparseExtendedHeaderNumEntries 697 } 698 sp := make([]sparseEntry, 0, spCap) 699 s := slicer(header[oldGNUSparseMainHeaderOffset:]) 700 701 // Read the four entries from the main tar header 702 for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ { 703 offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) 704 numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) 705 if p.err != nil { 706 tr.err = p.err 707 return nil 708 } 709 if offset == 0 && numBytes == 0 { 710 break 711 } 712 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 713 } 714 715 for isExtended { 716 // There are more entries. Read an extension header and parse its entries. 717 sparseHeader := make([]byte, blockSize) 718 if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil { 719 return nil 720 } 721 isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 722 s = slicer(sparseHeader) 723 for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { 724 offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) 725 numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) 726 if p.err != nil { 727 tr.err = p.err 728 return nil 729 } 730 if offset == 0 && numBytes == 0 { 731 break 732 } 733 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 734 } 735 } 736 return sp 737 } 738 739 // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format 740 // version 1.0. The format of the sparse map consists of a series of 741 // newline-terminated numeric fields. The first field is the number of entries 742 // and is always present. Following this are the entries, consisting of two 743 // fields (offset, numBytes). This function must stop reading at the end 744 // boundary of the block containing the last newline. 745 // 746 // Note that the GNU manual says that numeric values should be encoded in octal 747 // format. However, the GNU tar utility itself outputs these values in decimal. 748 // As such, this library treats values as being encoded in decimal. 749 func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { 750 var cntNewline int64 751 var buf bytes.Buffer 752 var blk = make([]byte, blockSize) 753 754 // feedTokens copies data in numBlock chunks from r into buf until there are 755 // at least cnt newlines in buf. It will not read more blocks than needed. 756 var feedTokens = func(cnt int64) error { 757 for cntNewline < cnt { 758 if _, err := io.ReadFull(r, blk); err != nil { 759 if err == io.EOF { 760 err = io.ErrUnexpectedEOF 761 } 762 return err 763 } 764 buf.Write(blk) 765 for _, c := range blk { 766 if c == '\n' { 767 cntNewline++ 768 } 769 } 770 } 771 return nil 772 } 773 774 // nextToken gets the next token delimited by a newline. This assumes that 775 // at least one newline exists in the buffer. 776 var nextToken = func() string { 777 cntNewline-- 778 tok, _ := buf.ReadString('\n') 779 return tok[:len(tok)-1] // Cut off newline 780 } 781 782 // Parse for the number of entries. 783 // Use integer overflow resistant math to check this. 784 if err := feedTokens(1); err != nil { 785 return nil, err 786 } 787 numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int 788 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { 789 return nil, ErrHeader 790 } 791 792 // Parse for all member entries. 793 // numEntries is trusted after this since a potential attacker must have 794 // committed resources proportional to what this library used. 795 if err := feedTokens(2 * numEntries); err != nil { 796 return nil, err 797 } 798 sp := make([]sparseEntry, 0, numEntries) 799 for i := int64(0); i < numEntries; i++ { 800 offset, err := strconv.ParseInt(nextToken(), 10, 64) 801 if err != nil { 802 return nil, ErrHeader 803 } 804 numBytes, err := strconv.ParseInt(nextToken(), 10, 64) 805 if err != nil { 806 return nil, ErrHeader 807 } 808 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 809 } 810 return sp, nil 811 } 812 813 // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format 814 // version 0.1. The sparse map is stored in the PAX headers. 815 func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { 816 // Get number of entries. 817 // Use integer overflow resistant math to check this. 818 numEntriesStr := extHdrs[paxGNUSparseNumBlocks] 819 numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int 820 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { 821 return nil, ErrHeader 822 } 823 824 // There should be two numbers in sparseMap for each entry. 825 sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") 826 if int64(len(sparseMap)) != 2*numEntries { 827 return nil, ErrHeader 828 } 829 830 // Loop through the entries in the sparse map. 831 // numEntries is trusted now. 832 sp := make([]sparseEntry, 0, numEntries) 833 for i := int64(0); i < numEntries; i++ { 834 offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) 835 if err != nil { 836 return nil, ErrHeader 837 } 838 numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) 839 if err != nil { 840 return nil, ErrHeader 841 } 842 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 843 } 844 return sp, nil 845 } 846 847 // numBytes returns the number of bytes left to read in the current file's entry 848 // in the tar archive, or 0 if there is no current file. 849 func (tr *Reader) numBytes() int64 { 850 if tr.curr == nil { 851 // No current file, so no bytes 852 return 0 853 } 854 return tr.curr.numBytes() 855 } 856 857 // Read reads from the current entry in the tar archive. 858 // It returns 0, io.EOF when it reaches the end of that entry, 859 // until Next is called to advance to the next entry. 860 // 861 // Calling Read on special types like TypeLink, TypeSymLink, TypeChar, 862 // TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what 863 // the Header.Size claims. 864 func (tr *Reader) Read(b []byte) (n int, err error) { 865 if tr.err != nil { 866 return 0, tr.err 867 } 868 if tr.curr == nil { 869 return 0, io.EOF 870 } 871 872 n, err = tr.curr.Read(b) 873 if err != nil && err != io.EOF { 874 tr.err = err 875 } 876 return 877 } 878 879 func (rfr *regFileReader) Read(b []byte) (n int, err error) { 880 if rfr.nb == 0 { 881 // file consumed 882 return 0, io.EOF 883 } 884 if int64(len(b)) > rfr.nb { 885 b = b[0:rfr.nb] 886 } 887 n, err = rfr.r.Read(b) 888 rfr.nb -= int64(n) 889 890 if err == io.EOF && rfr.nb > 0 { 891 err = io.ErrUnexpectedEOF 892 } 893 return 894 } 895 896 // numBytes returns the number of bytes left to read in the file's data in the tar archive. 897 func (rfr *regFileReader) numBytes() int64 { 898 return rfr.nb 899 } 900 901 // newSparseFileReader creates a new sparseFileReader, but validates all of the 902 // sparse entries before doing so. 903 func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { 904 if total < 0 { 905 return nil, ErrHeader // Total size cannot be negative 906 } 907 908 // Validate all sparse entries. These are the same checks as performed by 909 // the BSD tar utility. 910 for i, s := range sp { 911 switch { 912 case s.offset < 0 || s.numBytes < 0: 913 return nil, ErrHeader // Negative values are never okay 914 case s.offset > math.MaxInt64-s.numBytes: 915 return nil, ErrHeader // Integer overflow with large length 916 case s.offset+s.numBytes > total: 917 return nil, ErrHeader // Region extends beyond the "real" size 918 case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: 919 return nil, ErrHeader // Regions can't overlap and must be in order 920 } 921 } 922 return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil 923 } 924 925 // readHole reads a sparse hole ending at endOffset. 926 func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { 927 n64 := endOffset - sfr.pos 928 if n64 > int64(len(b)) { 929 n64 = int64(len(b)) 930 } 931 n := int(n64) 932 for i := 0; i < n; i++ { 933 b[i] = 0 934 } 935 sfr.pos += n64 936 return n 937 } 938 939 // Read reads the sparse file data in expanded form. 940 func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { 941 // Skip past all empty fragments. 942 for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { 943 sfr.sp = sfr.sp[1:] 944 } 945 946 // If there are no more fragments, then it is possible that there 947 // is one last sparse hole. 948 if len(sfr.sp) == 0 { 949 // This behavior matches the BSD tar utility. 950 // However, GNU tar stops returning data even if sfr.total is unmet. 951 if sfr.pos < sfr.total { 952 return sfr.readHole(b, sfr.total), nil 953 } 954 return 0, io.EOF 955 } 956 957 // In front of a data fragment, so read a hole. 958 if sfr.pos < sfr.sp[0].offset { 959 return sfr.readHole(b, sfr.sp[0].offset), nil 960 } 961 962 // In a data fragment, so read from it. 963 // This math is overflow free since we verify that offset and numBytes can 964 // be safely added when creating the sparseFileReader. 965 endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment 966 bytesLeft := endPos - sfr.pos // Bytes left in fragment 967 if int64(len(b)) > bytesLeft { 968 b = b[:bytesLeft] 969 } 970 971 n, err = sfr.rfr.Read(b) 972 sfr.pos += int64(n) 973 if err == io.EOF { 974 if sfr.pos < endPos { 975 err = io.ErrUnexpectedEOF // There was supposed to be more data 976 } else if sfr.pos < sfr.total { 977 err = nil // There is still an implicit sparse hole at the end 978 } 979 } 980 981 if sfr.pos == endPos { 982 sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it 983 } 984 return n, err 985 } 986 987 // numBytes returns the number of bytes left to read in the sparse file's 988 // sparse-encoded data in the tar archive. 989 func (sfr *sparseFileReader) numBytes() int64 { 990 return sfr.rfr.numBytes() 991 }