github.com/sbinet/go@v0.0.0-20160827155028-54d7de7dd62b/src/archive/tar/reader.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 // TODO(dsymonds): 8 // - pax extensions 9 10 import ( 11 "bytes" 12 "errors" 13 "io" 14 "io/ioutil" 15 "math" 16 "strconv" 17 "strings" 18 "time" 19 ) 20 21 var ( 22 ErrHeader = errors.New("archive/tar: invalid tar header") 23 ) 24 25 const maxNanoSecondIntSize = 9 26 27 // A Reader provides sequential access to the contents of a tar archive. 28 // A tar archive consists of a sequence of files. 29 // The Next method advances to the next file in the archive (including the first), 30 // and then it can be treated as an io.Reader to access the file's data. 31 type Reader struct { 32 r io.Reader 33 err error 34 pad int64 // amount of padding (ignored) after current file entry 35 curr numBytesReader // reader for current file entry 36 blk block // buffer to use as temporary local storage 37 } 38 39 type parser struct { 40 err error // Last error seen 41 } 42 43 // A numBytesReader is an io.Reader with a numBytes method, returning the number 44 // of bytes remaining in the underlying encoded data. 45 type numBytesReader interface { 46 io.Reader 47 numBytes() int64 48 } 49 50 // A regFileReader is a numBytesReader for reading file data from a tar archive. 51 type regFileReader struct { 52 r io.Reader // underlying reader 53 nb int64 // number of unread bytes for current file entry 54 } 55 56 // A sparseFileReader is a numBytesReader for reading sparse file data from a 57 // tar archive. 58 type sparseFileReader struct { 59 rfr numBytesReader // Reads the sparse-encoded file data 60 sp []sparseEntry // The sparse map for the file 61 pos int64 // Keeps track of file position 62 total int64 // Total size of the file 63 } 64 65 // A sparseEntry holds a single entry in a sparse file's sparse map. 66 // 67 // Sparse files are represented using a series of sparseEntrys. 68 // Despite the name, a sparseEntry represents an actual data fragment that 69 // references data found in the underlying archive stream. All regions not 70 // covered by a sparseEntry are logically filled with zeros. 71 // 72 // For example, if the underlying raw file contains the 10-byte data: 73 // var compactData = "abcdefgh" 74 // 75 // And the sparse map has the following entries: 76 // var sp = []sparseEntry{ 77 // {offset: 2, numBytes: 5} // Data fragment for [2..7] 78 // {offset: 18, numBytes: 3} // Data fragment for [18..21] 79 // } 80 // 81 // Then the content of the resulting sparse file with a "real" size of 25 is: 82 // var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 83 type sparseEntry struct { 84 offset int64 // Starting position of the fragment 85 numBytes int64 // Length of the fragment 86 } 87 88 // Keywords for GNU sparse files in a PAX extended header 89 const ( 90 paxGNUSparseNumBlocks = "GNU.sparse.numblocks" 91 paxGNUSparseOffset = "GNU.sparse.offset" 92 paxGNUSparseNumBytes = "GNU.sparse.numbytes" 93 paxGNUSparseMap = "GNU.sparse.map" 94 paxGNUSparseName = "GNU.sparse.name" 95 paxGNUSparseMajor = "GNU.sparse.major" 96 paxGNUSparseMinor = "GNU.sparse.minor" 97 paxGNUSparseSize = "GNU.sparse.size" 98 paxGNUSparseRealSize = "GNU.sparse.realsize" 99 ) 100 101 // NewReader creates a new Reader reading from r. 102 func NewReader(r io.Reader) *Reader { return &Reader{r: r} } 103 104 // Next advances to the next entry in the tar archive. 105 // 106 // io.EOF is returned at the end of the input. 107 func (tr *Reader) Next() (*Header, error) { 108 if tr.err != nil { 109 return nil, tr.err 110 } 111 112 var hdr *Header 113 var rawHdr *block 114 var extHdrs map[string]string 115 116 // Externally, Next iterates through the tar archive as if it is a series of 117 // files. Internally, the tar format often uses fake "files" to add meta 118 // data that describes the next file. These meta data "files" should not 119 // normally be visible to the outside. As such, this loop iterates through 120 // one or more "header files" until it finds a "normal file". 121 loop: 122 for { 123 tr.err = tr.skipUnread() 124 if tr.err != nil { 125 return nil, tr.err 126 } 127 128 hdr, rawHdr = tr.readHeader() 129 if tr.err != nil { 130 return nil, tr.err 131 } 132 133 tr.err = tr.handleRegularFile(hdr) 134 if tr.err != nil { 135 return nil, tr.err 136 } 137 138 // Check for PAX/GNU special headers and files. 139 switch hdr.Typeflag { 140 case TypeXHeader: 141 extHdrs, tr.err = parsePAX(tr) 142 if tr.err != nil { 143 return nil, tr.err 144 } 145 continue loop // This is a meta header affecting the next header 146 case TypeGNULongName, TypeGNULongLink: 147 var realname []byte 148 realname, tr.err = ioutil.ReadAll(tr) 149 if tr.err != nil { 150 return nil, tr.err 151 } 152 153 // Convert GNU extensions to use PAX headers. 154 if extHdrs == nil { 155 extHdrs = make(map[string]string) 156 } 157 var p parser 158 switch hdr.Typeflag { 159 case TypeGNULongName: 160 extHdrs[paxPath] = p.parseString(realname) 161 case TypeGNULongLink: 162 extHdrs[paxLinkpath] = p.parseString(realname) 163 } 164 if p.err != nil { 165 tr.err = p.err 166 return nil, tr.err 167 } 168 continue loop // This is a meta header affecting the next header 169 default: 170 // The old GNU sparse format is handled here since it is technically 171 // just a regular file with additional attributes. 172 173 // TODO(dsnet): We should handle errors reported by mergePAX. 174 mergePAX(hdr, extHdrs) 175 176 // TODO(dsnet): The extended headers may have updated the size. 177 // Thus, we must setup the regFileReader again here. 178 // 179 // See golang.org/issue/15573 180 181 tr.err = tr.handleSparseFile(hdr, rawHdr, extHdrs) 182 if tr.err != nil { 183 return nil, tr.err 184 } 185 break loop // This is a file, so stop 186 } 187 } 188 return hdr, nil 189 } 190 191 // handleRegularFile sets up the current file reader and padding such that it 192 // can only read the following logical data section. It will properly handle 193 // special headers that contain no data section. 194 func (tr *Reader) handleRegularFile(hdr *Header) error { 195 nb := hdr.Size 196 if isHeaderOnlyType(hdr.Typeflag) { 197 nb = 0 198 } 199 if nb < 0 { 200 return ErrHeader 201 } 202 203 tr.pad = -nb & (blockSize - 1) // blockSize is a power of two 204 tr.curr = ®FileReader{r: tr.r, nb: nb} 205 return nil 206 } 207 208 // handleSparseFile checks if the current file is a sparse format of any type 209 // and sets the curr reader appropriately. 210 func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[string]string) error { 211 var sp []sparseEntry 212 var err error 213 if hdr.Typeflag == TypeGNUSparse { 214 var p parser 215 hdr.Size = p.parseNumeric(rawHdr.GNU().RealSize()) 216 if p.err != nil { 217 return p.err 218 } 219 220 sp = tr.readOldGNUSparseMap(rawHdr) 221 if tr.err != nil { 222 return tr.err 223 } 224 } else { 225 sp, err = tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) 226 if err != nil { 227 return err 228 } 229 } 230 231 // If sp is non-nil, then this is a sparse file. 232 // Note that it is possible for len(sp) to be zero. 233 if sp != nil { 234 tr.curr, err = newSparseFileReader(tr.curr, sp, hdr.Size) 235 } 236 return err 237 } 238 239 // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then 240 // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to 241 // be treated as a regular file. 242 func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { 243 var sparseFormat string 244 245 // Check for sparse format indicators 246 major, majorOk := headers[paxGNUSparseMajor] 247 minor, minorOk := headers[paxGNUSparseMinor] 248 sparseName, sparseNameOk := headers[paxGNUSparseName] 249 _, sparseMapOk := headers[paxGNUSparseMap] 250 sparseSize, sparseSizeOk := headers[paxGNUSparseSize] 251 sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] 252 253 // Identify which, if any, sparse format applies from which PAX headers are set 254 if majorOk && minorOk { 255 sparseFormat = major + "." + minor 256 } else if sparseNameOk && sparseMapOk { 257 sparseFormat = "0.1" 258 } else if sparseSizeOk { 259 sparseFormat = "0.0" 260 } else { 261 // Not a PAX format GNU sparse file. 262 return nil, nil 263 } 264 265 // Check for unknown sparse format 266 if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { 267 return nil, nil 268 } 269 270 // Update hdr from GNU sparse PAX headers 271 if sparseNameOk { 272 hdr.Name = sparseName 273 } 274 if sparseSizeOk { 275 realSize, err := strconv.ParseInt(sparseSize, 10, 0) 276 if err != nil { 277 return nil, ErrHeader 278 } 279 hdr.Size = realSize 280 } else if sparseRealSizeOk { 281 realSize, err := strconv.ParseInt(sparseRealSize, 10, 0) 282 if err != nil { 283 return nil, ErrHeader 284 } 285 hdr.Size = realSize 286 } 287 288 // Set up the sparse map, according to the particular sparse format in use 289 var sp []sparseEntry 290 var err error 291 switch sparseFormat { 292 case "0.0", "0.1": 293 sp, err = readGNUSparseMap0x1(headers) 294 case "1.0": 295 sp, err = readGNUSparseMap1x0(tr.curr) 296 } 297 return sp, err 298 } 299 300 // mergePAX merges well known headers according to PAX standard. 301 // In general headers with the same name as those found 302 // in the header struct overwrite those found in the header 303 // struct with higher precision or longer values. Esp. useful 304 // for name and linkname fields. 305 func mergePAX(hdr *Header, headers map[string]string) error { 306 for k, v := range headers { 307 switch k { 308 case paxPath: 309 hdr.Name = v 310 case paxLinkpath: 311 hdr.Linkname = v 312 case paxGname: 313 hdr.Gname = v 314 case paxUname: 315 hdr.Uname = v 316 case paxUid: 317 uid, err := strconv.ParseInt(v, 10, 0) 318 if err != nil { 319 return err 320 } 321 hdr.Uid = int(uid) 322 case paxGid: 323 gid, err := strconv.ParseInt(v, 10, 0) 324 if err != nil { 325 return err 326 } 327 hdr.Gid = int(gid) 328 case paxAtime: 329 t, err := parsePAXTime(v) 330 if err != nil { 331 return err 332 } 333 hdr.AccessTime = t 334 case paxMtime: 335 t, err := parsePAXTime(v) 336 if err != nil { 337 return err 338 } 339 hdr.ModTime = t 340 case paxCtime: 341 t, err := parsePAXTime(v) 342 if err != nil { 343 return err 344 } 345 hdr.ChangeTime = t 346 case paxSize: 347 size, err := strconv.ParseInt(v, 10, 0) 348 if err != nil { 349 return err 350 } 351 hdr.Size = size 352 default: 353 if strings.HasPrefix(k, paxXattr) { 354 if hdr.Xattrs == nil { 355 hdr.Xattrs = make(map[string]string) 356 } 357 hdr.Xattrs[k[len(paxXattr):]] = v 358 } 359 } 360 } 361 return nil 362 } 363 364 // parsePAXTime takes a string of the form %d.%d as described in 365 // the PAX specification. 366 func parsePAXTime(t string) (time.Time, error) { 367 buf := []byte(t) 368 pos := bytes.IndexByte(buf, '.') 369 var seconds, nanoseconds int64 370 var err error 371 if pos == -1 { 372 seconds, err = strconv.ParseInt(t, 10, 0) 373 if err != nil { 374 return time.Time{}, err 375 } 376 } else { 377 seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0) 378 if err != nil { 379 return time.Time{}, err 380 } 381 nanoBuf := string(buf[pos+1:]) 382 // Pad as needed before converting to a decimal. 383 // For example .030 -> .030000000 -> 30000000 nanoseconds 384 if len(nanoBuf) < maxNanoSecondIntSize { 385 // Right pad 386 nanoBuf += strings.Repeat("0", maxNanoSecondIntSize-len(nanoBuf)) 387 } else if len(nanoBuf) > maxNanoSecondIntSize { 388 // Right truncate 389 nanoBuf = nanoBuf[:maxNanoSecondIntSize] 390 } 391 nanoseconds, err = strconv.ParseInt(nanoBuf, 10, 0) 392 if err != nil { 393 return time.Time{}, err 394 } 395 } 396 ts := time.Unix(seconds, nanoseconds) 397 return ts, nil 398 } 399 400 // parsePAX parses PAX headers. 401 // If an extended header (type 'x') is invalid, ErrHeader is returned 402 func parsePAX(r io.Reader) (map[string]string, error) { 403 buf, err := ioutil.ReadAll(r) 404 if err != nil { 405 return nil, err 406 } 407 sbuf := string(buf) 408 409 // For GNU PAX sparse format 0.0 support. 410 // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. 411 var sparseMap bytes.Buffer 412 413 headers := make(map[string]string) 414 // Each record is constructed as 415 // "%d %s=%s\n", length, keyword, value 416 for len(sbuf) > 0 { 417 key, value, residual, err := parsePAXRecord(sbuf) 418 if err != nil { 419 return nil, ErrHeader 420 } 421 sbuf = residual 422 423 keyStr := key 424 if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { 425 // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. 426 sparseMap.WriteString(value) 427 sparseMap.Write([]byte{','}) 428 } else { 429 // Normal key. Set the value in the headers map. 430 headers[keyStr] = value 431 } 432 } 433 if sparseMap.Len() != 0 { 434 // Add sparse info to headers, chopping off the extra comma 435 sparseMap.Truncate(sparseMap.Len() - 1) 436 headers[paxGNUSparseMap] = sparseMap.String() 437 } 438 return headers, nil 439 } 440 441 // parsePAXRecord parses the input PAX record string into a key-value pair. 442 // If parsing is successful, it will slice off the currently read record and 443 // return the remainder as r. 444 // 445 // A PAX record is of the following form: 446 // "%d %s=%s\n" % (size, key, value) 447 func parsePAXRecord(s string) (k, v, r string, err error) { 448 // The size field ends at the first space. 449 sp := strings.IndexByte(s, ' ') 450 if sp == -1 { 451 return "", "", s, ErrHeader 452 } 453 454 // Parse the first token as a decimal integer. 455 n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int 456 if perr != nil || n < 5 || int64(len(s)) < n { 457 return "", "", s, ErrHeader 458 } 459 460 // Extract everything between the space and the final newline. 461 rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] 462 if nl != "\n" { 463 return "", "", s, ErrHeader 464 } 465 466 // The first equals separates the key from the value. 467 eq := strings.IndexByte(rec, '=') 468 if eq == -1 { 469 return "", "", s, ErrHeader 470 } 471 return rec[:eq], rec[eq+1:], rem, nil 472 } 473 474 // parseString parses bytes as a NUL-terminated C-style string. 475 // If a NUL byte is not found then the whole slice is returned as a string. 476 func (*parser) parseString(b []byte) string { 477 n := 0 478 for n < len(b) && b[n] != 0 { 479 n++ 480 } 481 return string(b[0:n]) 482 } 483 484 // parseNumeric parses the input as being encoded in either base-256 or octal. 485 // This function may return negative numbers. 486 // If parsing fails or an integer overflow occurs, err will be set. 487 func (p *parser) parseNumeric(b []byte) int64 { 488 // Check for base-256 (binary) format first. 489 // If the first bit is set, then all following bits constitute a two's 490 // complement encoded number in big-endian byte order. 491 if len(b) > 0 && b[0]&0x80 != 0 { 492 // Handling negative numbers relies on the following identity: 493 // -a-1 == ^a 494 // 495 // If the number is negative, we use an inversion mask to invert the 496 // data bytes and treat the value as an unsigned number. 497 var inv byte // 0x00 if positive or zero, 0xff if negative 498 if b[0]&0x40 != 0 { 499 inv = 0xff 500 } 501 502 var x uint64 503 for i, c := range b { 504 c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing 505 if i == 0 { 506 c &= 0x7f // Ignore signal bit in first byte 507 } 508 if (x >> 56) > 0 { 509 p.err = ErrHeader // Integer overflow 510 return 0 511 } 512 x = x<<8 | uint64(c) 513 } 514 if (x >> 63) > 0 { 515 p.err = ErrHeader // Integer overflow 516 return 0 517 } 518 if inv == 0xff { 519 return ^int64(x) 520 } 521 return int64(x) 522 } 523 524 // Normal case is base-8 (octal) format. 525 return p.parseOctal(b) 526 } 527 528 func (p *parser) parseOctal(b []byte) int64 { 529 // Because unused fields are filled with NULs, we need 530 // to skip leading NULs. Fields may also be padded with 531 // spaces or NULs. 532 // So we remove leading and trailing NULs and spaces to 533 // be sure. 534 b = bytes.Trim(b, " \x00") 535 536 if len(b) == 0 { 537 return 0 538 } 539 x, perr := strconv.ParseUint(p.parseString(b), 8, 64) 540 if perr != nil { 541 p.err = ErrHeader 542 } 543 return int64(x) 544 } 545 546 // skipUnread skips any unread bytes in the existing file entry, as well as any 547 // alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is 548 // encountered in the data portion; it is okay to hit io.EOF in the padding. 549 // 550 // Note that this function still works properly even when sparse files are being 551 // used since numBytes returns the bytes remaining in the underlying io.Reader. 552 func (tr *Reader) skipUnread() error { 553 dataSkip := tr.numBytes() // Number of data bytes to skip 554 totalSkip := dataSkip + tr.pad // Total number of bytes to skip 555 tr.curr, tr.pad = nil, 0 556 557 // If possible, Seek to the last byte before the end of the data section. 558 // Do this because Seek is often lazy about reporting errors; this will mask 559 // the fact that the tar stream may be truncated. We can rely on the 560 // io.CopyN done shortly afterwards to trigger any IO errors. 561 var seekSkipped int64 // Number of bytes skipped via Seek 562 if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { 563 // Not all io.Seeker can actually Seek. For example, os.Stdin implements 564 // io.Seeker, but calling Seek always returns an error and performs 565 // no action. Thus, we try an innocent seek to the current position 566 // to see if Seek is really supported. 567 pos1, err := sr.Seek(0, io.SeekCurrent) 568 if err == nil { 569 // Seek seems supported, so perform the real Seek. 570 pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent) 571 if err != nil { 572 tr.err = err 573 return tr.err 574 } 575 seekSkipped = pos2 - pos1 576 } 577 } 578 579 var copySkipped int64 // Number of bytes skipped via CopyN 580 copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) 581 if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip { 582 tr.err = io.ErrUnexpectedEOF 583 } 584 return tr.err 585 } 586 587 // readHeader reads the next block header and assumes that the underlying reader 588 // is already aligned to a block boundary. It returns the raw block of the 589 // header in case further processing is required. 590 // 591 // The err will be set to io.EOF only when one of the following occurs: 592 // * Exactly 0 bytes are read and EOF is hit. 593 // * Exactly 1 block of zeros is read and EOF is hit. 594 // * At least 2 blocks of zeros are read. 595 func (tr *Reader) readHeader() (*Header, *block) { 596 if _, tr.err = io.ReadFull(tr.r, tr.blk[:]); tr.err != nil { 597 return nil, nil // io.EOF is okay here 598 } 599 600 // Two blocks of zero bytes marks the end of the archive. 601 if bytes.Equal(tr.blk[:], zeroBlock[:]) { 602 if _, tr.err = io.ReadFull(tr.r, tr.blk[:]); tr.err != nil { 603 return nil, nil // io.EOF is okay here 604 } 605 if bytes.Equal(tr.blk[:], zeroBlock[:]) { 606 tr.err = io.EOF 607 } else { 608 tr.err = ErrHeader // zero block and then non-zero block 609 } 610 return nil, nil 611 } 612 613 // Verify the header matches a known format. 614 format := tr.blk.GetFormat() 615 if format == formatUnknown { 616 tr.err = ErrHeader 617 return nil, nil 618 } 619 620 var p parser 621 hdr := new(Header) 622 623 // Unpack the V7 header. 624 v7 := tr.blk.V7() 625 hdr.Name = p.parseString(v7.Name()) 626 hdr.Mode = p.parseNumeric(v7.Mode()) 627 hdr.Uid = int(p.parseNumeric(v7.UID())) 628 hdr.Gid = int(p.parseNumeric(v7.GID())) 629 hdr.Size = p.parseNumeric(v7.Size()) 630 hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0) 631 hdr.Typeflag = v7.TypeFlag()[0] 632 hdr.Linkname = p.parseString(v7.LinkName()) 633 634 // Unpack format specific fields. 635 if format > formatV7 { 636 ustar := tr.blk.USTAR() 637 hdr.Uname = p.parseString(ustar.UserName()) 638 hdr.Gname = p.parseString(ustar.GroupName()) 639 if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { 640 hdr.Devmajor = p.parseNumeric(ustar.DevMajor()) 641 hdr.Devminor = p.parseNumeric(ustar.DevMinor()) 642 } 643 644 var prefix string 645 switch format { 646 case formatUSTAR, formatGNU: 647 // TODO(dsnet): Do not use the prefix field for the GNU format! 648 // See golang.org/issues/12594 649 ustar := tr.blk.USTAR() 650 prefix = p.parseString(ustar.Prefix()) 651 case formatSTAR: 652 star := tr.blk.STAR() 653 prefix = p.parseString(star.Prefix()) 654 hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0) 655 hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0) 656 } 657 if len(prefix) > 0 { 658 hdr.Name = prefix + "/" + hdr.Name 659 } 660 } 661 662 // Check for parsing errors. 663 if p.err != nil { 664 tr.err = p.err 665 return nil, nil 666 } 667 return hdr, &tr.blk 668 } 669 670 // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. 671 // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, 672 // then one or more extension headers are used to store the rest of the sparse map. 673 func (tr *Reader) readOldGNUSparseMap(blk *block) []sparseEntry { 674 var p parser 675 var s sparseArray = blk.GNU().Sparse() 676 var sp = make([]sparseEntry, 0, s.MaxEntries()) 677 for i := 0; i < s.MaxEntries(); i++ { 678 offset := p.parseOctal(s.Entry(i).Offset()) 679 numBytes := p.parseOctal(s.Entry(i).NumBytes()) 680 if p.err != nil { 681 tr.err = p.err 682 return nil 683 } 684 if offset == 0 && numBytes == 0 { 685 break 686 } 687 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 688 } 689 690 for s.IsExtended()[0] > 0 { 691 // There are more entries. Read an extension header and parse its entries. 692 var blk block 693 if _, tr.err = io.ReadFull(tr.r, blk[:]); tr.err != nil { 694 return nil 695 } 696 s = blk.Sparse() 697 698 for i := 0; i < s.MaxEntries(); i++ { 699 offset := p.parseOctal(s.Entry(i).Offset()) 700 numBytes := p.parseOctal(s.Entry(i).NumBytes()) 701 if p.err != nil { 702 tr.err = p.err 703 return nil 704 } 705 if offset == 0 && numBytes == 0 { 706 break 707 } 708 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 709 } 710 } 711 return sp 712 } 713 714 // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format 715 // version 1.0. The format of the sparse map consists of a series of 716 // newline-terminated numeric fields. The first field is the number of entries 717 // and is always present. Following this are the entries, consisting of two 718 // fields (offset, numBytes). This function must stop reading at the end 719 // boundary of the block containing the last newline. 720 // 721 // Note that the GNU manual says that numeric values should be encoded in octal 722 // format. However, the GNU tar utility itself outputs these values in decimal. 723 // As such, this library treats values as being encoded in decimal. 724 func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { 725 var cntNewline int64 726 var buf bytes.Buffer 727 var blk = make([]byte, blockSize) 728 729 // feedTokens copies data in numBlock chunks from r into buf until there are 730 // at least cnt newlines in buf. It will not read more blocks than needed. 731 var feedTokens = func(cnt int64) error { 732 for cntNewline < cnt { 733 if _, err := io.ReadFull(r, blk); err != nil { 734 if err == io.EOF { 735 err = io.ErrUnexpectedEOF 736 } 737 return err 738 } 739 buf.Write(blk) 740 for _, c := range blk { 741 if c == '\n' { 742 cntNewline++ 743 } 744 } 745 } 746 return nil 747 } 748 749 // nextToken gets the next token delimited by a newline. This assumes that 750 // at least one newline exists in the buffer. 751 var nextToken = func() string { 752 cntNewline-- 753 tok, _ := buf.ReadString('\n') 754 return tok[:len(tok)-1] // Cut off newline 755 } 756 757 // Parse for the number of entries. 758 // Use integer overflow resistant math to check this. 759 if err := feedTokens(1); err != nil { 760 return nil, err 761 } 762 numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int 763 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { 764 return nil, ErrHeader 765 } 766 767 // Parse for all member entries. 768 // numEntries is trusted after this since a potential attacker must have 769 // committed resources proportional to what this library used. 770 if err := feedTokens(2 * numEntries); err != nil { 771 return nil, err 772 } 773 sp := make([]sparseEntry, 0, numEntries) 774 for i := int64(0); i < numEntries; i++ { 775 offset, err := strconv.ParseInt(nextToken(), 10, 64) 776 if err != nil { 777 return nil, ErrHeader 778 } 779 numBytes, err := strconv.ParseInt(nextToken(), 10, 64) 780 if err != nil { 781 return nil, ErrHeader 782 } 783 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 784 } 785 return sp, nil 786 } 787 788 // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format 789 // version 0.1. The sparse map is stored in the PAX headers. 790 func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { 791 // Get number of entries. 792 // Use integer overflow resistant math to check this. 793 numEntriesStr := extHdrs[paxGNUSparseNumBlocks] 794 numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int 795 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { 796 return nil, ErrHeader 797 } 798 799 // There should be two numbers in sparseMap for each entry. 800 sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") 801 if int64(len(sparseMap)) != 2*numEntries { 802 return nil, ErrHeader 803 } 804 805 // Loop through the entries in the sparse map. 806 // numEntries is trusted now. 807 sp := make([]sparseEntry, 0, numEntries) 808 for i := int64(0); i < numEntries; i++ { 809 offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) 810 if err != nil { 811 return nil, ErrHeader 812 } 813 numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) 814 if err != nil { 815 return nil, ErrHeader 816 } 817 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 818 } 819 return sp, nil 820 } 821 822 // numBytes returns the number of bytes left to read in the current file's entry 823 // in the tar archive, or 0 if there is no current file. 824 func (tr *Reader) numBytes() int64 { 825 if tr.curr == nil { 826 // No current file, so no bytes 827 return 0 828 } 829 return tr.curr.numBytes() 830 } 831 832 // Read reads from the current entry in the tar archive. 833 // It returns 0, io.EOF when it reaches the end of that entry, 834 // until Next is called to advance to the next entry. 835 // 836 // Calling Read on special types like TypeLink, TypeSymLink, TypeChar, 837 // TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what 838 // the Header.Size claims. 839 func (tr *Reader) Read(b []byte) (n int, err error) { 840 if tr.err != nil { 841 return 0, tr.err 842 } 843 if tr.curr == nil { 844 return 0, io.EOF 845 } 846 847 n, err = tr.curr.Read(b) 848 if err != nil && err != io.EOF { 849 tr.err = err 850 } 851 return 852 } 853 854 func (rfr *regFileReader) Read(b []byte) (n int, err error) { 855 if rfr.nb == 0 { 856 // file consumed 857 return 0, io.EOF 858 } 859 if int64(len(b)) > rfr.nb { 860 b = b[0:rfr.nb] 861 } 862 n, err = rfr.r.Read(b) 863 rfr.nb -= int64(n) 864 865 if err == io.EOF && rfr.nb > 0 { 866 err = io.ErrUnexpectedEOF 867 } 868 return 869 } 870 871 // numBytes returns the number of bytes left to read in the file's data in the tar archive. 872 func (rfr *regFileReader) numBytes() int64 { 873 return rfr.nb 874 } 875 876 // newSparseFileReader creates a new sparseFileReader, but validates all of the 877 // sparse entries before doing so. 878 func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { 879 if total < 0 { 880 return nil, ErrHeader // Total size cannot be negative 881 } 882 883 // Validate all sparse entries. These are the same checks as performed by 884 // the BSD tar utility. 885 for i, s := range sp { 886 switch { 887 case s.offset < 0 || s.numBytes < 0: 888 return nil, ErrHeader // Negative values are never okay 889 case s.offset > math.MaxInt64-s.numBytes: 890 return nil, ErrHeader // Integer overflow with large length 891 case s.offset+s.numBytes > total: 892 return nil, ErrHeader // Region extends beyond the "real" size 893 case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: 894 return nil, ErrHeader // Regions can't overlap and must be in order 895 } 896 } 897 return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil 898 } 899 900 // readHole reads a sparse hole ending at endOffset. 901 func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { 902 n64 := endOffset - sfr.pos 903 if n64 > int64(len(b)) { 904 n64 = int64(len(b)) 905 } 906 n := int(n64) 907 for i := 0; i < n; i++ { 908 b[i] = 0 909 } 910 sfr.pos += n64 911 return n 912 } 913 914 // Read reads the sparse file data in expanded form. 915 func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { 916 // Skip past all empty fragments. 917 for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { 918 sfr.sp = sfr.sp[1:] 919 } 920 921 // If there are no more fragments, then it is possible that there 922 // is one last sparse hole. 923 if len(sfr.sp) == 0 { 924 // This behavior matches the BSD tar utility. 925 // However, GNU tar stops returning data even if sfr.total is unmet. 926 if sfr.pos < sfr.total { 927 return sfr.readHole(b, sfr.total), nil 928 } 929 return 0, io.EOF 930 } 931 932 // In front of a data fragment, so read a hole. 933 if sfr.pos < sfr.sp[0].offset { 934 return sfr.readHole(b, sfr.sp[0].offset), nil 935 } 936 937 // In a data fragment, so read from it. 938 // This math is overflow free since we verify that offset and numBytes can 939 // be safely added when creating the sparseFileReader. 940 endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment 941 bytesLeft := endPos - sfr.pos // Bytes left in fragment 942 if int64(len(b)) > bytesLeft { 943 b = b[:bytesLeft] 944 } 945 946 n, err = sfr.rfr.Read(b) 947 sfr.pos += int64(n) 948 if err == io.EOF { 949 if sfr.pos < endPos { 950 err = io.ErrUnexpectedEOF // There was supposed to be more data 951 } else if sfr.pos < sfr.total { 952 err = nil // There is still an implicit sparse hole at the end 953 } 954 } 955 956 if sfr.pos == endPos { 957 sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it 958 } 959 return n, err 960 } 961 962 // numBytes returns the number of bytes left to read in the sparse file's 963 // sparse-encoded data in the tar archive. 964 func (sfr *sparseFileReader) numBytes() int64 { 965 return sfr.rfr.numBytes() 966 }