github.com/4ad/go@v0.0.0-20161219182952-69a12818b605/src/archive/tar/reader.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 // TODO(dsymonds): 8 // - pax extensions 9 10 import ( 11 "bytes" 12 "errors" 13 "io" 14 "io/ioutil" 15 "math" 16 "strconv" 17 "strings" 18 "time" 19 ) 20 21 var ( 22 ErrHeader = errors.New("archive/tar: invalid tar header") 23 ) 24 25 const maxNanoSecondIntSize = 9 26 27 // A Reader provides sequential access to the contents of a tar archive. 28 // A tar archive consists of a sequence of files. 29 // The Next method advances to the next file in the archive (including the first), 30 // and then it can be treated as an io.Reader to access the file's data. 31 type Reader struct { 32 r io.Reader 33 err error 34 pad int64 // amount of padding (ignored) after current file entry 35 curr numBytesReader // reader for current file entry 36 blk block // buffer to use as temporary local storage 37 } 38 39 type parser struct { 40 err error // Last error seen 41 } 42 43 // A numBytesReader is an io.Reader with a numBytes method, returning the number 44 // of bytes remaining in the underlying encoded data. 45 type numBytesReader interface { 46 io.Reader 47 numBytes() int64 48 } 49 50 // A regFileReader is a numBytesReader for reading file data from a tar archive. 51 type regFileReader struct { 52 r io.Reader // underlying reader 53 nb int64 // number of unread bytes for current file entry 54 } 55 56 // A sparseFileReader is a numBytesReader for reading sparse file data from a 57 // tar archive. 58 type sparseFileReader struct { 59 rfr numBytesReader // Reads the sparse-encoded file data 60 sp []sparseEntry // The sparse map for the file 61 pos int64 // Keeps track of file position 62 total int64 // Total size of the file 63 } 64 65 // A sparseEntry holds a single entry in a sparse file's sparse map. 66 // 67 // Sparse files are represented using a series of sparseEntrys. 68 // Despite the name, a sparseEntry represents an actual data fragment that 69 // references data found in the underlying archive stream. All regions not 70 // covered by a sparseEntry are logically filled with zeros. 71 // 72 // For example, if the underlying raw file contains the 10-byte data: 73 // var compactData = "abcdefgh" 74 // 75 // And the sparse map has the following entries: 76 // var sp = []sparseEntry{ 77 // {offset: 2, numBytes: 5} // Data fragment for [2..7] 78 // {offset: 18, numBytes: 3} // Data fragment for [18..21] 79 // } 80 // 81 // Then the content of the resulting sparse file with a "real" size of 25 is: 82 // var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 83 type sparseEntry struct { 84 offset int64 // Starting position of the fragment 85 numBytes int64 // Length of the fragment 86 } 87 88 // Keywords for GNU sparse files in a PAX extended header 89 const ( 90 paxGNUSparseNumBlocks = "GNU.sparse.numblocks" 91 paxGNUSparseOffset = "GNU.sparse.offset" 92 paxGNUSparseNumBytes = "GNU.sparse.numbytes" 93 paxGNUSparseMap = "GNU.sparse.map" 94 paxGNUSparseName = "GNU.sparse.name" 95 paxGNUSparseMajor = "GNU.sparse.major" 96 paxGNUSparseMinor = "GNU.sparse.minor" 97 paxGNUSparseSize = "GNU.sparse.size" 98 paxGNUSparseRealSize = "GNU.sparse.realsize" 99 ) 100 101 // NewReader creates a new Reader reading from r. 102 func NewReader(r io.Reader) *Reader { return &Reader{r: r} } 103 104 // Next advances to the next entry in the tar archive. 105 // 106 // io.EOF is returned at the end of the input. 107 func (tr *Reader) Next() (*Header, error) { 108 if tr.err != nil { 109 return nil, tr.err 110 } 111 112 var hdr *Header 113 var extHdrs map[string]string 114 115 // Externally, Next iterates through the tar archive as if it is a series of 116 // files. Internally, the tar format often uses fake "files" to add meta 117 // data that describes the next file. These meta data "files" should not 118 // normally be visible to the outside. As such, this loop iterates through 119 // one or more "header files" until it finds a "normal file". 120 loop: 121 for { 122 tr.err = tr.skipUnread() 123 if tr.err != nil { 124 return nil, tr.err 125 } 126 127 hdr = tr.readHeader() 128 if tr.err != nil { 129 return nil, tr.err 130 } 131 132 // Check for PAX/GNU special headers and files. 133 switch hdr.Typeflag { 134 case TypeXHeader: 135 extHdrs, tr.err = parsePAX(tr) 136 if tr.err != nil { 137 return nil, tr.err 138 } 139 continue loop // This is a meta header affecting the next header 140 case TypeGNULongName, TypeGNULongLink: 141 var realname []byte 142 realname, tr.err = ioutil.ReadAll(tr) 143 if tr.err != nil { 144 return nil, tr.err 145 } 146 147 // Convert GNU extensions to use PAX headers. 148 if extHdrs == nil { 149 extHdrs = make(map[string]string) 150 } 151 var p parser 152 switch hdr.Typeflag { 153 case TypeGNULongName: 154 extHdrs[paxPath] = p.parseString(realname) 155 case TypeGNULongLink: 156 extHdrs[paxLinkpath] = p.parseString(realname) 157 } 158 if p.err != nil { 159 tr.err = p.err 160 return nil, tr.err 161 } 162 continue loop // This is a meta header affecting the next header 163 default: 164 mergePAX(hdr, extHdrs) 165 166 // Check for a PAX format sparse file 167 sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) 168 if err != nil { 169 tr.err = err 170 return nil, err 171 } 172 if sp != nil { 173 // Current file is a PAX format GNU sparse file. 174 // Set the current file reader to a sparse file reader. 175 tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) 176 if tr.err != nil { 177 return nil, tr.err 178 } 179 } 180 break loop // This is a file, so stop 181 } 182 } 183 return hdr, nil 184 } 185 186 // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then 187 // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to 188 // be treated as a regular file. 189 func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { 190 var sparseFormat string 191 192 // Check for sparse format indicators 193 major, majorOk := headers[paxGNUSparseMajor] 194 minor, minorOk := headers[paxGNUSparseMinor] 195 sparseName, sparseNameOk := headers[paxGNUSparseName] 196 _, sparseMapOk := headers[paxGNUSparseMap] 197 sparseSize, sparseSizeOk := headers[paxGNUSparseSize] 198 sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] 199 200 // Identify which, if any, sparse format applies from which PAX headers are set 201 if majorOk && minorOk { 202 sparseFormat = major + "." + minor 203 } else if sparseNameOk && sparseMapOk { 204 sparseFormat = "0.1" 205 } else if sparseSizeOk { 206 sparseFormat = "0.0" 207 } else { 208 // Not a PAX format GNU sparse file. 209 return nil, nil 210 } 211 212 // Check for unknown sparse format 213 if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { 214 return nil, nil 215 } 216 217 // Update hdr from GNU sparse PAX headers 218 if sparseNameOk { 219 hdr.Name = sparseName 220 } 221 if sparseSizeOk { 222 realSize, err := strconv.ParseInt(sparseSize, 10, 0) 223 if err != nil { 224 return nil, ErrHeader 225 } 226 hdr.Size = realSize 227 } else if sparseRealSizeOk { 228 realSize, err := strconv.ParseInt(sparseRealSize, 10, 0) 229 if err != nil { 230 return nil, ErrHeader 231 } 232 hdr.Size = realSize 233 } 234 235 // Set up the sparse map, according to the particular sparse format in use 236 var sp []sparseEntry 237 var err error 238 switch sparseFormat { 239 case "0.0", "0.1": 240 sp, err = readGNUSparseMap0x1(headers) 241 case "1.0": 242 sp, err = readGNUSparseMap1x0(tr.curr) 243 } 244 return sp, err 245 } 246 247 // mergePAX merges well known headers according to PAX standard. 248 // In general headers with the same name as those found 249 // in the header struct overwrite those found in the header 250 // struct with higher precision or longer values. Esp. useful 251 // for name and linkname fields. 252 func mergePAX(hdr *Header, headers map[string]string) error { 253 for k, v := range headers { 254 switch k { 255 case paxPath: 256 hdr.Name = v 257 case paxLinkpath: 258 hdr.Linkname = v 259 case paxGname: 260 hdr.Gname = v 261 case paxUname: 262 hdr.Uname = v 263 case paxUid: 264 uid, err := strconv.ParseInt(v, 10, 0) 265 if err != nil { 266 return err 267 } 268 hdr.Uid = int(uid) 269 case paxGid: 270 gid, err := strconv.ParseInt(v, 10, 0) 271 if err != nil { 272 return err 273 } 274 hdr.Gid = int(gid) 275 case paxAtime: 276 t, err := parsePAXTime(v) 277 if err != nil { 278 return err 279 } 280 hdr.AccessTime = t 281 case paxMtime: 282 t, err := parsePAXTime(v) 283 if err != nil { 284 return err 285 } 286 hdr.ModTime = t 287 case paxCtime: 288 t, err := parsePAXTime(v) 289 if err != nil { 290 return err 291 } 292 hdr.ChangeTime = t 293 case paxSize: 294 size, err := strconv.ParseInt(v, 10, 0) 295 if err != nil { 296 return err 297 } 298 hdr.Size = size 299 default: 300 if strings.HasPrefix(k, paxXattr) { 301 if hdr.Xattrs == nil { 302 hdr.Xattrs = make(map[string]string) 303 } 304 hdr.Xattrs[k[len(paxXattr):]] = v 305 } 306 } 307 } 308 return nil 309 } 310 311 // parsePAXTime takes a string of the form %d.%d as described in 312 // the PAX specification. 313 func parsePAXTime(t string) (time.Time, error) { 314 buf := []byte(t) 315 pos := bytes.IndexByte(buf, '.') 316 var seconds, nanoseconds int64 317 var err error 318 if pos == -1 { 319 seconds, err = strconv.ParseInt(t, 10, 0) 320 if err != nil { 321 return time.Time{}, err 322 } 323 } else { 324 seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0) 325 if err != nil { 326 return time.Time{}, err 327 } 328 nanoBuf := string(buf[pos+1:]) 329 // Pad as needed before converting to a decimal. 330 // For example .030 -> .030000000 -> 30000000 nanoseconds 331 if len(nanoBuf) < maxNanoSecondIntSize { 332 // Right pad 333 nanoBuf += strings.Repeat("0", maxNanoSecondIntSize-len(nanoBuf)) 334 } else if len(nanoBuf) > maxNanoSecondIntSize { 335 // Right truncate 336 nanoBuf = nanoBuf[:maxNanoSecondIntSize] 337 } 338 nanoseconds, err = strconv.ParseInt(nanoBuf, 10, 0) 339 if err != nil { 340 return time.Time{}, err 341 } 342 } 343 ts := time.Unix(seconds, nanoseconds) 344 return ts, nil 345 } 346 347 // parsePAX parses PAX headers. 348 // If an extended header (type 'x') is invalid, ErrHeader is returned 349 func parsePAX(r io.Reader) (map[string]string, error) { 350 buf, err := ioutil.ReadAll(r) 351 if err != nil { 352 return nil, err 353 } 354 sbuf := string(buf) 355 356 // For GNU PAX sparse format 0.0 support. 357 // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. 358 var sparseMap bytes.Buffer 359 360 headers := make(map[string]string) 361 // Each record is constructed as 362 // "%d %s=%s\n", length, keyword, value 363 for len(sbuf) > 0 { 364 key, value, residual, err := parsePAXRecord(sbuf) 365 if err != nil { 366 return nil, ErrHeader 367 } 368 sbuf = residual 369 370 keyStr := key 371 if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { 372 // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. 373 sparseMap.WriteString(value) 374 sparseMap.Write([]byte{','}) 375 } else { 376 // Normal key. Set the value in the headers map. 377 headers[keyStr] = value 378 } 379 } 380 if sparseMap.Len() != 0 { 381 // Add sparse info to headers, chopping off the extra comma 382 sparseMap.Truncate(sparseMap.Len() - 1) 383 headers[paxGNUSparseMap] = sparseMap.String() 384 } 385 return headers, nil 386 } 387 388 // parsePAXRecord parses the input PAX record string into a key-value pair. 389 // If parsing is successful, it will slice off the currently read record and 390 // return the remainder as r. 391 // 392 // A PAX record is of the following form: 393 // "%d %s=%s\n" % (size, key, value) 394 func parsePAXRecord(s string) (k, v, r string, err error) { 395 // The size field ends at the first space. 396 sp := strings.IndexByte(s, ' ') 397 if sp == -1 { 398 return "", "", s, ErrHeader 399 } 400 401 // Parse the first token as a decimal integer. 402 n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int 403 if perr != nil || n < 5 || int64(len(s)) < n { 404 return "", "", s, ErrHeader 405 } 406 407 // Extract everything between the space and the final newline. 408 rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] 409 if nl != "\n" { 410 return "", "", s, ErrHeader 411 } 412 413 // The first equals separates the key from the value. 414 eq := strings.IndexByte(rec, '=') 415 if eq == -1 { 416 return "", "", s, ErrHeader 417 } 418 return rec[:eq], rec[eq+1:], rem, nil 419 } 420 421 // parseString parses bytes as a NUL-terminated C-style string. 422 // If a NUL byte is not found then the whole slice is returned as a string. 423 func (*parser) parseString(b []byte) string { 424 n := 0 425 for n < len(b) && b[n] != 0 { 426 n++ 427 } 428 return string(b[0:n]) 429 } 430 431 // parseNumeric parses the input as being encoded in either base-256 or octal. 432 // This function may return negative numbers. 433 // If parsing fails or an integer overflow occurs, err will be set. 434 func (p *parser) parseNumeric(b []byte) int64 { 435 // Check for base-256 (binary) format first. 436 // If the first bit is set, then all following bits constitute a two's 437 // complement encoded number in big-endian byte order. 438 if len(b) > 0 && b[0]&0x80 != 0 { 439 // Handling negative numbers relies on the following identity: 440 // -a-1 == ^a 441 // 442 // If the number is negative, we use an inversion mask to invert the 443 // data bytes and treat the value as an unsigned number. 444 var inv byte // 0x00 if positive or zero, 0xff if negative 445 if b[0]&0x40 != 0 { 446 inv = 0xff 447 } 448 449 var x uint64 450 for i, c := range b { 451 c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing 452 if i == 0 { 453 c &= 0x7f // Ignore signal bit in first byte 454 } 455 if (x >> 56) > 0 { 456 p.err = ErrHeader // Integer overflow 457 return 0 458 } 459 x = x<<8 | uint64(c) 460 } 461 if (x >> 63) > 0 { 462 p.err = ErrHeader // Integer overflow 463 return 0 464 } 465 if inv == 0xff { 466 return ^int64(x) 467 } 468 return int64(x) 469 } 470 471 // Normal case is base-8 (octal) format. 472 return p.parseOctal(b) 473 } 474 475 func (p *parser) parseOctal(b []byte) int64 { 476 // Because unused fields are filled with NULs, we need 477 // to skip leading NULs. Fields may also be padded with 478 // spaces or NULs. 479 // So we remove leading and trailing NULs and spaces to 480 // be sure. 481 b = bytes.Trim(b, " \x00") 482 483 if len(b) == 0 { 484 return 0 485 } 486 x, perr := strconv.ParseUint(p.parseString(b), 8, 64) 487 if perr != nil { 488 p.err = ErrHeader 489 } 490 return int64(x) 491 } 492 493 // skipUnread skips any unread bytes in the existing file entry, as well as any 494 // alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is 495 // encountered in the data portion; it is okay to hit io.EOF in the padding. 496 // 497 // Note that this function still works properly even when sparse files are being 498 // used since numBytes returns the bytes remaining in the underlying io.Reader. 499 func (tr *Reader) skipUnread() error { 500 dataSkip := tr.numBytes() // Number of data bytes to skip 501 totalSkip := dataSkip + tr.pad // Total number of bytes to skip 502 tr.curr, tr.pad = nil, 0 503 504 // If possible, Seek to the last byte before the end of the data section. 505 // Do this because Seek is often lazy about reporting errors; this will mask 506 // the fact that the tar stream may be truncated. We can rely on the 507 // io.CopyN done shortly afterwards to trigger any IO errors. 508 var seekSkipped int64 // Number of bytes skipped via Seek 509 if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { 510 // Not all io.Seeker can actually Seek. For example, os.Stdin implements 511 // io.Seeker, but calling Seek always returns an error and performs 512 // no action. Thus, we try an innocent seek to the current position 513 // to see if Seek is really supported. 514 pos1, err := sr.Seek(0, io.SeekCurrent) 515 if err == nil { 516 // Seek seems supported, so perform the real Seek. 517 pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent) 518 if err != nil { 519 tr.err = err 520 return tr.err 521 } 522 seekSkipped = pos2 - pos1 523 } 524 } 525 526 var copySkipped int64 // Number of bytes skipped via CopyN 527 copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) 528 if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip { 529 tr.err = io.ErrUnexpectedEOF 530 } 531 return tr.err 532 } 533 534 // readHeader reads the next block header and assumes that the underlying reader 535 // is already aligned to a block boundary. 536 // 537 // The err will be set to io.EOF only when one of the following occurs: 538 // * Exactly 0 bytes are read and EOF is hit. 539 // * Exactly 1 block of zeros is read and EOF is hit. 540 // * At least 2 blocks of zeros are read. 541 func (tr *Reader) readHeader() *Header { 542 if _, tr.err = io.ReadFull(tr.r, tr.blk[:]); tr.err != nil { 543 return nil // io.EOF is okay here 544 } 545 546 // Two blocks of zero bytes marks the end of the archive. 547 if bytes.Equal(tr.blk[:], zeroBlock[:]) { 548 if _, tr.err = io.ReadFull(tr.r, tr.blk[:]); tr.err != nil { 549 return nil // io.EOF is okay here 550 } 551 if bytes.Equal(tr.blk[:], zeroBlock[:]) { 552 tr.err = io.EOF 553 } else { 554 tr.err = ErrHeader // zero block and then non-zero block 555 } 556 return nil 557 } 558 559 // Verify the header matches a known format. 560 format := tr.blk.GetFormat() 561 if format == formatUnknown { 562 tr.err = ErrHeader 563 return nil 564 } 565 566 var p parser 567 hdr := new(Header) 568 569 // Unpack the V7 header. 570 v7 := tr.blk.V7() 571 hdr.Name = p.parseString(v7.Name()) 572 hdr.Mode = p.parseNumeric(v7.Mode()) 573 hdr.Uid = int(p.parseNumeric(v7.UID())) 574 hdr.Gid = int(p.parseNumeric(v7.GID())) 575 hdr.Size = p.parseNumeric(v7.Size()) 576 hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0) 577 hdr.Typeflag = v7.TypeFlag()[0] 578 hdr.Linkname = p.parseString(v7.LinkName()) 579 580 // Unpack format specific fields. 581 if format > formatV7 { 582 ustar := tr.blk.USTAR() 583 hdr.Uname = p.parseString(ustar.UserName()) 584 hdr.Gname = p.parseString(ustar.GroupName()) 585 if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { 586 hdr.Devmajor = p.parseNumeric(ustar.DevMajor()) 587 hdr.Devminor = p.parseNumeric(ustar.DevMinor()) 588 } 589 590 var prefix string 591 switch format { 592 case formatUSTAR, formatGNU: 593 // TODO(dsnet): Do not use the prefix field for the GNU format! 594 // See golang.org/issues/12594 595 ustar := tr.blk.USTAR() 596 prefix = p.parseString(ustar.Prefix()) 597 case formatSTAR: 598 star := tr.blk.STAR() 599 prefix = p.parseString(star.Prefix()) 600 hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0) 601 hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0) 602 } 603 if len(prefix) > 0 { 604 hdr.Name = prefix + "/" + hdr.Name 605 } 606 } 607 608 nb := hdr.Size 609 if isHeaderOnlyType(hdr.Typeflag) { 610 nb = 0 611 } 612 if nb < 0 { 613 tr.err = ErrHeader 614 return nil 615 } 616 617 // Set the current file reader. 618 tr.pad = -nb & (blockSize - 1) // blockSize is a power of two 619 tr.curr = ®FileReader{r: tr.r, nb: nb} 620 621 // Check for old GNU sparse format entry. 622 if hdr.Typeflag == TypeGNUSparse { 623 // Get the real size of the file. 624 hdr.Size = p.parseNumeric(tr.blk.GNU().RealSize()) 625 if p.err != nil { 626 tr.err = p.err 627 return nil 628 } 629 630 // Read the sparse map. 631 sp := tr.readOldGNUSparseMap(&tr.blk) 632 if tr.err != nil { 633 return nil 634 } 635 636 // Current file is a GNU sparse file. Update the current file reader. 637 tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) 638 if tr.err != nil { 639 return nil 640 } 641 } 642 643 if p.err != nil { 644 tr.err = p.err 645 return nil 646 } 647 648 return hdr 649 } 650 651 // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. 652 // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, 653 // then one or more extension headers are used to store the rest of the sparse map. 654 func (tr *Reader) readOldGNUSparseMap(blk *block) []sparseEntry { 655 var p parser 656 var s sparseArray = blk.GNU().Sparse() 657 var sp = make([]sparseEntry, 0, s.MaxEntries()) 658 for i := 0; i < s.MaxEntries(); i++ { 659 offset := p.parseOctal(s.Entry(i).Offset()) 660 numBytes := p.parseOctal(s.Entry(i).NumBytes()) 661 if p.err != nil { 662 tr.err = p.err 663 return nil 664 } 665 if offset == 0 && numBytes == 0 { 666 break 667 } 668 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 669 } 670 671 for s.IsExtended()[0] > 0 { 672 // There are more entries. Read an extension header and parse its entries. 673 var blk block 674 if _, tr.err = io.ReadFull(tr.r, blk[:]); tr.err != nil { 675 return nil 676 } 677 s = blk.Sparse() 678 679 for i := 0; i < s.MaxEntries(); i++ { 680 offset := p.parseOctal(s.Entry(i).Offset()) 681 numBytes := p.parseOctal(s.Entry(i).NumBytes()) 682 if p.err != nil { 683 tr.err = p.err 684 return nil 685 } 686 if offset == 0 && numBytes == 0 { 687 break 688 } 689 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 690 } 691 } 692 return sp 693 } 694 695 // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format 696 // version 1.0. The format of the sparse map consists of a series of 697 // newline-terminated numeric fields. The first field is the number of entries 698 // and is always present. Following this are the entries, consisting of two 699 // fields (offset, numBytes). This function must stop reading at the end 700 // boundary of the block containing the last newline. 701 // 702 // Note that the GNU manual says that numeric values should be encoded in octal 703 // format. However, the GNU tar utility itself outputs these values in decimal. 704 // As such, this library treats values as being encoded in decimal. 705 func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { 706 var cntNewline int64 707 var buf bytes.Buffer 708 var blk = make([]byte, blockSize) 709 710 // feedTokens copies data in numBlock chunks from r into buf until there are 711 // at least cnt newlines in buf. It will not read more blocks than needed. 712 var feedTokens = func(cnt int64) error { 713 for cntNewline < cnt { 714 if _, err := io.ReadFull(r, blk); err != nil { 715 if err == io.EOF { 716 err = io.ErrUnexpectedEOF 717 } 718 return err 719 } 720 buf.Write(blk) 721 for _, c := range blk { 722 if c == '\n' { 723 cntNewline++ 724 } 725 } 726 } 727 return nil 728 } 729 730 // nextToken gets the next token delimited by a newline. This assumes that 731 // at least one newline exists in the buffer. 732 var nextToken = func() string { 733 cntNewline-- 734 tok, _ := buf.ReadString('\n') 735 return tok[:len(tok)-1] // Cut off newline 736 } 737 738 // Parse for the number of entries. 739 // Use integer overflow resistant math to check this. 740 if err := feedTokens(1); err != nil { 741 return nil, err 742 } 743 numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int 744 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { 745 return nil, ErrHeader 746 } 747 748 // Parse for all member entries. 749 // numEntries is trusted after this since a potential attacker must have 750 // committed resources proportional to what this library used. 751 if err := feedTokens(2 * numEntries); err != nil { 752 return nil, err 753 } 754 sp := make([]sparseEntry, 0, numEntries) 755 for i := int64(0); i < numEntries; i++ { 756 offset, err := strconv.ParseInt(nextToken(), 10, 64) 757 if err != nil { 758 return nil, ErrHeader 759 } 760 numBytes, err := strconv.ParseInt(nextToken(), 10, 64) 761 if err != nil { 762 return nil, ErrHeader 763 } 764 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 765 } 766 return sp, nil 767 } 768 769 // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format 770 // version 0.1. The sparse map is stored in the PAX headers. 771 func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { 772 // Get number of entries. 773 // Use integer overflow resistant math to check this. 774 numEntriesStr := extHdrs[paxGNUSparseNumBlocks] 775 numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int 776 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { 777 return nil, ErrHeader 778 } 779 780 // There should be two numbers in sparseMap for each entry. 781 sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") 782 if int64(len(sparseMap)) != 2*numEntries { 783 return nil, ErrHeader 784 } 785 786 // Loop through the entries in the sparse map. 787 // numEntries is trusted now. 788 sp := make([]sparseEntry, 0, numEntries) 789 for i := int64(0); i < numEntries; i++ { 790 offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) 791 if err != nil { 792 return nil, ErrHeader 793 } 794 numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) 795 if err != nil { 796 return nil, ErrHeader 797 } 798 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 799 } 800 return sp, nil 801 } 802 803 // numBytes returns the number of bytes left to read in the current file's entry 804 // in the tar archive, or 0 if there is no current file. 805 func (tr *Reader) numBytes() int64 { 806 if tr.curr == nil { 807 // No current file, so no bytes 808 return 0 809 } 810 return tr.curr.numBytes() 811 } 812 813 // Read reads from the current entry in the tar archive. 814 // It returns 0, io.EOF when it reaches the end of that entry, 815 // until Next is called to advance to the next entry. 816 // 817 // Calling Read on special types like TypeLink, TypeSymLink, TypeChar, 818 // TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what 819 // the Header.Size claims. 820 func (tr *Reader) Read(b []byte) (n int, err error) { 821 if tr.err != nil { 822 return 0, tr.err 823 } 824 if tr.curr == nil { 825 return 0, io.EOF 826 } 827 828 n, err = tr.curr.Read(b) 829 if err != nil && err != io.EOF { 830 tr.err = err 831 } 832 return 833 } 834 835 func (rfr *regFileReader) Read(b []byte) (n int, err error) { 836 if rfr.nb == 0 { 837 // file consumed 838 return 0, io.EOF 839 } 840 if int64(len(b)) > rfr.nb { 841 b = b[0:rfr.nb] 842 } 843 n, err = rfr.r.Read(b) 844 rfr.nb -= int64(n) 845 846 if err == io.EOF && rfr.nb > 0 { 847 err = io.ErrUnexpectedEOF 848 } 849 return 850 } 851 852 // numBytes returns the number of bytes left to read in the file's data in the tar archive. 853 func (rfr *regFileReader) numBytes() int64 { 854 return rfr.nb 855 } 856 857 // newSparseFileReader creates a new sparseFileReader, but validates all of the 858 // sparse entries before doing so. 859 func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { 860 if total < 0 { 861 return nil, ErrHeader // Total size cannot be negative 862 } 863 864 // Validate all sparse entries. These are the same checks as performed by 865 // the BSD tar utility. 866 for i, s := range sp { 867 switch { 868 case s.offset < 0 || s.numBytes < 0: 869 return nil, ErrHeader // Negative values are never okay 870 case s.offset > math.MaxInt64-s.numBytes: 871 return nil, ErrHeader // Integer overflow with large length 872 case s.offset+s.numBytes > total: 873 return nil, ErrHeader // Region extends beyond the "real" size 874 case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: 875 return nil, ErrHeader // Regions can't overlap and must be in order 876 } 877 } 878 return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil 879 } 880 881 // readHole reads a sparse hole ending at endOffset. 882 func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { 883 n64 := endOffset - sfr.pos 884 if n64 > int64(len(b)) { 885 n64 = int64(len(b)) 886 } 887 n := int(n64) 888 for i := 0; i < n; i++ { 889 b[i] = 0 890 } 891 sfr.pos += n64 892 return n 893 } 894 895 // Read reads the sparse file data in expanded form. 896 func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { 897 // Skip past all empty fragments. 898 for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { 899 sfr.sp = sfr.sp[1:] 900 } 901 902 // If there are no more fragments, then it is possible that there 903 // is one last sparse hole. 904 if len(sfr.sp) == 0 { 905 // This behavior matches the BSD tar utility. 906 // However, GNU tar stops returning data even if sfr.total is unmet. 907 if sfr.pos < sfr.total { 908 return sfr.readHole(b, sfr.total), nil 909 } 910 return 0, io.EOF 911 } 912 913 // In front of a data fragment, so read a hole. 914 if sfr.pos < sfr.sp[0].offset { 915 return sfr.readHole(b, sfr.sp[0].offset), nil 916 } 917 918 // In a data fragment, so read from it. 919 // This math is overflow free since we verify that offset and numBytes can 920 // be safely added when creating the sparseFileReader. 921 endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment 922 bytesLeft := endPos - sfr.pos // Bytes left in fragment 923 if int64(len(b)) > bytesLeft { 924 b = b[:bytesLeft] 925 } 926 927 n, err = sfr.rfr.Read(b) 928 sfr.pos += int64(n) 929 if err == io.EOF { 930 if sfr.pos < endPos { 931 err = io.ErrUnexpectedEOF // There was supposed to be more data 932 } else if sfr.pos < sfr.total { 933 err = nil // There is still an implicit sparse hole at the end 934 } 935 } 936 937 if sfr.pos == endPos { 938 sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it 939 } 940 return n, err 941 } 942 943 // numBytes returns the number of bytes left to read in the sparse file's 944 // sparse-encoded data in the tar archive. 945 func (sfr *sparseFileReader) numBytes() int64 { 946 return sfr.rfr.numBytes() 947 }