github.com/panjjo/go@v0.0.0-20161104043856-d62b31386338/src/archive/tar/reader.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 // TODO(dsymonds): 8 // - pax extensions 9 10 import ( 11 "bytes" 12 "errors" 13 "io" 14 "io/ioutil" 15 "math" 16 "strconv" 17 "strings" 18 "time" 19 ) 20 21 var ( 22 ErrHeader = errors.New("archive/tar: invalid tar header") 23 ) 24 25 // A Reader provides sequential access to the contents of a tar archive. 26 // A tar archive consists of a sequence of files. 27 // The Next method advances to the next file in the archive (including the first), 28 // and then it can be treated as an io.Reader to access the file's data. 29 type Reader struct { 30 r io.Reader 31 pad int64 // amount of padding (ignored) after current file entry 32 curr numBytesReader // reader for current file entry 33 blk block // buffer to use as temporary local storage 34 35 // err is a persistent error. 36 // It is only the responsibility of every exported method of Reader to 37 // ensure that this error is sticky. 38 err error 39 } 40 41 // A numBytesReader is an io.Reader with a numBytes method, returning the number 42 // of bytes remaining in the underlying encoded data. 43 type numBytesReader interface { 44 io.Reader 45 numBytes() int64 46 } 47 48 // A regFileReader is a numBytesReader for reading file data from a tar archive. 49 type regFileReader struct { 50 r io.Reader // underlying reader 51 nb int64 // number of unread bytes for current file entry 52 } 53 54 // A sparseFileReader is a numBytesReader for reading sparse file data from a 55 // tar archive. 56 type sparseFileReader struct { 57 rfr numBytesReader // Reads the sparse-encoded file data 58 sp []sparseEntry // The sparse map for the file 59 pos int64 // Keeps track of file position 60 total int64 // Total size of the file 61 } 62 63 // A sparseEntry holds a single entry in a sparse file's sparse map. 64 // 65 // Sparse files are represented using a series of sparseEntrys. 66 // Despite the name, a sparseEntry represents an actual data fragment that 67 // references data found in the underlying archive stream. All regions not 68 // covered by a sparseEntry are logically filled with zeros. 69 // 70 // For example, if the underlying raw file contains the 10-byte data: 71 // var compactData = "abcdefgh" 72 // 73 // And the sparse map has the following entries: 74 // var sp = []sparseEntry{ 75 // {offset: 2, numBytes: 5} // Data fragment for [2..7] 76 // {offset: 18, numBytes: 3} // Data fragment for [18..21] 77 // } 78 // 79 // Then the content of the resulting sparse file with a "real" size of 25 is: 80 // var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 81 type sparseEntry struct { 82 offset int64 // Starting position of the fragment 83 numBytes int64 // Length of the fragment 84 } 85 86 // Keywords for GNU sparse files in a PAX extended header 87 const ( 88 paxGNUSparseNumBlocks = "GNU.sparse.numblocks" 89 paxGNUSparseOffset = "GNU.sparse.offset" 90 paxGNUSparseNumBytes = "GNU.sparse.numbytes" 91 paxGNUSparseMap = "GNU.sparse.map" 92 paxGNUSparseName = "GNU.sparse.name" 93 paxGNUSparseMajor = "GNU.sparse.major" 94 paxGNUSparseMinor = "GNU.sparse.minor" 95 paxGNUSparseSize = "GNU.sparse.size" 96 paxGNUSparseRealSize = "GNU.sparse.realsize" 97 ) 98 99 // NewReader creates a new Reader reading from r. 100 func NewReader(r io.Reader) *Reader { return &Reader{r: r} } 101 102 // Next advances to the next entry in the tar archive. 103 // 104 // io.EOF is returned at the end of the input. 105 func (tr *Reader) Next() (*Header, error) { 106 if tr.err != nil { 107 return nil, tr.err 108 } 109 hdr, err := tr.next() 110 tr.err = err 111 return hdr, err 112 } 113 114 func (tr *Reader) next() (*Header, error) { 115 var extHdrs map[string]string 116 117 // Externally, Next iterates through the tar archive as if it is a series of 118 // files. Internally, the tar format often uses fake "files" to add meta 119 // data that describes the next file. These meta data "files" should not 120 // normally be visible to the outside. As such, this loop iterates through 121 // one or more "header files" until it finds a "normal file". 122 loop: 123 for { 124 if err := tr.skipUnread(); err != nil { 125 return nil, err 126 } 127 hdr, rawHdr, err := tr.readHeader() 128 if err != nil { 129 return nil, err 130 } 131 if err := tr.handleRegularFile(hdr); err != nil { 132 return nil, err 133 } 134 135 // Check for PAX/GNU special headers and files. 136 switch hdr.Typeflag { 137 case TypeXHeader: 138 extHdrs, err = parsePAX(tr) 139 if err != nil { 140 return nil, err 141 } 142 continue loop // This is a meta header affecting the next header 143 case TypeGNULongName, TypeGNULongLink: 144 realname, err := ioutil.ReadAll(tr) 145 if err != nil { 146 return nil, err 147 } 148 149 // Convert GNU extensions to use PAX headers. 150 if extHdrs == nil { 151 extHdrs = make(map[string]string) 152 } 153 var p parser 154 switch hdr.Typeflag { 155 case TypeGNULongName: 156 extHdrs[paxPath] = p.parseString(realname) 157 case TypeGNULongLink: 158 extHdrs[paxLinkpath] = p.parseString(realname) 159 } 160 if p.err != nil { 161 return nil, p.err 162 } 163 continue loop // This is a meta header affecting the next header 164 default: 165 // The old GNU sparse format is handled here since it is technically 166 // just a regular file with additional attributes. 167 168 if err := mergePAX(hdr, extHdrs); err != nil { 169 return nil, err 170 } 171 172 // The extended headers may have updated the size. 173 // Thus, setup the regFileReader again after merging PAX headers. 174 if err := tr.handleRegularFile(hdr); err != nil { 175 return nil, err 176 } 177 178 // Sparse formats rely on being able to read from the logical data 179 // section; there must be a preceding call to handleRegularFile. 180 if err := tr.handleSparseFile(hdr, rawHdr, extHdrs); err != nil { 181 return nil, err 182 } 183 return hdr, nil // This is a file, so stop 184 } 185 } 186 } 187 188 // handleRegularFile sets up the current file reader and padding such that it 189 // can only read the following logical data section. It will properly handle 190 // special headers that contain no data section. 191 func (tr *Reader) handleRegularFile(hdr *Header) error { 192 nb := hdr.Size 193 if isHeaderOnlyType(hdr.Typeflag) { 194 nb = 0 195 } 196 if nb < 0 { 197 return ErrHeader 198 } 199 200 tr.pad = -nb & (blockSize - 1) // blockSize is a power of two 201 tr.curr = ®FileReader{r: tr.r, nb: nb} 202 return nil 203 } 204 205 // handleSparseFile checks if the current file is a sparse format of any type 206 // and sets the curr reader appropriately. 207 func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[string]string) error { 208 var sp []sparseEntry 209 var err error 210 if hdr.Typeflag == TypeGNUSparse { 211 sp, err = tr.readOldGNUSparseMap(hdr, rawHdr) 212 if err != nil { 213 return err 214 } 215 } else { 216 sp, err = tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) 217 if err != nil { 218 return err 219 } 220 } 221 222 // If sp is non-nil, then this is a sparse file. 223 // Note that it is possible for len(sp) to be zero. 224 if sp != nil { 225 tr.curr, err = newSparseFileReader(tr.curr, sp, hdr.Size) 226 } 227 return err 228 } 229 230 // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then 231 // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to 232 // be treated as a regular file. 233 func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { 234 var sparseFormat string 235 236 // Check for sparse format indicators 237 major, majorOk := headers[paxGNUSparseMajor] 238 minor, minorOk := headers[paxGNUSparseMinor] 239 sparseName, sparseNameOk := headers[paxGNUSparseName] 240 _, sparseMapOk := headers[paxGNUSparseMap] 241 sparseSize, sparseSizeOk := headers[paxGNUSparseSize] 242 sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] 243 244 // Identify which, if any, sparse format applies from which PAX headers are set 245 if majorOk && minorOk { 246 sparseFormat = major + "." + minor 247 } else if sparseNameOk && sparseMapOk { 248 sparseFormat = "0.1" 249 } else if sparseSizeOk { 250 sparseFormat = "0.0" 251 } else { 252 // Not a PAX format GNU sparse file. 253 return nil, nil 254 } 255 256 // Check for unknown sparse format 257 if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { 258 return nil, nil 259 } 260 261 // Update hdr from GNU sparse PAX headers 262 if sparseNameOk { 263 hdr.Name = sparseName 264 } 265 if sparseSizeOk { 266 realSize, err := strconv.ParseInt(sparseSize, 10, 64) 267 if err != nil { 268 return nil, ErrHeader 269 } 270 hdr.Size = realSize 271 } else if sparseRealSizeOk { 272 realSize, err := strconv.ParseInt(sparseRealSize, 10, 64) 273 if err != nil { 274 return nil, ErrHeader 275 } 276 hdr.Size = realSize 277 } 278 279 // Set up the sparse map, according to the particular sparse format in use 280 var sp []sparseEntry 281 var err error 282 switch sparseFormat { 283 case "0.0", "0.1": 284 sp, err = readGNUSparseMap0x1(headers) 285 case "1.0": 286 sp, err = readGNUSparseMap1x0(tr.curr) 287 } 288 return sp, err 289 } 290 291 // mergePAX merges well known headers according to PAX standard. 292 // In general headers with the same name as those found 293 // in the header struct overwrite those found in the header 294 // struct with higher precision or longer values. Esp. useful 295 // for name and linkname fields. 296 func mergePAX(hdr *Header, headers map[string]string) (err error) { 297 var id64 int64 298 for k, v := range headers { 299 switch k { 300 case paxPath: 301 hdr.Name = v 302 case paxLinkpath: 303 hdr.Linkname = v 304 case paxUname: 305 hdr.Uname = v 306 case paxGname: 307 hdr.Gname = v 308 case paxUid: 309 id64, err = strconv.ParseInt(v, 10, 64) 310 hdr.Uid = int(id64) // Integer overflow possible 311 case paxGid: 312 id64, err = strconv.ParseInt(v, 10, 64) 313 hdr.Gid = int(id64) // Integer overflow possible 314 case paxAtime: 315 hdr.AccessTime, err = parsePAXTime(v) 316 case paxMtime: 317 hdr.ModTime, err = parsePAXTime(v) 318 case paxCtime: 319 hdr.ChangeTime, err = parsePAXTime(v) 320 case paxSize: 321 hdr.Size, err = strconv.ParseInt(v, 10, 64) 322 default: 323 if strings.HasPrefix(k, paxXattr) { 324 if hdr.Xattrs == nil { 325 hdr.Xattrs = make(map[string]string) 326 } 327 hdr.Xattrs[k[len(paxXattr):]] = v 328 } 329 } 330 if err != nil { 331 return ErrHeader 332 } 333 } 334 return nil 335 } 336 337 // parsePAX parses PAX headers. 338 // If an extended header (type 'x') is invalid, ErrHeader is returned 339 func parsePAX(r io.Reader) (map[string]string, error) { 340 buf, err := ioutil.ReadAll(r) 341 if err != nil { 342 return nil, err 343 } 344 sbuf := string(buf) 345 346 // For GNU PAX sparse format 0.0 support. 347 // This function transforms the sparse format 0.0 headers into format 0.1 348 // headers since 0.0 headers were not PAX compliant. 349 var sparseMap []string 350 351 extHdrs := make(map[string]string) 352 for len(sbuf) > 0 { 353 key, value, residual, err := parsePAXRecord(sbuf) 354 if err != nil { 355 return nil, ErrHeader 356 } 357 sbuf = residual 358 359 switch key { 360 case paxGNUSparseOffset, paxGNUSparseNumBytes: 361 // Validate sparse header order and value. 362 if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) || 363 (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) || 364 strings.Contains(value, ",") { 365 return nil, ErrHeader 366 } 367 sparseMap = append(sparseMap, value) 368 default: 369 // According to PAX specification, a value is stored only if it is 370 // non-empty. Otherwise, the key is deleted. 371 if len(value) > 0 { 372 extHdrs[key] = value 373 } else { 374 delete(extHdrs, key) 375 } 376 } 377 } 378 if len(sparseMap) > 0 { 379 extHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",") 380 } 381 return extHdrs, nil 382 } 383 384 // skipUnread skips any unread bytes in the existing file entry, as well as any 385 // alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is 386 // encountered in the data portion; it is okay to hit io.EOF in the padding. 387 // 388 // Note that this function still works properly even when sparse files are being 389 // used since numBytes returns the bytes remaining in the underlying io.Reader. 390 func (tr *Reader) skipUnread() error { 391 dataSkip := tr.numBytes() // Number of data bytes to skip 392 totalSkip := dataSkip + tr.pad // Total number of bytes to skip 393 tr.curr, tr.pad = nil, 0 394 395 // If possible, Seek to the last byte before the end of the data section. 396 // Do this because Seek is often lazy about reporting errors; this will mask 397 // the fact that the tar stream may be truncated. We can rely on the 398 // io.CopyN done shortly afterwards to trigger any IO errors. 399 var seekSkipped int64 // Number of bytes skipped via Seek 400 if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { 401 // Not all io.Seeker can actually Seek. For example, os.Stdin implements 402 // io.Seeker, but calling Seek always returns an error and performs 403 // no action. Thus, we try an innocent seek to the current position 404 // to see if Seek is really supported. 405 pos1, err := sr.Seek(0, io.SeekCurrent) 406 if err == nil { 407 // Seek seems supported, so perform the real Seek. 408 pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent) 409 if err != nil { 410 return err 411 } 412 seekSkipped = pos2 - pos1 413 } 414 } 415 416 copySkipped, err := io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) 417 if err == io.EOF && seekSkipped+copySkipped < dataSkip { 418 err = io.ErrUnexpectedEOF 419 } 420 return err 421 } 422 423 // readHeader reads the next block header and assumes that the underlying reader 424 // is already aligned to a block boundary. It returns the raw block of the 425 // header in case further processing is required. 426 // 427 // The err will be set to io.EOF only when one of the following occurs: 428 // * Exactly 0 bytes are read and EOF is hit. 429 // * Exactly 1 block of zeros is read and EOF is hit. 430 // * At least 2 blocks of zeros are read. 431 func (tr *Reader) readHeader() (*Header, *block, error) { 432 // Two blocks of zero bytes marks the end of the archive. 433 if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { 434 return nil, nil, err // EOF is okay here; exactly 0 bytes read 435 } 436 if bytes.Equal(tr.blk[:], zeroBlock[:]) { 437 if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { 438 return nil, nil, err // EOF is okay here; exactly 1 block of zeros read 439 } 440 if bytes.Equal(tr.blk[:], zeroBlock[:]) { 441 return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read 442 } 443 return nil, nil, ErrHeader // Zero block and then non-zero block 444 } 445 446 // Verify the header matches a known format. 447 format := tr.blk.GetFormat() 448 if format == formatUnknown { 449 return nil, nil, ErrHeader 450 } 451 452 var p parser 453 hdr := new(Header) 454 455 // Unpack the V7 header. 456 v7 := tr.blk.V7() 457 hdr.Name = p.parseString(v7.Name()) 458 hdr.Mode = p.parseNumeric(v7.Mode()) 459 hdr.Uid = int(p.parseNumeric(v7.UID())) 460 hdr.Gid = int(p.parseNumeric(v7.GID())) 461 hdr.Size = p.parseNumeric(v7.Size()) 462 hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0) 463 hdr.Typeflag = v7.TypeFlag()[0] 464 hdr.Linkname = p.parseString(v7.LinkName()) 465 466 // The atime and ctime fields are often left unused. Some versions of Go 467 // had a bug in the tar.Writer where it would output an invalid tar file 468 // in certain rare situations because the logic incorrectly believed that 469 // the old GNU format had a prefix field. This is wrong and leads to 470 // an outputted file that actually mangles the atime and ctime fields. 471 // 472 // In order to continue reading tar files created by a buggy writer, we 473 // try to parse the atime and ctime fields, but just return the zero value 474 // of time.Time when we cannot parse them. 475 // 476 // See https://golang.org/issues/12594 477 tryParseTime := func(b []byte) time.Time { 478 var p parser 479 n := p.parseNumeric(b) 480 if b[0] != 0x00 && p.err == nil { 481 return time.Unix(n, 0) 482 } 483 return time.Time{} 484 } 485 486 // Unpack format specific fields. 487 if format > formatV7 { 488 ustar := tr.blk.USTAR() 489 hdr.Uname = p.parseString(ustar.UserName()) 490 hdr.Gname = p.parseString(ustar.GroupName()) 491 if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { 492 hdr.Devmajor = p.parseNumeric(ustar.DevMajor()) 493 hdr.Devminor = p.parseNumeric(ustar.DevMinor()) 494 } 495 496 var prefix string 497 switch format { 498 case formatUSTAR: 499 ustar := tr.blk.USTAR() 500 prefix = p.parseString(ustar.Prefix()) 501 case formatSTAR: 502 star := tr.blk.STAR() 503 prefix = p.parseString(star.Prefix()) 504 hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0) 505 hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0) 506 case formatGNU: 507 gnu := tr.blk.GNU() 508 hdr.AccessTime = tryParseTime(gnu.AccessTime()) 509 hdr.ChangeTime = tryParseTime(gnu.ChangeTime()) 510 } 511 if len(prefix) > 0 { 512 hdr.Name = prefix + "/" + hdr.Name 513 } 514 } 515 return hdr, &tr.blk, p.err 516 } 517 518 // readOldGNUSparseMap reads the sparse map from the old GNU sparse format. 519 // The sparse map is stored in the tar header if it's small enough. 520 // If it's larger than four entries, then one or more extension headers are used 521 // to store the rest of the sparse map. 522 // 523 // The Header.Size does not reflect the size of any extended headers used. 524 // Thus, this function will read from the raw io.Reader to fetch extra headers. 525 // This method mutates blk in the process. 526 func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, error) { 527 // Make sure that the input format is GNU. 528 // Unfortunately, the STAR format also has a sparse header format that uses 529 // the same type flag but has a completely different layout. 530 if blk.GetFormat() != formatGNU { 531 return nil, ErrHeader 532 } 533 534 var p parser 535 hdr.Size = p.parseNumeric(blk.GNU().RealSize()) 536 if p.err != nil { 537 return nil, p.err 538 } 539 var s sparseArray = blk.GNU().Sparse() 540 var sp = make([]sparseEntry, 0, s.MaxEntries()) 541 for { 542 for i := 0; i < s.MaxEntries(); i++ { 543 // This termination condition is identical to GNU and BSD tar. 544 if s.Entry(i).Offset()[0] == 0x00 { 545 break // Don't return, need to process extended headers (even if empty) 546 } 547 offset := p.parseNumeric(s.Entry(i).Offset()) 548 numBytes := p.parseNumeric(s.Entry(i).NumBytes()) 549 if p.err != nil { 550 return nil, p.err 551 } 552 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 553 } 554 555 if s.IsExtended()[0] > 0 { 556 // There are more entries. Read an extension header and parse its entries. 557 if _, err := io.ReadFull(tr.r, blk[:]); err != nil { 558 if err == io.EOF { 559 err = io.ErrUnexpectedEOF 560 } 561 return nil, err 562 } 563 s = blk.Sparse() 564 continue 565 } 566 return sp, nil // Done 567 } 568 } 569 570 // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format 571 // version 1.0. The format of the sparse map consists of a series of 572 // newline-terminated numeric fields. The first field is the number of entries 573 // and is always present. Following this are the entries, consisting of two 574 // fields (offset, numBytes). This function must stop reading at the end 575 // boundary of the block containing the last newline. 576 // 577 // Note that the GNU manual says that numeric values should be encoded in octal 578 // format. However, the GNU tar utility itself outputs these values in decimal. 579 // As such, this library treats values as being encoded in decimal. 580 func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { 581 var cntNewline int64 582 var buf bytes.Buffer 583 var blk = make([]byte, blockSize) 584 585 // feedTokens copies data in numBlock chunks from r into buf until there are 586 // at least cnt newlines in buf. It will not read more blocks than needed. 587 var feedTokens = func(cnt int64) error { 588 for cntNewline < cnt { 589 if _, err := io.ReadFull(r, blk); err != nil { 590 if err == io.EOF { 591 err = io.ErrUnexpectedEOF 592 } 593 return err 594 } 595 buf.Write(blk) 596 for _, c := range blk { 597 if c == '\n' { 598 cntNewline++ 599 } 600 } 601 } 602 return nil 603 } 604 605 // nextToken gets the next token delimited by a newline. This assumes that 606 // at least one newline exists in the buffer. 607 var nextToken = func() string { 608 cntNewline-- 609 tok, _ := buf.ReadString('\n') 610 return tok[:len(tok)-1] // Cut off newline 611 } 612 613 // Parse for the number of entries. 614 // Use integer overflow resistant math to check this. 615 if err := feedTokens(1); err != nil { 616 return nil, err 617 } 618 numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int 619 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { 620 return nil, ErrHeader 621 } 622 623 // Parse for all member entries. 624 // numEntries is trusted after this since a potential attacker must have 625 // committed resources proportional to what this library used. 626 if err := feedTokens(2 * numEntries); err != nil { 627 return nil, err 628 } 629 sp := make([]sparseEntry, 0, numEntries) 630 for i := int64(0); i < numEntries; i++ { 631 offset, err := strconv.ParseInt(nextToken(), 10, 64) 632 if err != nil { 633 return nil, ErrHeader 634 } 635 numBytes, err := strconv.ParseInt(nextToken(), 10, 64) 636 if err != nil { 637 return nil, ErrHeader 638 } 639 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 640 } 641 return sp, nil 642 } 643 644 // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format 645 // version 0.1. The sparse map is stored in the PAX headers. 646 func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { 647 // Get number of entries. 648 // Use integer overflow resistant math to check this. 649 numEntriesStr := extHdrs[paxGNUSparseNumBlocks] 650 numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int 651 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { 652 return nil, ErrHeader 653 } 654 655 // There should be two numbers in sparseMap for each entry. 656 sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") 657 if int64(len(sparseMap)) != 2*numEntries { 658 return nil, ErrHeader 659 } 660 661 // Loop through the entries in the sparse map. 662 // numEntries is trusted now. 663 sp := make([]sparseEntry, 0, numEntries) 664 for i := int64(0); i < numEntries; i++ { 665 offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) 666 if err != nil { 667 return nil, ErrHeader 668 } 669 numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) 670 if err != nil { 671 return nil, ErrHeader 672 } 673 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) 674 } 675 return sp, nil 676 } 677 678 // numBytes returns the number of bytes left to read in the current file's entry 679 // in the tar archive, or 0 if there is no current file. 680 func (tr *Reader) numBytes() int64 { 681 if tr.curr == nil { 682 // No current file, so no bytes 683 return 0 684 } 685 return tr.curr.numBytes() 686 } 687 688 // Read reads from the current entry in the tar archive. 689 // It returns 0, io.EOF when it reaches the end of that entry, 690 // until Next is called to advance to the next entry. 691 // 692 // Calling Read on special types like TypeLink, TypeSymLink, TypeChar, 693 // TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what 694 // the Header.Size claims. 695 func (tr *Reader) Read(b []byte) (int, error) { 696 if tr.err != nil { 697 return 0, tr.err 698 } 699 if tr.curr == nil { 700 return 0, io.EOF 701 } 702 703 n, err := tr.curr.Read(b) 704 if err != nil && err != io.EOF { 705 tr.err = err 706 } 707 return n, err 708 } 709 710 func (rfr *regFileReader) Read(b []byte) (n int, err error) { 711 if rfr.nb == 0 { 712 // file consumed 713 return 0, io.EOF 714 } 715 if int64(len(b)) > rfr.nb { 716 b = b[0:rfr.nb] 717 } 718 n, err = rfr.r.Read(b) 719 rfr.nb -= int64(n) 720 721 if err == io.EOF && rfr.nb > 0 { 722 err = io.ErrUnexpectedEOF 723 } 724 return 725 } 726 727 // numBytes returns the number of bytes left to read in the file's data in the tar archive. 728 func (rfr *regFileReader) numBytes() int64 { 729 return rfr.nb 730 } 731 732 // newSparseFileReader creates a new sparseFileReader, but validates all of the 733 // sparse entries before doing so. 734 func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { 735 if total < 0 { 736 return nil, ErrHeader // Total size cannot be negative 737 } 738 739 // Validate all sparse entries. These are the same checks as performed by 740 // the BSD tar utility. 741 for i, s := range sp { 742 switch { 743 case s.offset < 0 || s.numBytes < 0: 744 return nil, ErrHeader // Negative values are never okay 745 case s.offset > math.MaxInt64-s.numBytes: 746 return nil, ErrHeader // Integer overflow with large length 747 case s.offset+s.numBytes > total: 748 return nil, ErrHeader // Region extends beyond the "real" size 749 case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: 750 return nil, ErrHeader // Regions can't overlap and must be in order 751 } 752 } 753 return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil 754 } 755 756 // readHole reads a sparse hole ending at endOffset. 757 func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { 758 n64 := endOffset - sfr.pos 759 if n64 > int64(len(b)) { 760 n64 = int64(len(b)) 761 } 762 n := int(n64) 763 for i := 0; i < n; i++ { 764 b[i] = 0 765 } 766 sfr.pos += n64 767 return n 768 } 769 770 // Read reads the sparse file data in expanded form. 771 func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { 772 // Skip past all empty fragments. 773 for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { 774 sfr.sp = sfr.sp[1:] 775 } 776 777 // If there are no more fragments, then it is possible that there 778 // is one last sparse hole. 779 if len(sfr.sp) == 0 { 780 // This behavior matches the BSD tar utility. 781 // However, GNU tar stops returning data even if sfr.total is unmet. 782 if sfr.pos < sfr.total { 783 return sfr.readHole(b, sfr.total), nil 784 } 785 return 0, io.EOF 786 } 787 788 // In front of a data fragment, so read a hole. 789 if sfr.pos < sfr.sp[0].offset { 790 return sfr.readHole(b, sfr.sp[0].offset), nil 791 } 792 793 // In a data fragment, so read from it. 794 // This math is overflow free since we verify that offset and numBytes can 795 // be safely added when creating the sparseFileReader. 796 endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment 797 bytesLeft := endPos - sfr.pos // Bytes left in fragment 798 if int64(len(b)) > bytesLeft { 799 b = b[:bytesLeft] 800 } 801 802 n, err = sfr.rfr.Read(b) 803 sfr.pos += int64(n) 804 if err == io.EOF { 805 if sfr.pos < endPos { 806 err = io.ErrUnexpectedEOF // There was supposed to be more data 807 } else if sfr.pos < sfr.total { 808 err = nil // There is still an implicit sparse hole at the end 809 } 810 } 811 812 if sfr.pos == endPos { 813 sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it 814 } 815 return n, err 816 } 817 818 // numBytes returns the number of bytes left to read in the sparse file's 819 // sparse-encoded data in the tar archive. 820 func (sfr *sparseFileReader) numBytes() int64 { 821 return sfr.rfr.numBytes() 822 }