github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/archive/tar/reader.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 import ( 8 "bytes" 9 "io" 10 "io/ioutil" 11 "strconv" 12 "strings" 13 "time" 14 ) 15 16 // Reader provides sequential access to the contents of a tar archive. 17 // Reader.Next advances to the next file in the archive (including the first), 18 // and then Reader can be treated as an io.Reader to access the file's data. 19 type Reader struct { 20 r io.Reader 21 pad int64 // Amount of padding (ignored) after current file entry 22 curr fileReader // Reader for current file entry 23 blk block // Buffer to use as temporary local storage 24 25 // err is a persistent error. 26 // It is only the responsibility of every exported method of Reader to 27 // ensure that this error is sticky. 28 err error 29 } 30 31 type fileReader interface { 32 io.Reader 33 fileState 34 35 Discard(n int64) (int64, error) 36 } 37 38 // NewReader creates a new Reader reading from r. 39 func NewReader(r io.Reader) *Reader { 40 return &Reader{r: r, curr: ®FileReader{r, 0}} 41 } 42 43 // Next advances to the next entry in the tar archive. 44 // The Header.Size determines how many bytes can be read for the next file. 45 // Any remaining data in the current file is automatically discarded. 46 // 47 // io.EOF is returned at the end of the input. 48 func (tr *Reader) Next() (*Header, error) { 49 if tr.err != nil { 50 return nil, tr.err 51 } 52 hdr, err := tr.next() 53 tr.err = err 54 return hdr, err 55 } 56 57 func (tr *Reader) next() (*Header, error) { 58 var paxHdrs map[string]string 59 var gnuLongName, gnuLongLink string 60 61 // Externally, Next iterates through the tar archive as if it is a series of 62 // files. Internally, the tar format often uses fake "files" to add meta 63 // data that describes the next file. These meta data "files" should not 64 // normally be visible to the outside. As such, this loop iterates through 65 // one or more "header files" until it finds a "normal file". 66 format := FormatUSTAR | FormatPAX | FormatGNU 67 loop: 68 for { 69 // Discard the remainder of the file and any padding. 70 if _, err := tr.curr.Discard(tr.curr.Remaining()); err != nil { 71 return nil, err 72 } 73 if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil { 74 return nil, err 75 } 76 tr.pad = 0 77 78 hdr, rawHdr, err := tr.readHeader() 79 if err != nil { 80 return nil, err 81 } 82 if err := tr.handleRegularFile(hdr); err != nil { 83 return nil, err 84 } 85 format.mayOnlyBe(hdr.Format) 86 87 // Check for PAX/GNU special headers and files. 88 switch hdr.Typeflag { 89 case TypeXHeader, TypeXGlobalHeader: 90 format.mayOnlyBe(FormatPAX) 91 paxHdrs, err = parsePAX(tr) 92 if err != nil { 93 return nil, err 94 } 95 if hdr.Typeflag == TypeXGlobalHeader { 96 mergePAX(hdr, paxHdrs) 97 return &Header{ 98 Typeflag: hdr.Typeflag, 99 Xattrs: hdr.Xattrs, 100 PAXRecords: hdr.PAXRecords, 101 Format: format, 102 }, nil 103 } 104 continue loop // This is a meta header affecting the next header 105 case TypeGNULongName, TypeGNULongLink: 106 format.mayOnlyBe(FormatGNU) 107 realname, err := ioutil.ReadAll(tr) 108 if err != nil { 109 return nil, err 110 } 111 112 var p parser 113 switch hdr.Typeflag { 114 case TypeGNULongName: 115 gnuLongName = p.parseString(realname) 116 case TypeGNULongLink: 117 gnuLongLink = p.parseString(realname) 118 } 119 continue loop // This is a meta header affecting the next header 120 default: 121 // The old GNU sparse format is handled here since it is technically 122 // just a regular file with additional attributes. 123 124 if err := mergePAX(hdr, paxHdrs); err != nil { 125 return nil, err 126 } 127 if gnuLongName != "" { 128 hdr.Name = gnuLongName 129 } 130 if gnuLongLink != "" { 131 hdr.Linkname = gnuLongLink 132 } 133 134 // The extended headers may have updated the size. 135 // Thus, setup the regFileReader again after merging PAX headers. 136 if err := tr.handleRegularFile(hdr); err != nil { 137 return nil, err 138 } 139 140 // Sparse formats rely on being able to read from the logical data 141 // section; there must be a preceding call to handleRegularFile. 142 if err := tr.handleSparseFile(hdr, rawHdr); err != nil { 143 return nil, err 144 } 145 146 // Set the final guess at the format. 147 if format.has(FormatUSTAR) && format.has(FormatPAX) { 148 format.mayOnlyBe(FormatUSTAR) 149 } 150 hdr.Format = format 151 return hdr, nil // This is a file, so stop 152 } 153 } 154 } 155 156 // handleRegularFile sets up the current file reader and padding such that it 157 // can only read the following logical data section. It will properly handle 158 // special headers that contain no data section. 159 func (tr *Reader) handleRegularFile(hdr *Header) error { 160 nb := hdr.Size 161 if isHeaderOnlyType(hdr.Typeflag) { 162 nb = 0 163 } 164 if nb < 0 { 165 return ErrHeader 166 } 167 168 tr.pad = blockPadding(nb) 169 tr.curr = ®FileReader{r: tr.r, nb: nb} 170 return nil 171 } 172 173 // handleSparseFile checks if the current file is a sparse format of any type 174 // and sets the curr reader appropriately. 175 func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error { 176 var spd sparseDatas 177 var err error 178 if hdr.Typeflag == TypeGNUSparse { 179 spd, err = tr.readOldGNUSparseMap(hdr, rawHdr) 180 } else { 181 spd, err = tr.readGNUSparsePAXHeaders(hdr) 182 } 183 184 // If sp is non-nil, then this is a sparse file. 185 // Note that it is possible for len(sp) == 0. 186 if err == nil && spd != nil { 187 if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) { 188 return ErrHeader 189 } 190 sph := invertSparseEntries(spd, hdr.Size) 191 tr.curr = &sparseFileReader{tr.curr, sph, 0} 192 hdr.SparseHoles = append([]SparseEntry{}, sph...) 193 } 194 return err 195 } 196 197 // readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. 198 // If they are found, then this function reads the sparse map and returns it. 199 // This assumes that 0.0 headers have already been converted to 0.1 headers 200 // by the the PAX header parsing logic. 201 func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) { 202 // Identify the version of GNU headers. 203 var is1x0 bool 204 major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor] 205 switch { 206 case major == "0" && (minor == "0" || minor == "1"): 207 is1x0 = false 208 case major == "1" && minor == "0": 209 is1x0 = true 210 case major != "" || minor != "": 211 return nil, nil // Unknown GNU sparse PAX version 212 case hdr.PAXRecords[paxGNUSparseMap] != "": 213 is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess 214 default: 215 return nil, nil // Not a PAX format GNU sparse file. 216 } 217 hdr.Format.mayOnlyBe(FormatPAX) 218 219 // Update hdr from GNU sparse PAX headers. 220 if name := hdr.PAXRecords[paxGNUSparseName]; name != "" { 221 hdr.Name = name 222 } 223 size := hdr.PAXRecords[paxGNUSparseSize] 224 if size == "" { 225 size = hdr.PAXRecords[paxGNUSparseRealSize] 226 } 227 if size != "" { 228 n, err := strconv.ParseInt(size, 10, 64) 229 if err != nil { 230 return nil, ErrHeader 231 } 232 hdr.Size = n 233 } 234 235 // Read the sparse map according to the appropriate format. 236 if is1x0 { 237 return readGNUSparseMap1x0(tr.curr) 238 } else { 239 return readGNUSparseMap0x1(hdr.PAXRecords) 240 } 241 } 242 243 // mergePAX merges paxHdrs into hdr for all relevant fields of Header. 244 func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) { 245 for k, v := range paxHdrs { 246 if v == "" { 247 continue // Keep the original USTAR value 248 } 249 var id64 int64 250 switch k { 251 case paxPath: 252 hdr.Name = v 253 case paxLinkpath: 254 hdr.Linkname = v 255 case paxUname: 256 hdr.Uname = v 257 case paxGname: 258 hdr.Gname = v 259 case paxUid: 260 id64, err = strconv.ParseInt(v, 10, 64) 261 hdr.Uid = int(id64) // Integer overflow possible 262 case paxGid: 263 id64, err = strconv.ParseInt(v, 10, 64) 264 hdr.Gid = int(id64) // Integer overflow possible 265 case paxAtime: 266 hdr.AccessTime, err = parsePAXTime(v) 267 case paxMtime: 268 hdr.ModTime, err = parsePAXTime(v) 269 case paxCtime: 270 hdr.ChangeTime, err = parsePAXTime(v) 271 case paxSize: 272 hdr.Size, err = strconv.ParseInt(v, 10, 64) 273 default: 274 if strings.HasPrefix(k, paxSchilyXattr) { 275 if hdr.Xattrs == nil { 276 hdr.Xattrs = make(map[string]string) 277 } 278 hdr.Xattrs[k[len(paxSchilyXattr):]] = v 279 } 280 } 281 if err != nil { 282 return ErrHeader 283 } 284 } 285 hdr.PAXRecords = paxHdrs 286 return nil 287 } 288 289 // parsePAX parses PAX headers. 290 // If an extended header (type 'x') is invalid, ErrHeader is returned 291 func parsePAX(r io.Reader) (map[string]string, error) { 292 buf, err := ioutil.ReadAll(r) 293 if err != nil { 294 return nil, err 295 } 296 sbuf := string(buf) 297 298 // For GNU PAX sparse format 0.0 support. 299 // This function transforms the sparse format 0.0 headers into format 0.1 300 // headers since 0.0 headers were not PAX compliant. 301 var sparseMap []string 302 303 paxHdrs := make(map[string]string) 304 for len(sbuf) > 0 { 305 key, value, residual, err := parsePAXRecord(sbuf) 306 if err != nil { 307 return nil, ErrHeader 308 } 309 sbuf = residual 310 311 switch key { 312 case paxGNUSparseOffset, paxGNUSparseNumBytes: 313 // Validate sparse header order and value. 314 if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) || 315 (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) || 316 strings.Contains(value, ",") { 317 return nil, ErrHeader 318 } 319 sparseMap = append(sparseMap, value) 320 default: 321 paxHdrs[key] = value 322 } 323 } 324 if len(sparseMap) > 0 { 325 paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",") 326 } 327 return paxHdrs, nil 328 } 329 330 // readHeader reads the next block header and assumes that the underlying reader 331 // is already aligned to a block boundary. It returns the raw block of the 332 // header in case further processing is required. 333 // 334 // The err will be set to io.EOF only when one of the following occurs: 335 // * Exactly 0 bytes are read and EOF is hit. 336 // * Exactly 1 block of zeros is read and EOF is hit. 337 // * At least 2 blocks of zeros are read. 338 func (tr *Reader) readHeader() (*Header, *block, error) { 339 // Two blocks of zero bytes marks the end of the archive. 340 if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { 341 return nil, nil, err // EOF is okay here; exactly 0 bytes read 342 } 343 if bytes.Equal(tr.blk[:], zeroBlock[:]) { 344 if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { 345 return nil, nil, err // EOF is okay here; exactly 1 block of zeros read 346 } 347 if bytes.Equal(tr.blk[:], zeroBlock[:]) { 348 return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read 349 } 350 return nil, nil, ErrHeader // Zero block and then non-zero block 351 } 352 353 // Verify the header matches a known format. 354 format := tr.blk.GetFormat() 355 if format == FormatUnknown { 356 return nil, nil, ErrHeader 357 } 358 359 var p parser 360 hdr := new(Header) 361 362 // Unpack the V7 header. 363 v7 := tr.blk.V7() 364 hdr.Typeflag = v7.TypeFlag()[0] 365 hdr.Name = p.parseString(v7.Name()) 366 hdr.Linkname = p.parseString(v7.LinkName()) 367 hdr.Size = p.parseNumeric(v7.Size()) 368 hdr.Mode = p.parseNumeric(v7.Mode()) 369 hdr.Uid = int(p.parseNumeric(v7.UID())) 370 hdr.Gid = int(p.parseNumeric(v7.GID())) 371 hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0) 372 373 // Unpack format specific fields. 374 if format > formatV7 { 375 ustar := tr.blk.USTAR() 376 hdr.Uname = p.parseString(ustar.UserName()) 377 hdr.Gname = p.parseString(ustar.GroupName()) 378 hdr.Devmajor = p.parseNumeric(ustar.DevMajor()) 379 hdr.Devminor = p.parseNumeric(ustar.DevMinor()) 380 381 var prefix string 382 switch { 383 case format.has(FormatUSTAR | FormatPAX): 384 hdr.Format = format 385 ustar := tr.blk.USTAR() 386 prefix = p.parseString(ustar.Prefix()) 387 388 // For Format detection, check if block is properly formatted since 389 // the parser is more liberal than what USTAR actually permits. 390 notASCII := func(r rune) bool { return r >= 0x80 } 391 if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 { 392 hdr.Format = FormatUnknown // Non-ASCII characters in block. 393 } 394 nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 } 395 if !(nul(v7.Size()) && nul(v7.Mode()) && nul(v7.UID()) && nul(v7.GID()) && 396 nul(v7.ModTime()) && nul(ustar.DevMajor()) && nul(ustar.DevMinor())) { 397 hdr.Format = FormatUnknown // Numeric fields must end in NUL 398 } 399 case format.has(formatSTAR): 400 star := tr.blk.STAR() 401 prefix = p.parseString(star.Prefix()) 402 hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0) 403 hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0) 404 case format.has(FormatGNU): 405 hdr.Format = format 406 var p2 parser 407 gnu := tr.blk.GNU() 408 if b := gnu.AccessTime(); b[0] != 0 { 409 hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0) 410 } 411 if b := gnu.ChangeTime(); b[0] != 0 { 412 hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0) 413 } 414 415 // Prior to Go1.8, the Writer had a bug where it would output 416 // an invalid tar file in certain rare situations because the logic 417 // incorrectly believed that the old GNU format had a prefix field. 418 // This is wrong and leads to an output file that mangles the 419 // atime and ctime fields, which are often left unused. 420 // 421 // In order to continue reading tar files created by former, buggy 422 // versions of Go, we skeptically parse the atime and ctime fields. 423 // If we are unable to parse them and the prefix field looks like 424 // an ASCII string, then we fallback on the pre-Go1.8 behavior 425 // of treating these fields as the USTAR prefix field. 426 // 427 // Note that this will not use the fallback logic for all possible 428 // files generated by a pre-Go1.8 toolchain. If the generated file 429 // happened to have a prefix field that parses as valid 430 // atime and ctime fields (e.g., when they are valid octal strings), 431 // then it is impossible to distinguish between an valid GNU file 432 // and an invalid pre-Go1.8 file. 433 // 434 // See https://golang.org/issues/12594 435 // See https://golang.org/issues/21005 436 if p2.err != nil { 437 hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{} 438 ustar := tr.blk.USTAR() 439 if s := p.parseString(ustar.Prefix()); isASCII(s) { 440 prefix = s 441 } 442 hdr.Format = FormatUnknown // Buggy file is not GNU 443 } 444 } 445 if len(prefix) > 0 { 446 hdr.Name = prefix + "/" + hdr.Name 447 } 448 } 449 return hdr, &tr.blk, p.err 450 } 451 452 // readOldGNUSparseMap reads the sparse map from the old GNU sparse format. 453 // The sparse map is stored in the tar header if it's small enough. 454 // If it's larger than four entries, then one or more extension headers are used 455 // to store the rest of the sparse map. 456 // 457 // The Header.Size does not reflect the size of any extended headers used. 458 // Thus, this function will read from the raw io.Reader to fetch extra headers. 459 // This method mutates blk in the process. 460 func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) { 461 // Make sure that the input format is GNU. 462 // Unfortunately, the STAR format also has a sparse header format that uses 463 // the same type flag but has a completely different layout. 464 if blk.GetFormat() != FormatGNU { 465 return nil, ErrHeader 466 } 467 hdr.Format.mayOnlyBe(FormatGNU) 468 469 var p parser 470 hdr.Size = p.parseNumeric(blk.GNU().RealSize()) 471 if p.err != nil { 472 return nil, p.err 473 } 474 s := blk.GNU().Sparse() 475 spd := make(sparseDatas, 0, s.MaxEntries()) 476 for { 477 for i := 0; i < s.MaxEntries(); i++ { 478 // This termination condition is identical to GNU and BSD tar. 479 if s.Entry(i).Offset()[0] == 0x00 { 480 break // Don't return, need to process extended headers (even if empty) 481 } 482 offset := p.parseNumeric(s.Entry(i).Offset()) 483 length := p.parseNumeric(s.Entry(i).Length()) 484 if p.err != nil { 485 return nil, p.err 486 } 487 spd = append(spd, SparseEntry{Offset: offset, Length: length}) 488 } 489 490 if s.IsExtended()[0] > 0 { 491 // There are more entries. Read an extension header and parse its entries. 492 if _, err := mustReadFull(tr.r, blk[:]); err != nil { 493 return nil, err 494 } 495 s = blk.Sparse() 496 continue 497 } 498 return spd, nil // Done 499 } 500 } 501 502 // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format 503 // version 1.0. The format of the sparse map consists of a series of 504 // newline-terminated numeric fields. The first field is the number of entries 505 // and is always present. Following this are the entries, consisting of two 506 // fields (offset, length). This function must stop reading at the end 507 // boundary of the block containing the last newline. 508 // 509 // Note that the GNU manual says that numeric values should be encoded in octal 510 // format. However, the GNU tar utility itself outputs these values in decimal. 511 // As such, this library treats values as being encoded in decimal. 512 func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) { 513 var ( 514 cntNewline int64 515 buf bytes.Buffer 516 blk block 517 ) 518 519 // feedTokens copies data in blocks from r into buf until there are 520 // at least cnt newlines in buf. It will not read more blocks than needed. 521 feedTokens := func(n int64) error { 522 for cntNewline < n { 523 if _, err := mustReadFull(r, blk[:]); err != nil { 524 return err 525 } 526 buf.Write(blk[:]) 527 for _, c := range blk { 528 if c == '\n' { 529 cntNewline++ 530 } 531 } 532 } 533 return nil 534 } 535 536 // nextToken gets the next token delimited by a newline. This assumes that 537 // at least one newline exists in the buffer. 538 nextToken := func() string { 539 cntNewline-- 540 tok, _ := buf.ReadString('\n') 541 return strings.TrimRight(tok, "\n") 542 } 543 544 // Parse for the number of entries. 545 // Use integer overflow resistant math to check this. 546 if err := feedTokens(1); err != nil { 547 return nil, err 548 } 549 numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int 550 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { 551 return nil, ErrHeader 552 } 553 554 // Parse for all member entries. 555 // numEntries is trusted after this since a potential attacker must have 556 // committed resources proportional to what this library used. 557 if err := feedTokens(2 * numEntries); err != nil { 558 return nil, err 559 } 560 spd := make(sparseDatas, 0, numEntries) 561 for i := int64(0); i < numEntries; i++ { 562 offset, err1 := strconv.ParseInt(nextToken(), 10, 64) 563 length, err2 := strconv.ParseInt(nextToken(), 10, 64) 564 if err1 != nil || err2 != nil { 565 return nil, ErrHeader 566 } 567 spd = append(spd, SparseEntry{Offset: offset, Length: length}) 568 } 569 return spd, nil 570 } 571 572 // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format 573 // version 0.1. The sparse map is stored in the PAX headers. 574 func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) { 575 // Get number of entries. 576 // Use integer overflow resistant math to check this. 577 numEntriesStr := paxHdrs[paxGNUSparseNumBlocks] 578 numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int 579 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { 580 return nil, ErrHeader 581 } 582 583 // There should be two numbers in sparseMap for each entry. 584 sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",") 585 if len(sparseMap) == 1 && sparseMap[0] == "" { 586 sparseMap = sparseMap[:0] 587 } 588 if int64(len(sparseMap)) != 2*numEntries { 589 return nil, ErrHeader 590 } 591 592 // Loop through the entries in the sparse map. 593 // numEntries is trusted now. 594 spd := make(sparseDatas, 0, numEntries) 595 for len(sparseMap) >= 2 { 596 offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64) 597 length, err2 := strconv.ParseInt(sparseMap[1], 10, 64) 598 if err1 != nil || err2 != nil { 599 return nil, ErrHeader 600 } 601 spd = append(spd, SparseEntry{Offset: offset, Length: length}) 602 sparseMap = sparseMap[2:] 603 } 604 return spd, nil 605 } 606 607 // Read reads from the current file in the tar archive. 608 // It returns (0, io.EOF) when it reaches the end of that file, 609 // until Next is called to advance to the next file. 610 // 611 // If the current file is sparse, then the regions marked as a hole 612 // are read back as NUL-bytes. 613 // 614 // Calling Read on special types like TypeLink, TypeSymLink, TypeChar, 615 // TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what 616 // the Header.Size claims. 617 func (tr *Reader) Read(b []byte) (int, error) { 618 if tr.err != nil { 619 return 0, tr.err 620 } 621 n, err := tr.curr.Read(b) 622 if err != nil && err != io.EOF { 623 tr.err = err 624 } 625 return n, err 626 } 627 628 // TODO(dsnet): Export the Reader.Discard method to assist in quickly 629 // skipping over sections of a file. This is especially useful: 630 // * when skipping through an underlying io.Reader that is also an io.Seeker. 631 // * when skipping over large holes in a sparse file. 632 633 // discard skips the next n bytes in the current file, 634 // returning the number of bytes discarded. 635 // If fewer than n bytes are discarded, it returns an non-nil error, 636 // which may be io.EOF if there are no more remaining bytes in the current file. 637 func (tr *Reader) discard(n int64) (int64, error) { 638 if tr.err != nil { 639 return 0, tr.err 640 } 641 n, err := tr.curr.Discard(n) 642 if err != nil && err != io.EOF { 643 tr.err = err 644 } 645 return n, err 646 } 647 648 // regFileReader is a fileReader for reading data from a regular file entry. 649 type regFileReader struct { 650 r io.Reader // Underlying Reader 651 nb int64 // Number of remaining bytes to read 652 } 653 654 func (fr *regFileReader) Read(b []byte) (int, error) { 655 if int64(len(b)) > fr.nb { 656 b = b[:fr.nb] 657 } 658 n, err := fr.r.Read(b) 659 fr.nb -= int64(n) 660 switch { 661 case err == io.EOF && fr.nb > 0: 662 return n, io.ErrUnexpectedEOF 663 case err == nil && fr.nb == 0: 664 return n, io.EOF 665 default: 666 return n, err 667 } 668 } 669 670 func (fr *regFileReader) Discard(n int64) (int64, error) { 671 overread := n > fr.Remaining() 672 if overread { 673 n = fr.Remaining() 674 } 675 676 // If possible, Seek to the last byte before the end of the data section. 677 // Do this because Seek is often lazy about reporting errors; this will mask 678 // the fact that the stream may be truncated. We can rely on the 679 // io.CopyN done shortly afterwards to trigger any IO errors. 680 var seekSkipped int64 // Number of bytes skipped via Seek 681 if sr, ok := fr.r.(io.Seeker); ok && n > 1 { 682 // Not all io.Seeker can actually Seek. For example, os.Stdin implements 683 // io.Seeker, but calling Seek always returns an error and performs 684 // no action. Thus, we try an innocent seek to the current position 685 // to see if Seek is really supported. 686 pos1, err := sr.Seek(0, io.SeekCurrent) 687 if pos1 >= 0 && err == nil { 688 // Seek seems supported, so perform the real Seek. 689 pos2, err := sr.Seek(n-1, io.SeekCurrent) 690 if pos2 < 0 || err != nil { 691 return 0, err 692 } 693 seekSkipped = pos2 - pos1 694 } 695 } 696 697 copySkipped, err := io.CopyN(ioutil.Discard, fr.r, n-seekSkipped) 698 discarded := seekSkipped + copySkipped 699 fr.nb -= discarded 700 switch { 701 case err == io.EOF && discarded < n: 702 return discarded, io.ErrUnexpectedEOF 703 case err == nil && overread: 704 return discarded, io.EOF 705 default: 706 return discarded, err 707 } 708 } 709 710 func (rf regFileReader) Remaining() int64 { 711 return rf.nb 712 } 713 714 // sparseFileReader is a fileReader for reading data from a sparse file entry. 715 type sparseFileReader struct { 716 fr fileReader // Underlying fileReader 717 sp sparseHoles // Normalized list of sparse holes 718 pos int64 // Current position in sparse file 719 } 720 721 func (sr *sparseFileReader) Read(b []byte) (n int, err error) { 722 finished := int64(len(b)) >= sr.Remaining() 723 if finished { 724 b = b[:sr.Remaining()] 725 } 726 727 b0 := b 728 endPos := sr.pos + int64(len(b)) 729 for endPos > sr.pos && err == nil { 730 var nf int // Bytes read in fragment 731 holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() 732 if sr.pos < holeStart { // In a data fragment 733 bf := b[:min(int64(len(b)), holeStart-sr.pos)] 734 nf, err = tryReadFull(sr.fr, bf) 735 } else { // In a hole fragment 736 bf := b[:min(int64(len(b)), holeEnd-sr.pos)] 737 nf, err = tryReadFull(zeroReader{}, bf) 738 } 739 b = b[nf:] 740 sr.pos += int64(nf) 741 if sr.pos >= holeEnd && len(sr.sp) > 1 { 742 sr.sp = sr.sp[1:] // Ensure last fragment always remains 743 } 744 } 745 746 n = len(b0) - len(b) 747 switch { 748 case err == io.EOF: 749 return n, errMissData // Less data in dense file than sparse file 750 case err != nil: 751 return n, err 752 case sr.Remaining() == 0 && sr.fr.Remaining() > 0: 753 return n, errUnrefData // More data in dense file than sparse file 754 case finished: 755 return n, io.EOF 756 default: 757 return n, nil 758 } 759 } 760 761 func (sr *sparseFileReader) Discard(n int64) (int64, error) { 762 overread := n > sr.Remaining() 763 if overread { 764 n = sr.Remaining() 765 } 766 767 var realDiscard int64 // Number of real data bytes to discard 768 endPos := sr.pos + n 769 for endPos > sr.pos { 770 var nf int64 // Size of fragment 771 holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() 772 if sr.pos < holeStart { // In a data fragment 773 nf = min(endPos-sr.pos, holeStart-sr.pos) 774 realDiscard += nf 775 } else { // In a hole fragment 776 nf = min(endPos-sr.pos, holeEnd-sr.pos) 777 } 778 sr.pos += nf 779 if sr.pos >= holeEnd && len(sr.sp) > 1 { 780 sr.sp = sr.sp[1:] // Ensure last fragment always remains 781 } 782 } 783 784 _, err := sr.fr.Discard(realDiscard) 785 switch { 786 case err == io.EOF: 787 return n, errMissData // Less data in dense file than sparse file 788 case err != nil: 789 return n, err 790 case sr.Remaining() == 0 && sr.fr.Remaining() > 0: 791 return n, errUnrefData // More data in dense file than sparse file 792 case overread: 793 return n, io.EOF 794 default: 795 return n, nil 796 } 797 } 798 799 func (sr sparseFileReader) Remaining() int64 { 800 return sr.sp[len(sr.sp)-1].endOffset() - sr.pos 801 } 802 803 type zeroReader struct{} 804 805 func (zeroReader) Read(b []byte) (int, error) { 806 for i := range b { 807 b[i] = 0 808 } 809 return len(b), nil 810 } 811 812 // mustReadFull is like io.ReadFull except it returns 813 // io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read. 814 func mustReadFull(r io.Reader, b []byte) (int, error) { 815 n, err := tryReadFull(r, b) 816 if err == io.EOF { 817 err = io.ErrUnexpectedEOF 818 } 819 return n, err 820 } 821 822 // tryReadFull is like io.ReadFull except it returns 823 // io.EOF when it is hit before len(b) bytes are read. 824 func tryReadFull(r io.Reader, b []byte) (n int, err error) { 825 for len(b) > n && err == nil { 826 var nn int 827 nn, err = r.Read(b[n:]) 828 n += nn 829 } 830 if len(b) == n && err == io.EOF { 831 err = nil 832 } 833 return n, err 834 }