github.com/m10x/go/src@v0.0.0-20220112094212-ba61592315da/archive/zip/reader.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package zip 6 7 import ( 8 "bufio" 9 "encoding/binary" 10 "errors" 11 "hash" 12 "hash/crc32" 13 "io" 14 "io/fs" 15 "os" 16 "path" 17 "sort" 18 "strings" 19 "sync" 20 "time" 21 ) 22 23 var ( 24 ErrFormat = errors.New("zip: not a valid zip file") 25 ErrAlgorithm = errors.New("zip: unsupported compression algorithm") 26 ErrChecksum = errors.New("zip: checksum error") 27 ) 28 29 // A Reader serves content from a ZIP archive. 30 type Reader struct { 31 r io.ReaderAt 32 File []*File 33 Comment string 34 decompressors map[uint16]Decompressor 35 36 // fileList is a list of files sorted by ename, 37 // for use by the Open method. 38 fileListOnce sync.Once 39 fileList []fileListEntry 40 } 41 42 // A ReadCloser is a Reader that must be closed when no longer needed. 43 type ReadCloser struct { 44 f *os.File 45 Reader 46 } 47 48 // A File is a single file in a ZIP archive. 49 // The file information is in the embedded FileHeader. 50 // The file content can be accessed by calling Open. 51 type File struct { 52 FileHeader 53 zip *Reader 54 zipr io.ReaderAt 55 headerOffset int64 56 zip64 bool // zip64 extended information extra field presence 57 descErr error // error reading the data descriptor during init 58 } 59 60 // OpenReader will open the Zip file specified by name and return a ReadCloser. 61 func OpenReader(name string) (*ReadCloser, error) { 62 f, err := os.Open(name) 63 if err != nil { 64 return nil, err 65 } 66 fi, err := f.Stat() 67 if err != nil { 68 f.Close() 69 return nil, err 70 } 71 r := new(ReadCloser) 72 if err := r.init(f, fi.Size()); err != nil { 73 f.Close() 74 return nil, err 75 } 76 r.f = f 77 return r, nil 78 } 79 80 // NewReader returns a new Reader reading from r, which is assumed to 81 // have the given size in bytes. 82 func NewReader(r io.ReaderAt, size int64) (*Reader, error) { 83 if size < 0 { 84 return nil, errors.New("zip: size cannot be negative") 85 } 86 zr := new(Reader) 87 if err := zr.init(r, size); err != nil { 88 return nil, err 89 } 90 return zr, nil 91 } 92 93 func (z *Reader) init(r io.ReaderAt, size int64) error { 94 end, err := readDirectoryEnd(r, size) 95 if err != nil { 96 return err 97 } 98 z.r = r 99 // Since the number of directory records is not validated, it is not 100 // safe to preallocate z.File without first checking that the specified 101 // number of files is reasonable, since a malformed archive may 102 // indicate it contains up to 1 << 128 - 1 files. Since each file has a 103 // header which will be _at least_ 30 bytes we can safely preallocate 104 // if (data size / 30) >= end.directoryRecords. 105 if end.directorySize < uint64(size) && (uint64(size)-end.directorySize)/30 >= end.directoryRecords { 106 z.File = make([]*File, 0, end.directoryRecords) 107 } 108 z.Comment = end.comment 109 rs := io.NewSectionReader(r, 0, size) 110 if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil { 111 return err 112 } 113 buf := bufio.NewReader(rs) 114 115 // The count of files inside a zip is truncated to fit in a uint16. 116 // Gloss over this by reading headers until we encounter 117 // a bad one, and then only report an ErrFormat or UnexpectedEOF if 118 // the file count modulo 65536 is incorrect. 119 for { 120 f := &File{zip: z, zipr: r} 121 err = readDirectoryHeader(f, buf) 122 if err == ErrFormat || err == io.ErrUnexpectedEOF { 123 break 124 } 125 if err != nil { 126 return err 127 } 128 z.File = append(z.File, f) 129 } 130 if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here 131 // Return the readDirectoryHeader error if we read 132 // the wrong number of directory entries. 133 return err 134 } 135 return nil 136 } 137 138 // RegisterDecompressor registers or overrides a custom decompressor for a 139 // specific method ID. If a decompressor for a given method is not found, 140 // Reader will default to looking up the decompressor at the package level. 141 func (z *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) { 142 if z.decompressors == nil { 143 z.decompressors = make(map[uint16]Decompressor) 144 } 145 z.decompressors[method] = dcomp 146 } 147 148 func (z *Reader) decompressor(method uint16) Decompressor { 149 dcomp := z.decompressors[method] 150 if dcomp == nil { 151 dcomp = decompressor(method) 152 } 153 return dcomp 154 } 155 156 // Close closes the Zip file, rendering it unusable for I/O. 157 func (rc *ReadCloser) Close() error { 158 return rc.f.Close() 159 } 160 161 // DataOffset returns the offset of the file's possibly-compressed 162 // data, relative to the beginning of the zip file. 163 // 164 // Most callers should instead use Open, which transparently 165 // decompresses data and verifies checksums. 166 func (f *File) DataOffset() (offset int64, err error) { 167 bodyOffset, err := f.findBodyOffset() 168 if err != nil { 169 return 170 } 171 return f.headerOffset + bodyOffset, nil 172 } 173 174 // Open returns a ReadCloser that provides access to the File's contents. 175 // Multiple files may be read concurrently. 176 func (f *File) Open() (io.ReadCloser, error) { 177 bodyOffset, err := f.findBodyOffset() 178 if err != nil { 179 return nil, err 180 } 181 size := int64(f.CompressedSize64) 182 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) 183 dcomp := f.zip.decompressor(f.Method) 184 if dcomp == nil { 185 return nil, ErrAlgorithm 186 } 187 var rc io.ReadCloser = dcomp(r) 188 var desr io.Reader 189 if f.hasDataDescriptor() { 190 desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) 191 } 192 rc = &checksumReader{ 193 rc: rc, 194 hash: crc32.NewIEEE(), 195 f: f, 196 desr: desr, 197 } 198 return rc, nil 199 } 200 201 // OpenRaw returns a Reader that provides access to the File's contents without 202 // decompression. 203 func (f *File) OpenRaw() (io.Reader, error) { 204 bodyOffset, err := f.findBodyOffset() 205 if err != nil { 206 return nil, err 207 } 208 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64)) 209 return r, nil 210 } 211 212 type checksumReader struct { 213 rc io.ReadCloser 214 hash hash.Hash32 215 nread uint64 // number of bytes read so far 216 f *File 217 desr io.Reader // if non-nil, where to read the data descriptor 218 err error // sticky error 219 } 220 221 func (r *checksumReader) Stat() (fs.FileInfo, error) { 222 return headerFileInfo{&r.f.FileHeader}, nil 223 } 224 225 func (r *checksumReader) Read(b []byte) (n int, err error) { 226 if r.err != nil { 227 return 0, r.err 228 } 229 n, err = r.rc.Read(b) 230 r.hash.Write(b[:n]) 231 r.nread += uint64(n) 232 if err == nil { 233 return 234 } 235 if err == io.EOF { 236 if r.nread != r.f.UncompressedSize64 { 237 return 0, io.ErrUnexpectedEOF 238 } 239 if r.desr != nil { 240 if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { 241 if err1 == io.EOF { 242 err = io.ErrUnexpectedEOF 243 } else { 244 err = err1 245 } 246 } else if r.hash.Sum32() != r.f.CRC32 { 247 err = ErrChecksum 248 } 249 } else { 250 // If there's not a data descriptor, we still compare 251 // the CRC32 of what we've read against the file header 252 // or TOC's CRC32, if it seems like it was set. 253 if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { 254 err = ErrChecksum 255 } 256 } 257 } 258 r.err = err 259 return 260 } 261 262 func (r *checksumReader) Close() error { return r.rc.Close() } 263 264 // findBodyOffset does the minimum work to verify the file has a header 265 // and returns the file body offset. 266 func (f *File) findBodyOffset() (int64, error) { 267 var buf [fileHeaderLen]byte 268 if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { 269 return 0, err 270 } 271 b := readBuf(buf[:]) 272 if sig := b.uint32(); sig != fileHeaderSignature { 273 return 0, ErrFormat 274 } 275 b = b[22:] // skip over most of the header 276 filenameLen := int(b.uint16()) 277 extraLen := int(b.uint16()) 278 return int64(fileHeaderLen + filenameLen + extraLen), nil 279 } 280 281 // readDirectoryHeader attempts to read a directory header from r. 282 // It returns io.ErrUnexpectedEOF if it cannot read a complete header, 283 // and ErrFormat if it doesn't find a valid header signature. 284 func readDirectoryHeader(f *File, r io.Reader) error { 285 var buf [directoryHeaderLen]byte 286 if _, err := io.ReadFull(r, buf[:]); err != nil { 287 return err 288 } 289 b := readBuf(buf[:]) 290 if sig := b.uint32(); sig != directoryHeaderSignature { 291 return ErrFormat 292 } 293 f.CreatorVersion = b.uint16() 294 f.ReaderVersion = b.uint16() 295 f.Flags = b.uint16() 296 f.Method = b.uint16() 297 f.ModifiedTime = b.uint16() 298 f.ModifiedDate = b.uint16() 299 f.CRC32 = b.uint32() 300 f.CompressedSize = b.uint32() 301 f.UncompressedSize = b.uint32() 302 f.CompressedSize64 = uint64(f.CompressedSize) 303 f.UncompressedSize64 = uint64(f.UncompressedSize) 304 filenameLen := int(b.uint16()) 305 extraLen := int(b.uint16()) 306 commentLen := int(b.uint16()) 307 b = b[4:] // skipped start disk number and internal attributes (2x uint16) 308 f.ExternalAttrs = b.uint32() 309 f.headerOffset = int64(b.uint32()) 310 d := make([]byte, filenameLen+extraLen+commentLen) 311 if _, err := io.ReadFull(r, d); err != nil { 312 return err 313 } 314 f.Name = string(d[:filenameLen]) 315 f.Extra = d[filenameLen : filenameLen+extraLen] 316 f.Comment = string(d[filenameLen+extraLen:]) 317 318 // Determine the character encoding. 319 utf8Valid1, utf8Require1 := detectUTF8(f.Name) 320 utf8Valid2, utf8Require2 := detectUTF8(f.Comment) 321 switch { 322 case !utf8Valid1 || !utf8Valid2: 323 // Name and Comment definitely not UTF-8. 324 f.NonUTF8 = true 325 case !utf8Require1 && !utf8Require2: 326 // Name and Comment use only single-byte runes that overlap with UTF-8. 327 f.NonUTF8 = false 328 default: 329 // Might be UTF-8, might be some other encoding; preserve existing flag. 330 // Some ZIP writers use UTF-8 encoding without setting the UTF-8 flag. 331 // Since it is impossible to always distinguish valid UTF-8 from some 332 // other encoding (e.g., GBK or Shift-JIS), we trust the flag. 333 f.NonUTF8 = f.Flags&0x800 == 0 334 } 335 336 needUSize := f.UncompressedSize == ^uint32(0) 337 needCSize := f.CompressedSize == ^uint32(0) 338 needHeaderOffset := f.headerOffset == int64(^uint32(0)) 339 340 // Best effort to find what we need. 341 // Other zip authors might not even follow the basic format, 342 // and we'll just ignore the Extra content in that case. 343 var modified time.Time 344 parseExtras: 345 for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size 346 fieldTag := extra.uint16() 347 fieldSize := int(extra.uint16()) 348 if len(extra) < fieldSize { 349 break 350 } 351 fieldBuf := extra.sub(fieldSize) 352 353 switch fieldTag { 354 case zip64ExtraID: 355 f.zip64 = true 356 357 // update directory values from the zip64 extra block. 358 // They should only be consulted if the sizes read earlier 359 // are maxed out. 360 // See golang.org/issue/13367. 361 if needUSize { 362 needUSize = false 363 if len(fieldBuf) < 8 { 364 return ErrFormat 365 } 366 f.UncompressedSize64 = fieldBuf.uint64() 367 } 368 if needCSize { 369 needCSize = false 370 if len(fieldBuf) < 8 { 371 return ErrFormat 372 } 373 f.CompressedSize64 = fieldBuf.uint64() 374 } 375 if needHeaderOffset { 376 needHeaderOffset = false 377 if len(fieldBuf) < 8 { 378 return ErrFormat 379 } 380 f.headerOffset = int64(fieldBuf.uint64()) 381 } 382 case ntfsExtraID: 383 if len(fieldBuf) < 4 { 384 continue parseExtras 385 } 386 fieldBuf.uint32() // reserved (ignored) 387 for len(fieldBuf) >= 4 { // need at least tag and size 388 attrTag := fieldBuf.uint16() 389 attrSize := int(fieldBuf.uint16()) 390 if len(fieldBuf) < attrSize { 391 continue parseExtras 392 } 393 attrBuf := fieldBuf.sub(attrSize) 394 if attrTag != 1 || attrSize != 24 { 395 continue // Ignore irrelevant attributes 396 } 397 398 const ticksPerSecond = 1e7 // Windows timestamp resolution 399 ts := int64(attrBuf.uint64()) // ModTime since Windows epoch 400 secs := int64(ts / ticksPerSecond) 401 nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond) 402 epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC) 403 modified = time.Unix(epoch.Unix()+secs, nsecs) 404 } 405 case unixExtraID, infoZipUnixExtraID: 406 if len(fieldBuf) < 8 { 407 continue parseExtras 408 } 409 fieldBuf.uint32() // AcTime (ignored) 410 ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch 411 modified = time.Unix(ts, 0) 412 case extTimeExtraID: 413 if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 { 414 continue parseExtras 415 } 416 ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch 417 modified = time.Unix(ts, 0) 418 } 419 } 420 421 msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime) 422 f.Modified = msdosModified 423 if !modified.IsZero() { 424 f.Modified = modified.UTC() 425 426 // If legacy MS-DOS timestamps are set, we can use the delta between 427 // the legacy and extended versions to estimate timezone offset. 428 // 429 // A non-UTC timezone is always used (even if offset is zero). 430 // Thus, FileHeader.Modified.Location() == time.UTC is useful for 431 // determining whether extended timestamps are present. 432 // This is necessary for users that need to do additional time 433 // calculations when dealing with legacy ZIP formats. 434 if f.ModifiedTime != 0 || f.ModifiedDate != 0 { 435 f.Modified = modified.In(timeZone(msdosModified.Sub(modified))) 436 } 437 } 438 439 // Assume that uncompressed size 2³²-1 could plausibly happen in 440 // an old zip32 file that was sharding inputs into the largest chunks 441 // possible (or is just malicious; search the web for 42.zip). 442 // If needUSize is true still, it means we didn't see a zip64 extension. 443 // As long as the compressed size is not also 2³²-1 (implausible) 444 // and the header is not also 2³²-1 (equally implausible), 445 // accept the uncompressed size 2³²-1 as valid. 446 // If nothing else, this keeps archive/zip working with 42.zip. 447 _ = needUSize 448 449 if needCSize || needHeaderOffset { 450 return ErrFormat 451 } 452 453 return nil 454 } 455 456 func readDataDescriptor(r io.Reader, f *File) error { 457 var buf [dataDescriptorLen]byte 458 // The spec says: "Although not originally assigned a 459 // signature, the value 0x08074b50 has commonly been adopted 460 // as a signature value for the data descriptor record. 461 // Implementers should be aware that ZIP files may be 462 // encountered with or without this signature marking data 463 // descriptors and should account for either case when reading 464 // ZIP files to ensure compatibility." 465 // 466 // dataDescriptorLen includes the size of the signature but 467 // first read just those 4 bytes to see if it exists. 468 if _, err := io.ReadFull(r, buf[:4]); err != nil { 469 return err 470 } 471 off := 0 472 maybeSig := readBuf(buf[:4]) 473 if maybeSig.uint32() != dataDescriptorSignature { 474 // No data descriptor signature. Keep these four 475 // bytes. 476 off += 4 477 } 478 if _, err := io.ReadFull(r, buf[off:12]); err != nil { 479 return err 480 } 481 b := readBuf(buf[:12]) 482 if b.uint32() != f.CRC32 { 483 return ErrChecksum 484 } 485 486 // The two sizes that follow here can be either 32 bits or 64 bits 487 // but the spec is not very clear on this and different 488 // interpretations has been made causing incompatibilities. We 489 // already have the sizes from the central directory so we can 490 // just ignore these. 491 492 return nil 493 } 494 495 func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { 496 // look for directoryEndSignature in the last 1k, then in the last 65k 497 var buf []byte 498 var directoryEndOffset int64 499 for i, bLen := range []int64{1024, 65 * 1024} { 500 if bLen > size { 501 bLen = size 502 } 503 buf = make([]byte, int(bLen)) 504 if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { 505 return nil, err 506 } 507 if p := findSignatureInBlock(buf); p >= 0 { 508 buf = buf[p:] 509 directoryEndOffset = size - bLen + int64(p) 510 break 511 } 512 if i == 1 || bLen == size { 513 return nil, ErrFormat 514 } 515 } 516 517 // read header into struct 518 b := readBuf(buf[4:]) // skip signature 519 d := &directoryEnd{ 520 diskNbr: uint32(b.uint16()), 521 dirDiskNbr: uint32(b.uint16()), 522 dirRecordsThisDisk: uint64(b.uint16()), 523 directoryRecords: uint64(b.uint16()), 524 directorySize: uint64(b.uint32()), 525 directoryOffset: uint64(b.uint32()), 526 commentLen: b.uint16(), 527 } 528 l := int(d.commentLen) 529 if l > len(b) { 530 return nil, errors.New("zip: invalid comment length") 531 } 532 d.comment = string(b[:l]) 533 534 // These values mean that the file can be a zip64 file 535 if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { 536 p, err := findDirectory64End(r, directoryEndOffset) 537 if err == nil && p >= 0 { 538 err = readDirectory64End(r, p, d) 539 } 540 if err != nil { 541 return nil, err 542 } 543 } 544 // Make sure directoryOffset points to somewhere in our file. 545 if o := int64(d.directoryOffset); o < 0 || o >= size { 546 return nil, ErrFormat 547 } 548 return d, nil 549 } 550 551 // findDirectory64End tries to read the zip64 locator just before the 552 // directory end and returns the offset of the zip64 directory end if 553 // found. 554 func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { 555 locOffset := directoryEndOffset - directory64LocLen 556 if locOffset < 0 { 557 return -1, nil // no need to look for a header outside the file 558 } 559 buf := make([]byte, directory64LocLen) 560 if _, err := r.ReadAt(buf, locOffset); err != nil { 561 return -1, err 562 } 563 b := readBuf(buf) 564 if sig := b.uint32(); sig != directory64LocSignature { 565 return -1, nil 566 } 567 if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory 568 return -1, nil // the file is not a valid zip64-file 569 } 570 p := b.uint64() // relative offset of the zip64 end of central directory record 571 if b.uint32() != 1 { // total number of disks 572 return -1, nil // the file is not a valid zip64-file 573 } 574 return int64(p), nil 575 } 576 577 // readDirectory64End reads the zip64 directory end and updates the 578 // directory end with the zip64 directory end values. 579 func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { 580 buf := make([]byte, directory64EndLen) 581 if _, err := r.ReadAt(buf, offset); err != nil { 582 return err 583 } 584 585 b := readBuf(buf) 586 if sig := b.uint32(); sig != directory64EndSignature { 587 return ErrFormat 588 } 589 590 b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) 591 d.diskNbr = b.uint32() // number of this disk 592 d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory 593 d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk 594 d.directoryRecords = b.uint64() // total number of entries in the central directory 595 d.directorySize = b.uint64() // size of the central directory 596 d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number 597 598 return nil 599 } 600 601 func findSignatureInBlock(b []byte) int { 602 for i := len(b) - directoryEndLen; i >= 0; i-- { 603 // defined from directoryEndSignature in struct.go 604 if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { 605 // n is length of comment 606 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 607 if n+directoryEndLen+i <= len(b) { 608 return i 609 } 610 } 611 } 612 return -1 613 } 614 615 type readBuf []byte 616 617 func (b *readBuf) uint8() uint8 { 618 v := (*b)[0] 619 *b = (*b)[1:] 620 return v 621 } 622 623 func (b *readBuf) uint16() uint16 { 624 v := binary.LittleEndian.Uint16(*b) 625 *b = (*b)[2:] 626 return v 627 } 628 629 func (b *readBuf) uint32() uint32 { 630 v := binary.LittleEndian.Uint32(*b) 631 *b = (*b)[4:] 632 return v 633 } 634 635 func (b *readBuf) uint64() uint64 { 636 v := binary.LittleEndian.Uint64(*b) 637 *b = (*b)[8:] 638 return v 639 } 640 641 func (b *readBuf) sub(n int) readBuf { 642 b2 := (*b)[:n] 643 *b = (*b)[n:] 644 return b2 645 } 646 647 // A fileListEntry is a File and its ename. 648 // If file == nil, the fileListEntry describes a directory without metadata. 649 type fileListEntry struct { 650 name string 651 file *File 652 isDir bool 653 } 654 655 type fileInfoDirEntry interface { 656 fs.FileInfo 657 fs.DirEntry 658 } 659 660 func (e *fileListEntry) stat() fileInfoDirEntry { 661 if !e.isDir { 662 return headerFileInfo{&e.file.FileHeader} 663 } 664 return e 665 } 666 667 // Only used for directories. 668 func (f *fileListEntry) Name() string { _, elem, _ := split(f.name); return elem } 669 func (f *fileListEntry) Size() int64 { return 0 } 670 func (f *fileListEntry) Mode() fs.FileMode { return fs.ModeDir | 0555 } 671 func (f *fileListEntry) Type() fs.FileMode { return fs.ModeDir } 672 func (f *fileListEntry) IsDir() bool { return true } 673 func (f *fileListEntry) Sys() any { return nil } 674 675 func (f *fileListEntry) ModTime() time.Time { 676 if f.file == nil { 677 return time.Time{} 678 } 679 return f.file.FileHeader.Modified.UTC() 680 } 681 682 func (f *fileListEntry) Info() (fs.FileInfo, error) { return f, nil } 683 684 // toValidName coerces name to be a valid name for fs.FS.Open. 685 func toValidName(name string) string { 686 name = strings.ReplaceAll(name, `\`, `/`) 687 p := path.Clean(name) 688 if strings.HasPrefix(p, "/") { 689 p = p[len("/"):] 690 } 691 for strings.HasPrefix(p, "../") { 692 p = p[len("../"):] 693 } 694 return p 695 } 696 697 func (r *Reader) initFileList() { 698 r.fileListOnce.Do(func() { 699 dirs := make(map[string]bool) 700 knownDirs := make(map[string]bool) 701 for _, file := range r.File { 702 isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/' 703 name := toValidName(file.Name) 704 if name == "" { 705 continue 706 } 707 for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) { 708 dirs[dir] = true 709 } 710 entry := fileListEntry{ 711 name: name, 712 file: file, 713 isDir: isDir, 714 } 715 r.fileList = append(r.fileList, entry) 716 if isDir { 717 knownDirs[name] = true 718 } 719 } 720 for dir := range dirs { 721 if !knownDirs[dir] { 722 entry := fileListEntry{ 723 name: dir, 724 file: nil, 725 isDir: true, 726 } 727 r.fileList = append(r.fileList, entry) 728 } 729 } 730 731 sort.Slice(r.fileList, func(i, j int) bool { return fileEntryLess(r.fileList[i].name, r.fileList[j].name) }) 732 }) 733 } 734 735 func fileEntryLess(x, y string) bool { 736 xdir, xelem, _ := split(x) 737 ydir, yelem, _ := split(y) 738 return xdir < ydir || xdir == ydir && xelem < yelem 739 } 740 741 // Open opens the named file in the ZIP archive, 742 // using the semantics of fs.FS.Open: 743 // paths are always slash separated, with no 744 // leading / or ../ elements. 745 func (r *Reader) Open(name string) (fs.File, error) { 746 r.initFileList() 747 748 if !fs.ValidPath(name) { 749 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid} 750 } 751 e := r.openLookup(name) 752 if e == nil { 753 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist} 754 } 755 if e.isDir { 756 return &openDir{e, r.openReadDir(name), 0}, nil 757 } 758 rc, err := e.file.Open() 759 if err != nil { 760 return nil, err 761 } 762 return rc.(fs.File), nil 763 } 764 765 func split(name string) (dir, elem string, isDir bool) { 766 if len(name) > 0 && name[len(name)-1] == '/' { 767 isDir = true 768 name = name[:len(name)-1] 769 } 770 i := len(name) - 1 771 for i >= 0 && name[i] != '/' { 772 i-- 773 } 774 if i < 0 { 775 return ".", name, isDir 776 } 777 return name[:i], name[i+1:], isDir 778 } 779 780 var dotFile = &fileListEntry{name: "./", isDir: true} 781 782 func (r *Reader) openLookup(name string) *fileListEntry { 783 if name == "." { 784 return dotFile 785 } 786 787 dir, elem, _ := split(name) 788 files := r.fileList 789 i := sort.Search(len(files), func(i int) bool { 790 idir, ielem, _ := split(files[i].name) 791 return idir > dir || idir == dir && ielem >= elem 792 }) 793 if i < len(files) { 794 fname := files[i].name 795 if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name { 796 return &files[i] 797 } 798 } 799 return nil 800 } 801 802 func (r *Reader) openReadDir(dir string) []fileListEntry { 803 files := r.fileList 804 i := sort.Search(len(files), func(i int) bool { 805 idir, _, _ := split(files[i].name) 806 return idir >= dir 807 }) 808 j := sort.Search(len(files), func(j int) bool { 809 jdir, _, _ := split(files[j].name) 810 return jdir > dir 811 }) 812 return files[i:j] 813 } 814 815 type openDir struct { 816 e *fileListEntry 817 files []fileListEntry 818 offset int 819 } 820 821 func (d *openDir) Close() error { return nil } 822 func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat(), nil } 823 824 func (d *openDir) Read([]byte) (int, error) { 825 return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")} 826 } 827 828 func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) { 829 n := len(d.files) - d.offset 830 if count > 0 && n > count { 831 n = count 832 } 833 if n == 0 { 834 if count <= 0 { 835 return nil, nil 836 } 837 return nil, io.EOF 838 } 839 list := make([]fs.DirEntry, n) 840 for i := range list { 841 list[i] = d.files[d.offset+i].stat() 842 } 843 d.offset += n 844 return list, nil 845 }