github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/archive/zip/reader.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package zip 6 7 import ( 8 "bufio" 9 "encoding/binary" 10 "errors" 11 "hash" 12 "hash/crc32" 13 "io" 14 "io/fs" 15 "os" 16 "path" 17 "sort" 18 "strings" 19 "sync" 20 "time" 21 ) 22 23 var ( 24 ErrFormat = errors.New("zip: not a valid zip file") 25 ErrAlgorithm = errors.New("zip: unsupported compression algorithm") 26 ErrChecksum = errors.New("zip: checksum error") 27 ) 28 29 // A Reader serves content from a ZIP archive. 30 type Reader struct { 31 r io.ReaderAt 32 File []*File 33 Comment string 34 decompressors map[uint16]Decompressor 35 36 // Some JAR files are zip files with a prefix that is a bash script. 37 // The baseOffset field is the start of the zip file proper. 38 baseOffset int64 39 40 // fileList is a list of files sorted by ename, 41 // for use by the Open method. 42 fileListOnce sync.Once 43 fileList []fileListEntry 44 } 45 46 // A ReadCloser is a Reader that must be closed when no longer needed. 47 type ReadCloser struct { 48 f *os.File 49 Reader 50 } 51 52 // A File is a single file in a ZIP archive. 53 // The file information is in the embedded FileHeader. 54 // The file content can be accessed by calling Open. 55 type File struct { 56 FileHeader 57 zip *Reader 58 zipr io.ReaderAt 59 headerOffset int64 // includes overall ZIP archive baseOffset 60 zip64 bool // zip64 extended information extra field presence 61 } 62 63 // OpenReader will open the Zip file specified by name and return a ReadCloser. 64 func OpenReader(name string) (*ReadCloser, error) { 65 f, err := os.Open(name) 66 if err != nil { 67 return nil, err 68 } 69 fi, err := f.Stat() 70 if err != nil { 71 f.Close() 72 return nil, err 73 } 74 r := new(ReadCloser) 75 if err := r.init(f, fi.Size()); err != nil { 76 f.Close() 77 return nil, err 78 } 79 r.f = f 80 return r, nil 81 } 82 83 // NewReader returns a new Reader reading from r, which is assumed to 84 // have the given size in bytes. 85 func NewReader(r io.ReaderAt, size int64) (*Reader, error) { 86 if size < 0 { 87 return nil, errors.New("zip: size cannot be negative") 88 } 89 zr := new(Reader) 90 if err := zr.init(r, size); err != nil { 91 return nil, err 92 } 93 return zr, nil 94 } 95 96 func (z *Reader) init(r io.ReaderAt, size int64) error { 97 end, baseOffset, err := readDirectoryEnd(r, size) 98 if err != nil { 99 return err 100 } 101 z.r = r 102 z.baseOffset = baseOffset 103 // Since the number of directory records is not validated, it is not 104 // safe to preallocate z.File without first checking that the specified 105 // number of files is reasonable, since a malformed archive may 106 // indicate it contains up to 1 << 128 - 1 files. Since each file has a 107 // header which will be _at least_ 30 bytes we can safely preallocate 108 // if (data size / 30) >= end.directoryRecords. 109 if end.directorySize < uint64(size) && (uint64(size)-end.directorySize)/30 >= end.directoryRecords { 110 z.File = make([]*File, 0, end.directoryRecords) 111 } 112 z.Comment = end.comment 113 rs := io.NewSectionReader(r, 0, size) 114 if _, err = rs.Seek(z.baseOffset+int64(end.directoryOffset), io.SeekStart); err != nil { 115 return err 116 } 117 buf := bufio.NewReader(rs) 118 119 // The count of files inside a zip is truncated to fit in a uint16. 120 // Gloss over this by reading headers until we encounter 121 // a bad one, and then only report an ErrFormat or UnexpectedEOF if 122 // the file count modulo 65536 is incorrect. 123 for { 124 f := &File{zip: z, zipr: r} 125 err = readDirectoryHeader(f, buf) 126 127 // For compatibility with other zip programs, 128 // if we have a non-zero base offset and can't read 129 // the first directory header, try again with a zero 130 // base offset. 131 if err == ErrFormat && z.baseOffset != 0 && len(z.File) == 0 { 132 z.baseOffset = 0 133 if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil { 134 return err 135 } 136 buf.Reset(rs) 137 continue 138 } 139 140 if err == ErrFormat || err == io.ErrUnexpectedEOF { 141 break 142 } 143 if err != nil { 144 return err 145 } 146 f.headerOffset += z.baseOffset 147 z.File = append(z.File, f) 148 } 149 if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here 150 // Return the readDirectoryHeader error if we read 151 // the wrong number of directory entries. 152 return err 153 } 154 return nil 155 } 156 157 // RegisterDecompressor registers or overrides a custom decompressor for a 158 // specific method ID. If a decompressor for a given method is not found, 159 // Reader will default to looking up the decompressor at the package level. 160 func (z *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) { 161 if z.decompressors == nil { 162 z.decompressors = make(map[uint16]Decompressor) 163 } 164 z.decompressors[method] = dcomp 165 } 166 167 func (z *Reader) decompressor(method uint16) Decompressor { 168 dcomp := z.decompressors[method] 169 if dcomp == nil { 170 dcomp = decompressor(method) 171 } 172 return dcomp 173 } 174 175 // Close closes the Zip file, rendering it unusable for I/O. 176 func (rc *ReadCloser) Close() error { 177 return rc.f.Close() 178 } 179 180 // DataOffset returns the offset of the file's possibly-compressed 181 // data, relative to the beginning of the zip file. 182 // 183 // Most callers should instead use Open, which transparently 184 // decompresses data and verifies checksums. 185 func (f *File) DataOffset() (offset int64, err error) { 186 bodyOffset, err := f.findBodyOffset() 187 if err != nil { 188 return 189 } 190 return f.headerOffset + bodyOffset, nil 191 } 192 193 // Open returns a ReadCloser that provides access to the File's contents. 194 // Multiple files may be read concurrently. 195 func (f *File) Open() (io.ReadCloser, error) { 196 bodyOffset, err := f.findBodyOffset() 197 if err != nil { 198 return nil, err 199 } 200 size := int64(f.CompressedSize64) 201 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) 202 dcomp := f.zip.decompressor(f.Method) 203 if dcomp == nil { 204 return nil, ErrAlgorithm 205 } 206 var rc io.ReadCloser = dcomp(r) 207 var desr io.Reader 208 if f.hasDataDescriptor() { 209 desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) 210 } 211 rc = &checksumReader{ 212 rc: rc, 213 hash: crc32.NewIEEE(), 214 f: f, 215 desr: desr, 216 } 217 return rc, nil 218 } 219 220 // OpenRaw returns a Reader that provides access to the File's contents without 221 // decompression. 222 func (f *File) OpenRaw() (io.Reader, error) { 223 bodyOffset, err := f.findBodyOffset() 224 if err != nil { 225 return nil, err 226 } 227 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64)) 228 return r, nil 229 } 230 231 type checksumReader struct { 232 rc io.ReadCloser 233 hash hash.Hash32 234 nread uint64 // number of bytes read so far 235 f *File 236 desr io.Reader // if non-nil, where to read the data descriptor 237 err error // sticky error 238 } 239 240 func (r *checksumReader) Stat() (fs.FileInfo, error) { 241 return headerFileInfo{&r.f.FileHeader}, nil 242 } 243 244 func (r *checksumReader) Read(b []byte) (n int, err error) { 245 if r.err != nil { 246 return 0, r.err 247 } 248 n, err = r.rc.Read(b) 249 r.hash.Write(b[:n]) 250 r.nread += uint64(n) 251 if r.nread > r.f.UncompressedSize64 { 252 return 0, ErrFormat 253 } 254 if err == nil { 255 return 256 } 257 if err == io.EOF { 258 if r.nread != r.f.UncompressedSize64 { 259 return 0, io.ErrUnexpectedEOF 260 } 261 if r.desr != nil { 262 if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { 263 if err1 == io.EOF { 264 err = io.ErrUnexpectedEOF 265 } else { 266 err = err1 267 } 268 } else if r.hash.Sum32() != r.f.CRC32 { 269 err = ErrChecksum 270 } 271 } else { 272 // If there's not a data descriptor, we still compare 273 // the CRC32 of what we've read against the file header 274 // or TOC's CRC32, if it seems like it was set. 275 if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { 276 err = ErrChecksum 277 } 278 } 279 } 280 r.err = err 281 return 282 } 283 284 func (r *checksumReader) Close() error { return r.rc.Close() } 285 286 // findBodyOffset does the minimum work to verify the file has a header 287 // and returns the file body offset. 288 func (f *File) findBodyOffset() (int64, error) { 289 var buf [fileHeaderLen]byte 290 if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { 291 return 0, err 292 } 293 b := readBuf(buf[:]) 294 if sig := b.uint32(); sig != fileHeaderSignature { 295 return 0, ErrFormat 296 } 297 b = b[22:] // skip over most of the header 298 filenameLen := int(b.uint16()) 299 extraLen := int(b.uint16()) 300 return int64(fileHeaderLen + filenameLen + extraLen), nil 301 } 302 303 // readDirectoryHeader attempts to read a directory header from r. 304 // It returns io.ErrUnexpectedEOF if it cannot read a complete header, 305 // and ErrFormat if it doesn't find a valid header signature. 306 func readDirectoryHeader(f *File, r io.Reader) error { 307 var buf [directoryHeaderLen]byte 308 if _, err := io.ReadFull(r, buf[:]); err != nil { 309 return err 310 } 311 b := readBuf(buf[:]) 312 if sig := b.uint32(); sig != directoryHeaderSignature { 313 return ErrFormat 314 } 315 f.CreatorVersion = b.uint16() 316 f.ReaderVersion = b.uint16() 317 f.Flags = b.uint16() 318 f.Method = b.uint16() 319 f.ModifiedTime = b.uint16() 320 f.ModifiedDate = b.uint16() 321 f.CRC32 = b.uint32() 322 f.CompressedSize = b.uint32() 323 f.UncompressedSize = b.uint32() 324 f.CompressedSize64 = uint64(f.CompressedSize) 325 f.UncompressedSize64 = uint64(f.UncompressedSize) 326 filenameLen := int(b.uint16()) 327 extraLen := int(b.uint16()) 328 commentLen := int(b.uint16()) 329 b = b[4:] // skipped start disk number and internal attributes (2x uint16) 330 f.ExternalAttrs = b.uint32() 331 f.headerOffset = int64(b.uint32()) 332 d := make([]byte, filenameLen+extraLen+commentLen) 333 if _, err := io.ReadFull(r, d); err != nil { 334 return err 335 } 336 f.Name = string(d[:filenameLen]) 337 f.Extra = d[filenameLen : filenameLen+extraLen] 338 f.Comment = string(d[filenameLen+extraLen:]) 339 340 // Determine the character encoding. 341 utf8Valid1, utf8Require1 := detectUTF8(f.Name) 342 utf8Valid2, utf8Require2 := detectUTF8(f.Comment) 343 switch { 344 case !utf8Valid1 || !utf8Valid2: 345 // Name and Comment definitely not UTF-8. 346 f.NonUTF8 = true 347 case !utf8Require1 && !utf8Require2: 348 // Name and Comment use only single-byte runes that overlap with UTF-8. 349 f.NonUTF8 = false 350 default: 351 // Might be UTF-8, might be some other encoding; preserve existing flag. 352 // Some ZIP writers use UTF-8 encoding without setting the UTF-8 flag. 353 // Since it is impossible to always distinguish valid UTF-8 from some 354 // other encoding (e.g., GBK or Shift-JIS), we trust the flag. 355 f.NonUTF8 = f.Flags&0x800 == 0 356 } 357 358 needUSize := f.UncompressedSize == ^uint32(0) 359 needCSize := f.CompressedSize == ^uint32(0) 360 needHeaderOffset := f.headerOffset == int64(^uint32(0)) 361 362 // Best effort to find what we need. 363 // Other zip authors might not even follow the basic format, 364 // and we'll just ignore the Extra content in that case. 365 var modified time.Time 366 parseExtras: 367 for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size 368 fieldTag := extra.uint16() 369 fieldSize := int(extra.uint16()) 370 if len(extra) < fieldSize { 371 break 372 } 373 fieldBuf := extra.sub(fieldSize) 374 375 switch fieldTag { 376 case zip64ExtraID: 377 f.zip64 = true 378 379 // update directory values from the zip64 extra block. 380 // They should only be consulted if the sizes read earlier 381 // are maxed out. 382 // See golang.org/issue/13367. 383 if needUSize { 384 needUSize = false 385 if len(fieldBuf) < 8 { 386 return ErrFormat 387 } 388 f.UncompressedSize64 = fieldBuf.uint64() 389 } 390 if needCSize { 391 needCSize = false 392 if len(fieldBuf) < 8 { 393 return ErrFormat 394 } 395 f.CompressedSize64 = fieldBuf.uint64() 396 } 397 if needHeaderOffset { 398 needHeaderOffset = false 399 if len(fieldBuf) < 8 { 400 return ErrFormat 401 } 402 f.headerOffset = int64(fieldBuf.uint64()) 403 } 404 case ntfsExtraID: 405 if len(fieldBuf) < 4 { 406 continue parseExtras 407 } 408 fieldBuf.uint32() // reserved (ignored) 409 for len(fieldBuf) >= 4 { // need at least tag and size 410 attrTag := fieldBuf.uint16() 411 attrSize := int(fieldBuf.uint16()) 412 if len(fieldBuf) < attrSize { 413 continue parseExtras 414 } 415 attrBuf := fieldBuf.sub(attrSize) 416 if attrTag != 1 || attrSize != 24 { 417 continue // Ignore irrelevant attributes 418 } 419 420 const ticksPerSecond = 1e7 // Windows timestamp resolution 421 ts := int64(attrBuf.uint64()) // ModTime since Windows epoch 422 secs := int64(ts / ticksPerSecond) 423 nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond) 424 epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC) 425 modified = time.Unix(epoch.Unix()+secs, nsecs) 426 } 427 case unixExtraID, infoZipUnixExtraID: 428 if len(fieldBuf) < 8 { 429 continue parseExtras 430 } 431 fieldBuf.uint32() // AcTime (ignored) 432 ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch 433 modified = time.Unix(ts, 0) 434 case extTimeExtraID: 435 if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 { 436 continue parseExtras 437 } 438 ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch 439 modified = time.Unix(ts, 0) 440 } 441 } 442 443 msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime) 444 f.Modified = msdosModified 445 if !modified.IsZero() { 446 f.Modified = modified.UTC() 447 448 // If legacy MS-DOS timestamps are set, we can use the delta between 449 // the legacy and extended versions to estimate timezone offset. 450 // 451 // A non-UTC timezone is always used (even if offset is zero). 452 // Thus, FileHeader.Modified.Location() == time.UTC is useful for 453 // determining whether extended timestamps are present. 454 // This is necessary for users that need to do additional time 455 // calculations when dealing with legacy ZIP formats. 456 if f.ModifiedTime != 0 || f.ModifiedDate != 0 { 457 f.Modified = modified.In(timeZone(msdosModified.Sub(modified))) 458 } 459 } 460 461 // Assume that uncompressed size 2³²-1 could plausibly happen in 462 // an old zip32 file that was sharding inputs into the largest chunks 463 // possible (or is just malicious; search the web for 42.zip). 464 // If needUSize is true still, it means we didn't see a zip64 extension. 465 // As long as the compressed size is not also 2³²-1 (implausible) 466 // and the header is not also 2³²-1 (equally implausible), 467 // accept the uncompressed size 2³²-1 as valid. 468 // If nothing else, this keeps archive/zip working with 42.zip. 469 _ = needUSize 470 471 if needCSize || needHeaderOffset { 472 return ErrFormat 473 } 474 475 return nil 476 } 477 478 func readDataDescriptor(r io.Reader, f *File) error { 479 var buf [dataDescriptorLen]byte 480 // The spec says: "Although not originally assigned a 481 // signature, the value 0x08074b50 has commonly been adopted 482 // as a signature value for the data descriptor record. 483 // Implementers should be aware that ZIP files may be 484 // encountered with or without this signature marking data 485 // descriptors and should account for either case when reading 486 // ZIP files to ensure compatibility." 487 // 488 // dataDescriptorLen includes the size of the signature but 489 // first read just those 4 bytes to see if it exists. 490 if _, err := io.ReadFull(r, buf[:4]); err != nil { 491 return err 492 } 493 off := 0 494 maybeSig := readBuf(buf[:4]) 495 if maybeSig.uint32() != dataDescriptorSignature { 496 // No data descriptor signature. Keep these four 497 // bytes. 498 off += 4 499 } 500 if _, err := io.ReadFull(r, buf[off:12]); err != nil { 501 return err 502 } 503 b := readBuf(buf[:12]) 504 if b.uint32() != f.CRC32 { 505 return ErrChecksum 506 } 507 508 // The two sizes that follow here can be either 32 bits or 64 bits 509 // but the spec is not very clear on this and different 510 // interpretations has been made causing incompatibilities. We 511 // already have the sizes from the central directory so we can 512 // just ignore these. 513 514 return nil 515 } 516 517 func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, baseOffset int64, err error) { 518 // look for directoryEndSignature in the last 1k, then in the last 65k 519 var buf []byte 520 var directoryEndOffset int64 521 for i, bLen := range []int64{1024, 65 * 1024} { 522 if bLen > size { 523 bLen = size 524 } 525 buf = make([]byte, int(bLen)) 526 if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { 527 return nil, 0, err 528 } 529 if p := findSignatureInBlock(buf); p >= 0 { 530 buf = buf[p:] 531 directoryEndOffset = size - bLen + int64(p) 532 break 533 } 534 if i == 1 || bLen == size { 535 return nil, 0, ErrFormat 536 } 537 } 538 539 // read header into struct 540 b := readBuf(buf[4:]) // skip signature 541 d := &directoryEnd{ 542 diskNbr: uint32(b.uint16()), 543 dirDiskNbr: uint32(b.uint16()), 544 dirRecordsThisDisk: uint64(b.uint16()), 545 directoryRecords: uint64(b.uint16()), 546 directorySize: uint64(b.uint32()), 547 directoryOffset: uint64(b.uint32()), 548 commentLen: b.uint16(), 549 } 550 l := int(d.commentLen) 551 if l > len(b) { 552 return nil, 0, errors.New("zip: invalid comment length") 553 } 554 d.comment = string(b[:l]) 555 556 // These values mean that the file can be a zip64 file 557 if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { 558 p, err := findDirectory64End(r, directoryEndOffset) 559 if err == nil && p >= 0 { 560 directoryEndOffset = p 561 err = readDirectory64End(r, p, d) 562 } 563 if err != nil { 564 return nil, 0, err 565 } 566 } 567 568 baseOffset = directoryEndOffset - int64(d.directorySize) - int64(d.directoryOffset) 569 570 // Make sure directoryOffset points to somewhere in our file. 571 if o := baseOffset + int64(d.directoryOffset); o < 0 || o >= size { 572 return nil, 0, ErrFormat 573 } 574 return d, baseOffset, nil 575 } 576 577 // findDirectory64End tries to read the zip64 locator just before the 578 // directory end and returns the offset of the zip64 directory end if 579 // found. 580 func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { 581 locOffset := directoryEndOffset - directory64LocLen 582 if locOffset < 0 { 583 return -1, nil // no need to look for a header outside the file 584 } 585 buf := make([]byte, directory64LocLen) 586 if _, err := r.ReadAt(buf, locOffset); err != nil { 587 return -1, err 588 } 589 b := readBuf(buf) 590 if sig := b.uint32(); sig != directory64LocSignature { 591 return -1, nil 592 } 593 if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory 594 return -1, nil // the file is not a valid zip64-file 595 } 596 p := b.uint64() // relative offset of the zip64 end of central directory record 597 if b.uint32() != 1 { // total number of disks 598 return -1, nil // the file is not a valid zip64-file 599 } 600 return int64(p), nil 601 } 602 603 // readDirectory64End reads the zip64 directory end and updates the 604 // directory end with the zip64 directory end values. 605 func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { 606 buf := make([]byte, directory64EndLen) 607 if _, err := r.ReadAt(buf, offset); err != nil { 608 return err 609 } 610 611 b := readBuf(buf) 612 if sig := b.uint32(); sig != directory64EndSignature { 613 return ErrFormat 614 } 615 616 b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) 617 d.diskNbr = b.uint32() // number of this disk 618 d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory 619 d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk 620 d.directoryRecords = b.uint64() // total number of entries in the central directory 621 d.directorySize = b.uint64() // size of the central directory 622 d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number 623 624 return nil 625 } 626 627 func findSignatureInBlock(b []byte) int { 628 for i := len(b) - directoryEndLen; i >= 0; i-- { 629 // defined from directoryEndSignature in struct.go 630 if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { 631 // n is length of comment 632 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 633 if n+directoryEndLen+i <= len(b) { 634 return i 635 } 636 } 637 } 638 return -1 639 } 640 641 type readBuf []byte 642 643 func (b *readBuf) uint8() uint8 { 644 v := (*b)[0] 645 *b = (*b)[1:] 646 return v 647 } 648 649 func (b *readBuf) uint16() uint16 { 650 v := binary.LittleEndian.Uint16(*b) 651 *b = (*b)[2:] 652 return v 653 } 654 655 func (b *readBuf) uint32() uint32 { 656 v := binary.LittleEndian.Uint32(*b) 657 *b = (*b)[4:] 658 return v 659 } 660 661 func (b *readBuf) uint64() uint64 { 662 v := binary.LittleEndian.Uint64(*b) 663 *b = (*b)[8:] 664 return v 665 } 666 667 func (b *readBuf) sub(n int) readBuf { 668 b2 := (*b)[:n] 669 *b = (*b)[n:] 670 return b2 671 } 672 673 // A fileListEntry is a File and its ename. 674 // If file == nil, the fileListEntry describes a directory without metadata. 675 type fileListEntry struct { 676 name string 677 file *File 678 isDir bool 679 isDup bool 680 } 681 682 type fileInfoDirEntry interface { 683 fs.FileInfo 684 fs.DirEntry 685 } 686 687 func (e *fileListEntry) stat() (fileInfoDirEntry, error) { 688 if e.isDup { 689 return nil, errors.New(e.name + ": duplicate entries in zip file") 690 } 691 if !e.isDir { 692 return headerFileInfo{&e.file.FileHeader}, nil 693 } 694 return e, nil 695 } 696 697 // Only used for directories. 698 func (f *fileListEntry) Name() string { _, elem, _ := split(f.name); return elem } 699 func (f *fileListEntry) Size() int64 { return 0 } 700 func (f *fileListEntry) Mode() fs.FileMode { return fs.ModeDir | 0555 } 701 func (f *fileListEntry) Type() fs.FileMode { return fs.ModeDir } 702 func (f *fileListEntry) IsDir() bool { return true } 703 func (f *fileListEntry) Sys() any { return nil } 704 705 func (f *fileListEntry) ModTime() time.Time { 706 if f.file == nil { 707 return time.Time{} 708 } 709 return f.file.FileHeader.Modified.UTC() 710 } 711 712 func (f *fileListEntry) Info() (fs.FileInfo, error) { return f, nil } 713 714 // toValidName coerces name to be a valid name for fs.FS.Open. 715 func toValidName(name string) string { 716 name = strings.ReplaceAll(name, `\`, `/`) 717 p := path.Clean(name) 718 719 p = strings.TrimPrefix(p, "/") 720 721 for strings.HasPrefix(p, "../") { 722 p = p[len("../"):] 723 } 724 725 return p 726 } 727 728 func (r *Reader) initFileList() { 729 r.fileListOnce.Do(func() { 730 // files and knownDirs map from a file/directory name 731 // to an index into the r.fileList entry that we are 732 // building. They are used to mark duplicate entries. 733 files := make(map[string]int) 734 knownDirs := make(map[string]int) 735 736 // dirs[name] is true if name is known to be a directory, 737 // because it appears as a prefix in a path. 738 dirs := make(map[string]bool) 739 740 for _, file := range r.File { 741 isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/' 742 name := toValidName(file.Name) 743 if name == "" { 744 continue 745 } 746 747 if idx, ok := files[name]; ok { 748 r.fileList[idx].isDup = true 749 continue 750 } 751 if idx, ok := knownDirs[name]; ok { 752 r.fileList[idx].isDup = true 753 continue 754 } 755 756 for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) { 757 dirs[dir] = true 758 } 759 760 idx := len(r.fileList) 761 entry := fileListEntry{ 762 name: name, 763 file: file, 764 isDir: isDir, 765 } 766 r.fileList = append(r.fileList, entry) 767 if isDir { 768 knownDirs[name] = idx 769 } else { 770 files[name] = idx 771 } 772 } 773 for dir := range dirs { 774 if _, ok := knownDirs[dir]; !ok { 775 if idx, ok := files[dir]; ok { 776 r.fileList[idx].isDup = true 777 } else { 778 entry := fileListEntry{ 779 name: dir, 780 file: nil, 781 isDir: true, 782 } 783 r.fileList = append(r.fileList, entry) 784 } 785 } 786 } 787 788 sort.Slice(r.fileList, func(i, j int) bool { return fileEntryLess(r.fileList[i].name, r.fileList[j].name) }) 789 }) 790 } 791 792 func fileEntryLess(x, y string) bool { 793 xdir, xelem, _ := split(x) 794 ydir, yelem, _ := split(y) 795 return xdir < ydir || xdir == ydir && xelem < yelem 796 } 797 798 // Open opens the named file in the ZIP archive, 799 // using the semantics of fs.FS.Open: 800 // paths are always slash separated, with no 801 // leading / or ../ elements. 802 func (r *Reader) Open(name string) (fs.File, error) { 803 r.initFileList() 804 805 if !fs.ValidPath(name) { 806 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid} 807 } 808 e := r.openLookup(name) 809 if e == nil { 810 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist} 811 } 812 if e.isDir { 813 return &openDir{e, r.openReadDir(name), 0}, nil 814 } 815 rc, err := e.file.Open() 816 if err != nil { 817 return nil, err 818 } 819 return rc.(fs.File), nil 820 } 821 822 func split(name string) (dir, elem string, isDir bool) { 823 if len(name) > 0 && name[len(name)-1] == '/' { 824 isDir = true 825 name = name[:len(name)-1] 826 } 827 i := len(name) - 1 828 for i >= 0 && name[i] != '/' { 829 i-- 830 } 831 if i < 0 { 832 return ".", name, isDir 833 } 834 return name[:i], name[i+1:], isDir 835 } 836 837 var dotFile = &fileListEntry{name: "./", isDir: true} 838 839 func (r *Reader) openLookup(name string) *fileListEntry { 840 if name == "." { 841 return dotFile 842 } 843 844 dir, elem, _ := split(name) 845 files := r.fileList 846 i := sort.Search(len(files), func(i int) bool { 847 idir, ielem, _ := split(files[i].name) 848 return idir > dir || idir == dir && ielem >= elem 849 }) 850 if i < len(files) { 851 fname := files[i].name 852 if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name { 853 return &files[i] 854 } 855 } 856 return nil 857 } 858 859 func (r *Reader) openReadDir(dir string) []fileListEntry { 860 files := r.fileList 861 i := sort.Search(len(files), func(i int) bool { 862 idir, _, _ := split(files[i].name) 863 return idir >= dir 864 }) 865 j := sort.Search(len(files), func(j int) bool { 866 jdir, _, _ := split(files[j].name) 867 return jdir > dir 868 }) 869 return files[i:j] 870 } 871 872 type openDir struct { 873 e *fileListEntry 874 files []fileListEntry 875 offset int 876 } 877 878 func (d *openDir) Close() error { return nil } 879 func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat() } 880 881 func (d *openDir) Read([]byte) (int, error) { 882 return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")} 883 } 884 885 func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) { 886 n := len(d.files) - d.offset 887 if count > 0 && n > count { 888 n = count 889 } 890 if n == 0 { 891 if count <= 0 { 892 return nil, nil 893 } 894 return nil, io.EOF 895 } 896 list := make([]fs.DirEntry, n) 897 for i := range list { 898 s, err := d.files[d.offset+i].stat() 899 if err != nil { 900 return nil, err 901 } 902 list[i] = s 903 } 904 d.offset += n 905 return list, nil 906 }