github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/archive/zip/reader.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package zip 6 7 import ( 8 "bufio" 9 "encoding/binary" 10 "errors" 11 "hash" 12 "hash/crc32" 13 "internal/godebug" 14 "io" 15 "io/fs" 16 "os" 17 "path" 18 "path/filepath" 19 "sort" 20 "strings" 21 "sync" 22 "time" 23 ) 24 25 var zipinsecurepath = godebug.New("zipinsecurepath") 26 27 var ( 28 ErrFormat = errors.New("zip: not a valid zip file") 29 ErrAlgorithm = errors.New("zip: unsupported compression algorithm") 30 ErrChecksum = errors.New("zip: checksum error") 31 ErrInsecurePath = errors.New("zip: insecure file path") 32 ) 33 34 // A Reader serves content from a ZIP archive. 35 type Reader struct { 36 r io.ReaderAt 37 File []*File 38 Comment string 39 decompressors map[uint16]Decompressor 40 41 // Some JAR files are zip files with a prefix that is a bash script. 42 // The baseOffset field is the start of the zip file proper. 43 baseOffset int64 44 45 // fileList is a list of files sorted by ename, 46 // for use by the Open method. 47 fileListOnce sync.Once 48 fileList []fileListEntry 49 } 50 51 // A ReadCloser is a Reader that must be closed when no longer needed. 52 type ReadCloser struct { 53 f *os.File 54 Reader 55 } 56 57 // A File is a single file in a ZIP archive. 58 // The file information is in the embedded FileHeader. 59 // The file content can be accessed by calling Open. 60 type File struct { 61 FileHeader 62 zip *Reader 63 zipr io.ReaderAt 64 headerOffset int64 // includes overall ZIP archive baseOffset 65 zip64 bool // zip64 extended information extra field presence 66 } 67 68 // OpenReader will open the Zip file specified by name and return a ReadCloser. 69 // 70 // If any file inside the archive uses a non-local name 71 // (as defined by [filepath.IsLocal]) or a name containing backslashes 72 // and the GODEBUG environment variable contains `zipinsecurepath=0`, 73 // OpenReader returns the reader with an ErrInsecurePath error. 74 // A future version of Go may introduce this behavior by default. 75 // Programs that want to accept non-local names can ignore 76 // the ErrInsecurePath error and use the returned reader. 77 func OpenReader(name string) (*ReadCloser, error) { 78 f, err := os.Open(name) 79 if err != nil { 80 return nil, err 81 } 82 fi, err := f.Stat() 83 if err != nil { 84 f.Close() 85 return nil, err 86 } 87 r := new(ReadCloser) 88 if err = r.init(f, fi.Size()); err != nil && err != ErrInsecurePath { 89 f.Close() 90 return nil, err 91 } 92 r.f = f 93 return r, err 94 } 95 96 // NewReader returns a new Reader reading from r, which is assumed to 97 // have the given size in bytes. 98 // 99 // If any file inside the archive uses a non-local name 100 // (as defined by [filepath.IsLocal]) or a name containing backslashes 101 // and the GODEBUG environment variable contains `zipinsecurepath=0`, 102 // NewReader returns the reader with an ErrInsecurePath error. 103 // A future version of Go may introduce this behavior by default. 104 // Programs that want to accept non-local names can ignore 105 // the ErrInsecurePath error and use the returned reader. 106 func NewReader(r io.ReaderAt, size int64) (*Reader, error) { 107 if size < 0 { 108 return nil, errors.New("zip: size cannot be negative") 109 } 110 zr := new(Reader) 111 var err error 112 if err = zr.init(r, size); err != nil && err != ErrInsecurePath { 113 return nil, err 114 } 115 return zr, err 116 } 117 118 func (r *Reader) init(rdr io.ReaderAt, size int64) error { 119 end, baseOffset, err := readDirectoryEnd(rdr, size) 120 if err != nil { 121 return err 122 } 123 r.r = rdr 124 r.baseOffset = baseOffset 125 // Since the number of directory records is not validated, it is not 126 // safe to preallocate r.File without first checking that the specified 127 // number of files is reasonable, since a malformed archive may 128 // indicate it contains up to 1 << 128 - 1 files. Since each file has a 129 // header which will be _at least_ 30 bytes we can safely preallocate 130 // if (data size / 30) >= end.directoryRecords. 131 if end.directorySize < uint64(size) && (uint64(size)-end.directorySize)/30 >= end.directoryRecords { 132 r.File = make([]*File, 0, end.directoryRecords) 133 } 134 r.Comment = end.comment 135 rs := io.NewSectionReader(rdr, 0, size) 136 if _, err = rs.Seek(r.baseOffset+int64(end.directoryOffset), io.SeekStart); err != nil { 137 return err 138 } 139 buf := bufio.NewReader(rs) 140 141 // The count of files inside a zip is truncated to fit in a uint16. 142 // Gloss over this by reading headers until we encounter 143 // a bad one, and then only report an ErrFormat or UnexpectedEOF if 144 // the file count modulo 65536 is incorrect. 145 for { 146 f := &File{zip: r, zipr: rdr} 147 err = readDirectoryHeader(f, buf) 148 if err == ErrFormat || err == io.ErrUnexpectedEOF { 149 break 150 } 151 if err != nil { 152 return err 153 } 154 f.headerOffset += r.baseOffset 155 r.File = append(r.File, f) 156 } 157 if uint16(len(r.File)) != uint16(end.directoryRecords) { // only compare 16 bits here 158 // Return the readDirectoryHeader error if we read 159 // the wrong number of directory entries. 160 return err 161 } 162 if zipinsecurepath.Value() == "0" { 163 for _, f := range r.File { 164 if f.Name == "" { 165 // Zip permits an empty file name field. 166 continue 167 } 168 // The zip specification states that names must use forward slashes, 169 // so consider any backslashes in the name insecure. 170 if !filepath.IsLocal(f.Name) || strings.Contains(f.Name, `\`) { 171 zipinsecurepath.IncNonDefault() 172 return ErrInsecurePath 173 } 174 } 175 } 176 return nil 177 } 178 179 // RegisterDecompressor registers or overrides a custom decompressor for a 180 // specific method ID. If a decompressor for a given method is not found, 181 // Reader will default to looking up the decompressor at the package level. 182 func (r *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) { 183 if r.decompressors == nil { 184 r.decompressors = make(map[uint16]Decompressor) 185 } 186 r.decompressors[method] = dcomp 187 } 188 189 func (r *Reader) decompressor(method uint16) Decompressor { 190 dcomp := r.decompressors[method] 191 if dcomp == nil { 192 dcomp = decompressor(method) 193 } 194 return dcomp 195 } 196 197 // Close closes the Zip file, rendering it unusable for I/O. 198 func (rc *ReadCloser) Close() error { 199 return rc.f.Close() 200 } 201 202 // DataOffset returns the offset of the file's possibly-compressed 203 // data, relative to the beginning of the zip file. 204 // 205 // Most callers should instead use Open, which transparently 206 // decompresses data and verifies checksums. 207 func (f *File) DataOffset() (offset int64, err error) { 208 bodyOffset, err := f.findBodyOffset() 209 if err != nil { 210 return 211 } 212 return f.headerOffset + bodyOffset, nil 213 } 214 215 // Open returns a ReadCloser that provides access to the File's contents. 216 // Multiple files may be read concurrently. 217 func (f *File) Open() (io.ReadCloser, error) { 218 bodyOffset, err := f.findBodyOffset() 219 if err != nil { 220 return nil, err 221 } 222 if strings.HasSuffix(f.Name, "/") { 223 // The ZIP specification (APPNOTE.TXT) specifies that directories, which 224 // are technically zero-byte files, must not have any associated file 225 // data. We previously tried failing here if f.CompressedSize64 != 0, 226 // but it turns out that a number of implementations (namely, the Java 227 // jar tool) don't properly set the storage method on directories 228 // resulting in a file with compressed size > 0 but uncompressed size == 229 // 0. We still want to fail when a directory has associated uncompressed 230 // data, but we are tolerant of cases where the uncompressed size is 231 // zero but compressed size is not. 232 if f.UncompressedSize64 != 0 { 233 return &dirReader{ErrFormat}, nil 234 } else { 235 return &dirReader{io.EOF}, nil 236 } 237 } 238 size := int64(f.CompressedSize64) 239 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) 240 dcomp := f.zip.decompressor(f.Method) 241 if dcomp == nil { 242 return nil, ErrAlgorithm 243 } 244 var rc io.ReadCloser = dcomp(r) 245 var desr io.Reader 246 if f.hasDataDescriptor() { 247 desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) 248 } 249 rc = &checksumReader{ 250 rc: rc, 251 hash: crc32.NewIEEE(), 252 f: f, 253 desr: desr, 254 } 255 return rc, nil 256 } 257 258 // OpenRaw returns a Reader that provides access to the File's contents without 259 // decompression. 260 func (f *File) OpenRaw() (io.Reader, error) { 261 bodyOffset, err := f.findBodyOffset() 262 if err != nil { 263 return nil, err 264 } 265 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64)) 266 return r, nil 267 } 268 269 type dirReader struct { 270 err error 271 } 272 273 func (r *dirReader) Read([]byte) (int, error) { 274 return 0, r.err 275 } 276 277 func (r *dirReader) Close() error { 278 return nil 279 } 280 281 type checksumReader struct { 282 rc io.ReadCloser 283 hash hash.Hash32 284 nread uint64 // number of bytes read so far 285 f *File 286 desr io.Reader // if non-nil, where to read the data descriptor 287 err error // sticky error 288 } 289 290 func (r *checksumReader) Stat() (fs.FileInfo, error) { 291 return headerFileInfo{&r.f.FileHeader}, nil 292 } 293 294 func (r *checksumReader) Read(b []byte) (n int, err error) { 295 if r.err != nil { 296 return 0, r.err 297 } 298 n, err = r.rc.Read(b) 299 r.hash.Write(b[:n]) 300 r.nread += uint64(n) 301 if r.nread > r.f.UncompressedSize64 { 302 return 0, ErrFormat 303 } 304 if err == nil { 305 return 306 } 307 if err == io.EOF { 308 if r.nread != r.f.UncompressedSize64 { 309 return 0, io.ErrUnexpectedEOF 310 } 311 if r.desr != nil { 312 if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { 313 if err1 == io.EOF { 314 err = io.ErrUnexpectedEOF 315 } else { 316 err = err1 317 } 318 } else if r.hash.Sum32() != r.f.CRC32 { 319 err = ErrChecksum 320 } 321 } else { 322 // If there's not a data descriptor, we still compare 323 // the CRC32 of what we've read against the file header 324 // or TOC's CRC32, if it seems like it was set. 325 if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { 326 err = ErrChecksum 327 } 328 } 329 } 330 r.err = err 331 return 332 } 333 334 func (r *checksumReader) Close() error { return r.rc.Close() } 335 336 // findBodyOffset does the minimum work to verify the file has a header 337 // and returns the file body offset. 338 func (f *File) findBodyOffset() (int64, error) { 339 var buf [fileHeaderLen]byte 340 if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { 341 return 0, err 342 } 343 b := readBuf(buf[:]) 344 if sig := b.uint32(); sig != fileHeaderSignature { 345 return 0, ErrFormat 346 } 347 b = b[22:] // skip over most of the header 348 filenameLen := int(b.uint16()) 349 extraLen := int(b.uint16()) 350 return int64(fileHeaderLen + filenameLen + extraLen), nil 351 } 352 353 // readDirectoryHeader attempts to read a directory header from r. 354 // It returns io.ErrUnexpectedEOF if it cannot read a complete header, 355 // and ErrFormat if it doesn't find a valid header signature. 356 func readDirectoryHeader(f *File, r io.Reader) error { 357 var buf [directoryHeaderLen]byte 358 if _, err := io.ReadFull(r, buf[:]); err != nil { 359 return err 360 } 361 b := readBuf(buf[:]) 362 if sig := b.uint32(); sig != directoryHeaderSignature { 363 return ErrFormat 364 } 365 f.CreatorVersion = b.uint16() 366 f.ReaderVersion = b.uint16() 367 f.Flags = b.uint16() 368 f.Method = b.uint16() 369 f.ModifiedTime = b.uint16() 370 f.ModifiedDate = b.uint16() 371 f.CRC32 = b.uint32() 372 f.CompressedSize = b.uint32() 373 f.UncompressedSize = b.uint32() 374 f.CompressedSize64 = uint64(f.CompressedSize) 375 f.UncompressedSize64 = uint64(f.UncompressedSize) 376 filenameLen := int(b.uint16()) 377 extraLen := int(b.uint16()) 378 commentLen := int(b.uint16()) 379 b = b[4:] // skipped start disk number and internal attributes (2x uint16) 380 f.ExternalAttrs = b.uint32() 381 f.headerOffset = int64(b.uint32()) 382 d := make([]byte, filenameLen+extraLen+commentLen) 383 if _, err := io.ReadFull(r, d); err != nil { 384 return err 385 } 386 f.Name = string(d[:filenameLen]) 387 f.Extra = d[filenameLen : filenameLen+extraLen] 388 f.Comment = string(d[filenameLen+extraLen:]) 389 390 // Determine the character encoding. 391 utf8Valid1, utf8Require1 := detectUTF8(f.Name) 392 utf8Valid2, utf8Require2 := detectUTF8(f.Comment) 393 switch { 394 case !utf8Valid1 || !utf8Valid2: 395 // Name and Comment definitely not UTF-8. 396 f.NonUTF8 = true 397 case !utf8Require1 && !utf8Require2: 398 // Name and Comment use only single-byte runes that overlap with UTF-8. 399 f.NonUTF8 = false 400 default: 401 // Might be UTF-8, might be some other encoding; preserve existing flag. 402 // Some ZIP writers use UTF-8 encoding without setting the UTF-8 flag. 403 // Since it is impossible to always distinguish valid UTF-8 from some 404 // other encoding (e.g., GBK or Shift-JIS), we trust the flag. 405 f.NonUTF8 = f.Flags&0x800 == 0 406 } 407 408 needUSize := f.UncompressedSize == ^uint32(0) 409 needCSize := f.CompressedSize == ^uint32(0) 410 needHeaderOffset := f.headerOffset == int64(^uint32(0)) 411 412 // Best effort to find what we need. 413 // Other zip authors might not even follow the basic format, 414 // and we'll just ignore the Extra content in that case. 415 var modified time.Time 416 parseExtras: 417 for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size 418 fieldTag := extra.uint16() 419 fieldSize := int(extra.uint16()) 420 if len(extra) < fieldSize { 421 break 422 } 423 fieldBuf := extra.sub(fieldSize) 424 425 switch fieldTag { 426 case zip64ExtraID: 427 f.zip64 = true 428 429 // update directory values from the zip64 extra block. 430 // They should only be consulted if the sizes read earlier 431 // are maxed out. 432 // See golang.org/issue/13367. 433 if needUSize { 434 needUSize = false 435 if len(fieldBuf) < 8 { 436 return ErrFormat 437 } 438 f.UncompressedSize64 = fieldBuf.uint64() 439 } 440 if needCSize { 441 needCSize = false 442 if len(fieldBuf) < 8 { 443 return ErrFormat 444 } 445 f.CompressedSize64 = fieldBuf.uint64() 446 } 447 if needHeaderOffset { 448 needHeaderOffset = false 449 if len(fieldBuf) < 8 { 450 return ErrFormat 451 } 452 f.headerOffset = int64(fieldBuf.uint64()) 453 } 454 case ntfsExtraID: 455 if len(fieldBuf) < 4 { 456 continue parseExtras 457 } 458 fieldBuf.uint32() // reserved (ignored) 459 for len(fieldBuf) >= 4 { // need at least tag and size 460 attrTag := fieldBuf.uint16() 461 attrSize := int(fieldBuf.uint16()) 462 if len(fieldBuf) < attrSize { 463 continue parseExtras 464 } 465 attrBuf := fieldBuf.sub(attrSize) 466 if attrTag != 1 || attrSize != 24 { 467 continue // Ignore irrelevant attributes 468 } 469 470 const ticksPerSecond = 1e7 // Windows timestamp resolution 471 ts := int64(attrBuf.uint64()) // ModTime since Windows epoch 472 secs := int64(ts / ticksPerSecond) 473 nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond) 474 epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC) 475 modified = time.Unix(epoch.Unix()+secs, nsecs) 476 } 477 case unixExtraID, infoZipUnixExtraID: 478 if len(fieldBuf) < 8 { 479 continue parseExtras 480 } 481 fieldBuf.uint32() // AcTime (ignored) 482 ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch 483 modified = time.Unix(ts, 0) 484 case extTimeExtraID: 485 if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 { 486 continue parseExtras 487 } 488 ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch 489 modified = time.Unix(ts, 0) 490 } 491 } 492 493 msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime) 494 f.Modified = msdosModified 495 if !modified.IsZero() { 496 f.Modified = modified.UTC() 497 498 // If legacy MS-DOS timestamps are set, we can use the delta between 499 // the legacy and extended versions to estimate timezone offset. 500 // 501 // A non-UTC timezone is always used (even if offset is zero). 502 // Thus, FileHeader.Modified.Location() == time.UTC is useful for 503 // determining whether extended timestamps are present. 504 // This is necessary for users that need to do additional time 505 // calculations when dealing with legacy ZIP formats. 506 if f.ModifiedTime != 0 || f.ModifiedDate != 0 { 507 f.Modified = modified.In(timeZone(msdosModified.Sub(modified))) 508 } 509 } 510 511 // Assume that uncompressed size 2³²-1 could plausibly happen in 512 // an old zip32 file that was sharding inputs into the largest chunks 513 // possible (or is just malicious; search the web for 42.zip). 514 // If needUSize is true still, it means we didn't see a zip64 extension. 515 // As long as the compressed size is not also 2³²-1 (implausible) 516 // and the header is not also 2³²-1 (equally implausible), 517 // accept the uncompressed size 2³²-1 as valid. 518 // If nothing else, this keeps archive/zip working with 42.zip. 519 _ = needUSize 520 521 if needCSize || needHeaderOffset { 522 return ErrFormat 523 } 524 525 return nil 526 } 527 528 func readDataDescriptor(r io.Reader, f *File) error { 529 var buf [dataDescriptorLen]byte 530 // The spec says: "Although not originally assigned a 531 // signature, the value 0x08074b50 has commonly been adopted 532 // as a signature value for the data descriptor record. 533 // Implementers should be aware that ZIP files may be 534 // encountered with or without this signature marking data 535 // descriptors and should account for either case when reading 536 // ZIP files to ensure compatibility." 537 // 538 // dataDescriptorLen includes the size of the signature but 539 // first read just those 4 bytes to see if it exists. 540 if _, err := io.ReadFull(r, buf[:4]); err != nil { 541 return err 542 } 543 off := 0 544 maybeSig := readBuf(buf[:4]) 545 if maybeSig.uint32() != dataDescriptorSignature { 546 // No data descriptor signature. Keep these four 547 // bytes. 548 off += 4 549 } 550 if _, err := io.ReadFull(r, buf[off:12]); err != nil { 551 return err 552 } 553 b := readBuf(buf[:12]) 554 if b.uint32() != f.CRC32 { 555 return ErrChecksum 556 } 557 558 // The two sizes that follow here can be either 32 bits or 64 bits 559 // but the spec is not very clear on this and different 560 // interpretations has been made causing incompatibilities. We 561 // already have the sizes from the central directory so we can 562 // just ignore these. 563 564 return nil 565 } 566 567 func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, baseOffset int64, err error) { 568 // look for directoryEndSignature in the last 1k, then in the last 65k 569 var buf []byte 570 var directoryEndOffset int64 571 for i, bLen := range []int64{1024, 65 * 1024} { 572 if bLen > size { 573 bLen = size 574 } 575 buf = make([]byte, int(bLen)) 576 if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { 577 return nil, 0, err 578 } 579 if p := findSignatureInBlock(buf); p >= 0 { 580 buf = buf[p:] 581 directoryEndOffset = size - bLen + int64(p) 582 break 583 } 584 if i == 1 || bLen == size { 585 return nil, 0, ErrFormat 586 } 587 } 588 589 // read header into struct 590 b := readBuf(buf[4:]) // skip signature 591 d := &directoryEnd{ 592 diskNbr: uint32(b.uint16()), 593 dirDiskNbr: uint32(b.uint16()), 594 dirRecordsThisDisk: uint64(b.uint16()), 595 directoryRecords: uint64(b.uint16()), 596 directorySize: uint64(b.uint32()), 597 directoryOffset: uint64(b.uint32()), 598 commentLen: b.uint16(), 599 } 600 l := int(d.commentLen) 601 if l > len(b) { 602 return nil, 0, errors.New("zip: invalid comment length") 603 } 604 d.comment = string(b[:l]) 605 606 // These values mean that the file can be a zip64 file 607 if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { 608 p, err := findDirectory64End(r, directoryEndOffset) 609 if err == nil && p >= 0 { 610 directoryEndOffset = p 611 err = readDirectory64End(r, p, d) 612 } 613 if err != nil { 614 return nil, 0, err 615 } 616 } 617 618 maxInt64 := uint64(1<<63 - 1) 619 if d.directorySize > maxInt64 || d.directoryOffset > maxInt64 { 620 return nil, 0, ErrFormat 621 } 622 623 baseOffset = directoryEndOffset - int64(d.directorySize) - int64(d.directoryOffset) 624 625 // Make sure directoryOffset points to somewhere in our file. 626 if o := baseOffset + int64(d.directoryOffset); o < 0 || o >= size { 627 return nil, 0, ErrFormat 628 } 629 630 // If the directory end data tells us to use a non-zero baseOffset, 631 // but we would find a valid directory entry if we assume that the 632 // baseOffset is 0, then just use a baseOffset of 0. 633 // We've seen files in which the directory end data gives us 634 // an incorrect baseOffset. 635 if baseOffset > 0 { 636 off := int64(d.directoryOffset) 637 rs := io.NewSectionReader(r, off, size-off) 638 if readDirectoryHeader(&File{}, rs) == nil { 639 baseOffset = 0 640 } 641 } 642 643 return d, baseOffset, nil 644 } 645 646 // findDirectory64End tries to read the zip64 locator just before the 647 // directory end and returns the offset of the zip64 directory end if 648 // found. 649 func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { 650 locOffset := directoryEndOffset - directory64LocLen 651 if locOffset < 0 { 652 return -1, nil // no need to look for a header outside the file 653 } 654 buf := make([]byte, directory64LocLen) 655 if _, err := r.ReadAt(buf, locOffset); err != nil { 656 return -1, err 657 } 658 b := readBuf(buf) 659 if sig := b.uint32(); sig != directory64LocSignature { 660 return -1, nil 661 } 662 if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory 663 return -1, nil // the file is not a valid zip64-file 664 } 665 p := b.uint64() // relative offset of the zip64 end of central directory record 666 if b.uint32() != 1 { // total number of disks 667 return -1, nil // the file is not a valid zip64-file 668 } 669 return int64(p), nil 670 } 671 672 // readDirectory64End reads the zip64 directory end and updates the 673 // directory end with the zip64 directory end values. 674 func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { 675 buf := make([]byte, directory64EndLen) 676 if _, err := r.ReadAt(buf, offset); err != nil { 677 return err 678 } 679 680 b := readBuf(buf) 681 if sig := b.uint32(); sig != directory64EndSignature { 682 return ErrFormat 683 } 684 685 b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) 686 d.diskNbr = b.uint32() // number of this disk 687 d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory 688 d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk 689 d.directoryRecords = b.uint64() // total number of entries in the central directory 690 d.directorySize = b.uint64() // size of the central directory 691 d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number 692 693 return nil 694 } 695 696 func findSignatureInBlock(b []byte) int { 697 for i := len(b) - directoryEndLen; i >= 0; i-- { 698 // defined from directoryEndSignature in struct.go 699 if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { 700 // n is length of comment 701 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 702 if n+directoryEndLen+i <= len(b) { 703 return i 704 } 705 } 706 } 707 return -1 708 } 709 710 type readBuf []byte 711 712 func (b *readBuf) uint8() uint8 { 713 v := (*b)[0] 714 *b = (*b)[1:] 715 return v 716 } 717 718 func (b *readBuf) uint16() uint16 { 719 v := binary.LittleEndian.Uint16(*b) 720 *b = (*b)[2:] 721 return v 722 } 723 724 func (b *readBuf) uint32() uint32 { 725 v := binary.LittleEndian.Uint32(*b) 726 *b = (*b)[4:] 727 return v 728 } 729 730 func (b *readBuf) uint64() uint64 { 731 v := binary.LittleEndian.Uint64(*b) 732 *b = (*b)[8:] 733 return v 734 } 735 736 func (b *readBuf) sub(n int) readBuf { 737 b2 := (*b)[:n] 738 *b = (*b)[n:] 739 return b2 740 } 741 742 // A fileListEntry is a File and its ename. 743 // If file == nil, the fileListEntry describes a directory without metadata. 744 type fileListEntry struct { 745 name string 746 file *File 747 isDir bool 748 isDup bool 749 } 750 751 type fileInfoDirEntry interface { 752 fs.FileInfo 753 fs.DirEntry 754 } 755 756 func (f *fileListEntry) stat() (fileInfoDirEntry, error) { 757 if f.isDup { 758 return nil, errors.New(f.name + ": duplicate entries in zip file") 759 } 760 if !f.isDir { 761 return headerFileInfo{&f.file.FileHeader}, nil 762 } 763 return f, nil 764 } 765 766 // Only used for directories. 767 func (f *fileListEntry) Name() string { _, elem, _ := split(f.name); return elem } 768 func (f *fileListEntry) Size() int64 { return 0 } 769 func (f *fileListEntry) Mode() fs.FileMode { return fs.ModeDir | 0555 } 770 func (f *fileListEntry) Type() fs.FileMode { return fs.ModeDir } 771 func (f *fileListEntry) IsDir() bool { return true } 772 func (f *fileListEntry) Sys() any { return nil } 773 774 func (f *fileListEntry) ModTime() time.Time { 775 if f.file == nil { 776 return time.Time{} 777 } 778 return f.file.FileHeader.Modified.UTC() 779 } 780 781 func (f *fileListEntry) Info() (fs.FileInfo, error) { return f, nil } 782 783 func (f *fileListEntry) String() string { 784 return fs.FormatDirEntry(f) 785 } 786 787 // toValidName coerces name to be a valid name for fs.FS.Open. 788 func toValidName(name string) string { 789 name = strings.ReplaceAll(name, `\`, `/`) 790 p := path.Clean(name) 791 792 p = strings.TrimPrefix(p, "/") 793 794 for strings.HasPrefix(p, "../") { 795 p = p[len("../"):] 796 } 797 798 return p 799 } 800 801 func (r *Reader) initFileList() { 802 r.fileListOnce.Do(func() { 803 // files and knownDirs map from a file/directory name 804 // to an index into the r.fileList entry that we are 805 // building. They are used to mark duplicate entries. 806 files := make(map[string]int) 807 knownDirs := make(map[string]int) 808 809 // dirs[name] is true if name is known to be a directory, 810 // because it appears as a prefix in a path. 811 dirs := make(map[string]bool) 812 813 for _, file := range r.File { 814 isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/' 815 name := toValidName(file.Name) 816 if name == "" { 817 continue 818 } 819 820 if idx, ok := files[name]; ok { 821 r.fileList[idx].isDup = true 822 continue 823 } 824 if idx, ok := knownDirs[name]; ok { 825 r.fileList[idx].isDup = true 826 continue 827 } 828 829 for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) { 830 dirs[dir] = true 831 } 832 833 idx := len(r.fileList) 834 entry := fileListEntry{ 835 name: name, 836 file: file, 837 isDir: isDir, 838 } 839 r.fileList = append(r.fileList, entry) 840 if isDir { 841 knownDirs[name] = idx 842 } else { 843 files[name] = idx 844 } 845 } 846 for dir := range dirs { 847 if _, ok := knownDirs[dir]; !ok { 848 if idx, ok := files[dir]; ok { 849 r.fileList[idx].isDup = true 850 } else { 851 entry := fileListEntry{ 852 name: dir, 853 file: nil, 854 isDir: true, 855 } 856 r.fileList = append(r.fileList, entry) 857 } 858 } 859 } 860 861 sort.Slice(r.fileList, func(i, j int) bool { return fileEntryLess(r.fileList[i].name, r.fileList[j].name) }) 862 }) 863 } 864 865 func fileEntryLess(x, y string) bool { 866 xdir, xelem, _ := split(x) 867 ydir, yelem, _ := split(y) 868 return xdir < ydir || xdir == ydir && xelem < yelem 869 } 870 871 // Open opens the named file in the ZIP archive, 872 // using the semantics of fs.FS.Open: 873 // paths are always slash separated, with no 874 // leading / or ../ elements. 875 func (r *Reader) Open(name string) (fs.File, error) { 876 r.initFileList() 877 878 if !fs.ValidPath(name) { 879 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid} 880 } 881 e := r.openLookup(name) 882 if e == nil { 883 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist} 884 } 885 if e.isDir { 886 return &openDir{e, r.openReadDir(name), 0}, nil 887 } 888 rc, err := e.file.Open() 889 if err != nil { 890 return nil, err 891 } 892 return rc.(fs.File), nil 893 } 894 895 func split(name string) (dir, elem string, isDir bool) { 896 if len(name) > 0 && name[len(name)-1] == '/' { 897 isDir = true 898 name = name[:len(name)-1] 899 } 900 i := len(name) - 1 901 for i >= 0 && name[i] != '/' { 902 i-- 903 } 904 if i < 0 { 905 return ".", name, isDir 906 } 907 return name[:i], name[i+1:], isDir 908 } 909 910 var dotFile = &fileListEntry{name: "./", isDir: true} 911 912 func (r *Reader) openLookup(name string) *fileListEntry { 913 if name == "." { 914 return dotFile 915 } 916 917 dir, elem, _ := split(name) 918 files := r.fileList 919 i := sort.Search(len(files), func(i int) bool { 920 idir, ielem, _ := split(files[i].name) 921 return idir > dir || idir == dir && ielem >= elem 922 }) 923 if i < len(files) { 924 fname := files[i].name 925 if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name { 926 return &files[i] 927 } 928 } 929 return nil 930 } 931 932 func (r *Reader) openReadDir(dir string) []fileListEntry { 933 files := r.fileList 934 i := sort.Search(len(files), func(i int) bool { 935 idir, _, _ := split(files[i].name) 936 return idir >= dir 937 }) 938 j := sort.Search(len(files), func(j int) bool { 939 jdir, _, _ := split(files[j].name) 940 return jdir > dir 941 }) 942 return files[i:j] 943 } 944 945 type openDir struct { 946 e *fileListEntry 947 files []fileListEntry 948 offset int 949 } 950 951 func (d *openDir) Close() error { return nil } 952 func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat() } 953 954 func (d *openDir) Read([]byte) (int, error) { 955 return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")} 956 } 957 958 func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) { 959 n := len(d.files) - d.offset 960 if count > 0 && n > count { 961 n = count 962 } 963 if n == 0 { 964 if count <= 0 { 965 return nil, nil 966 } 967 return nil, io.EOF 968 } 969 list := make([]fs.DirEntry, n) 970 for i := range list { 971 s, err := d.files[d.offset+i].stat() 972 if err != nil { 973 return nil, err 974 } 975 list[i] = s 976 } 977 d.offset += n 978 return list, nil 979 }