github.com/guyezi/gofrontend@v0.0.0-20200228202240-7a62a49e62c0/libgo/go/archive/zip/reader.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package zip 6 7 import ( 8 "bufio" 9 "encoding/binary" 10 "errors" 11 "hash" 12 "hash/crc32" 13 "io" 14 "os" 15 "time" 16 ) 17 18 var ( 19 ErrFormat = errors.New("zip: not a valid zip file") 20 ErrAlgorithm = errors.New("zip: unsupported compression algorithm") 21 ErrChecksum = errors.New("zip: checksum error") 22 ) 23 24 type Reader struct { 25 r io.ReaderAt 26 File []*File 27 Comment string 28 decompressors map[uint16]Decompressor 29 } 30 31 type ReadCloser struct { 32 f *os.File 33 Reader 34 } 35 36 type File struct { 37 FileHeader 38 zip *Reader 39 zipr io.ReaderAt 40 zipsize int64 41 headerOffset int64 42 } 43 44 func (f *File) hasDataDescriptor() bool { 45 return f.Flags&0x8 != 0 46 } 47 48 // OpenReader will open the Zip file specified by name and return a ReadCloser. 49 func OpenReader(name string) (*ReadCloser, error) { 50 f, err := os.Open(name) 51 if err != nil { 52 return nil, err 53 } 54 fi, err := f.Stat() 55 if err != nil { 56 f.Close() 57 return nil, err 58 } 59 r := new(ReadCloser) 60 if err := r.init(f, fi.Size()); err != nil { 61 f.Close() 62 return nil, err 63 } 64 r.f = f 65 return r, nil 66 } 67 68 // NewReader returns a new Reader reading from r, which is assumed to 69 // have the given size in bytes. 70 func NewReader(r io.ReaderAt, size int64) (*Reader, error) { 71 if size < 0 { 72 return nil, errors.New("zip: size cannot be negative") 73 } 74 zr := new(Reader) 75 if err := zr.init(r, size); err != nil { 76 return nil, err 77 } 78 return zr, nil 79 } 80 81 func (z *Reader) init(r io.ReaderAt, size int64) error { 82 end, err := readDirectoryEnd(r, size) 83 if err != nil { 84 return err 85 } 86 z.r = r 87 z.File = make([]*File, 0, end.directoryRecords) 88 z.Comment = end.comment 89 rs := io.NewSectionReader(r, 0, size) 90 if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil { 91 return err 92 } 93 buf := bufio.NewReader(rs) 94 95 // The count of files inside a zip is truncated to fit in a uint16. 96 // Gloss over this by reading headers until we encounter 97 // a bad one, and then only report an ErrFormat or UnexpectedEOF if 98 // the file count modulo 65536 is incorrect. 99 for { 100 f := &File{zip: z, zipr: r, zipsize: size} 101 err = readDirectoryHeader(f, buf) 102 if err == ErrFormat || err == io.ErrUnexpectedEOF { 103 break 104 } 105 if err != nil { 106 return err 107 } 108 z.File = append(z.File, f) 109 } 110 if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here 111 // Return the readDirectoryHeader error if we read 112 // the wrong number of directory entries. 113 return err 114 } 115 return nil 116 } 117 118 // RegisterDecompressor registers or overrides a custom decompressor for a 119 // specific method ID. If a decompressor for a given method is not found, 120 // Reader will default to looking up the decompressor at the package level. 121 func (z *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) { 122 if z.decompressors == nil { 123 z.decompressors = make(map[uint16]Decompressor) 124 } 125 z.decompressors[method] = dcomp 126 } 127 128 func (z *Reader) decompressor(method uint16) Decompressor { 129 dcomp := z.decompressors[method] 130 if dcomp == nil { 131 dcomp = decompressor(method) 132 } 133 return dcomp 134 } 135 136 // Close closes the Zip file, rendering it unusable for I/O. 137 func (rc *ReadCloser) Close() error { 138 return rc.f.Close() 139 } 140 141 // DataOffset returns the offset of the file's possibly-compressed 142 // data, relative to the beginning of the zip file. 143 // 144 // Most callers should instead use Open, which transparently 145 // decompresses data and verifies checksums. 146 func (f *File) DataOffset() (offset int64, err error) { 147 bodyOffset, err := f.findBodyOffset() 148 if err != nil { 149 return 150 } 151 return f.headerOffset + bodyOffset, nil 152 } 153 154 // Open returns a ReadCloser that provides access to the File's contents. 155 // Multiple files may be read concurrently. 156 func (f *File) Open() (io.ReadCloser, error) { 157 bodyOffset, err := f.findBodyOffset() 158 if err != nil { 159 return nil, err 160 } 161 size := int64(f.CompressedSize64) 162 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) 163 dcomp := f.zip.decompressor(f.Method) 164 if dcomp == nil { 165 return nil, ErrAlgorithm 166 } 167 var rc io.ReadCloser = dcomp(r) 168 var desr io.Reader 169 if f.hasDataDescriptor() { 170 desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) 171 } 172 rc = &checksumReader{ 173 rc: rc, 174 hash: crc32.NewIEEE(), 175 f: f, 176 desr: desr, 177 } 178 return rc, nil 179 } 180 181 type checksumReader struct { 182 rc io.ReadCloser 183 hash hash.Hash32 184 nread uint64 // number of bytes read so far 185 f *File 186 desr io.Reader // if non-nil, where to read the data descriptor 187 err error // sticky error 188 } 189 190 func (r *checksumReader) Read(b []byte) (n int, err error) { 191 if r.err != nil { 192 return 0, r.err 193 } 194 n, err = r.rc.Read(b) 195 r.hash.Write(b[:n]) 196 r.nread += uint64(n) 197 if err == nil { 198 return 199 } 200 if err == io.EOF { 201 if r.nread != r.f.UncompressedSize64 { 202 return 0, io.ErrUnexpectedEOF 203 } 204 if r.desr != nil { 205 if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { 206 if err1 == io.EOF { 207 err = io.ErrUnexpectedEOF 208 } else { 209 err = err1 210 } 211 } else if r.hash.Sum32() != r.f.CRC32 { 212 err = ErrChecksum 213 } 214 } else { 215 // If there's not a data descriptor, we still compare 216 // the CRC32 of what we've read against the file header 217 // or TOC's CRC32, if it seems like it was set. 218 if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { 219 err = ErrChecksum 220 } 221 } 222 } 223 r.err = err 224 return 225 } 226 227 func (r *checksumReader) Close() error { return r.rc.Close() } 228 229 // findBodyOffset does the minimum work to verify the file has a header 230 // and returns the file body offset. 231 func (f *File) findBodyOffset() (int64, error) { 232 var buf [fileHeaderLen]byte 233 if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { 234 return 0, err 235 } 236 b := readBuf(buf[:]) 237 if sig := b.uint32(); sig != fileHeaderSignature { 238 return 0, ErrFormat 239 } 240 b = b[22:] // skip over most of the header 241 filenameLen := int(b.uint16()) 242 extraLen := int(b.uint16()) 243 return int64(fileHeaderLen + filenameLen + extraLen), nil 244 } 245 246 // readDirectoryHeader attempts to read a directory header from r. 247 // It returns io.ErrUnexpectedEOF if it cannot read a complete header, 248 // and ErrFormat if it doesn't find a valid header signature. 249 func readDirectoryHeader(f *File, r io.Reader) error { 250 var buf [directoryHeaderLen]byte 251 if _, err := io.ReadFull(r, buf[:]); err != nil { 252 return err 253 } 254 b := readBuf(buf[:]) 255 if sig := b.uint32(); sig != directoryHeaderSignature { 256 return ErrFormat 257 } 258 f.CreatorVersion = b.uint16() 259 f.ReaderVersion = b.uint16() 260 f.Flags = b.uint16() 261 f.Method = b.uint16() 262 f.ModifiedTime = b.uint16() 263 f.ModifiedDate = b.uint16() 264 f.CRC32 = b.uint32() 265 f.CompressedSize = b.uint32() 266 f.UncompressedSize = b.uint32() 267 f.CompressedSize64 = uint64(f.CompressedSize) 268 f.UncompressedSize64 = uint64(f.UncompressedSize) 269 filenameLen := int(b.uint16()) 270 extraLen := int(b.uint16()) 271 commentLen := int(b.uint16()) 272 b = b[4:] // skipped start disk number and internal attributes (2x uint16) 273 f.ExternalAttrs = b.uint32() 274 f.headerOffset = int64(b.uint32()) 275 d := make([]byte, filenameLen+extraLen+commentLen) 276 if _, err := io.ReadFull(r, d); err != nil { 277 return err 278 } 279 f.Name = string(d[:filenameLen]) 280 f.Extra = d[filenameLen : filenameLen+extraLen] 281 f.Comment = string(d[filenameLen+extraLen:]) 282 283 // Determine the character encoding. 284 utf8Valid1, utf8Require1 := detectUTF8(f.Name) 285 utf8Valid2, utf8Require2 := detectUTF8(f.Comment) 286 switch { 287 case !utf8Valid1 || !utf8Valid2: 288 // Name and Comment definitely not UTF-8. 289 f.NonUTF8 = true 290 case !utf8Require1 && !utf8Require2: 291 // Name and Comment use only single-byte runes that overlap with UTF-8. 292 f.NonUTF8 = false 293 default: 294 // Might be UTF-8, might be some other encoding; preserve existing flag. 295 // Some ZIP writers use UTF-8 encoding without setting the UTF-8 flag. 296 // Since it is impossible to always distinguish valid UTF-8 from some 297 // other encoding (e.g., GBK or Shift-JIS), we trust the flag. 298 f.NonUTF8 = f.Flags&0x800 == 0 299 } 300 301 needUSize := f.UncompressedSize == ^uint32(0) 302 needCSize := f.CompressedSize == ^uint32(0) 303 needHeaderOffset := f.headerOffset == int64(^uint32(0)) 304 305 // Best effort to find what we need. 306 // Other zip authors might not even follow the basic format, 307 // and we'll just ignore the Extra content in that case. 308 var modified time.Time 309 parseExtras: 310 for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size 311 fieldTag := extra.uint16() 312 fieldSize := int(extra.uint16()) 313 if len(extra) < fieldSize { 314 break 315 } 316 fieldBuf := extra.sub(fieldSize) 317 318 switch fieldTag { 319 case zip64ExtraID: 320 // update directory values from the zip64 extra block. 321 // They should only be consulted if the sizes read earlier 322 // are maxed out. 323 // See golang.org/issue/13367. 324 if needUSize { 325 needUSize = false 326 if len(fieldBuf) < 8 { 327 return ErrFormat 328 } 329 f.UncompressedSize64 = fieldBuf.uint64() 330 } 331 if needCSize { 332 needCSize = false 333 if len(fieldBuf) < 8 { 334 return ErrFormat 335 } 336 f.CompressedSize64 = fieldBuf.uint64() 337 } 338 if needHeaderOffset { 339 needHeaderOffset = false 340 if len(fieldBuf) < 8 { 341 return ErrFormat 342 } 343 f.headerOffset = int64(fieldBuf.uint64()) 344 } 345 case ntfsExtraID: 346 if len(fieldBuf) < 4 { 347 continue parseExtras 348 } 349 fieldBuf.uint32() // reserved (ignored) 350 for len(fieldBuf) >= 4 { // need at least tag and size 351 attrTag := fieldBuf.uint16() 352 attrSize := int(fieldBuf.uint16()) 353 if len(fieldBuf) < attrSize { 354 continue parseExtras 355 } 356 attrBuf := fieldBuf.sub(attrSize) 357 if attrTag != 1 || attrSize != 24 { 358 continue // Ignore irrelevant attributes 359 } 360 361 const ticksPerSecond = 1e7 // Windows timestamp resolution 362 ts := int64(attrBuf.uint64()) // ModTime since Windows epoch 363 secs := int64(ts / ticksPerSecond) 364 nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond) 365 epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC) 366 modified = time.Unix(epoch.Unix()+secs, nsecs) 367 } 368 case unixExtraID, infoZipUnixExtraID: 369 if len(fieldBuf) < 8 { 370 continue parseExtras 371 } 372 fieldBuf.uint32() // AcTime (ignored) 373 ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch 374 modified = time.Unix(ts, 0) 375 case extTimeExtraID: 376 if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 { 377 continue parseExtras 378 } 379 ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch 380 modified = time.Unix(ts, 0) 381 } 382 } 383 384 msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime) 385 f.Modified = msdosModified 386 if !modified.IsZero() { 387 f.Modified = modified.UTC() 388 389 // If legacy MS-DOS timestamps are set, we can use the delta between 390 // the legacy and extended versions to estimate timezone offset. 391 // 392 // A non-UTC timezone is always used (even if offset is zero). 393 // Thus, FileHeader.Modified.Location() == time.UTC is useful for 394 // determining whether extended timestamps are present. 395 // This is necessary for users that need to do additional time 396 // calculations when dealing with legacy ZIP formats. 397 if f.ModifiedTime != 0 || f.ModifiedDate != 0 { 398 f.Modified = modified.In(timeZone(msdosModified.Sub(modified))) 399 } 400 } 401 402 // Assume that uncompressed size 2³²-1 could plausibly happen in 403 // an old zip32 file that was sharding inputs into the largest chunks 404 // possible (or is just malicious; search the web for 42.zip). 405 // If needUSize is true still, it means we didn't see a zip64 extension. 406 // As long as the compressed size is not also 2³²-1 (implausible) 407 // and the header is not also 2³²-1 (equally implausible), 408 // accept the uncompressed size 2³²-1 as valid. 409 // If nothing else, this keeps archive/zip working with 42.zip. 410 _ = needUSize 411 412 if needCSize || needHeaderOffset { 413 return ErrFormat 414 } 415 416 return nil 417 } 418 419 func readDataDescriptor(r io.Reader, f *File) error { 420 var buf [dataDescriptorLen]byte 421 422 // The spec says: "Although not originally assigned a 423 // signature, the value 0x08074b50 has commonly been adopted 424 // as a signature value for the data descriptor record. 425 // Implementers should be aware that ZIP files may be 426 // encountered with or without this signature marking data 427 // descriptors and should account for either case when reading 428 // ZIP files to ensure compatibility." 429 // 430 // dataDescriptorLen includes the size of the signature but 431 // first read just those 4 bytes to see if it exists. 432 if _, err := io.ReadFull(r, buf[:4]); err != nil { 433 return err 434 } 435 off := 0 436 maybeSig := readBuf(buf[:4]) 437 if maybeSig.uint32() != dataDescriptorSignature { 438 // No data descriptor signature. Keep these four 439 // bytes. 440 off += 4 441 } 442 if _, err := io.ReadFull(r, buf[off:12]); err != nil { 443 return err 444 } 445 b := readBuf(buf[:12]) 446 if b.uint32() != f.CRC32 { 447 return ErrChecksum 448 } 449 450 // The two sizes that follow here can be either 32 bits or 64 bits 451 // but the spec is not very clear on this and different 452 // interpretations has been made causing incompatibilities. We 453 // already have the sizes from the central directory so we can 454 // just ignore these. 455 456 return nil 457 } 458 459 func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { 460 // look for directoryEndSignature in the last 1k, then in the last 65k 461 var buf []byte 462 var directoryEndOffset int64 463 for i, bLen := range []int64{1024, 65 * 1024} { 464 if bLen > size { 465 bLen = size 466 } 467 buf = make([]byte, int(bLen)) 468 if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { 469 return nil, err 470 } 471 if p := findSignatureInBlock(buf); p >= 0 { 472 buf = buf[p:] 473 directoryEndOffset = size - bLen + int64(p) 474 break 475 } 476 if i == 1 || bLen == size { 477 return nil, ErrFormat 478 } 479 } 480 481 // read header into struct 482 b := readBuf(buf[4:]) // skip signature 483 d := &directoryEnd{ 484 diskNbr: uint32(b.uint16()), 485 dirDiskNbr: uint32(b.uint16()), 486 dirRecordsThisDisk: uint64(b.uint16()), 487 directoryRecords: uint64(b.uint16()), 488 directorySize: uint64(b.uint32()), 489 directoryOffset: uint64(b.uint32()), 490 commentLen: b.uint16(), 491 } 492 l := int(d.commentLen) 493 if l > len(b) { 494 return nil, errors.New("zip: invalid comment length") 495 } 496 d.comment = string(b[:l]) 497 498 // These values mean that the file can be a zip64 file 499 if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { 500 p, err := findDirectory64End(r, directoryEndOffset) 501 if err == nil && p >= 0 { 502 err = readDirectory64End(r, p, d) 503 } 504 if err != nil { 505 return nil, err 506 } 507 } 508 // Make sure directoryOffset points to somewhere in our file. 509 if o := int64(d.directoryOffset); o < 0 || o >= size { 510 return nil, ErrFormat 511 } 512 return d, nil 513 } 514 515 // findDirectory64End tries to read the zip64 locator just before the 516 // directory end and returns the offset of the zip64 directory end if 517 // found. 518 func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { 519 locOffset := directoryEndOffset - directory64LocLen 520 if locOffset < 0 { 521 return -1, nil // no need to look for a header outside the file 522 } 523 buf := make([]byte, directory64LocLen) 524 if _, err := r.ReadAt(buf, locOffset); err != nil { 525 return -1, err 526 } 527 b := readBuf(buf) 528 if sig := b.uint32(); sig != directory64LocSignature { 529 return -1, nil 530 } 531 if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory 532 return -1, nil // the file is not a valid zip64-file 533 } 534 p := b.uint64() // relative offset of the zip64 end of central directory record 535 if b.uint32() != 1 { // total number of disks 536 return -1, nil // the file is not a valid zip64-file 537 } 538 return int64(p), nil 539 } 540 541 // readDirectory64End reads the zip64 directory end and updates the 542 // directory end with the zip64 directory end values. 543 func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { 544 buf := make([]byte, directory64EndLen) 545 if _, err := r.ReadAt(buf, offset); err != nil { 546 return err 547 } 548 549 b := readBuf(buf) 550 if sig := b.uint32(); sig != directory64EndSignature { 551 return ErrFormat 552 } 553 554 b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) 555 d.diskNbr = b.uint32() // number of this disk 556 d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory 557 d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk 558 d.directoryRecords = b.uint64() // total number of entries in the central directory 559 d.directorySize = b.uint64() // size of the central directory 560 d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number 561 562 return nil 563 } 564 565 func findSignatureInBlock(b []byte) int { 566 for i := len(b) - directoryEndLen; i >= 0; i-- { 567 // defined from directoryEndSignature in struct.go 568 if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { 569 // n is length of comment 570 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 571 if n+directoryEndLen+i <= len(b) { 572 return i 573 } 574 } 575 } 576 return -1 577 } 578 579 type readBuf []byte 580 581 func (b *readBuf) uint8() uint8 { 582 v := (*b)[0] 583 *b = (*b)[1:] 584 return v 585 } 586 587 func (b *readBuf) uint16() uint16 { 588 v := binary.LittleEndian.Uint16(*b) 589 *b = (*b)[2:] 590 return v 591 } 592 593 func (b *readBuf) uint32() uint32 { 594 v := binary.LittleEndian.Uint32(*b) 595 *b = (*b)[4:] 596 return v 597 } 598 599 func (b *readBuf) uint64() uint64 { 600 v := binary.LittleEndian.Uint64(*b) 601 *b = (*b)[8:] 602 return v 603 } 604 605 func (b *readBuf) sub(n int) readBuf { 606 b2 := (*b)[:n] 607 *b = (*b)[n:] 608 return b2 609 }