github.com/4ad/go@v0.0.0-20161219182952-69a12818b605/src/archive/zip/reader.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package zip 6 7 import ( 8 "bufio" 9 "encoding/binary" 10 "errors" 11 "fmt" 12 "hash" 13 "hash/crc32" 14 "io" 15 "os" 16 ) 17 18 var ( 19 ErrFormat = errors.New("zip: not a valid zip file") 20 ErrAlgorithm = errors.New("zip: unsupported compression algorithm") 21 ErrChecksum = errors.New("zip: checksum error") 22 ) 23 24 type Reader struct { 25 r io.ReaderAt 26 File []*File 27 Comment string 28 decompressors map[uint16]Decompressor 29 } 30 31 type ReadCloser struct { 32 f *os.File 33 Reader 34 } 35 36 type File struct { 37 FileHeader 38 zip *Reader 39 zipr io.ReaderAt 40 zipsize int64 41 headerOffset int64 42 } 43 44 func (f *File) hasDataDescriptor() bool { 45 return f.Flags&0x8 != 0 46 } 47 48 // OpenReader will open the Zip file specified by name and return a ReadCloser. 49 func OpenReader(name string) (*ReadCloser, error) { 50 f, err := os.Open(name) 51 if err != nil { 52 return nil, err 53 } 54 fi, err := f.Stat() 55 if err != nil { 56 f.Close() 57 return nil, err 58 } 59 r := new(ReadCloser) 60 if err := r.init(f, fi.Size()); err != nil { 61 f.Close() 62 return nil, err 63 } 64 r.f = f 65 return r, nil 66 } 67 68 // NewReader returns a new Reader reading from r, which is assumed to 69 // have the given size in bytes. 70 func NewReader(r io.ReaderAt, size int64) (*Reader, error) { 71 zr := new(Reader) 72 if err := zr.init(r, size); err != nil { 73 return nil, err 74 } 75 return zr, nil 76 } 77 78 func (z *Reader) init(r io.ReaderAt, size int64) error { 79 end, err := readDirectoryEnd(r, size) 80 if err != nil { 81 return err 82 } 83 if end.directoryRecords > uint64(size)/fileHeaderLen { 84 return fmt.Errorf("archive/zip: TOC declares impossible %d files in %d byte zip", end.directoryRecords, size) 85 } 86 z.r = r 87 z.File = make([]*File, 0, end.directoryRecords) 88 z.Comment = end.comment 89 rs := io.NewSectionReader(r, 0, size) 90 if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil { 91 return err 92 } 93 buf := bufio.NewReader(rs) 94 95 // The count of files inside a zip is truncated to fit in a uint16. 96 // Gloss over this by reading headers until we encounter 97 // a bad one, and then only report a ErrFormat or UnexpectedEOF if 98 // the file count modulo 65536 is incorrect. 99 for { 100 f := &File{zip: z, zipr: r, zipsize: size} 101 err = readDirectoryHeader(f, buf) 102 if err == ErrFormat || err == io.ErrUnexpectedEOF { 103 break 104 } 105 if err != nil { 106 return err 107 } 108 z.File = append(z.File, f) 109 } 110 if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here 111 // Return the readDirectoryHeader error if we read 112 // the wrong number of directory entries. 113 return err 114 } 115 return nil 116 } 117 118 // RegisterDecompressor registers or overrides a custom decompressor for a 119 // specific method ID. If a decompressor for a given method is not found, 120 // Reader will default to looking up the decompressor at the package level. 121 func (z *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) { 122 if z.decompressors == nil { 123 z.decompressors = make(map[uint16]Decompressor) 124 } 125 z.decompressors[method] = dcomp 126 } 127 128 func (z *Reader) decompressor(method uint16) Decompressor { 129 dcomp := z.decompressors[method] 130 if dcomp == nil { 131 dcomp = decompressor(method) 132 } 133 return dcomp 134 } 135 136 // Close closes the Zip file, rendering it unusable for I/O. 137 func (rc *ReadCloser) Close() error { 138 return rc.f.Close() 139 } 140 141 // DataOffset returns the offset of the file's possibly-compressed 142 // data, relative to the beginning of the zip file. 143 // 144 // Most callers should instead use Open, which transparently 145 // decompresses data and verifies checksums. 146 func (f *File) DataOffset() (offset int64, err error) { 147 bodyOffset, err := f.findBodyOffset() 148 if err != nil { 149 return 150 } 151 return f.headerOffset + bodyOffset, nil 152 } 153 154 // Open returns a ReadCloser that provides access to the File's contents. 155 // Multiple files may be read concurrently. 156 func (f *File) Open() (io.ReadCloser, error) { 157 bodyOffset, err := f.findBodyOffset() 158 if err != nil { 159 return nil, err 160 } 161 size := int64(f.CompressedSize64) 162 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) 163 dcomp := f.zip.decompressor(f.Method) 164 if dcomp == nil { 165 return nil, ErrAlgorithm 166 } 167 var rc io.ReadCloser = dcomp(r) 168 var desr io.Reader 169 if f.hasDataDescriptor() { 170 desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) 171 } 172 rc = &checksumReader{ 173 rc: rc, 174 hash: crc32.NewIEEE(), 175 f: f, 176 desr: desr, 177 } 178 return rc, nil 179 } 180 181 type checksumReader struct { 182 rc io.ReadCloser 183 hash hash.Hash32 184 nread uint64 // number of bytes read so far 185 f *File 186 desr io.Reader // if non-nil, where to read the data descriptor 187 err error // sticky error 188 } 189 190 func (r *checksumReader) Read(b []byte) (n int, err error) { 191 if r.err != nil { 192 return 0, r.err 193 } 194 n, err = r.rc.Read(b) 195 r.hash.Write(b[:n]) 196 r.nread += uint64(n) 197 if err == nil { 198 return 199 } 200 if err == io.EOF { 201 if r.nread != r.f.UncompressedSize64 { 202 return 0, io.ErrUnexpectedEOF 203 } 204 if r.desr != nil { 205 if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { 206 if err1 == io.EOF { 207 err = io.ErrUnexpectedEOF 208 } else { 209 err = err1 210 } 211 } else if r.hash.Sum32() != r.f.CRC32 { 212 err = ErrChecksum 213 } 214 } else { 215 // If there's not a data descriptor, we still compare 216 // the CRC32 of what we've read against the file header 217 // or TOC's CRC32, if it seems like it was set. 218 if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { 219 err = ErrChecksum 220 } 221 } 222 } 223 r.err = err 224 return 225 } 226 227 func (r *checksumReader) Close() error { return r.rc.Close() } 228 229 // findBodyOffset does the minimum work to verify the file has a header 230 // and returns the file body offset. 231 func (f *File) findBodyOffset() (int64, error) { 232 var buf [fileHeaderLen]byte 233 if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { 234 return 0, err 235 } 236 b := readBuf(buf[:]) 237 if sig := b.uint32(); sig != fileHeaderSignature { 238 return 0, ErrFormat 239 } 240 b = b[22:] // skip over most of the header 241 filenameLen := int(b.uint16()) 242 extraLen := int(b.uint16()) 243 return int64(fileHeaderLen + filenameLen + extraLen), nil 244 } 245 246 // readDirectoryHeader attempts to read a directory header from r. 247 // It returns io.ErrUnexpectedEOF if it cannot read a complete header, 248 // and ErrFormat if it doesn't find a valid header signature. 249 func readDirectoryHeader(f *File, r io.Reader) error { 250 var buf [directoryHeaderLen]byte 251 if _, err := io.ReadFull(r, buf[:]); err != nil { 252 return err 253 } 254 b := readBuf(buf[:]) 255 if sig := b.uint32(); sig != directoryHeaderSignature { 256 return ErrFormat 257 } 258 f.CreatorVersion = b.uint16() 259 f.ReaderVersion = b.uint16() 260 f.Flags = b.uint16() 261 f.Method = b.uint16() 262 f.ModifiedTime = b.uint16() 263 f.ModifiedDate = b.uint16() 264 f.CRC32 = b.uint32() 265 f.CompressedSize = b.uint32() 266 f.UncompressedSize = b.uint32() 267 f.CompressedSize64 = uint64(f.CompressedSize) 268 f.UncompressedSize64 = uint64(f.UncompressedSize) 269 filenameLen := int(b.uint16()) 270 extraLen := int(b.uint16()) 271 commentLen := int(b.uint16()) 272 b = b[4:] // skipped start disk number and internal attributes (2x uint16) 273 f.ExternalAttrs = b.uint32() 274 f.headerOffset = int64(b.uint32()) 275 d := make([]byte, filenameLen+extraLen+commentLen) 276 if _, err := io.ReadFull(r, d); err != nil { 277 return err 278 } 279 f.Name = string(d[:filenameLen]) 280 f.Extra = d[filenameLen : filenameLen+extraLen] 281 f.Comment = string(d[filenameLen+extraLen:]) 282 283 needUSize := f.UncompressedSize == ^uint32(0) 284 needCSize := f.CompressedSize == ^uint32(0) 285 needHeaderOffset := f.headerOffset == int64(^uint32(0)) 286 287 if len(f.Extra) > 0 { 288 // Best effort to find what we need. 289 // Other zip authors might not even follow the basic format, 290 // and we'll just ignore the Extra content in that case. 291 b := readBuf(f.Extra) 292 for len(b) >= 4 { // need at least tag and size 293 tag := b.uint16() 294 size := b.uint16() 295 if int(size) > len(b) { 296 break 297 } 298 if tag == zip64ExtraId { 299 // update directory values from the zip64 extra block. 300 // They should only be consulted if the sizes read earlier 301 // are maxed out. 302 // See golang.org/issue/13367. 303 eb := readBuf(b[:size]) 304 305 if needUSize { 306 needUSize = false 307 if len(eb) < 8 { 308 return ErrFormat 309 } 310 f.UncompressedSize64 = eb.uint64() 311 } 312 if needCSize { 313 needCSize = false 314 if len(eb) < 8 { 315 return ErrFormat 316 } 317 f.CompressedSize64 = eb.uint64() 318 } 319 if needHeaderOffset { 320 needHeaderOffset = false 321 if len(eb) < 8 { 322 return ErrFormat 323 } 324 f.headerOffset = int64(eb.uint64()) 325 } 326 break 327 } 328 b = b[size:] 329 } 330 } 331 332 // Assume that uncompressed size 2³²-1 could plausibly happen in 333 // an old zip32 file that was sharding inputs into the largest chunks 334 // possible (or is just malicious; search the web for 42.zip). 335 // If needUSize is true still, it means we didn't see a zip64 extension. 336 // As long as the compressed size is not also 2³²-1 (implausible) 337 // and the header is not also 2³²-1 (equally implausible), 338 // accept the uncompressed size 2³²-1 as valid. 339 // If nothing else, this keeps archive/zip working with 42.zip. 340 _ = needUSize 341 342 if needCSize || needHeaderOffset { 343 return ErrFormat 344 } 345 346 return nil 347 } 348 349 func readDataDescriptor(r io.Reader, f *File) error { 350 var buf [dataDescriptorLen]byte 351 352 // The spec says: "Although not originally assigned a 353 // signature, the value 0x08074b50 has commonly been adopted 354 // as a signature value for the data descriptor record. 355 // Implementers should be aware that ZIP files may be 356 // encountered with or without this signature marking data 357 // descriptors and should account for either case when reading 358 // ZIP files to ensure compatibility." 359 // 360 // dataDescriptorLen includes the size of the signature but 361 // first read just those 4 bytes to see if it exists. 362 if _, err := io.ReadFull(r, buf[:4]); err != nil { 363 return err 364 } 365 off := 0 366 maybeSig := readBuf(buf[:4]) 367 if maybeSig.uint32() != dataDescriptorSignature { 368 // No data descriptor signature. Keep these four 369 // bytes. 370 off += 4 371 } 372 if _, err := io.ReadFull(r, buf[off:12]); err != nil { 373 return err 374 } 375 b := readBuf(buf[:12]) 376 if b.uint32() != f.CRC32 { 377 return ErrChecksum 378 } 379 380 // The two sizes that follow here can be either 32 bits or 64 bits 381 // but the spec is not very clear on this and different 382 // interpretations has been made causing incompatibilities. We 383 // already have the sizes from the central directory so we can 384 // just ignore these. 385 386 return nil 387 } 388 389 func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { 390 // look for directoryEndSignature in the last 1k, then in the last 65k 391 var buf []byte 392 var directoryEndOffset int64 393 for i, bLen := range []int64{1024, 65 * 1024} { 394 if bLen > size { 395 bLen = size 396 } 397 buf = make([]byte, int(bLen)) 398 if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { 399 return nil, err 400 } 401 if p := findSignatureInBlock(buf); p >= 0 { 402 buf = buf[p:] 403 directoryEndOffset = size - bLen + int64(p) 404 break 405 } 406 if i == 1 || bLen == size { 407 return nil, ErrFormat 408 } 409 } 410 411 // read header into struct 412 b := readBuf(buf[4:]) // skip signature 413 d := &directoryEnd{ 414 diskNbr: uint32(b.uint16()), 415 dirDiskNbr: uint32(b.uint16()), 416 dirRecordsThisDisk: uint64(b.uint16()), 417 directoryRecords: uint64(b.uint16()), 418 directorySize: uint64(b.uint32()), 419 directoryOffset: uint64(b.uint32()), 420 commentLen: b.uint16(), 421 } 422 l := int(d.commentLen) 423 if l > len(b) { 424 return nil, errors.New("zip: invalid comment length") 425 } 426 d.comment = string(b[:l]) 427 428 // These values mean that the file can be a zip64 file 429 if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { 430 p, err := findDirectory64End(r, directoryEndOffset) 431 if err == nil && p >= 0 { 432 err = readDirectory64End(r, p, d) 433 } 434 if err != nil { 435 return nil, err 436 } 437 } 438 // Make sure directoryOffset points to somewhere in our file. 439 if o := int64(d.directoryOffset); o < 0 || o >= size { 440 return nil, ErrFormat 441 } 442 return d, nil 443 } 444 445 // findDirectory64End tries to read the zip64 locator just before the 446 // directory end and returns the offset of the zip64 directory end if 447 // found. 448 func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { 449 locOffset := directoryEndOffset - directory64LocLen 450 if locOffset < 0 { 451 return -1, nil // no need to look for a header outside the file 452 } 453 buf := make([]byte, directory64LocLen) 454 if _, err := r.ReadAt(buf, locOffset); err != nil { 455 return -1, err 456 } 457 b := readBuf(buf) 458 if sig := b.uint32(); sig != directory64LocSignature { 459 return -1, nil 460 } 461 if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory 462 return -1, nil // the file is not a valid zip64-file 463 } 464 p := b.uint64() // relative offset of the zip64 end of central directory record 465 if b.uint32() != 1 { // total number of disks 466 return -1, nil // the file is not a valid zip64-file 467 } 468 return int64(p), nil 469 } 470 471 // readDirectory64End reads the zip64 directory end and updates the 472 // directory end with the zip64 directory end values. 473 func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { 474 buf := make([]byte, directory64EndLen) 475 if _, err := r.ReadAt(buf, offset); err != nil { 476 return err 477 } 478 479 b := readBuf(buf) 480 if sig := b.uint32(); sig != directory64EndSignature { 481 return ErrFormat 482 } 483 484 b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) 485 d.diskNbr = b.uint32() // number of this disk 486 d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory 487 d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk 488 d.directoryRecords = b.uint64() // total number of entries in the central directory 489 d.directorySize = b.uint64() // size of the central directory 490 d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number 491 492 return nil 493 } 494 495 func findSignatureInBlock(b []byte) int { 496 for i := len(b) - directoryEndLen; i >= 0; i-- { 497 // defined from directoryEndSignature in struct.go 498 if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { 499 // n is length of comment 500 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 501 if n+directoryEndLen+i <= len(b) { 502 return i 503 } 504 } 505 } 506 return -1 507 } 508 509 type readBuf []byte 510 511 func (b *readBuf) uint16() uint16 { 512 v := binary.LittleEndian.Uint16(*b) 513 *b = (*b)[2:] 514 return v 515 } 516 517 func (b *readBuf) uint32() uint32 { 518 v := binary.LittleEndian.Uint32(*b) 519 *b = (*b)[4:] 520 return v 521 } 522 523 func (b *readBuf) uint64() uint64 { 524 v := binary.LittleEndian.Uint64(*b) 525 *b = (*b)[8:] 526 return v 527 }