github.com/sean-/go@v0.0.0-20151219100004-97f854cd7bb6/src/archive/zip/reader.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package zip 6 7 import ( 8 "bufio" 9 "encoding/binary" 10 "errors" 11 "fmt" 12 "hash" 13 "hash/crc32" 14 "io" 15 "os" 16 ) 17 18 var ( 19 ErrFormat = errors.New("zip: not a valid zip file") 20 ErrAlgorithm = errors.New("zip: unsupported compression algorithm") 21 ErrChecksum = errors.New("zip: checksum error") 22 ) 23 24 type Reader struct { 25 r io.ReaderAt 26 File []*File 27 Comment string 28 decompressors map[uint16]Decompressor 29 } 30 31 type ReadCloser struct { 32 f *os.File 33 Reader 34 } 35 36 type File struct { 37 FileHeader 38 zip *Reader 39 zipr io.ReaderAt 40 zipsize int64 41 headerOffset int64 42 } 43 44 func (f *File) hasDataDescriptor() bool { 45 return f.Flags&0x8 != 0 46 } 47 48 // OpenReader will open the Zip file specified by name and return a ReadCloser. 49 func OpenReader(name string) (*ReadCloser, error) { 50 f, err := os.Open(name) 51 if err != nil { 52 return nil, err 53 } 54 fi, err := f.Stat() 55 if err != nil { 56 f.Close() 57 return nil, err 58 } 59 r := new(ReadCloser) 60 if err := r.init(f, fi.Size()); err != nil { 61 f.Close() 62 return nil, err 63 } 64 r.f = f 65 return r, nil 66 } 67 68 // NewReader returns a new Reader reading from r, which is assumed to 69 // have the given size in bytes. 70 func NewReader(r io.ReaderAt, size int64) (*Reader, error) { 71 zr := new(Reader) 72 if err := zr.init(r, size); err != nil { 73 return nil, err 74 } 75 return zr, nil 76 } 77 78 func (z *Reader) init(r io.ReaderAt, size int64) error { 79 end, err := readDirectoryEnd(r, size) 80 if err != nil { 81 return err 82 } 83 if end.directoryRecords > uint64(size)/fileHeaderLen { 84 return fmt.Errorf("archive/zip: TOC declares impossible %d files in %d byte zip", end.directoryRecords, size) 85 } 86 z.r = r 87 z.File = make([]*File, 0, end.directoryRecords) 88 z.Comment = end.comment 89 rs := io.NewSectionReader(r, 0, size) 90 if _, err = rs.Seek(int64(end.directoryOffset), os.SEEK_SET); err != nil { 91 return err 92 } 93 buf := bufio.NewReader(rs) 94 95 // The count of files inside a zip is truncated to fit in a uint16. 96 // Gloss over this by reading headers until we encounter 97 // a bad one, and then only report a ErrFormat or UnexpectedEOF if 98 // the file count modulo 65536 is incorrect. 99 for { 100 f := &File{zip: z, zipr: r, zipsize: size} 101 err = readDirectoryHeader(f, buf) 102 if err == ErrFormat || err == io.ErrUnexpectedEOF { 103 break 104 } 105 if err != nil { 106 return err 107 } 108 z.File = append(z.File, f) 109 } 110 if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here 111 // Return the readDirectoryHeader error if we read 112 // the wrong number of directory entries. 113 return err 114 } 115 return nil 116 } 117 118 // RegisterDecompressor registers or overrides a custom decompressor for a 119 // specific method ID. If a decompressor for a given method is not found, 120 // Reader will default to looking up the decompressor at the package level. 121 // 122 // Must not be called concurrently with Open on any Files in the Reader. 123 func (z *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) { 124 if z.decompressors == nil { 125 z.decompressors = make(map[uint16]Decompressor) 126 } 127 z.decompressors[method] = dcomp 128 } 129 130 func (z *Reader) decompressor(method uint16) Decompressor { 131 dcomp := z.decompressors[method] 132 if dcomp == nil { 133 dcomp = decompressor(method) 134 } 135 return dcomp 136 } 137 138 // Close closes the Zip file, rendering it unusable for I/O. 139 func (rc *ReadCloser) Close() error { 140 return rc.f.Close() 141 } 142 143 // DataOffset returns the offset of the file's possibly-compressed 144 // data, relative to the beginning of the zip file. 145 // 146 // Most callers should instead use Open, which transparently 147 // decompresses data and verifies checksums. 148 func (f *File) DataOffset() (offset int64, err error) { 149 bodyOffset, err := f.findBodyOffset() 150 if err != nil { 151 return 152 } 153 return f.headerOffset + bodyOffset, nil 154 } 155 156 // Open returns a ReadCloser that provides access to the File's contents. 157 // Multiple files may be read concurrently. 158 func (f *File) Open() (rc io.ReadCloser, err error) { 159 bodyOffset, err := f.findBodyOffset() 160 if err != nil { 161 return 162 } 163 size := int64(f.CompressedSize64) 164 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) 165 dcomp := f.zip.decompressor(f.Method) 166 if dcomp == nil { 167 err = ErrAlgorithm 168 return 169 } 170 rc = dcomp(r) 171 var desr io.Reader 172 if f.hasDataDescriptor() { 173 desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) 174 } 175 rc = &checksumReader{ 176 rc: rc, 177 hash: crc32.NewIEEE(), 178 f: f, 179 desr: desr, 180 } 181 return 182 } 183 184 type checksumReader struct { 185 rc io.ReadCloser 186 hash hash.Hash32 187 nread uint64 // number of bytes read so far 188 f *File 189 desr io.Reader // if non-nil, where to read the data descriptor 190 err error // sticky error 191 } 192 193 func (r *checksumReader) Read(b []byte) (n int, err error) { 194 if r.err != nil { 195 return 0, r.err 196 } 197 n, err = r.rc.Read(b) 198 r.hash.Write(b[:n]) 199 r.nread += uint64(n) 200 if err == nil { 201 return 202 } 203 if err == io.EOF { 204 if r.nread != r.f.UncompressedSize64 { 205 return 0, io.ErrUnexpectedEOF 206 } 207 if r.desr != nil { 208 if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { 209 if err1 == io.EOF { 210 err = io.ErrUnexpectedEOF 211 } else { 212 err = err1 213 } 214 } else if r.hash.Sum32() != r.f.CRC32 { 215 err = ErrChecksum 216 } 217 } else { 218 // If there's not a data descriptor, we still compare 219 // the CRC32 of what we've read against the file header 220 // or TOC's CRC32, if it seems like it was set. 221 if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { 222 err = ErrChecksum 223 } 224 } 225 } 226 r.err = err 227 return 228 } 229 230 func (r *checksumReader) Close() error { return r.rc.Close() } 231 232 // findBodyOffset does the minimum work to verify the file has a header 233 // and returns the file body offset. 234 func (f *File) findBodyOffset() (int64, error) { 235 var buf [fileHeaderLen]byte 236 if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { 237 return 0, err 238 } 239 b := readBuf(buf[:]) 240 if sig := b.uint32(); sig != fileHeaderSignature { 241 return 0, ErrFormat 242 } 243 b = b[22:] // skip over most of the header 244 filenameLen := int(b.uint16()) 245 extraLen := int(b.uint16()) 246 return int64(fileHeaderLen + filenameLen + extraLen), nil 247 } 248 249 // readDirectoryHeader attempts to read a directory header from r. 250 // It returns io.ErrUnexpectedEOF if it cannot read a complete header, 251 // and ErrFormat if it doesn't find a valid header signature. 252 func readDirectoryHeader(f *File, r io.Reader) error { 253 var buf [directoryHeaderLen]byte 254 if _, err := io.ReadFull(r, buf[:]); err != nil { 255 return err 256 } 257 b := readBuf(buf[:]) 258 if sig := b.uint32(); sig != directoryHeaderSignature { 259 return ErrFormat 260 } 261 f.CreatorVersion = b.uint16() 262 f.ReaderVersion = b.uint16() 263 f.Flags = b.uint16() 264 f.Method = b.uint16() 265 f.ModifiedTime = b.uint16() 266 f.ModifiedDate = b.uint16() 267 f.CRC32 = b.uint32() 268 f.CompressedSize = b.uint32() 269 f.UncompressedSize = b.uint32() 270 f.CompressedSize64 = uint64(f.CompressedSize) 271 f.UncompressedSize64 = uint64(f.UncompressedSize) 272 filenameLen := int(b.uint16()) 273 extraLen := int(b.uint16()) 274 commentLen := int(b.uint16()) 275 b = b[4:] // skipped start disk number and internal attributes (2x uint16) 276 f.ExternalAttrs = b.uint32() 277 f.headerOffset = int64(b.uint32()) 278 d := make([]byte, filenameLen+extraLen+commentLen) 279 if _, err := io.ReadFull(r, d); err != nil { 280 return err 281 } 282 f.Name = string(d[:filenameLen]) 283 f.Extra = d[filenameLen : filenameLen+extraLen] 284 f.Comment = string(d[filenameLen+extraLen:]) 285 286 if len(f.Extra) > 0 { 287 b := readBuf(f.Extra) 288 for len(b) >= 4 { // need at least tag and size 289 tag := b.uint16() 290 size := b.uint16() 291 if int(size) > len(b) { 292 return ErrFormat 293 } 294 if tag == zip64ExtraId { 295 // update directory values from the zip64 extra block 296 eb := readBuf(b[:size]) 297 if len(eb) >= 8 { 298 f.UncompressedSize64 = eb.uint64() 299 } 300 if len(eb) >= 8 { 301 f.CompressedSize64 = eb.uint64() 302 } 303 if len(eb) >= 8 { 304 f.headerOffset = int64(eb.uint64()) 305 } 306 } 307 b = b[size:] 308 } 309 // Should have consumed the whole header. 310 // But popular zip & JAR creation tools are broken and 311 // may pad extra zeros at the end, so accept those 312 // too. See golang.org/issue/8186. 313 for _, v := range b { 314 if v != 0 { 315 return ErrFormat 316 } 317 } 318 } 319 return nil 320 } 321 322 func readDataDescriptor(r io.Reader, f *File) error { 323 var buf [dataDescriptorLen]byte 324 325 // The spec says: "Although not originally assigned a 326 // signature, the value 0x08074b50 has commonly been adopted 327 // as a signature value for the data descriptor record. 328 // Implementers should be aware that ZIP files may be 329 // encountered with or without this signature marking data 330 // descriptors and should account for either case when reading 331 // ZIP files to ensure compatibility." 332 // 333 // dataDescriptorLen includes the size of the signature but 334 // first read just those 4 bytes to see if it exists. 335 if _, err := io.ReadFull(r, buf[:4]); err != nil { 336 return err 337 } 338 off := 0 339 maybeSig := readBuf(buf[:4]) 340 if maybeSig.uint32() != dataDescriptorSignature { 341 // No data descriptor signature. Keep these four 342 // bytes. 343 off += 4 344 } 345 if _, err := io.ReadFull(r, buf[off:12]); err != nil { 346 return err 347 } 348 b := readBuf(buf[:12]) 349 if b.uint32() != f.CRC32 { 350 return ErrChecksum 351 } 352 353 // The two sizes that follow here can be either 32 bits or 64 bits 354 // but the spec is not very clear on this and different 355 // interpretations has been made causing incompatibilities. We 356 // already have the sizes from the central directory so we can 357 // just ignore these. 358 359 return nil 360 } 361 362 func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { 363 // look for directoryEndSignature in the last 1k, then in the last 65k 364 var buf []byte 365 var directoryEndOffset int64 366 for i, bLen := range []int64{1024, 65 * 1024} { 367 if bLen > size { 368 bLen = size 369 } 370 buf = make([]byte, int(bLen)) 371 if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { 372 return nil, err 373 } 374 if p := findSignatureInBlock(buf); p >= 0 { 375 buf = buf[p:] 376 directoryEndOffset = size - bLen + int64(p) 377 break 378 } 379 if i == 1 || bLen == size { 380 return nil, ErrFormat 381 } 382 } 383 384 // read header into struct 385 b := readBuf(buf[4:]) // skip signature 386 d := &directoryEnd{ 387 diskNbr: uint32(b.uint16()), 388 dirDiskNbr: uint32(b.uint16()), 389 dirRecordsThisDisk: uint64(b.uint16()), 390 directoryRecords: uint64(b.uint16()), 391 directorySize: uint64(b.uint32()), 392 directoryOffset: uint64(b.uint32()), 393 commentLen: b.uint16(), 394 } 395 l := int(d.commentLen) 396 if l > len(b) { 397 return nil, errors.New("zip: invalid comment length") 398 } 399 d.comment = string(b[:l]) 400 401 // These values mean that the file can be a zip64 file 402 if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { 403 p, err := findDirectory64End(r, directoryEndOffset) 404 if err == nil && p >= 0 { 405 err = readDirectory64End(r, p, d) 406 } 407 if err != nil { 408 return nil, err 409 } 410 } 411 // Make sure directoryOffset points to somewhere in our file. 412 if o := int64(d.directoryOffset); o < 0 || o >= size { 413 return nil, ErrFormat 414 } 415 return d, nil 416 } 417 418 // findDirectory64End tries to read the zip64 locator just before the 419 // directory end and returns the offset of the zip64 directory end if 420 // found. 421 func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { 422 locOffset := directoryEndOffset - directory64LocLen 423 if locOffset < 0 { 424 return -1, nil // no need to look for a header outside the file 425 } 426 buf := make([]byte, directory64LocLen) 427 if _, err := r.ReadAt(buf, locOffset); err != nil { 428 return -1, err 429 } 430 b := readBuf(buf) 431 if sig := b.uint32(); sig != directory64LocSignature { 432 return -1, nil 433 } 434 if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory 435 return -1, nil // the file is not a valid zip64-file 436 } 437 p := b.uint64() // relative offset of the zip64 end of central directory record 438 if b.uint32() != 1 { // total number of disks 439 return -1, nil // the file is not a valid zip64-file 440 } 441 return int64(p), nil 442 } 443 444 // readDirectory64End reads the zip64 directory end and updates the 445 // directory end with the zip64 directory end values. 446 func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { 447 buf := make([]byte, directory64EndLen) 448 if _, err := r.ReadAt(buf, offset); err != nil { 449 return err 450 } 451 452 b := readBuf(buf) 453 if sig := b.uint32(); sig != directory64EndSignature { 454 return ErrFormat 455 } 456 457 b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) 458 d.diskNbr = b.uint32() // number of this disk 459 d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory 460 d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk 461 d.directoryRecords = b.uint64() // total number of entries in the central directory 462 d.directorySize = b.uint64() // size of the central directory 463 d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number 464 465 return nil 466 } 467 468 func findSignatureInBlock(b []byte) int { 469 for i := len(b) - directoryEndLen; i >= 0; i-- { 470 // defined from directoryEndSignature in struct.go 471 if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { 472 // n is length of comment 473 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 474 if n+directoryEndLen+i <= len(b) { 475 return i 476 } 477 } 478 } 479 return -1 480 } 481 482 type readBuf []byte 483 484 func (b *readBuf) uint16() uint16 { 485 v := binary.LittleEndian.Uint16(*b) 486 *b = (*b)[2:] 487 return v 488 } 489 490 func (b *readBuf) uint32() uint32 { 491 v := binary.LittleEndian.Uint32(*b) 492 *b = (*b)[4:] 493 return v 494 } 495 496 func (b *readBuf) uint64() uint64 { 497 v := binary.LittleEndian.Uint64(*b) 498 *b = (*b)[8:] 499 return v 500 }