github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/archive/zip/reader.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package zip 6 7 import ( 8 "bufio" 9 "compress/flate" 10 "encoding/binary" 11 "errors" 12 "hash" 13 "hash/crc32" 14 "io" 15 "io/ioutil" 16 "os" 17 ) 18 19 var ( 20 ErrFormat = errors.New("zip: not a valid zip file") 21 ErrAlgorithm = errors.New("zip: unsupported compression algorithm") 22 ErrChecksum = errors.New("zip: checksum error") 23 ) 24 25 type Reader struct { 26 r io.ReaderAt 27 File []*File 28 Comment string 29 } 30 31 type ReadCloser struct { 32 f *os.File 33 Reader 34 } 35 36 type File struct { 37 FileHeader 38 zipr io.ReaderAt 39 zipsize int64 40 headerOffset int64 41 } 42 43 func (f *File) hasDataDescriptor() bool { 44 return f.Flags&0x8 != 0 45 } 46 47 // OpenReader will open the Zip file specified by name and return a ReadCloser. 48 func OpenReader(name string) (*ReadCloser, error) { 49 f, err := os.Open(name) 50 if err != nil { 51 return nil, err 52 } 53 fi, err := f.Stat() 54 if err != nil { 55 f.Close() 56 return nil, err 57 } 58 r := new(ReadCloser) 59 if err := r.init(f, fi.Size()); err != nil { 60 f.Close() 61 return nil, err 62 } 63 r.f = f 64 return r, nil 65 } 66 67 // NewReader returns a new Reader reading from r, which is assumed to 68 // have the given size in bytes. 69 func NewReader(r io.ReaderAt, size int64) (*Reader, error) { 70 zr := new(Reader) 71 if err := zr.init(r, size); err != nil { 72 return nil, err 73 } 74 return zr, nil 75 } 76 77 func (z *Reader) init(r io.ReaderAt, size int64) error { 78 end, err := readDirectoryEnd(r, size) 79 if err != nil { 80 return err 81 } 82 z.r = r 83 z.File = make([]*File, 0, end.directoryRecords) 84 z.Comment = end.comment 85 rs := io.NewSectionReader(r, 0, size) 86 if _, err = rs.Seek(int64(end.directoryOffset), os.SEEK_SET); err != nil { 87 return err 88 } 89 buf := bufio.NewReader(rs) 90 91 // The count of files inside a zip is truncated to fit in a uint16. 92 // Gloss over this by reading headers until we encounter 93 // a bad one, and then only report a ErrFormat or UnexpectedEOF if 94 // the file count modulo 65536 is incorrect. 95 for { 96 f := &File{zipr: r, zipsize: size} 97 err = readDirectoryHeader(f, buf) 98 if err == ErrFormat || err == io.ErrUnexpectedEOF { 99 break 100 } 101 if err != nil { 102 return err 103 } 104 z.File = append(z.File, f) 105 } 106 if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here 107 // Return the readDirectoryHeader error if we read 108 // the wrong number of directory entries. 109 return err 110 } 111 return nil 112 } 113 114 // Close closes the Zip file, rendering it unusable for I/O. 115 func (rc *ReadCloser) Close() error { 116 return rc.f.Close() 117 } 118 119 // Open returns a ReadCloser that provides access to the File's contents. 120 // Multiple files may be read concurrently. 121 func (f *File) Open() (rc io.ReadCloser, err error) { 122 bodyOffset, err := f.findBodyOffset() 123 if err != nil { 124 return 125 } 126 size := int64(f.CompressedSize64) 127 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) 128 switch f.Method { 129 case Store: // (no compression) 130 rc = ioutil.NopCloser(r) 131 case Deflate: 132 rc = flate.NewReader(r) 133 default: 134 err = ErrAlgorithm 135 return 136 } 137 var desr io.Reader 138 if f.hasDataDescriptor() { 139 desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) 140 } 141 rc = &checksumReader{rc, crc32.NewIEEE(), f, desr, nil} 142 return 143 } 144 145 type checksumReader struct { 146 rc io.ReadCloser 147 hash hash.Hash32 148 f *File 149 desr io.Reader // if non-nil, where to read the data descriptor 150 err error // sticky error 151 } 152 153 func (r *checksumReader) Read(b []byte) (n int, err error) { 154 if r.err != nil { 155 return 0, r.err 156 } 157 n, err = r.rc.Read(b) 158 r.hash.Write(b[:n]) 159 if err == nil { 160 return 161 } 162 if err == io.EOF { 163 if r.desr != nil { 164 if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { 165 err = err1 166 } else if r.hash.Sum32() != r.f.CRC32 { 167 err = ErrChecksum 168 } 169 } else { 170 // If there's not a data descriptor, we still compare 171 // the CRC32 of what we've read against the file header 172 // or TOC's CRC32, if it seems like it was set. 173 if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { 174 err = ErrChecksum 175 } 176 } 177 } 178 r.err = err 179 return 180 } 181 182 func (r *checksumReader) Close() error { return r.rc.Close() } 183 184 // findBodyOffset does the minimum work to verify the file has a header 185 // and returns the file body offset. 186 func (f *File) findBodyOffset() (int64, error) { 187 r := io.NewSectionReader(f.zipr, f.headerOffset, f.zipsize-f.headerOffset) 188 var buf [fileHeaderLen]byte 189 if _, err := io.ReadFull(r, buf[:]); err != nil { 190 return 0, err 191 } 192 b := readBuf(buf[:]) 193 if sig := b.uint32(); sig != fileHeaderSignature { 194 return 0, ErrFormat 195 } 196 b = b[22:] // skip over most of the header 197 filenameLen := int(b.uint16()) 198 extraLen := int(b.uint16()) 199 return int64(fileHeaderLen + filenameLen + extraLen), nil 200 } 201 202 // readDirectoryHeader attempts to read a directory header from r. 203 // It returns io.ErrUnexpectedEOF if it cannot read a complete header, 204 // and ErrFormat if it doesn't find a valid header signature. 205 func readDirectoryHeader(f *File, r io.Reader) error { 206 var buf [directoryHeaderLen]byte 207 if _, err := io.ReadFull(r, buf[:]); err != nil { 208 return err 209 } 210 b := readBuf(buf[:]) 211 if sig := b.uint32(); sig != directoryHeaderSignature { 212 return ErrFormat 213 } 214 f.CreatorVersion = b.uint16() 215 f.ReaderVersion = b.uint16() 216 f.Flags = b.uint16() 217 f.Method = b.uint16() 218 f.ModifiedTime = b.uint16() 219 f.ModifiedDate = b.uint16() 220 f.CRC32 = b.uint32() 221 f.CompressedSize = b.uint32() 222 f.UncompressedSize = b.uint32() 223 f.CompressedSize64 = uint64(f.CompressedSize) 224 f.UncompressedSize64 = uint64(f.UncompressedSize) 225 filenameLen := int(b.uint16()) 226 extraLen := int(b.uint16()) 227 commentLen := int(b.uint16()) 228 b = b[4:] // skipped start disk number and internal attributes (2x uint16) 229 f.ExternalAttrs = b.uint32() 230 f.headerOffset = int64(b.uint32()) 231 d := make([]byte, filenameLen+extraLen+commentLen) 232 if _, err := io.ReadFull(r, d); err != nil { 233 return err 234 } 235 f.Name = string(d[:filenameLen]) 236 f.Extra = d[filenameLen : filenameLen+extraLen] 237 f.Comment = string(d[filenameLen+extraLen:]) 238 239 if len(f.Extra) > 0 { 240 b := readBuf(f.Extra) 241 for len(b) >= 4 { // need at least tag and size 242 tag := b.uint16() 243 size := b.uint16() 244 if int(size) > len(b) { 245 return ErrFormat 246 } 247 if tag == zip64ExtraId { 248 // update directory values from the zip64 extra block 249 eb := readBuf(b) 250 if len(eb) >= 8 { 251 f.UncompressedSize64 = eb.uint64() 252 } 253 if len(eb) >= 8 { 254 f.CompressedSize64 = eb.uint64() 255 } 256 if len(eb) >= 8 { 257 f.headerOffset = int64(eb.uint64()) 258 } 259 } 260 b = b[size:] 261 } 262 // Should have consumed the whole header. 263 if len(b) != 0 { 264 return ErrFormat 265 } 266 } 267 return nil 268 } 269 270 func readDataDescriptor(r io.Reader, f *File) error { 271 var buf [dataDescriptorLen]byte 272 273 // The spec says: "Although not originally assigned a 274 // signature, the value 0x08074b50 has commonly been adopted 275 // as a signature value for the data descriptor record. 276 // Implementers should be aware that ZIP files may be 277 // encountered with or without this signature marking data 278 // descriptors and should account for either case when reading 279 // ZIP files to ensure compatibility." 280 // 281 // dataDescriptorLen includes the size of the signature but 282 // first read just those 4 bytes to see if it exists. 283 if _, err := io.ReadFull(r, buf[:4]); err != nil { 284 return err 285 } 286 off := 0 287 maybeSig := readBuf(buf[:4]) 288 if maybeSig.uint32() != dataDescriptorSignature { 289 // No data descriptor signature. Keep these four 290 // bytes. 291 off += 4 292 } 293 if _, err := io.ReadFull(r, buf[off:12]); err != nil { 294 return err 295 } 296 b := readBuf(buf[:12]) 297 if b.uint32() != f.CRC32 { 298 return ErrChecksum 299 } 300 301 // The two sizes that follow here can be either 32 bits or 64 bits 302 // but the spec is not very clear on this and different 303 // interpretations has been made causing incompatibilities. We 304 // already have the sizes from the central directory so we can 305 // just ignore these. 306 307 return nil 308 } 309 310 func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { 311 // look for directoryEndSignature in the last 1k, then in the last 65k 312 var buf []byte 313 var directoryEndOffset int64 314 for i, bLen := range []int64{1024, 65 * 1024} { 315 if bLen > size { 316 bLen = size 317 } 318 buf = make([]byte, int(bLen)) 319 if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { 320 return nil, err 321 } 322 if p := findSignatureInBlock(buf); p >= 0 { 323 buf = buf[p:] 324 directoryEndOffset = size - bLen + int64(p) 325 break 326 } 327 if i == 1 || bLen == size { 328 return nil, ErrFormat 329 } 330 } 331 332 // read header into struct 333 b := readBuf(buf[4:]) // skip signature 334 d := &directoryEnd{ 335 diskNbr: uint32(b.uint16()), 336 dirDiskNbr: uint32(b.uint16()), 337 dirRecordsThisDisk: uint64(b.uint16()), 338 directoryRecords: uint64(b.uint16()), 339 directorySize: uint64(b.uint32()), 340 directoryOffset: uint64(b.uint32()), 341 commentLen: b.uint16(), 342 } 343 l := int(d.commentLen) 344 if l > len(b) { 345 return nil, errors.New("zip: invalid comment length") 346 } 347 d.comment = string(b[:l]) 348 349 p, err := findDirectory64End(r, directoryEndOffset) 350 if err == nil && p >= 0 { 351 err = readDirectory64End(r, p, d) 352 } 353 if err != nil { 354 return nil, err 355 } 356 357 // Make sure directoryOffset points to somewhere in our file. 358 if o := int64(d.directoryOffset); o < 0 || o >= size { 359 return nil, ErrFormat 360 } 361 return d, nil 362 } 363 364 // findDirectory64End tries to read the zip64 locator just before the 365 // directory end and returns the offset of the zip64 directory end if 366 // found. 367 func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { 368 locOffset := directoryEndOffset - directory64LocLen 369 if locOffset < 0 { 370 return -1, nil // no need to look for a header outside the file 371 } 372 buf := make([]byte, directory64LocLen) 373 if _, err := r.ReadAt(buf, locOffset); err != nil { 374 return -1, err 375 } 376 b := readBuf(buf) 377 if sig := b.uint32(); sig != directory64LocSignature { 378 return -1, nil 379 } 380 b = b[4:] // skip number of the disk with the start of the zip64 end of central directory 381 p := b.uint64() // relative offset of the zip64 end of central directory record 382 return int64(p), nil 383 } 384 385 // readDirectory64End reads the zip64 directory end and updates the 386 // directory end with the zip64 directory end values. 387 func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { 388 buf := make([]byte, directory64EndLen) 389 if _, err := r.ReadAt(buf, offset); err != nil { 390 return err 391 } 392 393 b := readBuf(buf) 394 if sig := b.uint32(); sig != directory64EndSignature { 395 return ErrFormat 396 } 397 398 b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) 399 d.diskNbr = b.uint32() // number of this disk 400 d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory 401 d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk 402 d.directoryRecords = b.uint64() // total number of entries in the central directory 403 d.directorySize = b.uint64() // size of the central directory 404 d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number 405 406 return nil 407 } 408 409 func findSignatureInBlock(b []byte) int { 410 for i := len(b) - directoryEndLen; i >= 0; i-- { 411 // defined from directoryEndSignature in struct.go 412 if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { 413 // n is length of comment 414 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 415 if n+directoryEndLen+i <= len(b) { 416 return i 417 } 418 } 419 } 420 return -1 421 } 422 423 type readBuf []byte 424 425 func (b *readBuf) uint16() uint16 { 426 v := binary.LittleEndian.Uint16(*b) 427 *b = (*b)[2:] 428 return v 429 } 430 431 func (b *readBuf) uint32() uint32 { 432 v := binary.LittleEndian.Uint32(*b) 433 *b = (*b)[4:] 434 return v 435 } 436 437 func (b *readBuf) uint64() uint64 { 438 v := binary.LittleEndian.Uint64(*b) 439 *b = (*b)[8:] 440 return v 441 }