github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/archive/zip/reader.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package zip 6 7 import ( 8 "bufio" 9 "encoding/binary" 10 "errors" 11 "hash" 12 "hash/crc32" 13 "io" 14 "os" 15 ) 16 17 var ( 18 ErrFormat = errors.New("zip: not a valid zip file") 19 ErrAlgorithm = errors.New("zip: unsupported compression algorithm") 20 ErrChecksum = errors.New("zip: checksum error") 21 ) 22 23 type Reader struct { 24 r io.ReaderAt 25 File []*File 26 Comment string 27 } 28 29 type ReadCloser struct { 30 f *os.File 31 Reader 32 } 33 34 type File struct { 35 FileHeader 36 zipr io.ReaderAt 37 zipsize int64 38 headerOffset int64 39 } 40 41 func (f *File) hasDataDescriptor() bool { 42 return f.Flags&0x8 != 0 43 } 44 45 // OpenReader will open the Zip file specified by name and return a ReadCloser. 46 func OpenReader(name string) (*ReadCloser, error) { 47 f, err := os.Open(name) 48 if err != nil { 49 return nil, err 50 } 51 fi, err := f.Stat() 52 if err != nil { 53 f.Close() 54 return nil, err 55 } 56 r := new(ReadCloser) 57 if err := r.init(f, fi.Size()); err != nil { 58 f.Close() 59 return nil, err 60 } 61 r.f = f 62 return r, nil 63 } 64 65 // NewReader returns a new Reader reading from r, which is assumed to 66 // have the given size in bytes. 67 func NewReader(r io.ReaderAt, size int64) (*Reader, error) { 68 zr := new(Reader) 69 if err := zr.init(r, size); err != nil { 70 return nil, err 71 } 72 return zr, nil 73 } 74 75 func (z *Reader) init(r io.ReaderAt, size int64) error { 76 end, err := readDirectoryEnd(r, size) 77 if err != nil { 78 return err 79 } 80 z.r = r 81 z.File = make([]*File, 0, end.directoryRecords) 82 z.Comment = end.comment 83 rs := io.NewSectionReader(r, 0, size) 84 if _, err = rs.Seek(int64(end.directoryOffset), os.SEEK_SET); err != nil { 85 return err 86 } 87 buf := bufio.NewReader(rs) 88 89 // The count of files inside a zip is truncated to fit in a uint16. 90 // Gloss over this by reading headers until we encounter 91 // a bad one, and then only report a ErrFormat or UnexpectedEOF if 92 // the file count modulo 65536 is incorrect. 93 for { 94 f := &File{zipr: r, zipsize: size} 95 err = readDirectoryHeader(f, buf) 96 if err == ErrFormat || err == io.ErrUnexpectedEOF { 97 break 98 } 99 if err != nil { 100 return err 101 } 102 z.File = append(z.File, f) 103 } 104 if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here 105 // Return the readDirectoryHeader error if we read 106 // the wrong number of directory entries. 107 return err 108 } 109 return nil 110 } 111 112 // Close closes the Zip file, rendering it unusable for I/O. 113 func (rc *ReadCloser) Close() error { 114 return rc.f.Close() 115 } 116 117 // DataOffset returns the offset of the file's possibly-compressed 118 // data, relative to the beginning of the zip file. 119 // 120 // Most callers should instead use Open, which transparently 121 // decompresses data and verifies checksums. 122 func (f *File) DataOffset() (offset int64, err error) { 123 bodyOffset, err := f.findBodyOffset() 124 if err != nil { 125 return 126 } 127 return f.headerOffset + bodyOffset, nil 128 } 129 130 // Open returns a ReadCloser that provides access to the File's contents. 131 // Multiple files may be read concurrently. 132 func (f *File) Open() (rc io.ReadCloser, err error) { 133 bodyOffset, err := f.findBodyOffset() 134 if err != nil { 135 return 136 } 137 size := int64(f.CompressedSize64) 138 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) 139 dcomp := decompressor(f.Method) 140 if dcomp == nil { 141 err = ErrAlgorithm 142 return 143 } 144 rc = dcomp(r) 145 var desr io.Reader 146 if f.hasDataDescriptor() { 147 desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) 148 } 149 rc = &checksumReader{rc, crc32.NewIEEE(), f, desr, nil} 150 return 151 } 152 153 type checksumReader struct { 154 rc io.ReadCloser 155 hash hash.Hash32 156 f *File 157 desr io.Reader // if non-nil, where to read the data descriptor 158 err error // sticky error 159 } 160 161 func (r *checksumReader) Read(b []byte) (n int, err error) { 162 if r.err != nil { 163 return 0, r.err 164 } 165 n, err = r.rc.Read(b) 166 r.hash.Write(b[:n]) 167 if err == nil { 168 return 169 } 170 if err == io.EOF { 171 if r.desr != nil { 172 if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { 173 err = err1 174 } else if r.hash.Sum32() != r.f.CRC32 { 175 err = ErrChecksum 176 } 177 } else { 178 // If there's not a data descriptor, we still compare 179 // the CRC32 of what we've read against the file header 180 // or TOC's CRC32, if it seems like it was set. 181 if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { 182 err = ErrChecksum 183 } 184 } 185 } 186 r.err = err 187 return 188 } 189 190 func (r *checksumReader) Close() error { return r.rc.Close() } 191 192 // findBodyOffset does the minimum work to verify the file has a header 193 // and returns the file body offset. 194 func (f *File) findBodyOffset() (int64, error) { 195 var buf [fileHeaderLen]byte 196 if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { 197 return 0, err 198 } 199 b := readBuf(buf[:]) 200 if sig := b.uint32(); sig != fileHeaderSignature { 201 return 0, ErrFormat 202 } 203 b = b[22:] // skip over most of the header 204 filenameLen := int(b.uint16()) 205 extraLen := int(b.uint16()) 206 return int64(fileHeaderLen + filenameLen + extraLen), nil 207 } 208 209 // readDirectoryHeader attempts to read a directory header from r. 210 // It returns io.ErrUnexpectedEOF if it cannot read a complete header, 211 // and ErrFormat if it doesn't find a valid header signature. 212 func readDirectoryHeader(f *File, r io.Reader) error { 213 var buf [directoryHeaderLen]byte 214 if _, err := io.ReadFull(r, buf[:]); err != nil { 215 return err 216 } 217 b := readBuf(buf[:]) 218 if sig := b.uint32(); sig != directoryHeaderSignature { 219 return ErrFormat 220 } 221 f.CreatorVersion = b.uint16() 222 f.ReaderVersion = b.uint16() 223 f.Flags = b.uint16() 224 f.Method = b.uint16() 225 f.ModifiedTime = b.uint16() 226 f.ModifiedDate = b.uint16() 227 f.CRC32 = b.uint32() 228 f.CompressedSize = b.uint32() 229 f.UncompressedSize = b.uint32() 230 f.CompressedSize64 = uint64(f.CompressedSize) 231 f.UncompressedSize64 = uint64(f.UncompressedSize) 232 filenameLen := int(b.uint16()) 233 extraLen := int(b.uint16()) 234 commentLen := int(b.uint16()) 235 b = b[4:] // skipped start disk number and internal attributes (2x uint16) 236 f.ExternalAttrs = b.uint32() 237 f.headerOffset = int64(b.uint32()) 238 d := make([]byte, filenameLen+extraLen+commentLen) 239 if _, err := io.ReadFull(r, d); err != nil { 240 return err 241 } 242 f.Name = string(d[:filenameLen]) 243 f.Extra = d[filenameLen : filenameLen+extraLen] 244 f.Comment = string(d[filenameLen+extraLen:]) 245 246 if len(f.Extra) > 0 { 247 b := readBuf(f.Extra) 248 for len(b) >= 4 { // need at least tag and size 249 tag := b.uint16() 250 size := b.uint16() 251 if int(size) > len(b) { 252 return ErrFormat 253 } 254 if tag == zip64ExtraId { 255 // update directory values from the zip64 extra block 256 eb := readBuf(b[:size]) 257 if len(eb) >= 8 { 258 f.UncompressedSize64 = eb.uint64() 259 } 260 if len(eb) >= 8 { 261 f.CompressedSize64 = eb.uint64() 262 } 263 if len(eb) >= 8 { 264 f.headerOffset = int64(eb.uint64()) 265 } 266 } 267 b = b[size:] 268 } 269 // Should have consumed the whole header. 270 // But popular zip & JAR creation tools are broken and 271 // may pad extra zeros at the end, so accept those 272 // too. See golang.org/issue/8186. 273 for _, v := range b { 274 if v != 0 { 275 return ErrFormat 276 } 277 } 278 } 279 return nil 280 } 281 282 func readDataDescriptor(r io.Reader, f *File) error { 283 var buf [dataDescriptorLen]byte 284 285 // The spec says: "Although not originally assigned a 286 // signature, the value 0x08074b50 has commonly been adopted 287 // as a signature value for the data descriptor record. 288 // Implementers should be aware that ZIP files may be 289 // encountered with or without this signature marking data 290 // descriptors and should account for either case when reading 291 // ZIP files to ensure compatibility." 292 // 293 // dataDescriptorLen includes the size of the signature but 294 // first read just those 4 bytes to see if it exists. 295 if _, err := io.ReadFull(r, buf[:4]); err != nil { 296 return err 297 } 298 off := 0 299 maybeSig := readBuf(buf[:4]) 300 if maybeSig.uint32() != dataDescriptorSignature { 301 // No data descriptor signature. Keep these four 302 // bytes. 303 off += 4 304 } 305 if _, err := io.ReadFull(r, buf[off:12]); err != nil { 306 return err 307 } 308 b := readBuf(buf[:12]) 309 if b.uint32() != f.CRC32 { 310 return ErrChecksum 311 } 312 313 // The two sizes that follow here can be either 32 bits or 64 bits 314 // but the spec is not very clear on this and different 315 // interpretations has been made causing incompatibilities. We 316 // already have the sizes from the central directory so we can 317 // just ignore these. 318 319 return nil 320 } 321 322 func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { 323 // look for directoryEndSignature in the last 1k, then in the last 65k 324 var buf []byte 325 var directoryEndOffset int64 326 for i, bLen := range []int64{1024, 65 * 1024} { 327 if bLen > size { 328 bLen = size 329 } 330 buf = make([]byte, int(bLen)) 331 if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { 332 return nil, err 333 } 334 if p := findSignatureInBlock(buf); p >= 0 { 335 buf = buf[p:] 336 directoryEndOffset = size - bLen + int64(p) 337 break 338 } 339 if i == 1 || bLen == size { 340 return nil, ErrFormat 341 } 342 } 343 344 // read header into struct 345 b := readBuf(buf[4:]) // skip signature 346 d := &directoryEnd{ 347 diskNbr: uint32(b.uint16()), 348 dirDiskNbr: uint32(b.uint16()), 349 dirRecordsThisDisk: uint64(b.uint16()), 350 directoryRecords: uint64(b.uint16()), 351 directorySize: uint64(b.uint32()), 352 directoryOffset: uint64(b.uint32()), 353 commentLen: b.uint16(), 354 } 355 l := int(d.commentLen) 356 if l > len(b) { 357 return nil, errors.New("zip: invalid comment length") 358 } 359 d.comment = string(b[:l]) 360 361 p, err := findDirectory64End(r, directoryEndOffset) 362 if err == nil && p >= 0 { 363 err = readDirectory64End(r, p, d) 364 } 365 if err != nil { 366 return nil, err 367 } 368 369 // Make sure directoryOffset points to somewhere in our file. 370 if o := int64(d.directoryOffset); o < 0 || o >= size { 371 return nil, ErrFormat 372 } 373 return d, nil 374 } 375 376 // findDirectory64End tries to read the zip64 locator just before the 377 // directory end and returns the offset of the zip64 directory end if 378 // found. 379 func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { 380 locOffset := directoryEndOffset - directory64LocLen 381 if locOffset < 0 { 382 return -1, nil // no need to look for a header outside the file 383 } 384 buf := make([]byte, directory64LocLen) 385 if _, err := r.ReadAt(buf, locOffset); err != nil { 386 return -1, err 387 } 388 b := readBuf(buf) 389 if sig := b.uint32(); sig != directory64LocSignature { 390 return -1, nil 391 } 392 b = b[4:] // skip number of the disk with the start of the zip64 end of central directory 393 p := b.uint64() // relative offset of the zip64 end of central directory record 394 return int64(p), nil 395 } 396 397 // readDirectory64End reads the zip64 directory end and updates the 398 // directory end with the zip64 directory end values. 399 func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { 400 buf := make([]byte, directory64EndLen) 401 if _, err := r.ReadAt(buf, offset); err != nil { 402 return err 403 } 404 405 b := readBuf(buf) 406 if sig := b.uint32(); sig != directory64EndSignature { 407 return ErrFormat 408 } 409 410 b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) 411 d.diskNbr = b.uint32() // number of this disk 412 d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory 413 d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk 414 d.directoryRecords = b.uint64() // total number of entries in the central directory 415 d.directorySize = b.uint64() // size of the central directory 416 d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number 417 418 return nil 419 } 420 421 func findSignatureInBlock(b []byte) int { 422 for i := len(b) - directoryEndLen; i >= 0; i-- { 423 // defined from directoryEndSignature in struct.go 424 if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { 425 // n is length of comment 426 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 427 if n+directoryEndLen+i <= len(b) { 428 return i 429 } 430 } 431 } 432 return -1 433 } 434 435 type readBuf []byte 436 437 func (b *readBuf) uint16() uint16 { 438 v := binary.LittleEndian.Uint16(*b) 439 *b = (*b)[2:] 440 return v 441 } 442 443 func (b *readBuf) uint32() uint32 { 444 v := binary.LittleEndian.Uint32(*b) 445 *b = (*b)[4:] 446 return v 447 } 448 449 func (b *readBuf) uint64() uint64 { 450 v := binary.LittleEndian.Uint64(*b) 451 *b = (*b)[8:] 452 return v 453 }