github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/archive/zip/reader.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package zip 6 7 import ( 8 "bufio" 9 "encoding/binary" 10 "errors" 11 "hash" 12 "hash/crc32" 13 "io" 14 "os" 15 ) 16 17 var ( 18 ErrFormat = errors.New("zip: not a valid zip file") 19 ErrAlgorithm = errors.New("zip: unsupported compression algorithm") 20 ErrChecksum = errors.New("zip: checksum error") 21 ) 22 23 type Reader struct { 24 r io.ReaderAt 25 File []*File 26 Comment string 27 } 28 29 type ReadCloser struct { 30 f *os.File 31 Reader 32 } 33 34 type File struct { 35 FileHeader 36 zipr io.ReaderAt 37 zipsize int64 38 headerOffset int64 39 } 40 41 func (f *File) hasDataDescriptor() bool { 42 return f.Flags&0x8 != 0 43 } 44 45 // OpenReader will open the Zip file specified by name and return a ReadCloser. 46 func OpenReader(name string) (*ReadCloser, error) { 47 f, err := os.Open(name) 48 if err != nil { 49 return nil, err 50 } 51 fi, err := f.Stat() 52 if err != nil { 53 f.Close() 54 return nil, err 55 } 56 r := new(ReadCloser) 57 if err := r.init(f, fi.Size()); err != nil { 58 f.Close() 59 return nil, err 60 } 61 r.f = f 62 return r, nil 63 } 64 65 // NewReader returns a new Reader reading from r, which is assumed to 66 // have the given size in bytes. 67 func NewReader(r io.ReaderAt, size int64) (*Reader, error) { 68 zr := new(Reader) 69 if err := zr.init(r, size); err != nil { 70 return nil, err 71 } 72 return zr, nil 73 } 74 75 func (z *Reader) init(r io.ReaderAt, size int64) error { 76 end, err := readDirectoryEnd(r, size) 77 if err != nil { 78 return err 79 } 80 z.r = r 81 z.File = make([]*File, 0, end.directoryRecords) 82 z.Comment = end.comment 83 rs := io.NewSectionReader(r, 0, size) 84 if _, err = rs.Seek(int64(end.directoryOffset), os.SEEK_SET); err != nil { 85 return err 86 } 87 buf := bufio.NewReader(rs) 88 89 // The count of files inside a zip is truncated to fit in a uint16. 90 // Gloss over this by reading headers until we encounter 91 // a bad one, and then only report a ErrFormat or UnexpectedEOF if 92 // the file count modulo 65536 is incorrect. 93 for { 94 f := &File{zipr: r, zipsize: size} 95 err = readDirectoryHeader(f, buf) 96 if err == ErrFormat || err == io.ErrUnexpectedEOF { 97 break 98 } 99 if err != nil { 100 return err 101 } 102 z.File = append(z.File, f) 103 } 104 if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here 105 // Return the readDirectoryHeader error if we read 106 // the wrong number of directory entries. 107 return err 108 } 109 return nil 110 } 111 112 // Close closes the Zip file, rendering it unusable for I/O. 113 func (rc *ReadCloser) Close() error { 114 return rc.f.Close() 115 } 116 117 // DataOffset returns the offset of the file's possibly-compressed 118 // data, relative to the beginning of the zip file. 119 // 120 // Most callers should instead use Open, which transparently 121 // decompresses data and verifies checksums. 122 func (f *File) DataOffset() (offset int64, err error) { 123 bodyOffset, err := f.findBodyOffset() 124 if err != nil { 125 return 126 } 127 return f.headerOffset + bodyOffset, nil 128 } 129 130 // Open returns a ReadCloser that provides access to the File's contents. 131 // Multiple files may be read concurrently. 132 func (f *File) Open() (rc io.ReadCloser, err error) { 133 bodyOffset, err := f.findBodyOffset() 134 if err != nil { 135 return 136 } 137 size := int64(f.CompressedSize64) 138 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) 139 dcomp := decompressor(f.Method) 140 if dcomp == nil { 141 err = ErrAlgorithm 142 return 143 } 144 rc = dcomp(r) 145 var desr io.Reader 146 if f.hasDataDescriptor() { 147 desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) 148 } 149 rc = &checksumReader{rc, crc32.NewIEEE(), f, desr, nil} 150 return 151 } 152 153 type checksumReader struct { 154 rc io.ReadCloser 155 hash hash.Hash32 156 f *File 157 desr io.Reader // if non-nil, where to read the data descriptor 158 err error // sticky error 159 } 160 161 func (r *checksumReader) Read(b []byte) (n int, err error) { 162 if r.err != nil { 163 return 0, r.err 164 } 165 n, err = r.rc.Read(b) 166 r.hash.Write(b[:n]) 167 if err == nil { 168 return 169 } 170 if err == io.EOF { 171 if r.desr != nil { 172 if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { 173 err = err1 174 } else if r.hash.Sum32() != r.f.CRC32 { 175 err = ErrChecksum 176 } 177 } else { 178 // If there's not a data descriptor, we still compare 179 // the CRC32 of what we've read against the file header 180 // or TOC's CRC32, if it seems like it was set. 181 if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { 182 err = ErrChecksum 183 } 184 } 185 } 186 r.err = err 187 return 188 } 189 190 func (r *checksumReader) Close() error { return r.rc.Close() } 191 192 // findBodyOffset does the minimum work to verify the file has a header 193 // and returns the file body offset. 194 func (f *File) findBodyOffset() (int64, error) { 195 var buf [fileHeaderLen]byte 196 if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { 197 return 0, err 198 } 199 b := readBuf(buf[:]) 200 if sig := b.uint32(); sig != fileHeaderSignature { 201 return 0, ErrFormat 202 } 203 b = b[22:] // skip over most of the header 204 filenameLen := int(b.uint16()) 205 extraLen := int(b.uint16()) 206 return int64(fileHeaderLen + filenameLen + extraLen), nil 207 } 208 209 // readDirectoryHeader attempts to read a directory header from r. 210 // It returns io.ErrUnexpectedEOF if it cannot read a complete header, 211 // and ErrFormat if it doesn't find a valid header signature. 212 func readDirectoryHeader(f *File, r io.Reader) error { 213 var buf [directoryHeaderLen]byte 214 if _, err := io.ReadFull(r, buf[:]); err != nil { 215 return err 216 } 217 b := readBuf(buf[:]) 218 if sig := b.uint32(); sig != directoryHeaderSignature { 219 return ErrFormat 220 } 221 f.CreatorVersion = b.uint16() 222 f.ReaderVersion = b.uint16() 223 f.Flags = b.uint16() 224 f.Method = b.uint16() 225 f.ModifiedTime = b.uint16() 226 f.ModifiedDate = b.uint16() 227 f.CRC32 = b.uint32() 228 f.CompressedSize = b.uint32() 229 f.UncompressedSize = b.uint32() 230 f.CompressedSize64 = uint64(f.CompressedSize) 231 f.UncompressedSize64 = uint64(f.UncompressedSize) 232 filenameLen := int(b.uint16()) 233 extraLen := int(b.uint16()) 234 commentLen := int(b.uint16()) 235 b = b[4:] // skipped start disk number and internal attributes (2x uint16) 236 f.ExternalAttrs = b.uint32() 237 f.headerOffset = int64(b.uint32()) 238 d := make([]byte, filenameLen+extraLen+commentLen) 239 if _, err := io.ReadFull(r, d); err != nil { 240 return err 241 } 242 f.Name = string(d[:filenameLen]) 243 f.Extra = d[filenameLen : filenameLen+extraLen] 244 f.Comment = string(d[filenameLen+extraLen:]) 245 246 if len(f.Extra) > 0 { 247 b := readBuf(f.Extra) 248 for len(b) >= 4 { // need at least tag and size 249 tag := b.uint16() 250 size := b.uint16() 251 if int(size) > len(b) { 252 return ErrFormat 253 } 254 if tag == zip64ExtraId { 255 // update directory values from the zip64 extra block 256 eb := readBuf(b) 257 if len(eb) >= 8 { 258 f.UncompressedSize64 = eb.uint64() 259 } 260 if len(eb) >= 8 { 261 f.CompressedSize64 = eb.uint64() 262 } 263 if len(eb) >= 8 { 264 f.headerOffset = int64(eb.uint64()) 265 } 266 } 267 b = b[size:] 268 } 269 // Should have consumed the whole header. 270 if len(b) != 0 { 271 return ErrFormat 272 } 273 } 274 return nil 275 } 276 277 func readDataDescriptor(r io.Reader, f *File) error { 278 var buf [dataDescriptorLen]byte 279 280 // The spec says: "Although not originally assigned a 281 // signature, the value 0x08074b50 has commonly been adopted 282 // as a signature value for the data descriptor record. 283 // Implementers should be aware that ZIP files may be 284 // encountered with or without this signature marking data 285 // descriptors and should account for either case when reading 286 // ZIP files to ensure compatibility." 287 // 288 // dataDescriptorLen includes the size of the signature but 289 // first read just those 4 bytes to see if it exists. 290 if _, err := io.ReadFull(r, buf[:4]); err != nil { 291 return err 292 } 293 off := 0 294 maybeSig := readBuf(buf[:4]) 295 if maybeSig.uint32() != dataDescriptorSignature { 296 // No data descriptor signature. Keep these four 297 // bytes. 298 off += 4 299 } 300 if _, err := io.ReadFull(r, buf[off:12]); err != nil { 301 return err 302 } 303 b := readBuf(buf[:12]) 304 if b.uint32() != f.CRC32 { 305 return ErrChecksum 306 } 307 308 // The two sizes that follow here can be either 32 bits or 64 bits 309 // but the spec is not very clear on this and different 310 // interpretations has been made causing incompatibilities. We 311 // already have the sizes from the central directory so we can 312 // just ignore these. 313 314 return nil 315 } 316 317 func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { 318 // look for directoryEndSignature in the last 1k, then in the last 65k 319 var buf []byte 320 var directoryEndOffset int64 321 for i, bLen := range []int64{1024, 65 * 1024} { 322 if bLen > size { 323 bLen = size 324 } 325 buf = make([]byte, int(bLen)) 326 if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { 327 return nil, err 328 } 329 if p := findSignatureInBlock(buf); p >= 0 { 330 buf = buf[p:] 331 directoryEndOffset = size - bLen + int64(p) 332 break 333 } 334 if i == 1 || bLen == size { 335 return nil, ErrFormat 336 } 337 } 338 339 // read header into struct 340 b := readBuf(buf[4:]) // skip signature 341 d := &directoryEnd{ 342 diskNbr: uint32(b.uint16()), 343 dirDiskNbr: uint32(b.uint16()), 344 dirRecordsThisDisk: uint64(b.uint16()), 345 directoryRecords: uint64(b.uint16()), 346 directorySize: uint64(b.uint32()), 347 directoryOffset: uint64(b.uint32()), 348 commentLen: b.uint16(), 349 } 350 l := int(d.commentLen) 351 if l > len(b) { 352 return nil, errors.New("zip: invalid comment length") 353 } 354 d.comment = string(b[:l]) 355 356 p, err := findDirectory64End(r, directoryEndOffset) 357 if err == nil && p >= 0 { 358 err = readDirectory64End(r, p, d) 359 } 360 if err != nil { 361 return nil, err 362 } 363 364 // Make sure directoryOffset points to somewhere in our file. 365 if o := int64(d.directoryOffset); o < 0 || o >= size { 366 return nil, ErrFormat 367 } 368 return d, nil 369 } 370 371 // findDirectory64End tries to read the zip64 locator just before the 372 // directory end and returns the offset of the zip64 directory end if 373 // found. 374 func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { 375 locOffset := directoryEndOffset - directory64LocLen 376 if locOffset < 0 { 377 return -1, nil // no need to look for a header outside the file 378 } 379 buf := make([]byte, directory64LocLen) 380 if _, err := r.ReadAt(buf, locOffset); err != nil { 381 return -1, err 382 } 383 b := readBuf(buf) 384 if sig := b.uint32(); sig != directory64LocSignature { 385 return -1, nil 386 } 387 b = b[4:] // skip number of the disk with the start of the zip64 end of central directory 388 p := b.uint64() // relative offset of the zip64 end of central directory record 389 return int64(p), nil 390 } 391 392 // readDirectory64End reads the zip64 directory end and updates the 393 // directory end with the zip64 directory end values. 394 func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { 395 buf := make([]byte, directory64EndLen) 396 if _, err := r.ReadAt(buf, offset); err != nil { 397 return err 398 } 399 400 b := readBuf(buf) 401 if sig := b.uint32(); sig != directory64EndSignature { 402 return ErrFormat 403 } 404 405 b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) 406 d.diskNbr = b.uint32() // number of this disk 407 d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory 408 d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk 409 d.directoryRecords = b.uint64() // total number of entries in the central directory 410 d.directorySize = b.uint64() // size of the central directory 411 d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number 412 413 return nil 414 } 415 416 func findSignatureInBlock(b []byte) int { 417 for i := len(b) - directoryEndLen; i >= 0; i-- { 418 // defined from directoryEndSignature in struct.go 419 if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { 420 // n is length of comment 421 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 422 if n+directoryEndLen+i <= len(b) { 423 return i 424 } 425 } 426 } 427 return -1 428 } 429 430 type readBuf []byte 431 432 func (b *readBuf) uint16() uint16 { 433 v := binary.LittleEndian.Uint16(*b) 434 *b = (*b)[2:] 435 return v 436 } 437 438 func (b *readBuf) uint32() uint32 { 439 v := binary.LittleEndian.Uint32(*b) 440 *b = (*b)[4:] 441 return v 442 } 443 444 func (b *readBuf) uint64() uint64 { 445 v := binary.LittleEndian.Uint64(*b) 446 *b = (*b)[8:] 447 return v 448 }