github.com/sbinet/go@v0.0.0-20160827155028-54d7de7dd62b/src/compress/gzip/gunzip.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package gzip implements reading and writing of gzip format compressed files, 6 // as specified in RFC 1952. 7 package gzip 8 9 import ( 10 "bufio" 11 "compress/flate" 12 "encoding/binary" 13 "errors" 14 "hash/crc32" 15 "io" 16 "time" 17 ) 18 19 const ( 20 gzipID1 = 0x1f 21 gzipID2 = 0x8b 22 gzipDeflate = 8 23 flagText = 1 << 0 24 flagHdrCrc = 1 << 1 25 flagExtra = 1 << 2 26 flagName = 1 << 3 27 flagComment = 1 << 4 28 ) 29 30 var ( 31 // ErrChecksum is returned when reading GZIP data that has an invalid checksum. 32 ErrChecksum = errors.New("gzip: invalid checksum") 33 // ErrHeader is returned when reading GZIP data that has an invalid header. 34 ErrHeader = errors.New("gzip: invalid header") 35 ) 36 37 var le = binary.LittleEndian 38 39 // noEOF converts io.EOF to io.ErrUnexpectedEOF. 40 func noEOF(err error) error { 41 if err == io.EOF { 42 return io.ErrUnexpectedEOF 43 } 44 return err 45 } 46 47 // The gzip file stores a header giving metadata about the compressed file. 48 // That header is exposed as the fields of the Writer and Reader structs. 49 // 50 // Strings must be UTF-8 encoded and may only contain Unicode code points 51 // U+0001 through U+00FF, due to limitations of the GZIP file format. 52 type Header struct { 53 Comment string // comment 54 Extra []byte // "extra data" 55 ModTime time.Time // modification time 56 Name string // file name 57 OS byte // operating system type 58 } 59 60 // A Reader is an io.Reader that can be read to retrieve 61 // uncompressed data from a gzip-format compressed file. 62 // 63 // In general, a gzip file can be a concatenation of gzip files, 64 // each with its own header. Reads from the Reader 65 // return the concatenation of the uncompressed data of each. 66 // Only the first header is recorded in the Reader fields. 67 // 68 // Gzip files store a length and checksum of the uncompressed data. 69 // The Reader will return a ErrChecksum when Read 70 // reaches the end of the uncompressed data if it does not 71 // have the expected length or checksum. Clients should treat data 72 // returned by Read as tentative until they receive the io.EOF 73 // marking the end of the data. 74 type Reader struct { 75 Header // valid after NewReader or Reader.Reset 76 r flate.Reader 77 decompressor io.ReadCloser 78 digest uint32 // CRC-32, IEEE polynomial (section 8) 79 size uint32 // Uncompressed size (section 2.3.1) 80 buf [512]byte 81 err error 82 multistream bool 83 } 84 85 // NewReader creates a new Reader reading the given reader. 86 // If r does not also implement io.ByteReader, 87 // the decompressor may read more data than necessary from r. 88 // 89 // It is the caller's responsibility to call Close on the Reader when done. 90 // 91 // The Reader.Header fields will be valid in the Reader returned. 92 func NewReader(r io.Reader) (*Reader, error) { 93 z := new(Reader) 94 if err := z.Reset(r); err != nil { 95 return nil, err 96 } 97 return z, nil 98 } 99 100 // Reset discards the Reader z's state and makes it equivalent to the 101 // result of its original state from NewReader, but reading from r instead. 102 // This permits reusing a Reader rather than allocating a new one. 103 func (z *Reader) Reset(r io.Reader) error { 104 *z = Reader{ 105 decompressor: z.decompressor, 106 multistream: true, 107 } 108 if rr, ok := r.(flate.Reader); ok { 109 z.r = rr 110 } else { 111 z.r = bufio.NewReader(r) 112 } 113 z.Header, z.err = z.readHeader() 114 return z.err 115 } 116 117 // Multistream controls whether the reader supports multistream files. 118 // 119 // If enabled (the default), the Reader expects the input to be a sequence 120 // of individually gzipped data streams, each with its own header and 121 // trailer, ending at EOF. The effect is that the concatenation of a sequence 122 // of gzipped files is treated as equivalent to the gzip of the concatenation 123 // of the sequence. This is standard behavior for gzip readers. 124 // 125 // Calling Multistream(false) disables this behavior; disabling the behavior 126 // can be useful when reading file formats that distinguish individual gzip 127 // data streams or mix gzip data streams with other data streams. 128 // In this mode, when the Reader reaches the end of the data stream, 129 // Read returns io.EOF. If the underlying reader implements io.ByteReader, 130 // it will be left positioned just after the gzip stream. 131 // To start the next stream, call z.Reset(r) followed by z.Multistream(false). 132 // If there is no next stream, z.Reset(r) will return io.EOF. 133 func (z *Reader) Multistream(ok bool) { 134 z.multistream = ok 135 } 136 137 // readString reads a NUL-terminated string from z.r. 138 // It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and 139 // will output a string encoded using UTF-8. 140 // This method always updates z.digest with the data read. 141 func (z *Reader) readString() (string, error) { 142 var err error 143 needConv := false 144 for i := 0; ; i++ { 145 if i >= len(z.buf) { 146 return "", ErrHeader 147 } 148 z.buf[i], err = z.r.ReadByte() 149 if err != nil { 150 return "", err 151 } 152 if z.buf[i] > 0x7f { 153 needConv = true 154 } 155 if z.buf[i] == 0 { 156 // Digest covers the NUL terminator. 157 z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:i+1]) 158 159 // Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1). 160 if needConv { 161 s := make([]rune, 0, i) 162 for _, v := range z.buf[:i] { 163 s = append(s, rune(v)) 164 } 165 return string(s), nil 166 } 167 return string(z.buf[:i]), nil 168 } 169 } 170 } 171 172 // readHeader reads the GZIP header according to section 2.3.1. 173 // This method does not set z.err. 174 func (z *Reader) readHeader() (hdr Header, err error) { 175 if _, err = io.ReadFull(z.r, z.buf[:10]); err != nil { 176 // RFC 1952, section 2.2, says the following: 177 // A gzip file consists of a series of "members" (compressed data sets). 178 // 179 // Other than this, the specification does not clarify whether a 180 // "series" is defined as "one or more" or "zero or more". To err on the 181 // side of caution, Go interprets this to mean "zero or more". 182 // Thus, it is okay to return io.EOF here. 183 return hdr, err 184 } 185 if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate { 186 return hdr, ErrHeader 187 } 188 flg := z.buf[3] 189 hdr.ModTime = time.Unix(int64(le.Uint32(z.buf[4:8])), 0) 190 // z.buf[8] is XFL and is currently ignored. 191 hdr.OS = z.buf[9] 192 z.digest = crc32.ChecksumIEEE(z.buf[:10]) 193 194 if flg&flagExtra != 0 { 195 if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil { 196 return hdr, noEOF(err) 197 } 198 z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:2]) 199 data := make([]byte, le.Uint16(z.buf[:2])) 200 if _, err = io.ReadFull(z.r, data); err != nil { 201 return hdr, noEOF(err) 202 } 203 z.digest = crc32.Update(z.digest, crc32.IEEETable, data) 204 hdr.Extra = data 205 } 206 207 var s string 208 if flg&flagName != 0 { 209 if s, err = z.readString(); err != nil { 210 return hdr, err 211 } 212 hdr.Name = s 213 } 214 215 if flg&flagComment != 0 { 216 if s, err = z.readString(); err != nil { 217 return hdr, err 218 } 219 hdr.Comment = s 220 } 221 222 if flg&flagHdrCrc != 0 { 223 if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil { 224 return hdr, noEOF(err) 225 } 226 digest := le.Uint16(z.buf[:2]) 227 if digest != uint16(z.digest) { 228 return hdr, ErrHeader 229 } 230 } 231 232 z.digest = 0 233 if z.decompressor == nil { 234 z.decompressor = flate.NewReader(z.r) 235 } else { 236 z.decompressor.(flate.Resetter).Reset(z.r, nil) 237 } 238 return hdr, nil 239 } 240 241 func (z *Reader) Read(p []byte) (n int, err error) { 242 if z.err != nil { 243 return 0, z.err 244 } 245 246 n, z.err = z.decompressor.Read(p) 247 z.digest = crc32.Update(z.digest, crc32.IEEETable, p[:n]) 248 z.size += uint32(n) 249 if z.err != io.EOF { 250 // In the normal case we return here. 251 return n, z.err 252 } 253 254 // Finished file; check checksum and size. 255 if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil { 256 z.err = noEOF(err) 257 return n, z.err 258 } 259 digest := le.Uint32(z.buf[:4]) 260 size := le.Uint32(z.buf[4:8]) 261 if digest != z.digest || size != z.size { 262 z.err = ErrChecksum 263 return n, z.err 264 } 265 z.digest, z.size = 0, 0 266 267 // File is ok; check if there is another. 268 if !z.multistream { 269 return n, io.EOF 270 } 271 z.err = nil // Remove io.EOF 272 273 if _, z.err = z.readHeader(); z.err != nil { 274 return n, z.err 275 } 276 277 // Read from next file, if necessary. 278 if n > 0 { 279 return n, nil 280 } 281 return z.Read(p) 282 } 283 284 // Close closes the Reader. It does not close the underlying io.Reader. 285 // In order for the GZIP checksum to be verified, the reader must be 286 // fully consumed until the io.EOF. 287 func (z *Reader) Close() error { return z.decompressor.Close() }