github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/compress/gzip/gunzip.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package gzip implements reading and writing of gzip format compressed files, 6 // as specified in RFC 1952. 7 package gzip 8 9 import ( 10 "bufio" 11 "compress/flate" 12 "errors" 13 "hash/crc32" 14 "io" 15 "time" 16 ) 17 18 const ( 19 gzipID1 = 0x1f 20 gzipID2 = 0x8b 21 gzipDeflate = 8 22 flagText = 1 << 0 23 flagHdrCrc = 1 << 1 24 flagExtra = 1 << 2 25 flagName = 1 << 3 26 flagComment = 1 << 4 27 ) 28 29 var ( 30 // ErrChecksum is returned when reading GZIP data that has an invalid checksum. 31 ErrChecksum = errors.New("gzip: invalid checksum") 32 // ErrHeader is returned when reading GZIP data that has an invalid header. 33 ErrHeader = errors.New("gzip: invalid header") 34 ) 35 36 // The gzip file stores a header giving metadata about the compressed file. 37 // That header is exposed as the fields of the Writer and Reader structs. 38 // 39 // Strings must be UTF-8 encoded and may only contain Unicode code points 40 // U+0001 through U+00FF, due to limitations of the GZIP file format. 41 type Header struct { 42 Comment string // comment 43 Extra []byte // "extra data" 44 ModTime time.Time // modification time 45 Name string // file name 46 OS byte // operating system type 47 } 48 49 // A Reader is an io.Reader that can be read to retrieve 50 // uncompressed data from a gzip-format compressed file. 51 // 52 // In general, a gzip file can be a concatenation of gzip files, 53 // each with its own header. Reads from the Reader 54 // return the concatenation of the uncompressed data of each. 55 // Only the first header is recorded in the Reader fields. 56 // 57 // Gzip files store a length and checksum of the uncompressed data. 58 // The Reader will return a ErrChecksum when Read 59 // reaches the end of the uncompressed data if it does not 60 // have the expected length or checksum. Clients should treat data 61 // returned by Read as tentative until they receive the io.EOF 62 // marking the end of the data. 63 type Reader struct { 64 Header // valid after NewReader or Reader.Reset 65 r flate.Reader 66 decompressor io.ReadCloser 67 digest uint32 // CRC-32, IEEE polynomial (section 8) 68 size uint32 // Uncompressed size (section 2.3.1) 69 buf [512]byte 70 err error 71 multistream bool 72 } 73 74 // NewReader creates a new Reader reading the given reader. 75 // If r does not also implement io.ByteReader, 76 // the decompressor may read more data than necessary from r. 77 // 78 // It is the caller's responsibility to call Close on the Reader when done. 79 // 80 // The Reader.Header fields will be valid in the Reader returned. 81 func NewReader(r io.Reader) (*Reader, error) { 82 z := new(Reader) 83 if err := z.Reset(r); err != nil { 84 return nil, err 85 } 86 return z, nil 87 } 88 89 // Reset discards the Reader z's state and makes it equivalent to the 90 // result of its original state from NewReader, but reading from r instead. 91 // This permits reusing a Reader rather than allocating a new one. 92 func (z *Reader) Reset(r io.Reader) error { 93 *z = Reader{ 94 decompressor: z.decompressor, 95 multistream: true, 96 } 97 if rr, ok := r.(flate.Reader); ok { 98 z.r = rr 99 } else { 100 z.r = bufio.NewReader(r) 101 } 102 return z.readHeader(true) 103 } 104 105 // Multistream controls whether the reader supports multistream files. 106 // 107 // If enabled (the default), the Reader expects the input to be a sequence 108 // of individually gzipped data streams, each with its own header and 109 // trailer, ending at EOF. The effect is that the concatenation of a sequence 110 // of gzipped files is treated as equivalent to the gzip of the concatenation 111 // of the sequence. This is standard behavior for gzip readers. 112 // 113 // Calling Multistream(false) disables this behavior; disabling the behavior 114 // can be useful when reading file formats that distinguish individual gzip 115 // data streams or mix gzip data streams with other data streams. 116 // In this mode, when the Reader reaches the end of the data stream, 117 // Read returns io.EOF. If the underlying reader implements io.ByteReader, 118 // it will be left positioned just after the gzip stream. 119 // To start the next stream, call z.Reset(r) followed by z.Multistream(false). 120 // If there is no next stream, z.Reset(r) will return io.EOF. 121 func (z *Reader) Multistream(ok bool) { 122 z.multistream = ok 123 } 124 125 // GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950). 126 func get4(p []byte) uint32 { 127 return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24 128 } 129 130 func (z *Reader) readString() (string, error) { 131 var err error 132 needconv := false 133 for i := 0; ; i++ { 134 if i >= len(z.buf) { 135 return "", ErrHeader 136 } 137 z.buf[i], err = z.r.ReadByte() 138 if err != nil { 139 return "", err 140 } 141 if z.buf[i] > 0x7f { 142 needconv = true 143 } 144 if z.buf[i] == 0 { 145 // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). 146 if needconv { 147 s := make([]rune, 0, i) 148 for _, v := range z.buf[:i] { 149 s = append(s, rune(v)) 150 } 151 return string(s), nil 152 } 153 return string(z.buf[:i]), nil 154 } 155 } 156 } 157 158 func (z *Reader) read2() (uint32, error) { 159 _, err := io.ReadFull(z.r, z.buf[:2]) 160 if err != nil { 161 if err == io.EOF { 162 err = io.ErrUnexpectedEOF 163 } 164 return 0, err 165 } 166 return uint32(z.buf[0]) | uint32(z.buf[1])<<8, nil 167 } 168 169 func (z *Reader) readHeader(save bool) error { 170 _, err := io.ReadFull(z.r, z.buf[:10]) 171 if err != nil { 172 // RFC 1952, section 2.2, says the following: 173 // A gzip file consists of a series of "members" (compressed data sets). 174 // 175 // Other than this, the specification does not clarify whether a 176 // "series" is defined as "one or more" or "zero or more". To err on the 177 // side of caution, Go interprets this to mean "zero or more". 178 // Thus, it is okay to return io.EOF here. 179 return err 180 } 181 if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate { 182 return ErrHeader 183 } 184 flg := z.buf[3] 185 if save { 186 z.ModTime = time.Unix(int64(get4(z.buf[4:8])), 0) 187 // z.buf[8] is xfl, ignored 188 z.OS = z.buf[9] 189 } 190 z.digest = crc32.Update(0, crc32.IEEETable, z.buf[:10]) 191 192 if flg&flagExtra != 0 { 193 n, err := z.read2() 194 if err != nil { 195 return err 196 } 197 data := make([]byte, n) 198 if _, err = io.ReadFull(z.r, data); err != nil { 199 if err == io.EOF { 200 err = io.ErrUnexpectedEOF 201 } 202 return err 203 } 204 if save { 205 z.Extra = data 206 } 207 } 208 209 var s string 210 if flg&flagName != 0 { 211 if s, err = z.readString(); err != nil { 212 return err 213 } 214 if save { 215 z.Name = s 216 } 217 } 218 219 if flg&flagComment != 0 { 220 if s, err = z.readString(); err != nil { 221 return err 222 } 223 if save { 224 z.Comment = s 225 } 226 } 227 228 if flg&flagHdrCrc != 0 { 229 n, err := z.read2() 230 if err != nil { 231 return err 232 } 233 sum := z.digest & 0xFFFF 234 if n != sum { 235 return ErrHeader 236 } 237 } 238 239 z.digest = 0 240 if z.decompressor == nil { 241 z.decompressor = flate.NewReader(z.r) 242 } else { 243 z.decompressor.(flate.Resetter).Reset(z.r, nil) 244 } 245 return nil 246 } 247 248 func (z *Reader) Read(p []byte) (n int, err error) { 249 if z.err != nil { 250 return 0, z.err 251 } 252 253 n, z.err = z.decompressor.Read(p) 254 z.digest = crc32.Update(z.digest, crc32.IEEETable, p[:n]) 255 z.size += uint32(n) 256 if z.err != io.EOF { 257 // In the normal case we return here. 258 return n, z.err 259 } 260 261 // Finished file; check checksum and size. 262 if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil { 263 if err == io.EOF { 264 err = io.ErrUnexpectedEOF 265 } 266 z.err = err 267 return n, err 268 } 269 digest, size := get4(z.buf[:4]), get4(z.buf[4:8]) 270 if digest != z.digest || size != z.size { 271 z.err = ErrChecksum 272 return n, z.err 273 } 274 z.digest, z.size = 0, 0 275 276 // File is ok; check if there is another. 277 if !z.multistream { 278 return n, io.EOF 279 } 280 z.err = nil // Remove io.EOF 281 282 if z.err = z.readHeader(false); z.err != nil { 283 return n, z.err 284 } 285 286 // Read from next file, if necessary. 287 if n > 0 { 288 return n, nil 289 } 290 return z.Read(p) 291 } 292 293 // Close closes the Reader. It does not close the underlying io.Reader. 294 func (z *Reader) Close() error { return z.decompressor.Close() }