github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/compress/gzip/gunzip.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package gzip implements reading and writing of gzip format compressed files,
     6  // as specified in RFC 1952.
     7  package gzip
     8  
     9  import (
    10  	"bufio"
    11  	"compress/flate"
    12  	"errors"
    13  	"hash/crc32"
    14  	"io"
    15  	"time"
    16  )
    17  
    18  const (
    19  	gzipID1     = 0x1f
    20  	gzipID2     = 0x8b
    21  	gzipDeflate = 8
    22  	flagText    = 1 << 0
    23  	flagHdrCrc  = 1 << 1
    24  	flagExtra   = 1 << 2
    25  	flagName    = 1 << 3
    26  	flagComment = 1 << 4
    27  )
    28  
    29  var (
    30  	// ErrChecksum is returned when reading GZIP data that has an invalid checksum.
    31  	ErrChecksum = errors.New("gzip: invalid checksum")
    32  	// ErrHeader is returned when reading GZIP data that has an invalid header.
    33  	ErrHeader = errors.New("gzip: invalid header")
    34  )
    35  
    36  // The gzip file stores a header giving metadata about the compressed file.
    37  // That header is exposed as the fields of the Writer and Reader structs.
    38  //
    39  // Strings must be UTF-8 encoded and may only contain Unicode code points
    40  // U+0001 through U+00FF, due to limitations of the GZIP file format.
    41  type Header struct {
    42  	Comment string    // comment
    43  	Extra   []byte    // "extra data"
    44  	ModTime time.Time // modification time
    45  	Name    string    // file name
    46  	OS      byte      // operating system type
    47  }
    48  
    49  // A Reader is an io.Reader that can be read to retrieve
    50  // uncompressed data from a gzip-format compressed file.
    51  //
    52  // In general, a gzip file can be a concatenation of gzip files,
    53  // each with its own header. Reads from the Reader
    54  // return the concatenation of the uncompressed data of each.
    55  // Only the first header is recorded in the Reader fields.
    56  //
    57  // Gzip files store a length and checksum of the uncompressed data.
    58  // The Reader will return a ErrChecksum when Read
    59  // reaches the end of the uncompressed data if it does not
    60  // have the expected length or checksum. Clients should treat data
    61  // returned by Read as tentative until they receive the io.EOF
    62  // marking the end of the data.
    63  type Reader struct {
    64  	Header       // valid after NewReader or Reader.Reset
    65  	r            flate.Reader
    66  	decompressor io.ReadCloser
    67  	digest       uint32 // CRC-32, IEEE polynomial (section 8)
    68  	size         uint32 // Uncompressed size (section 2.3.1)
    69  	buf          [512]byte
    70  	err          error
    71  	multistream  bool
    72  }
    73  
    74  // NewReader creates a new Reader reading the given reader.
    75  // If r does not also implement io.ByteReader,
    76  // the decompressor may read more data than necessary from r.
    77  //
    78  // It is the caller's responsibility to call Close on the Reader when done.
    79  //
    80  // The Reader.Header fields will be valid in the Reader returned.
    81  func NewReader(r io.Reader) (*Reader, error) {
    82  	z := new(Reader)
    83  	if err := z.Reset(r); err != nil {
    84  		return nil, err
    85  	}
    86  	return z, nil
    87  }
    88  
    89  // Reset discards the Reader z's state and makes it equivalent to the
    90  // result of its original state from NewReader, but reading from r instead.
    91  // This permits reusing a Reader rather than allocating a new one.
    92  func (z *Reader) Reset(r io.Reader) error {
    93  	*z = Reader{
    94  		decompressor: z.decompressor,
    95  		multistream:  true,
    96  	}
    97  	if rr, ok := r.(flate.Reader); ok {
    98  		z.r = rr
    99  	} else {
   100  		z.r = bufio.NewReader(r)
   101  	}
   102  	return z.readHeader(true)
   103  }
   104  
   105  // Multistream controls whether the reader supports multistream files.
   106  //
   107  // If enabled (the default), the Reader expects the input to be a sequence
   108  // of individually gzipped data streams, each with its own header and
   109  // trailer, ending at EOF. The effect is that the concatenation of a sequence
   110  // of gzipped files is treated as equivalent to the gzip of the concatenation
   111  // of the sequence. This is standard behavior for gzip readers.
   112  //
   113  // Calling Multistream(false) disables this behavior; disabling the behavior
   114  // can be useful when reading file formats that distinguish individual gzip
   115  // data streams or mix gzip data streams with other data streams.
   116  // In this mode, when the Reader reaches the end of the data stream,
   117  // Read returns io.EOF. If the underlying reader implements io.ByteReader,
   118  // it will be left positioned just after the gzip stream.
   119  // To start the next stream, call z.Reset(r) followed by z.Multistream(false).
   120  // If there is no next stream, z.Reset(r) will return io.EOF.
   121  func (z *Reader) Multistream(ok bool) {
   122  	z.multistream = ok
   123  }
   124  
   125  // GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
   126  func get4(p []byte) uint32 {
   127  	return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
   128  }
   129  
   130  func (z *Reader) readString() (string, error) {
   131  	var err error
   132  	needconv := false
   133  	for i := 0; ; i++ {
   134  		if i >= len(z.buf) {
   135  			return "", ErrHeader
   136  		}
   137  		z.buf[i], err = z.r.ReadByte()
   138  		if err != nil {
   139  			return "", err
   140  		}
   141  		if z.buf[i] > 0x7f {
   142  			needconv = true
   143  		}
   144  		if z.buf[i] == 0 {
   145  			// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
   146  			if needconv {
   147  				s := make([]rune, 0, i)
   148  				for _, v := range z.buf[:i] {
   149  					s = append(s, rune(v))
   150  				}
   151  				return string(s), nil
   152  			}
   153  			return string(z.buf[:i]), nil
   154  		}
   155  	}
   156  }
   157  
   158  func (z *Reader) read2() (uint32, error) {
   159  	_, err := io.ReadFull(z.r, z.buf[:2])
   160  	if err != nil {
   161  		if err == io.EOF {
   162  			err = io.ErrUnexpectedEOF
   163  		}
   164  		return 0, err
   165  	}
   166  	return uint32(z.buf[0]) | uint32(z.buf[1])<<8, nil
   167  }
   168  
   169  func (z *Reader) readHeader(save bool) error {
   170  	_, err := io.ReadFull(z.r, z.buf[:10])
   171  	if err != nil {
   172  		// RFC 1952, section 2.2, says the following:
   173  		//	A gzip file consists of a series of "members" (compressed data sets).
   174  		//
   175  		// Other than this, the specification does not clarify whether a
   176  		// "series" is defined as "one or more" or "zero or more". To err on the
   177  		// side of caution, Go interprets this to mean "zero or more".
   178  		// Thus, it is okay to return io.EOF here.
   179  		return err
   180  	}
   181  	if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
   182  		return ErrHeader
   183  	}
   184  	flg := z.buf[3]
   185  	if save {
   186  		z.ModTime = time.Unix(int64(get4(z.buf[4:8])), 0)
   187  		// z.buf[8] is xfl, ignored
   188  		z.OS = z.buf[9]
   189  	}
   190  	z.digest = crc32.Update(0, crc32.IEEETable, z.buf[:10])
   191  
   192  	if flg&flagExtra != 0 {
   193  		n, err := z.read2()
   194  		if err != nil {
   195  			return err
   196  		}
   197  		data := make([]byte, n)
   198  		if _, err = io.ReadFull(z.r, data); err != nil {
   199  			if err == io.EOF {
   200  				err = io.ErrUnexpectedEOF
   201  			}
   202  			return err
   203  		}
   204  		if save {
   205  			z.Extra = data
   206  		}
   207  	}
   208  
   209  	var s string
   210  	if flg&flagName != 0 {
   211  		if s, err = z.readString(); err != nil {
   212  			return err
   213  		}
   214  		if save {
   215  			z.Name = s
   216  		}
   217  	}
   218  
   219  	if flg&flagComment != 0 {
   220  		if s, err = z.readString(); err != nil {
   221  			return err
   222  		}
   223  		if save {
   224  			z.Comment = s
   225  		}
   226  	}
   227  
   228  	if flg&flagHdrCrc != 0 {
   229  		n, err := z.read2()
   230  		if err != nil {
   231  			return err
   232  		}
   233  		sum := z.digest & 0xFFFF
   234  		if n != sum {
   235  			return ErrHeader
   236  		}
   237  	}
   238  
   239  	z.digest = 0
   240  	if z.decompressor == nil {
   241  		z.decompressor = flate.NewReader(z.r)
   242  	} else {
   243  		z.decompressor.(flate.Resetter).Reset(z.r, nil)
   244  	}
   245  	return nil
   246  }
   247  
   248  func (z *Reader) Read(p []byte) (n int, err error) {
   249  	if z.err != nil {
   250  		return 0, z.err
   251  	}
   252  
   253  	n, z.err = z.decompressor.Read(p)
   254  	z.digest = crc32.Update(z.digest, crc32.IEEETable, p[:n])
   255  	z.size += uint32(n)
   256  	if z.err != io.EOF {
   257  		// In the normal case we return here.
   258  		return n, z.err
   259  	}
   260  
   261  	// Finished file; check checksum and size.
   262  	if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil {
   263  		if err == io.EOF {
   264  			err = io.ErrUnexpectedEOF
   265  		}
   266  		z.err = err
   267  		return n, err
   268  	}
   269  	digest, size := get4(z.buf[:4]), get4(z.buf[4:8])
   270  	if digest != z.digest || size != z.size {
   271  		z.err = ErrChecksum
   272  		return n, z.err
   273  	}
   274  	z.digest, z.size = 0, 0
   275  
   276  	// File is ok; check if there is another.
   277  	if !z.multistream {
   278  		return n, io.EOF
   279  	}
   280  	z.err = nil // Remove io.EOF
   281  
   282  	if z.err = z.readHeader(false); z.err != nil {
   283  		return n, z.err
   284  	}
   285  
   286  	// Read from next file, if necessary.
   287  	if n > 0 {
   288  		return n, nil
   289  	}
   290  	return z.Read(p)
   291  }
   292  
   293  // Close closes the Reader. It does not close the underlying io.Reader.
   294  func (z *Reader) Close() error { return z.decompressor.Close() }