github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/compress/flate/inflate.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package flate implements the DEFLATE compressed data format, described in
     6  // RFC 1951.  The gzip and zlib packages implement access to DEFLATE-based file
     7  // formats.
     8  package flate
     9  
    10  import (
    11  	"bufio"
    12  	"io"
    13  	"strconv"
    14  )
    15  
    16  const (
    17  	maxCodeLen = 16    // max length of Huffman code
    18  	maxHist    = 32768 // max history required
    19  	// The next three numbers come from the RFC, section 3.2.7.
    20  	maxLit   = 286
    21  	maxDist  = 32
    22  	numCodes = 19 // number of codes in Huffman meta-code
    23  )
    24  
    25  // A CorruptInputError reports the presence of corrupt input at a given offset.
    26  type CorruptInputError int64
    27  
    28  func (e CorruptInputError) Error() string {
    29  	return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
    30  }
    31  
    32  // An InternalError reports an error in the flate code itself.
    33  type InternalError string
    34  
    35  func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
    36  
    37  // A ReadError reports an error encountered while reading input.
    38  type ReadError struct {
    39  	Offset int64 // byte offset where error occurred
    40  	Err    error // error returned by underlying Read
    41  }
    42  
    43  func (e *ReadError) Error() string {
    44  	return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
    45  }
    46  
    47  // A WriteError reports an error encountered while writing output.
    48  type WriteError struct {
    49  	Offset int64 // byte offset where error occurred
    50  	Err    error // error returned by underlying Write
    51  }
    52  
    53  func (e *WriteError) Error() string {
    54  	return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
    55  }
    56  
    57  // Note that much of the implemenation of huffmanDecoder is also copied
    58  // into gen.go (in package main) for the purpose of precomputing the
    59  // fixed huffman tables so they can be included statically.
    60  
    61  // The data structure for decoding Huffman tables is based on that of
    62  // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
    63  // For codes smaller than the table width, there are multiple entries
    64  // (each combination of trailing bits has the same value). For codes
    65  // larger than the table width, the table contains a link to an overflow
    66  // table. The width of each entry in the link table is the maximum code
    67  // size minus the chunk width.
    68  
    69  // Note that you can do a lookup in the table even without all bits
    70  // filled. Since the extra bits are zero, and the DEFLATE Huffman codes
    71  // have the property that shorter codes come before longer ones, the
    72  // bit length estimate in the result is a lower bound on the actual
    73  // number of bits.
    74  
    75  // chunk & 15 is number of bits
    76  // chunk >> 4 is value, including table link
    77  
    78  const (
    79  	huffmanChunkBits  = 9
    80  	huffmanNumChunks  = 1 << huffmanChunkBits
    81  	huffmanCountMask  = 15
    82  	huffmanValueShift = 4
    83  )
    84  
    85  type huffmanDecoder struct {
    86  	min      int                      // the minimum code length
    87  	chunks   [huffmanNumChunks]uint32 // chunks as described above
    88  	links    [][]uint32               // overflow links
    89  	linkMask uint32                   // mask the width of the link table
    90  }
    91  
    92  // Initialize Huffman decoding tables from array of code lengths.
    93  func (h *huffmanDecoder) init(bits []int) bool {
    94  	// Count number of codes of each length,
    95  	// compute min and max length.
    96  	var count [maxCodeLen]int
    97  	var min, max int
    98  	for _, n := range bits {
    99  		if n == 0 {
   100  			continue
   101  		}
   102  		if min == 0 || n < min {
   103  			min = n
   104  		}
   105  		if n > max {
   106  			max = n
   107  		}
   108  		count[n]++
   109  	}
   110  	if max == 0 {
   111  		return false
   112  	}
   113  
   114  	h.min = min
   115  	var linkBits uint
   116  	var numLinks int
   117  	if max > huffmanChunkBits {
   118  		linkBits = uint(max) - huffmanChunkBits
   119  		numLinks = 1 << linkBits
   120  		h.linkMask = uint32(numLinks - 1)
   121  	}
   122  	code := 0
   123  	var nextcode [maxCodeLen]int
   124  	for i := min; i <= max; i++ {
   125  		if i == huffmanChunkBits+1 {
   126  			// create link tables
   127  			link := code >> 1
   128  			if huffmanNumChunks < link {
   129  				return false
   130  			}
   131  			h.links = make([][]uint32, huffmanNumChunks-link)
   132  			for j := uint(link); j < huffmanNumChunks; j++ {
   133  				reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
   134  				reverse >>= uint(16 - huffmanChunkBits)
   135  				off := j - uint(link)
   136  				h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
   137  				h.links[off] = make([]uint32, 1<<linkBits)
   138  			}
   139  		}
   140  		n := count[i]
   141  		nextcode[i] = code
   142  		code += n
   143  		code <<= 1
   144  	}
   145  
   146  	for i, n := range bits {
   147  		if n == 0 {
   148  			continue
   149  		}
   150  		code := nextcode[n]
   151  		nextcode[n]++
   152  		chunk := uint32(i<<huffmanValueShift | n)
   153  		reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
   154  		reverse >>= uint(16 - n)
   155  		if n <= huffmanChunkBits {
   156  			for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
   157  				h.chunks[off] = chunk
   158  			}
   159  		} else {
   160  			value := h.chunks[reverse&(huffmanNumChunks-1)] >> huffmanValueShift
   161  			if value >= uint32(len(h.links)) {
   162  				return false
   163  			}
   164  			linktab := h.links[value]
   165  			reverse >>= huffmanChunkBits
   166  			for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
   167  				linktab[off] = chunk
   168  			}
   169  		}
   170  	}
   171  	return true
   172  }
   173  
   174  // The actual read interface needed by NewReader.
   175  // If the passed in io.Reader does not also have ReadByte,
   176  // the NewReader will introduce its own buffering.
   177  type Reader interface {
   178  	io.Reader
   179  	ReadByte() (c byte, err error)
   180  }
   181  
   182  // Decompress state.
   183  type decompressor struct {
   184  	// Input source.
   185  	r       Reader
   186  	roffset int64
   187  	woffset int64
   188  
   189  	// Input bits, in top of b.
   190  	b  uint32
   191  	nb uint
   192  
   193  	// Huffman decoders for literal/length, distance.
   194  	h1, h2 huffmanDecoder
   195  
   196  	// Length arrays used to define Huffman codes.
   197  	bits     *[maxLit + maxDist]int
   198  	codebits *[numCodes]int
   199  
   200  	// Output history, buffer.
   201  	hist  *[maxHist]byte
   202  	hp    int  // current output position in buffer
   203  	hw    int  // have written hist[0:hw] already
   204  	hfull bool // buffer has filled at least once
   205  
   206  	// Temporary buffer (avoids repeated allocation).
   207  	buf [4]byte
   208  
   209  	// Next step in the decompression,
   210  	// and decompression state.
   211  	step     func(*decompressor)
   212  	final    bool
   213  	err      error
   214  	toRead   []byte
   215  	hl, hd   *huffmanDecoder
   216  	copyLen  int
   217  	copyDist int
   218  }
   219  
   220  func (f *decompressor) nextBlock() {
   221  	if f.final {
   222  		if f.hw != f.hp {
   223  			f.flush((*decompressor).nextBlock)
   224  			return
   225  		}
   226  		f.err = io.EOF
   227  		return
   228  	}
   229  	for f.nb < 1+2 {
   230  		if f.err = f.moreBits(); f.err != nil {
   231  			return
   232  		}
   233  	}
   234  	f.final = f.b&1 == 1
   235  	f.b >>= 1
   236  	typ := f.b & 3
   237  	f.b >>= 2
   238  	f.nb -= 1 + 2
   239  	switch typ {
   240  	case 0:
   241  		f.dataBlock()
   242  	case 1:
   243  		// compressed, fixed Huffman tables
   244  		f.hl = &fixedHuffmanDecoder
   245  		f.hd = nil
   246  		f.huffmanBlock()
   247  	case 2:
   248  		// compressed, dynamic Huffman tables
   249  		if f.err = f.readHuffman(); f.err != nil {
   250  			break
   251  		}
   252  		f.hl = &f.h1
   253  		f.hd = &f.h2
   254  		f.huffmanBlock()
   255  	default:
   256  		// 3 is reserved.
   257  		f.err = CorruptInputError(f.roffset)
   258  	}
   259  }
   260  
   261  func (f *decompressor) Read(b []byte) (int, error) {
   262  	for {
   263  		if len(f.toRead) > 0 {
   264  			n := copy(b, f.toRead)
   265  			f.toRead = f.toRead[n:]
   266  			return n, nil
   267  		}
   268  		if f.err != nil {
   269  			return 0, f.err
   270  		}
   271  		f.step(f)
   272  	}
   273  }
   274  
   275  func (f *decompressor) Close() error {
   276  	if f.err == io.EOF {
   277  		return nil
   278  	}
   279  	return f.err
   280  }
   281  
   282  // RFC 1951 section 3.2.7.
   283  // Compression with dynamic Huffman codes
   284  
   285  var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
   286  
   287  func (f *decompressor) readHuffman() error {
   288  	// HLIT[5], HDIST[5], HCLEN[4].
   289  	for f.nb < 5+5+4 {
   290  		if err := f.moreBits(); err != nil {
   291  			return err
   292  		}
   293  	}
   294  	nlit := int(f.b&0x1F) + 257
   295  	if nlit > maxLit {
   296  		return CorruptInputError(f.roffset)
   297  	}
   298  	f.b >>= 5
   299  	ndist := int(f.b&0x1F) + 1
   300  	// maxDist is 32, so ndist is always valid.
   301  	f.b >>= 5
   302  	nclen := int(f.b&0xF) + 4
   303  	// numCodes is 19, so nclen is always valid.
   304  	f.b >>= 4
   305  	f.nb -= 5 + 5 + 4
   306  
   307  	// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
   308  	for i := 0; i < nclen; i++ {
   309  		for f.nb < 3 {
   310  			if err := f.moreBits(); err != nil {
   311  				return err
   312  			}
   313  		}
   314  		f.codebits[codeOrder[i]] = int(f.b & 0x7)
   315  		f.b >>= 3
   316  		f.nb -= 3
   317  	}
   318  	for i := nclen; i < len(codeOrder); i++ {
   319  		f.codebits[codeOrder[i]] = 0
   320  	}
   321  	if !f.h1.init(f.codebits[0:]) {
   322  		return CorruptInputError(f.roffset)
   323  	}
   324  
   325  	// HLIT + 257 code lengths, HDIST + 1 code lengths,
   326  	// using the code length Huffman code.
   327  	for i, n := 0, nlit+ndist; i < n; {
   328  		x, err := f.huffSym(&f.h1)
   329  		if err != nil {
   330  			return err
   331  		}
   332  		if x < 16 {
   333  			// Actual length.
   334  			f.bits[i] = x
   335  			i++
   336  			continue
   337  		}
   338  		// Repeat previous length or zero.
   339  		var rep int
   340  		var nb uint
   341  		var b int
   342  		switch x {
   343  		default:
   344  			return InternalError("unexpected length code")
   345  		case 16:
   346  			rep = 3
   347  			nb = 2
   348  			if i == 0 {
   349  				return CorruptInputError(f.roffset)
   350  			}
   351  			b = f.bits[i-1]
   352  		case 17:
   353  			rep = 3
   354  			nb = 3
   355  			b = 0
   356  		case 18:
   357  			rep = 11
   358  			nb = 7
   359  			b = 0
   360  		}
   361  		for f.nb < nb {
   362  			if err := f.moreBits(); err != nil {
   363  				return err
   364  			}
   365  		}
   366  		rep += int(f.b & uint32(1<<nb-1))
   367  		f.b >>= nb
   368  		f.nb -= nb
   369  		if i+rep > n {
   370  			return CorruptInputError(f.roffset)
   371  		}
   372  		for j := 0; j < rep; j++ {
   373  			f.bits[i] = b
   374  			i++
   375  		}
   376  	}
   377  
   378  	if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
   379  		return CorruptInputError(f.roffset)
   380  	}
   381  
   382  	return nil
   383  }
   384  
   385  // Decode a single Huffman block from f.
   386  // hl and hd are the Huffman states for the lit/length values
   387  // and the distance values, respectively.  If hd == nil, using the
   388  // fixed distance encoding associated with fixed Huffman blocks.
   389  func (f *decompressor) huffmanBlock() {
   390  	for {
   391  		v, err := f.huffSym(f.hl)
   392  		if err != nil {
   393  			f.err = err
   394  			return
   395  		}
   396  		var n uint // number of bits extra
   397  		var length int
   398  		switch {
   399  		case v < 256:
   400  			f.hist[f.hp] = byte(v)
   401  			f.hp++
   402  			if f.hp == len(f.hist) {
   403  				// After the flush, continue this loop.
   404  				f.flush((*decompressor).huffmanBlock)
   405  				return
   406  			}
   407  			continue
   408  		case v == 256:
   409  			// Done with huffman block; read next block.
   410  			f.step = (*decompressor).nextBlock
   411  			return
   412  		// otherwise, reference to older data
   413  		case v < 265:
   414  			length = v - (257 - 3)
   415  			n = 0
   416  		case v < 269:
   417  			length = v*2 - (265*2 - 11)
   418  			n = 1
   419  		case v < 273:
   420  			length = v*4 - (269*4 - 19)
   421  			n = 2
   422  		case v < 277:
   423  			length = v*8 - (273*8 - 35)
   424  			n = 3
   425  		case v < 281:
   426  			length = v*16 - (277*16 - 67)
   427  			n = 4
   428  		case v < 285:
   429  			length = v*32 - (281*32 - 131)
   430  			n = 5
   431  		default:
   432  			length = 258
   433  			n = 0
   434  		}
   435  		if n > 0 {
   436  			for f.nb < n {
   437  				if err = f.moreBits(); err != nil {
   438  					f.err = err
   439  					return
   440  				}
   441  			}
   442  			length += int(f.b & uint32(1<<n-1))
   443  			f.b >>= n
   444  			f.nb -= n
   445  		}
   446  
   447  		var dist int
   448  		if f.hd == nil {
   449  			for f.nb < 5 {
   450  				if err = f.moreBits(); err != nil {
   451  					f.err = err
   452  					return
   453  				}
   454  			}
   455  			dist = int(reverseByte[(f.b&0x1F)<<3])
   456  			f.b >>= 5
   457  			f.nb -= 5
   458  		} else {
   459  			if dist, err = f.huffSym(f.hd); err != nil {
   460  				f.err = err
   461  				return
   462  			}
   463  		}
   464  
   465  		switch {
   466  		case dist < 4:
   467  			dist++
   468  		case dist >= 30:
   469  			f.err = CorruptInputError(f.roffset)
   470  			return
   471  		default:
   472  			nb := uint(dist-2) >> 1
   473  			// have 1 bit in bottom of dist, need nb more.
   474  			extra := (dist & 1) << nb
   475  			for f.nb < nb {
   476  				if err = f.moreBits(); err != nil {
   477  					f.err = err
   478  					return
   479  				}
   480  			}
   481  			extra |= int(f.b & uint32(1<<nb-1))
   482  			f.b >>= nb
   483  			f.nb -= nb
   484  			dist = 1<<(nb+1) + 1 + extra
   485  		}
   486  
   487  		// Copy history[-dist:-dist+length] into output.
   488  		if dist > len(f.hist) {
   489  			f.err = InternalError("bad history distance")
   490  			return
   491  		}
   492  
   493  		// No check on length; encoding can be prescient.
   494  		if !f.hfull && dist > f.hp {
   495  			f.err = CorruptInputError(f.roffset)
   496  			return
   497  		}
   498  
   499  		f.copyLen, f.copyDist = length, dist
   500  		if f.copyHist() {
   501  			return
   502  		}
   503  	}
   504  }
   505  
   506  // copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself.
   507  // It reports whether the f.hist buffer is full.
   508  func (f *decompressor) copyHist() bool {
   509  	p := f.hp - f.copyDist
   510  	if p < 0 {
   511  		p += len(f.hist)
   512  	}
   513  	for f.copyLen > 0 {
   514  		n := f.copyLen
   515  		if x := len(f.hist) - f.hp; n > x {
   516  			n = x
   517  		}
   518  		if x := len(f.hist) - p; n > x {
   519  			n = x
   520  		}
   521  		forwardCopy(f.hist[:], f.hp, p, n)
   522  		p += n
   523  		f.hp += n
   524  		f.copyLen -= n
   525  		if f.hp == len(f.hist) {
   526  			// After flush continue copying out of history.
   527  			f.flush((*decompressor).copyHuff)
   528  			return true
   529  		}
   530  		if p == len(f.hist) {
   531  			p = 0
   532  		}
   533  	}
   534  	return false
   535  }
   536  
   537  func (f *decompressor) copyHuff() {
   538  	if f.copyHist() {
   539  		return
   540  	}
   541  	f.huffmanBlock()
   542  }
   543  
   544  // Copy a single uncompressed data block from input to output.
   545  func (f *decompressor) dataBlock() {
   546  	// Uncompressed.
   547  	// Discard current half-byte.
   548  	f.nb = 0
   549  	f.b = 0
   550  
   551  	// Length then ones-complement of length.
   552  	nr, err := io.ReadFull(f.r, f.buf[0:4])
   553  	f.roffset += int64(nr)
   554  	if err != nil {
   555  		f.err = &ReadError{f.roffset, err}
   556  		return
   557  	}
   558  	n := int(f.buf[0]) | int(f.buf[1])<<8
   559  	nn := int(f.buf[2]) | int(f.buf[3])<<8
   560  	if uint16(nn) != uint16(^n) {
   561  		f.err = CorruptInputError(f.roffset)
   562  		return
   563  	}
   564  
   565  	if n == 0 {
   566  		// 0-length block means sync
   567  		f.flush((*decompressor).nextBlock)
   568  		return
   569  	}
   570  
   571  	f.copyLen = n
   572  	f.copyData()
   573  }
   574  
   575  // copyData copies f.copyLen bytes from the underlying reader into f.hist.
   576  // It pauses for reads when f.hist is full.
   577  func (f *decompressor) copyData() {
   578  	n := f.copyLen
   579  	for n > 0 {
   580  		m := len(f.hist) - f.hp
   581  		if m > n {
   582  			m = n
   583  		}
   584  		m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m])
   585  		f.roffset += int64(m)
   586  		if err != nil {
   587  			f.err = &ReadError{f.roffset, err}
   588  			return
   589  		}
   590  		n -= m
   591  		f.hp += m
   592  		if f.hp == len(f.hist) {
   593  			f.copyLen = n
   594  			f.flush((*decompressor).copyData)
   595  			return
   596  		}
   597  	}
   598  	f.step = (*decompressor).nextBlock
   599  }
   600  
   601  func (f *decompressor) setDict(dict []byte) {
   602  	if len(dict) > len(f.hist) {
   603  		// Will only remember the tail.
   604  		dict = dict[len(dict)-len(f.hist):]
   605  	}
   606  
   607  	f.hp = copy(f.hist[:], dict)
   608  	if f.hp == len(f.hist) {
   609  		f.hp = 0
   610  		f.hfull = true
   611  	}
   612  	f.hw = f.hp
   613  }
   614  
   615  func (f *decompressor) moreBits() error {
   616  	c, err := f.r.ReadByte()
   617  	if err != nil {
   618  		if err == io.EOF {
   619  			err = io.ErrUnexpectedEOF
   620  		}
   621  		return err
   622  	}
   623  	f.roffset++
   624  	f.b |= uint32(c) << f.nb
   625  	f.nb += 8
   626  	return nil
   627  }
   628  
   629  // Read the next Huffman-encoded symbol from f according to h.
   630  func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
   631  	n := uint(h.min)
   632  	for {
   633  		for f.nb < n {
   634  			if err := f.moreBits(); err != nil {
   635  				return 0, err
   636  			}
   637  		}
   638  		chunk := h.chunks[f.b&(huffmanNumChunks-1)]
   639  		n = uint(chunk & huffmanCountMask)
   640  		if n > huffmanChunkBits {
   641  			chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask]
   642  			n = uint(chunk & huffmanCountMask)
   643  		}
   644  		if n <= f.nb {
   645  			f.b >>= n
   646  			f.nb -= n
   647  			return int(chunk >> huffmanValueShift), nil
   648  		}
   649  	}
   650  }
   651  
   652  // Flush any buffered output to the underlying writer.
   653  func (f *decompressor) flush(step func(*decompressor)) {
   654  	f.toRead = f.hist[f.hw:f.hp]
   655  	f.woffset += int64(f.hp - f.hw)
   656  	f.hw = f.hp
   657  	if f.hp == len(f.hist) {
   658  		f.hp = 0
   659  		f.hw = 0
   660  		f.hfull = true
   661  	}
   662  	f.step = step
   663  }
   664  
   665  func makeReader(r io.Reader) Reader {
   666  	if rr, ok := r.(Reader); ok {
   667  		return rr
   668  	}
   669  	return bufio.NewReader(r)
   670  }
   671  
   672  // NewReader returns a new ReadCloser that can be used
   673  // to read the uncompressed version of r.  It is the caller's
   674  // responsibility to call Close on the ReadCloser when
   675  // finished reading.
   676  func NewReader(r io.Reader) io.ReadCloser {
   677  	var f decompressor
   678  	f.bits = new([maxLit + maxDist]int)
   679  	f.codebits = new([numCodes]int)
   680  	f.r = makeReader(r)
   681  	f.hist = new([maxHist]byte)
   682  	f.step = (*decompressor).nextBlock
   683  	return &f
   684  }
   685  
   686  // NewReaderDict is like NewReader but initializes the reader
   687  // with a preset dictionary.  The returned Reader behaves as if
   688  // the uncompressed data stream started with the given dictionary,
   689  // which has already been read.  NewReaderDict is typically used
   690  // to read data compressed by NewWriterDict.
   691  func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
   692  	var f decompressor
   693  	f.r = makeReader(r)
   694  	f.hist = new([maxHist]byte)
   695  	f.bits = new([maxLit + maxDist]int)
   696  	f.codebits = new([numCodes]int)
   697  	f.step = (*decompressor).nextBlock
   698  	f.setDict(dict)
   699  	return &f
   700  }