github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/compress/flate/deflate.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package flate
     6  
     7  import (
     8  	"fmt"
     9  	"io"
    10  	"math"
    11  )
    12  
    13  const (
    14  	NoCompression      = 0
    15  	BestSpeed          = 1
    16  	fastCompression    = 3
    17  	BestCompression    = 9
    18  	DefaultCompression = -1
    19  	logWindowSize      = 15
    20  	windowSize         = 1 << logWindowSize
    21  	windowMask         = windowSize - 1
    22  	logMaxOffsetSize   = 15  // Standard DEFLATE
    23  	minMatchLength     = 3   // The smallest match that the compressor looks for
    24  	maxMatchLength     = 258 // The longest match for the compressor
    25  	minOffsetSize      = 1   // The shortest offset that makes any sense
    26  
    27  	// The maximum number of tokens we put into a single flat block, just to
    28  	// stop things from getting too large.
    29  	maxFlateBlockTokens = 1 << 14
    30  	maxStoreBlockSize   = 65535
    31  	hashBits            = 17
    32  	hashSize            = 1 << hashBits
    33  	hashMask            = (1 << hashBits) - 1
    34  	hashShift           = (hashBits + minMatchLength - 1) / minMatchLength
    35  	maxHashOffset       = 1 << 24
    36  
    37  	skipNever = math.MaxInt32
    38  )
    39  
    40  type compressionLevel struct {
    41  	good, lazy, nice, chain, fastSkipHashing int
    42  }
    43  
    44  var levels = []compressionLevel{
    45  	{}, // 0
    46  	// For levels 1-3 we don't bother trying with lazy matches
    47  	{3, 0, 8, 4, 4},
    48  	{3, 0, 16, 8, 5},
    49  	{3, 0, 32, 32, 6},
    50  	// Levels 4-9 use increasingly more lazy matching
    51  	// and increasingly stringent conditions for "good enough".
    52  	{4, 4, 16, 16, skipNever},
    53  	{8, 16, 32, 32, skipNever},
    54  	{8, 16, 128, 128, skipNever},
    55  	{8, 32, 128, 256, skipNever},
    56  	{32, 128, 258, 1024, skipNever},
    57  	{32, 258, 258, 4096, skipNever},
    58  }
    59  
    60  type compressor struct {
    61  	compressionLevel
    62  
    63  	w *huffmanBitWriter
    64  
    65  	// compression algorithm
    66  	fill func(*compressor, []byte) int // copy data to window
    67  	step func(*compressor)             // process window
    68  	sync bool                          // requesting flush
    69  
    70  	// Input hash chains
    71  	// hashHead[hashValue] contains the largest inputIndex with the specified hash value
    72  	// If hashHead[hashValue] is within the current window, then
    73  	// hashPrev[hashHead[hashValue] & windowMask] contains the previous index
    74  	// with the same hash value.
    75  	chainHead  int
    76  	hashHead   []int
    77  	hashPrev   []int
    78  	hashOffset int
    79  
    80  	// input window: unprocessed data is window[index:windowEnd]
    81  	index         int
    82  	window        []byte
    83  	windowEnd     int
    84  	blockStart    int  // window index where current tokens start
    85  	byteAvailable bool // if true, still need to process window[index-1].
    86  
    87  	// queued output tokens
    88  	tokens []token
    89  
    90  	// deflate state
    91  	length         int
    92  	offset         int
    93  	hash           int
    94  	maxInsertIndex int
    95  	err            error
    96  }
    97  
    98  func (d *compressor) fillDeflate(b []byte) int {
    99  	if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
   100  		// shift the window by windowSize
   101  		copy(d.window, d.window[windowSize:2*windowSize])
   102  		d.index -= windowSize
   103  		d.windowEnd -= windowSize
   104  		if d.blockStart >= windowSize {
   105  			d.blockStart -= windowSize
   106  		} else {
   107  			d.blockStart = math.MaxInt32
   108  		}
   109  		d.hashOffset += windowSize
   110  		if d.hashOffset > maxHashOffset {
   111  			delta := d.hashOffset - 1
   112  			d.hashOffset -= delta
   113  			d.chainHead -= delta
   114  			for i, v := range d.hashPrev {
   115  				if v > delta {
   116  					d.hashPrev[i] -= delta
   117  				} else {
   118  					d.hashPrev[i] = 0
   119  				}
   120  			}
   121  			for i, v := range d.hashHead {
   122  				if v > delta {
   123  					d.hashHead[i] -= delta
   124  				} else {
   125  					d.hashHead[i] = 0
   126  				}
   127  			}
   128  		}
   129  	}
   130  	n := copy(d.window[d.windowEnd:], b)
   131  	d.windowEnd += n
   132  	return n
   133  }
   134  
   135  func (d *compressor) writeBlock(tokens []token, index int, eof bool) error {
   136  	if index > 0 || eof {
   137  		var window []byte
   138  		if d.blockStart <= index {
   139  			window = d.window[d.blockStart:index]
   140  		}
   141  		d.blockStart = index
   142  		d.w.writeBlock(tokens, eof, window)
   143  		return d.w.err
   144  	}
   145  	return nil
   146  }
   147  
   148  // Try to find a match starting at index whose length is greater than prevSize.
   149  // We only look at chainCount possibilities before giving up.
   150  func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
   151  	minMatchLook := maxMatchLength
   152  	if lookahead < minMatchLook {
   153  		minMatchLook = lookahead
   154  	}
   155  
   156  	win := d.window[0 : pos+minMatchLook]
   157  
   158  	// We quit when we get a match that's at least nice long
   159  	nice := len(win) - pos
   160  	if d.nice < nice {
   161  		nice = d.nice
   162  	}
   163  
   164  	// If we've got a match that's good enough, only look in 1/4 the chain.
   165  	tries := d.chain
   166  	length = prevLength
   167  	if length >= d.good {
   168  		tries >>= 2
   169  	}
   170  
   171  	w0 := win[pos]
   172  	w1 := win[pos+1]
   173  	wEnd := win[pos+length]
   174  	minIndex := pos - windowSize
   175  
   176  	for i := prevHead; tries > 0; tries-- {
   177  		if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] {
   178  			// The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches
   179  
   180  			n := 3
   181  			for pos+n < len(win) && win[i+n] == win[pos+n] {
   182  				n++
   183  			}
   184  			if n > length && (n > 3 || pos-i <= 4096) {
   185  				length = n
   186  				offset = pos - i
   187  				ok = true
   188  				if n >= nice {
   189  					// The match is good enough that we don't try to find a better one.
   190  					break
   191  				}
   192  				wEnd = win[pos+n]
   193  			}
   194  		}
   195  		if i == minIndex {
   196  			// hashPrev[i & windowMask] has already been overwritten, so stop now.
   197  			break
   198  		}
   199  		if i = d.hashPrev[i&windowMask] - d.hashOffset; i < minIndex || i < 0 {
   200  			break
   201  		}
   202  	}
   203  	return
   204  }
   205  
   206  func (d *compressor) writeStoredBlock(buf []byte) error {
   207  	if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
   208  		return d.w.err
   209  	}
   210  	d.w.writeBytes(buf)
   211  	return d.w.err
   212  }
   213  
   214  func (d *compressor) initDeflate() {
   215  	d.hashHead = make([]int, hashSize)
   216  	d.hashPrev = make([]int, windowSize)
   217  	d.window = make([]byte, 2*windowSize)
   218  	d.hashOffset = 1
   219  	d.tokens = make([]token, 0, maxFlateBlockTokens+1)
   220  	d.length = minMatchLength - 1
   221  	d.offset = 0
   222  	d.byteAvailable = false
   223  	d.index = 0
   224  	d.hash = 0
   225  	d.chainHead = -1
   226  }
   227  
   228  func (d *compressor) deflate() {
   229  	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
   230  		return
   231  	}
   232  
   233  	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
   234  	if d.index < d.maxInsertIndex {
   235  		d.hash = int(d.window[d.index])<<hashShift + int(d.window[d.index+1])
   236  	}
   237  
   238  Loop:
   239  	for {
   240  		if d.index > d.windowEnd {
   241  			panic("index > windowEnd")
   242  		}
   243  		lookahead := d.windowEnd - d.index
   244  		if lookahead < minMatchLength+maxMatchLength {
   245  			if !d.sync {
   246  				break Loop
   247  			}
   248  			if d.index > d.windowEnd {
   249  				panic("index > windowEnd")
   250  			}
   251  			if lookahead == 0 {
   252  				// Flush current output block if any.
   253  				if d.byteAvailable {
   254  					// There is still one pending token that needs to be flushed
   255  					d.tokens = append(d.tokens, literalToken(uint32(d.window[d.index-1])))
   256  					d.byteAvailable = false
   257  				}
   258  				if len(d.tokens) > 0 {
   259  					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
   260  						return
   261  					}
   262  					d.tokens = d.tokens[:0]
   263  				}
   264  				break Loop
   265  			}
   266  		}
   267  		if d.index < d.maxInsertIndex {
   268  			// Update the hash
   269  			d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask
   270  			d.chainHead = d.hashHead[d.hash]
   271  			d.hashPrev[d.index&windowMask] = d.chainHead
   272  			d.hashHead[d.hash] = d.index + d.hashOffset
   273  		}
   274  		prevLength := d.length
   275  		prevOffset := d.offset
   276  		d.length = minMatchLength - 1
   277  		d.offset = 0
   278  		minIndex := d.index - windowSize
   279  		if minIndex < 0 {
   280  			minIndex = 0
   281  		}
   282  
   283  		if d.chainHead-d.hashOffset >= minIndex &&
   284  			(d.fastSkipHashing != skipNever && lookahead > minMatchLength-1 ||
   285  				d.fastSkipHashing == skipNever && lookahead > prevLength && prevLength < d.lazy) {
   286  			if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
   287  				d.length = newLength
   288  				d.offset = newOffset
   289  			}
   290  		}
   291  		if d.fastSkipHashing != skipNever && d.length >= minMatchLength ||
   292  			d.fastSkipHashing == skipNever && prevLength >= minMatchLength && d.length <= prevLength {
   293  			// There was a match at the previous step, and the current match is
   294  			// not better. Output the previous match.
   295  			if d.fastSkipHashing != skipNever {
   296  				d.tokens = append(d.tokens, matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize)))
   297  			} else {
   298  				d.tokens = append(d.tokens, matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize)))
   299  			}
   300  			// Insert in the hash table all strings up to the end of the match.
   301  			// index and index-1 are already inserted. If there is not enough
   302  			// lookahead, the last two strings are not inserted into the hash
   303  			// table.
   304  			if d.length <= d.fastSkipHashing {
   305  				var newIndex int
   306  				if d.fastSkipHashing != skipNever {
   307  					newIndex = d.index + d.length
   308  				} else {
   309  					newIndex = d.index + prevLength - 1
   310  				}
   311  				for d.index++; d.index < newIndex; d.index++ {
   312  					if d.index < d.maxInsertIndex {
   313  						d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask
   314  						// Get previous value with the same hash.
   315  						// Our chain should point to the previous value.
   316  						d.hashPrev[d.index&windowMask] = d.hashHead[d.hash]
   317  						// Set the head of the hash chain to us.
   318  						d.hashHead[d.hash] = d.index + d.hashOffset
   319  					}
   320  				}
   321  				if d.fastSkipHashing == skipNever {
   322  					d.byteAvailable = false
   323  					d.length = minMatchLength - 1
   324  				}
   325  			} else {
   326  				// For matches this long, we don't bother inserting each individual
   327  				// item into the table.
   328  				d.index += d.length
   329  				if d.index < d.maxInsertIndex {
   330  					d.hash = (int(d.window[d.index])<<hashShift + int(d.window[d.index+1]))
   331  				}
   332  			}
   333  			if len(d.tokens) == maxFlateBlockTokens {
   334  				// The block includes the current character
   335  				if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
   336  					return
   337  				}
   338  				d.tokens = d.tokens[:0]
   339  			}
   340  		} else {
   341  			if d.fastSkipHashing != skipNever || d.byteAvailable {
   342  				i := d.index - 1
   343  				if d.fastSkipHashing != skipNever {
   344  					i = d.index
   345  				}
   346  				d.tokens = append(d.tokens, literalToken(uint32(d.window[i])))
   347  				if len(d.tokens) == maxFlateBlockTokens {
   348  					if d.err = d.writeBlock(d.tokens, i+1, false); d.err != nil {
   349  						return
   350  					}
   351  					d.tokens = d.tokens[:0]
   352  				}
   353  			}
   354  			d.index++
   355  			if d.fastSkipHashing == skipNever {
   356  				d.byteAvailable = true
   357  			}
   358  		}
   359  	}
   360  }
   361  
   362  func (d *compressor) fillStore(b []byte) int {
   363  	n := copy(d.window[d.windowEnd:], b)
   364  	d.windowEnd += n
   365  	return n
   366  }
   367  
   368  func (d *compressor) store() {
   369  	if d.windowEnd > 0 {
   370  		d.err = d.writeStoredBlock(d.window[:d.windowEnd])
   371  	}
   372  	d.windowEnd = 0
   373  }
   374  
   375  func (d *compressor) write(b []byte) (n int, err error) {
   376  	n = len(b)
   377  	b = b[d.fill(d, b):]
   378  	for len(b) > 0 {
   379  		d.step(d)
   380  		b = b[d.fill(d, b):]
   381  	}
   382  	return n, d.err
   383  }
   384  
   385  func (d *compressor) syncFlush() error {
   386  	d.sync = true
   387  	d.step(d)
   388  	if d.err == nil {
   389  		d.w.writeStoredHeader(0, false)
   390  		d.w.flush()
   391  		d.err = d.w.err
   392  	}
   393  	d.sync = false
   394  	return d.err
   395  }
   396  
   397  func (d *compressor) init(w io.Writer, level int) (err error) {
   398  	d.w = newHuffmanBitWriter(w)
   399  
   400  	switch {
   401  	case level == NoCompression:
   402  		d.window = make([]byte, maxStoreBlockSize)
   403  		d.fill = (*compressor).fillStore
   404  		d.step = (*compressor).store
   405  	case level == DefaultCompression:
   406  		level = 6
   407  		fallthrough
   408  	case 1 <= level && level <= 9:
   409  		d.compressionLevel = levels[level]
   410  		d.initDeflate()
   411  		d.fill = (*compressor).fillDeflate
   412  		d.step = (*compressor).deflate
   413  	default:
   414  		return fmt.Errorf("flate: invalid compression level %d: want value in range [-1, 9]", level)
   415  	}
   416  	return nil
   417  }
   418  
   419  var zeroes [32]int
   420  var bzeroes [256]byte
   421  
   422  func (d *compressor) reset(w io.Writer) {
   423  	d.w.reset(w)
   424  	d.sync = false
   425  	d.err = nil
   426  	switch d.compressionLevel.chain {
   427  	case 0:
   428  		// level was NoCompression.
   429  		for i := range d.window {
   430  			d.window[i] = 0
   431  		}
   432  		d.windowEnd = 0
   433  	default:
   434  		d.chainHead = -1
   435  		for s := d.hashHead; len(s) > 0; {
   436  			n := copy(s, zeroes[:])
   437  			s = s[n:]
   438  		}
   439  		for s := d.hashPrev; len(s) > 0; s = s[len(zeroes):] {
   440  			copy(s, zeroes[:])
   441  		}
   442  		d.hashOffset = 1
   443  
   444  		d.index, d.windowEnd = 0, 0
   445  		for s := d.window; len(s) > 0; {
   446  			n := copy(s, bzeroes[:])
   447  			s = s[n:]
   448  		}
   449  		d.blockStart, d.byteAvailable = 0, false
   450  
   451  		d.tokens = d.tokens[:maxFlateBlockTokens+1]
   452  		for i := 0; i <= maxFlateBlockTokens; i++ {
   453  			d.tokens[i] = 0
   454  		}
   455  		d.tokens = d.tokens[:0]
   456  		d.length = minMatchLength - 1
   457  		d.offset = 0
   458  		d.hash = 0
   459  		d.maxInsertIndex = 0
   460  	}
   461  }
   462  
   463  func (d *compressor) close() error {
   464  	d.sync = true
   465  	d.step(d)
   466  	if d.err != nil {
   467  		return d.err
   468  	}
   469  	if d.w.writeStoredHeader(0, true); d.w.err != nil {
   470  		return d.w.err
   471  	}
   472  	d.w.flush()
   473  	return d.w.err
   474  }
   475  
   476  // NewWriter returns a new Writer compressing data at the given level.
   477  // Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression);
   478  // higher levels typically run slower but compress more. Level 0
   479  // (NoCompression) does not attempt any compression; it only adds the
   480  // necessary DEFLATE framing. Level -1 (DefaultCompression) uses the default
   481  // compression level.
   482  //
   483  // If level is in the range [-1, 9] then the error returned will be nil.
   484  // Otherwise the error returned will be non-nil.
   485  func NewWriter(w io.Writer, level int) (*Writer, error) {
   486  	var dw Writer
   487  	if err := dw.d.init(w, level); err != nil {
   488  		return nil, err
   489  	}
   490  	return &dw, nil
   491  }
   492  
   493  // NewWriterDict is like NewWriter but initializes the new
   494  // Writer with a preset dictionary.  The returned Writer behaves
   495  // as if the dictionary had been written to it without producing
   496  // any compressed output.  The compressed data written to w
   497  // can only be decompressed by a Reader initialized with the
   498  // same dictionary.
   499  func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
   500  	dw := &dictWriter{w, false}
   501  	zw, err := NewWriter(dw, level)
   502  	if err != nil {
   503  		return nil, err
   504  	}
   505  	zw.Write(dict)
   506  	zw.Flush()
   507  	dw.enabled = true
   508  	zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method.
   509  	return zw, err
   510  }
   511  
   512  type dictWriter struct {
   513  	w       io.Writer
   514  	enabled bool
   515  }
   516  
   517  func (w *dictWriter) Write(b []byte) (n int, err error) {
   518  	if w.enabled {
   519  		return w.w.Write(b)
   520  	}
   521  	return len(b), nil
   522  }
   523  
   524  // A Writer takes data written to it and writes the compressed
   525  // form of that data to an underlying writer (see NewWriter).
   526  type Writer struct {
   527  	d    compressor
   528  	dict []byte
   529  }
   530  
   531  // Write writes data to w, which will eventually write the
   532  // compressed form of data to its underlying writer.
   533  func (w *Writer) Write(data []byte) (n int, err error) {
   534  	return w.d.write(data)
   535  }
   536  
   537  // Flush flushes any pending compressed data to the underlying writer.
   538  // It is useful mainly in compressed network protocols, to ensure that
   539  // a remote reader has enough data to reconstruct a packet.
   540  // Flush does not return until the data has been written.
   541  // If the underlying writer returns an error, Flush returns that error.
   542  //
   543  // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
   544  func (w *Writer) Flush() error {
   545  	// For more about flushing:
   546  	// http://www.bolet.org/~pornin/deflate-flush.html
   547  	return w.d.syncFlush()
   548  }
   549  
   550  // Close flushes and closes the writer.
   551  func (w *Writer) Close() error {
   552  	return w.d.close()
   553  }
   554  
   555  // Reset discards the writer's state and makes it equivalent to
   556  // the result of NewWriter or NewWriterDict called with dst
   557  // and w's level and dictionary.
   558  func (w *Writer) Reset(dst io.Writer) {
   559  	if dw, ok := w.d.w.w.(*dictWriter); ok {
   560  		// w was created with NewWriterDict
   561  		dw.w = dst
   562  		w.d.reset(dw)
   563  		dw.enabled = false
   564  		w.Write(w.dict)
   565  		w.Flush()
   566  		dw.enabled = true
   567  	} else {
   568  		// w was created with NewWriter
   569  		w.d.reset(dst)
   570  	}
   571  }