github.com/bir3/gocompiler@v0.9.2202/extra/compress/zstd/enc_better.go (about)

     1  // Copyright 2019+ Klaus Post. All rights reserved.
     2  // License information can be found in the LICENSE file.
     3  // Based on work by Yann Collet, released under BSD License.
     4  
     5  package zstd
     6  
     7  import "fmt"
     8  
     9  const (
    10  	betterLongTableBits = 19                       // Bits used in the long match table
    11  	betterLongTableSize = 1 << betterLongTableBits // Size of the table
    12  	betterLongLen       = 8                        // Bytes used for table hash
    13  
    14  	// Note: Increasing the short table bits or making the hash shorter
    15  	// can actually lead to compression degradation since it will 'steal' more from the
    16  	// long match table and match offsets are quite big.
    17  	// This greatly depends on the type of input.
    18  	betterShortTableBits = 13                        // Bits used in the short match table
    19  	betterShortTableSize = 1 << betterShortTableBits // Size of the table
    20  	betterShortLen       = 5                         // Bytes used for table hash
    21  
    22  	betterLongTableShardCnt  = 1 << (betterLongTableBits - dictShardBits)    // Number of shards in the table
    23  	betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
    24  
    25  	betterShortTableShardCnt  = 1 << (betterShortTableBits - dictShardBits)     // Number of shards in the table
    26  	betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard
    27  )
    28  
    29  type prevEntry struct {
    30  	offset int32
    31  	prev   int32
    32  }
    33  
    34  // betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
    35  // The long match table contains the previous entry with the same hash,
    36  // effectively making it a "chain" of length 2.
    37  // When we find a long match we choose between the two values and select the longest.
    38  // When we find a short match, after checking the long, we check if we can find a long at n+1
    39  // and that it is longer (lazy matching).
    40  type betterFastEncoder struct {
    41  	fastBase
    42  	table     [betterShortTableSize]tableEntry
    43  	longTable [betterLongTableSize]prevEntry
    44  }
    45  
    46  type betterFastEncoderDict struct {
    47  	betterFastEncoder
    48  	dictTable            []tableEntry
    49  	dictLongTable        []prevEntry
    50  	shortTableShardDirty [betterShortTableShardCnt]bool
    51  	longTableShardDirty  [betterLongTableShardCnt]bool
    52  	allDirty             bool
    53  }
    54  
    55  // Encode improves compression...
    56  func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
    57  	const (
    58  		// Input margin is the number of bytes we read (8)
    59  		// and the maximum we will read ahead (2)
    60  		inputMargin            = 8 + 2
    61  		minNonLiteralBlockSize = 16
    62  	)
    63  
    64  	// Protect against e.cur wraparound.
    65  	for e.cur >= e.bufferReset-int32(len(e.hist)) {
    66  		if len(e.hist) == 0 {
    67  			e.table = [betterShortTableSize]tableEntry{}
    68  			e.longTable = [betterLongTableSize]prevEntry{}
    69  			e.cur = e.maxMatchOff
    70  			break
    71  		}
    72  		// Shift down everything in the table that isn't already too far away.
    73  		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
    74  		for i := range e.table[:] {
    75  			v := e.table[i].offset
    76  			if v < minOff {
    77  				v = 0
    78  			} else {
    79  				v = v - e.cur + e.maxMatchOff
    80  			}
    81  			e.table[i].offset = v
    82  		}
    83  		for i := range e.longTable[:] {
    84  			v := e.longTable[i].offset
    85  			v2 := e.longTable[i].prev
    86  			if v < minOff {
    87  				v = 0
    88  				v2 = 0
    89  			} else {
    90  				v = v - e.cur + e.maxMatchOff
    91  				if v2 < minOff {
    92  					v2 = 0
    93  				} else {
    94  					v2 = v2 - e.cur + e.maxMatchOff
    95  				}
    96  			}
    97  			e.longTable[i] = prevEntry{
    98  				offset: v,
    99  				prev:   v2,
   100  			}
   101  		}
   102  		e.cur = e.maxMatchOff
   103  		break
   104  	}
   105  
   106  	s := e.addBlock(src)
   107  	blk.size = len(src)
   108  	if len(src) < minNonLiteralBlockSize {
   109  		blk.extraLits = len(src)
   110  		blk.literals = blk.literals[:len(src)]
   111  		copy(blk.literals, src)
   112  		return
   113  	}
   114  
   115  	// Override src
   116  	src = e.hist
   117  	sLimit := int32(len(src)) - inputMargin
   118  	// stepSize is the number of bytes to skip on every main loop iteration.
   119  	// It should be >= 1.
   120  	const stepSize = 1
   121  
   122  	const kSearchStrength = 9
   123  
   124  	// nextEmit is where in src the next emitLiteral should start from.
   125  	nextEmit := s
   126  	cv := load6432(src, s)
   127  
   128  	// Relative offsets
   129  	offset1 := int32(blk.recentOffsets[0])
   130  	offset2 := int32(blk.recentOffsets[1])
   131  
   132  	addLiterals := func(s *seq, until int32) {
   133  		if until == nextEmit {
   134  			return
   135  		}
   136  		blk.literals = append(blk.literals, src[nextEmit:until]...)
   137  		s.litLen = uint32(until - nextEmit)
   138  	}
   139  	if debugEncoder {
   140  		println("recent offsets:", blk.recentOffsets)
   141  	}
   142  
   143  encodeLoop:
   144  	for {
   145  		var t int32
   146  		// We allow the encoder to optionally turn off repeat offsets across blocks
   147  		canRepeat := len(blk.sequences) > 2
   148  		var matched int32
   149  
   150  		for {
   151  			if debugAsserts && canRepeat && offset1 == 0 {
   152  				panic("offset0 was 0")
   153  			}
   154  
   155  			nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
   156  			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
   157  			candidateL := e.longTable[nextHashL]
   158  			candidateS := e.table[nextHashS]
   159  
   160  			const repOff = 1
   161  			repIndex := s - offset1 + repOff
   162  			off := s + e.cur
   163  			e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
   164  			e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
   165  
   166  			if canRepeat {
   167  				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
   168  					// Consider history as well.
   169  					var seq seq
   170  					lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
   171  
   172  					seq.matchLen = uint32(lenght - zstdMinMatch)
   173  
   174  					// We might be able to match backwards.
   175  					// Extend as long as we can.
   176  					start := s + repOff
   177  					// We end the search early, so we don't risk 0 literals
   178  					// and have to do special offset treatment.
   179  					startLimit := nextEmit + 1
   180  
   181  					tMin := s - e.maxMatchOff
   182  					if tMin < 0 {
   183  						tMin = 0
   184  					}
   185  					for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
   186  						repIndex--
   187  						start--
   188  						seq.matchLen++
   189  					}
   190  					addLiterals(&seq, start)
   191  
   192  					// rep 0
   193  					seq.offset = 1
   194  					if debugSequences {
   195  						println("repeat sequence", seq, "next s:", s)
   196  					}
   197  					blk.sequences = append(blk.sequences, seq)
   198  
   199  					// Index match start+1 (long) -> s - 1
   200  					index0 := s + repOff
   201  					s += lenght + repOff
   202  
   203  					nextEmit = s
   204  					if s >= sLimit {
   205  						if debugEncoder {
   206  							println("repeat ended", s, lenght)
   207  
   208  						}
   209  						break encodeLoop
   210  					}
   211  					// Index skipped...
   212  					for index0 < s-1 {
   213  						cv0 := load6432(src, index0)
   214  						cv1 := cv0 >> 8
   215  						h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
   216  						off := index0 + e.cur
   217  						e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
   218  						e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
   219  						index0 += 2
   220  					}
   221  					cv = load6432(src, s)
   222  					continue
   223  				}
   224  				const repOff2 = 1
   225  
   226  				// We deviate from the reference encoder and also check offset 2.
   227  				// Still slower and not much better, so disabled.
   228  				// repIndex = s - offset2 + repOff2
   229  				if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
   230  					// Consider history as well.
   231  					var seq seq
   232  					lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
   233  
   234  					seq.matchLen = uint32(lenght - zstdMinMatch)
   235  
   236  					// We might be able to match backwards.
   237  					// Extend as long as we can.
   238  					start := s + repOff2
   239  					// We end the search early, so we don't risk 0 literals
   240  					// and have to do special offset treatment.
   241  					startLimit := nextEmit + 1
   242  
   243  					tMin := s - e.maxMatchOff
   244  					if tMin < 0 {
   245  						tMin = 0
   246  					}
   247  					for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
   248  						repIndex--
   249  						start--
   250  						seq.matchLen++
   251  					}
   252  					addLiterals(&seq, start)
   253  
   254  					// rep 2
   255  					seq.offset = 2
   256  					if debugSequences {
   257  						println("repeat sequence 2", seq, "next s:", s)
   258  					}
   259  					blk.sequences = append(blk.sequences, seq)
   260  
   261  					index0 := s + repOff2
   262  					s += lenght + repOff2
   263  					nextEmit = s
   264  					if s >= sLimit {
   265  						if debugEncoder {
   266  							println("repeat ended", s, lenght)
   267  
   268  						}
   269  						break encodeLoop
   270  					}
   271  
   272  					// Index skipped...
   273  					for index0 < s-1 {
   274  						cv0 := load6432(src, index0)
   275  						cv1 := cv0 >> 8
   276  						h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
   277  						off := index0 + e.cur
   278  						e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
   279  						e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
   280  						index0 += 2
   281  					}
   282  					cv = load6432(src, s)
   283  					// Swap offsets
   284  					offset1, offset2 = offset2, offset1
   285  					continue
   286  				}
   287  			}
   288  			// Find the offsets of our two matches.
   289  			coffsetL := candidateL.offset - e.cur
   290  			coffsetLP := candidateL.prev - e.cur
   291  
   292  			// Check if we have a long match.
   293  			if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
   294  				// Found a long match, at least 8 bytes.
   295  				matched = e.matchlen(s+8, coffsetL+8, src) + 8
   296  				t = coffsetL
   297  				if debugAsserts && s <= t {
   298  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   299  				}
   300  				if debugAsserts && s-t > e.maxMatchOff {
   301  					panic("s - t >e.maxMatchOff")
   302  				}
   303  				if debugMatches {
   304  					println("long match")
   305  				}
   306  
   307  				if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
   308  					// Found a long match, at least 8 bytes.
   309  					prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
   310  					if prevMatch > matched {
   311  						matched = prevMatch
   312  						t = coffsetLP
   313  					}
   314  					if debugAsserts && s <= t {
   315  						panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   316  					}
   317  					if debugAsserts && s-t > e.maxMatchOff {
   318  						panic("s - t >e.maxMatchOff")
   319  					}
   320  					if debugMatches {
   321  						println("long match")
   322  					}
   323  				}
   324  				break
   325  			}
   326  
   327  			// Check if we have a long match on prev.
   328  			if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
   329  				// Found a long match, at least 8 bytes.
   330  				matched = e.matchlen(s+8, coffsetLP+8, src) + 8
   331  				t = coffsetLP
   332  				if debugAsserts && s <= t {
   333  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   334  				}
   335  				if debugAsserts && s-t > e.maxMatchOff {
   336  					panic("s - t >e.maxMatchOff")
   337  				}
   338  				if debugMatches {
   339  					println("long match")
   340  				}
   341  				break
   342  			}
   343  
   344  			coffsetS := candidateS.offset - e.cur
   345  
   346  			// Check if we have a short match.
   347  			if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
   348  				// found a regular match
   349  				matched = e.matchlen(s+4, coffsetS+4, src) + 4
   350  
   351  				// See if we can find a long match at s+1
   352  				const checkAt = 1
   353  				cv := load6432(src, s+checkAt)
   354  				nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
   355  				candidateL = e.longTable[nextHashL]
   356  				coffsetL = candidateL.offset - e.cur
   357  
   358  				// We can store it, since we have at least a 4 byte match.
   359  				e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
   360  				if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
   361  					// Found a long match, at least 8 bytes.
   362  					matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
   363  					if matchedNext > matched {
   364  						t = coffsetL
   365  						s += checkAt
   366  						matched = matchedNext
   367  						if debugMatches {
   368  							println("long match (after short)")
   369  						}
   370  						break
   371  					}
   372  				}
   373  
   374  				// Check prev long...
   375  				coffsetL = candidateL.prev - e.cur
   376  				if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
   377  					// Found a long match, at least 8 bytes.
   378  					matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
   379  					if matchedNext > matched {
   380  						t = coffsetL
   381  						s += checkAt
   382  						matched = matchedNext
   383  						if debugMatches {
   384  							println("prev long match (after short)")
   385  						}
   386  						break
   387  					}
   388  				}
   389  				t = coffsetS
   390  				if debugAsserts && s <= t {
   391  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   392  				}
   393  				if debugAsserts && s-t > e.maxMatchOff {
   394  					panic("s - t >e.maxMatchOff")
   395  				}
   396  				if debugAsserts && t < 0 {
   397  					panic("t<0")
   398  				}
   399  				if debugMatches {
   400  					println("short match")
   401  				}
   402  				break
   403  			}
   404  
   405  			// No match found, move forward in input.
   406  			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
   407  			if s >= sLimit {
   408  				break encodeLoop
   409  			}
   410  			cv = load6432(src, s)
   411  		}
   412  
   413  		// Try to find a better match by searching for a long match at the end of the current best match
   414  		if s+matched < sLimit {
   415  			// Allow some bytes at the beginning to mismatch.
   416  			// Sweet spot is around 3 bytes, but depends on input.
   417  			// The skipped bytes are tested in Extend backwards,
   418  			// and still picked up as part of the match if they do.
   419  			const skipBeginning = 3
   420  
   421  			nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
   422  			s2 := s + skipBeginning
   423  			cv := load3232(src, s2)
   424  			candidateL := e.longTable[nextHashL]
   425  			coffsetL := candidateL.offset - e.cur - matched + skipBeginning
   426  			if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
   427  				// Found a long match, at least 4 bytes.
   428  				matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
   429  				if matchedNext > matched {
   430  					t = coffsetL
   431  					s = s2
   432  					matched = matchedNext
   433  					if debugMatches {
   434  						println("long match at end-of-match")
   435  					}
   436  				}
   437  			}
   438  
   439  			// Check prev long...
   440  			if true {
   441  				coffsetL = candidateL.prev - e.cur - matched + skipBeginning
   442  				if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
   443  					// Found a long match, at least 4 bytes.
   444  					matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
   445  					if matchedNext > matched {
   446  						t = coffsetL
   447  						s = s2
   448  						matched = matchedNext
   449  						if debugMatches {
   450  							println("prev long match at end-of-match")
   451  						}
   452  					}
   453  				}
   454  			}
   455  		}
   456  		// A match has been found. Update recent offsets.
   457  		offset2 = offset1
   458  		offset1 = s - t
   459  
   460  		if debugAsserts && s <= t {
   461  			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   462  		}
   463  
   464  		if debugAsserts && canRepeat && int(offset1) > len(src) {
   465  			panic("invalid offset")
   466  		}
   467  
   468  		// Extend the n-byte match as long as possible.
   469  		l := matched
   470  
   471  		// Extend backwards
   472  		tMin := s - e.maxMatchOff
   473  		if tMin < 0 {
   474  			tMin = 0
   475  		}
   476  		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
   477  			s--
   478  			t--
   479  			l++
   480  		}
   481  
   482  		// Write our sequence
   483  		var seq seq
   484  		seq.litLen = uint32(s - nextEmit)
   485  		seq.matchLen = uint32(l - zstdMinMatch)
   486  		if seq.litLen > 0 {
   487  			blk.literals = append(blk.literals, src[nextEmit:s]...)
   488  		}
   489  		seq.offset = uint32(s-t) + 3
   490  		s += l
   491  		if debugSequences {
   492  			println("sequence", seq, "next s:", s)
   493  		}
   494  		blk.sequences = append(blk.sequences, seq)
   495  		nextEmit = s
   496  		if s >= sLimit {
   497  			break encodeLoop
   498  		}
   499  
   500  		// Index match start+1 (long) -> s - 1
   501  		index0 := s - l + 1
   502  		for index0 < s-1 {
   503  			cv0 := load6432(src, index0)
   504  			cv1 := cv0 >> 8
   505  			h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
   506  			off := index0 + e.cur
   507  			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
   508  			e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
   509  			index0 += 2
   510  		}
   511  
   512  		cv = load6432(src, s)
   513  		if !canRepeat {
   514  			continue
   515  		}
   516  
   517  		// Check offset 2
   518  		for {
   519  			o2 := s - offset2
   520  			if load3232(src, o2) != uint32(cv) {
   521  				// Do regular search
   522  				break
   523  			}
   524  
   525  			// Store this, since we have it.
   526  			nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
   527  			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
   528  
   529  			// We have at least 4 byte match.
   530  			// No need to check backwards. We come straight from a match
   531  			l := 4 + e.matchlen(s+4, o2+4, src)
   532  
   533  			e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
   534  			e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
   535  			seq.matchLen = uint32(l) - zstdMinMatch
   536  			seq.litLen = 0
   537  
   538  			// Since litlen is always 0, this is offset 1.
   539  			seq.offset = 1
   540  			s += l
   541  			nextEmit = s
   542  			if debugSequences {
   543  				println("sequence", seq, "next s:", s)
   544  			}
   545  			blk.sequences = append(blk.sequences, seq)
   546  
   547  			// Swap offset 1 and 2.
   548  			offset1, offset2 = offset2, offset1
   549  			if s >= sLimit {
   550  				// Finished
   551  				break encodeLoop
   552  			}
   553  			cv = load6432(src, s)
   554  		}
   555  	}
   556  
   557  	if int(nextEmit) < len(src) {
   558  		blk.literals = append(blk.literals, src[nextEmit:]...)
   559  		blk.extraLits = len(src) - int(nextEmit)
   560  	}
   561  	blk.recentOffsets[0] = uint32(offset1)
   562  	blk.recentOffsets[1] = uint32(offset2)
   563  	if debugEncoder {
   564  		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
   565  	}
   566  }
   567  
   568  // EncodeNoHist will encode a block with no history and no following blocks.
   569  // Most notable difference is that src will not be copied for history and
   570  // we do not need to check for max match length.
   571  func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
   572  	e.ensureHist(len(src))
   573  	e.Encode(blk, src)
   574  }
   575  
   576  // Encode improves compression...
   577  func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
   578  	const (
   579  		// Input margin is the number of bytes we read (8)
   580  		// and the maximum we will read ahead (2)
   581  		inputMargin            = 8 + 2
   582  		minNonLiteralBlockSize = 16
   583  	)
   584  
   585  	// Protect against e.cur wraparound.
   586  	for e.cur >= e.bufferReset-int32(len(e.hist)) {
   587  		if len(e.hist) == 0 {
   588  			for i := range e.table[:] {
   589  				e.table[i] = tableEntry{}
   590  			}
   591  			for i := range e.longTable[:] {
   592  				e.longTable[i] = prevEntry{}
   593  			}
   594  			e.cur = e.maxMatchOff
   595  			e.allDirty = true
   596  			break
   597  		}
   598  		// Shift down everything in the table that isn't already too far away.
   599  		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
   600  		for i := range e.table[:] {
   601  			v := e.table[i].offset
   602  			if v < minOff {
   603  				v = 0
   604  			} else {
   605  				v = v - e.cur + e.maxMatchOff
   606  			}
   607  			e.table[i].offset = v
   608  		}
   609  		for i := range e.longTable[:] {
   610  			v := e.longTable[i].offset
   611  			v2 := e.longTable[i].prev
   612  			if v < minOff {
   613  				v = 0
   614  				v2 = 0
   615  			} else {
   616  				v = v - e.cur + e.maxMatchOff
   617  				if v2 < minOff {
   618  					v2 = 0
   619  				} else {
   620  					v2 = v2 - e.cur + e.maxMatchOff
   621  				}
   622  			}
   623  			e.longTable[i] = prevEntry{
   624  				offset: v,
   625  				prev:   v2,
   626  			}
   627  		}
   628  		e.allDirty = true
   629  		e.cur = e.maxMatchOff
   630  		break
   631  	}
   632  
   633  	s := e.addBlock(src)
   634  	blk.size = len(src)
   635  	if len(src) < minNonLiteralBlockSize {
   636  		blk.extraLits = len(src)
   637  		blk.literals = blk.literals[:len(src)]
   638  		copy(blk.literals, src)
   639  		return
   640  	}
   641  
   642  	// Override src
   643  	src = e.hist
   644  	sLimit := int32(len(src)) - inputMargin
   645  	// stepSize is the number of bytes to skip on every main loop iteration.
   646  	// It should be >= 1.
   647  	const stepSize = 1
   648  
   649  	const kSearchStrength = 9
   650  
   651  	// nextEmit is where in src the next emitLiteral should start from.
   652  	nextEmit := s
   653  	cv := load6432(src, s)
   654  
   655  	// Relative offsets
   656  	offset1 := int32(blk.recentOffsets[0])
   657  	offset2 := int32(blk.recentOffsets[1])
   658  
   659  	addLiterals := func(s *seq, until int32) {
   660  		if until == nextEmit {
   661  			return
   662  		}
   663  		blk.literals = append(blk.literals, src[nextEmit:until]...)
   664  		s.litLen = uint32(until - nextEmit)
   665  	}
   666  	if debugEncoder {
   667  		println("recent offsets:", blk.recentOffsets)
   668  	}
   669  
   670  encodeLoop:
   671  	for {
   672  		var t int32
   673  		// We allow the encoder to optionally turn off repeat offsets across blocks
   674  		canRepeat := len(blk.sequences) > 2
   675  		var matched int32
   676  
   677  		for {
   678  			if debugAsserts && canRepeat && offset1 == 0 {
   679  				panic("offset0 was 0")
   680  			}
   681  
   682  			nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
   683  			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
   684  			candidateL := e.longTable[nextHashL]
   685  			candidateS := e.table[nextHashS]
   686  
   687  			const repOff = 1
   688  			repIndex := s - offset1 + repOff
   689  			off := s + e.cur
   690  			e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
   691  			e.markLongShardDirty(nextHashL)
   692  			e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
   693  			e.markShortShardDirty(nextHashS)
   694  
   695  			if canRepeat {
   696  				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
   697  					// Consider history as well.
   698  					var seq seq
   699  					lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
   700  
   701  					seq.matchLen = uint32(lenght - zstdMinMatch)
   702  
   703  					// We might be able to match backwards.
   704  					// Extend as long as we can.
   705  					start := s + repOff
   706  					// We end the search early, so we don't risk 0 literals
   707  					// and have to do special offset treatment.
   708  					startLimit := nextEmit + 1
   709  
   710  					tMin := s - e.maxMatchOff
   711  					if tMin < 0 {
   712  						tMin = 0
   713  					}
   714  					for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
   715  						repIndex--
   716  						start--
   717  						seq.matchLen++
   718  					}
   719  					addLiterals(&seq, start)
   720  
   721  					// rep 0
   722  					seq.offset = 1
   723  					if debugSequences {
   724  						println("repeat sequence", seq, "next s:", s)
   725  					}
   726  					blk.sequences = append(blk.sequences, seq)
   727  
   728  					// Index match start+1 (long) -> s - 1
   729  					index0 := s + repOff
   730  					s += lenght + repOff
   731  
   732  					nextEmit = s
   733  					if s >= sLimit {
   734  						if debugEncoder {
   735  							println("repeat ended", s, lenght)
   736  
   737  						}
   738  						break encodeLoop
   739  					}
   740  					// Index skipped...
   741  					for index0 < s-1 {
   742  						cv0 := load6432(src, index0)
   743  						cv1 := cv0 >> 8
   744  						h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
   745  						off := index0 + e.cur
   746  						e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
   747  						e.markLongShardDirty(h0)
   748  						h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
   749  						e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
   750  						e.markShortShardDirty(h1)
   751  						index0 += 2
   752  					}
   753  					cv = load6432(src, s)
   754  					continue
   755  				}
   756  				const repOff2 = 1
   757  
   758  				// We deviate from the reference encoder and also check offset 2.
   759  				// Still slower and not much better, so disabled.
   760  				// repIndex = s - offset2 + repOff2
   761  				if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
   762  					// Consider history as well.
   763  					var seq seq
   764  					lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
   765  
   766  					seq.matchLen = uint32(lenght - zstdMinMatch)
   767  
   768  					// We might be able to match backwards.
   769  					// Extend as long as we can.
   770  					start := s + repOff2
   771  					// We end the search early, so we don't risk 0 literals
   772  					// and have to do special offset treatment.
   773  					startLimit := nextEmit + 1
   774  
   775  					tMin := s - e.maxMatchOff
   776  					if tMin < 0 {
   777  						tMin = 0
   778  					}
   779  					for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
   780  						repIndex--
   781  						start--
   782  						seq.matchLen++
   783  					}
   784  					addLiterals(&seq, start)
   785  
   786  					// rep 2
   787  					seq.offset = 2
   788  					if debugSequences {
   789  						println("repeat sequence 2", seq, "next s:", s)
   790  					}
   791  					blk.sequences = append(blk.sequences, seq)
   792  
   793  					index0 := s + repOff2
   794  					s += lenght + repOff2
   795  					nextEmit = s
   796  					if s >= sLimit {
   797  						if debugEncoder {
   798  							println("repeat ended", s, lenght)
   799  
   800  						}
   801  						break encodeLoop
   802  					}
   803  
   804  					// Index skipped...
   805  					for index0 < s-1 {
   806  						cv0 := load6432(src, index0)
   807  						cv1 := cv0 >> 8
   808  						h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
   809  						off := index0 + e.cur
   810  						e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
   811  						e.markLongShardDirty(h0)
   812  						h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
   813  						e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
   814  						e.markShortShardDirty(h1)
   815  						index0 += 2
   816  					}
   817  					cv = load6432(src, s)
   818  					// Swap offsets
   819  					offset1, offset2 = offset2, offset1
   820  					continue
   821  				}
   822  			}
   823  			// Find the offsets of our two matches.
   824  			coffsetL := candidateL.offset - e.cur
   825  			coffsetLP := candidateL.prev - e.cur
   826  
   827  			// Check if we have a long match.
   828  			if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
   829  				// Found a long match, at least 8 bytes.
   830  				matched = e.matchlen(s+8, coffsetL+8, src) + 8
   831  				t = coffsetL
   832  				if debugAsserts && s <= t {
   833  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   834  				}
   835  				if debugAsserts && s-t > e.maxMatchOff {
   836  					panic("s - t >e.maxMatchOff")
   837  				}
   838  				if debugMatches {
   839  					println("long match")
   840  				}
   841  
   842  				if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
   843  					// Found a long match, at least 8 bytes.
   844  					prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
   845  					if prevMatch > matched {
   846  						matched = prevMatch
   847  						t = coffsetLP
   848  					}
   849  					if debugAsserts && s <= t {
   850  						panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   851  					}
   852  					if debugAsserts && s-t > e.maxMatchOff {
   853  						panic("s - t >e.maxMatchOff")
   854  					}
   855  					if debugMatches {
   856  						println("long match")
   857  					}
   858  				}
   859  				break
   860  			}
   861  
   862  			// Check if we have a long match on prev.
   863  			if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
   864  				// Found a long match, at least 8 bytes.
   865  				matched = e.matchlen(s+8, coffsetLP+8, src) + 8
   866  				t = coffsetLP
   867  				if debugAsserts && s <= t {
   868  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   869  				}
   870  				if debugAsserts && s-t > e.maxMatchOff {
   871  					panic("s - t >e.maxMatchOff")
   872  				}
   873  				if debugMatches {
   874  					println("long match")
   875  				}
   876  				break
   877  			}
   878  
   879  			coffsetS := candidateS.offset - e.cur
   880  
   881  			// Check if we have a short match.
   882  			if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
   883  				// found a regular match
   884  				matched = e.matchlen(s+4, coffsetS+4, src) + 4
   885  
   886  				// See if we can find a long match at s+1
   887  				const checkAt = 1
   888  				cv := load6432(src, s+checkAt)
   889  				nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
   890  				candidateL = e.longTable[nextHashL]
   891  				coffsetL = candidateL.offset - e.cur
   892  
   893  				// We can store it, since we have at least a 4 byte match.
   894  				e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
   895  				e.markLongShardDirty(nextHashL)
   896  				if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
   897  					// Found a long match, at least 8 bytes.
   898  					matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
   899  					if matchedNext > matched {
   900  						t = coffsetL
   901  						s += checkAt
   902  						matched = matchedNext
   903  						if debugMatches {
   904  							println("long match (after short)")
   905  						}
   906  						break
   907  					}
   908  				}
   909  
   910  				// Check prev long...
   911  				coffsetL = candidateL.prev - e.cur
   912  				if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
   913  					// Found a long match, at least 8 bytes.
   914  					matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
   915  					if matchedNext > matched {
   916  						t = coffsetL
   917  						s += checkAt
   918  						matched = matchedNext
   919  						if debugMatches {
   920  							println("prev long match (after short)")
   921  						}
   922  						break
   923  					}
   924  				}
   925  				t = coffsetS
   926  				if debugAsserts && s <= t {
   927  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   928  				}
   929  				if debugAsserts && s-t > e.maxMatchOff {
   930  					panic("s - t >e.maxMatchOff")
   931  				}
   932  				if debugAsserts && t < 0 {
   933  					panic("t<0")
   934  				}
   935  				if debugMatches {
   936  					println("short match")
   937  				}
   938  				break
   939  			}
   940  
   941  			// No match found, move forward in input.
   942  			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
   943  			if s >= sLimit {
   944  				break encodeLoop
   945  			}
   946  			cv = load6432(src, s)
   947  		}
   948  		// Try to find a better match by searching for a long match at the end of the current best match
   949  		if s+matched < sLimit {
   950  			nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
   951  			cv := load3232(src, s)
   952  			candidateL := e.longTable[nextHashL]
   953  			coffsetL := candidateL.offset - e.cur - matched
   954  			if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
   955  				// Found a long match, at least 4 bytes.
   956  				matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
   957  				if matchedNext > matched {
   958  					t = coffsetL
   959  					matched = matchedNext
   960  					if debugMatches {
   961  						println("long match at end-of-match")
   962  					}
   963  				}
   964  			}
   965  
   966  			// Check prev long...
   967  			if true {
   968  				coffsetL = candidateL.prev - e.cur - matched
   969  				if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
   970  					// Found a long match, at least 4 bytes.
   971  					matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
   972  					if matchedNext > matched {
   973  						t = coffsetL
   974  						matched = matchedNext
   975  						if debugMatches {
   976  							println("prev long match at end-of-match")
   977  						}
   978  					}
   979  				}
   980  			}
   981  		}
   982  		// A match has been found. Update recent offsets.
   983  		offset2 = offset1
   984  		offset1 = s - t
   985  
   986  		if debugAsserts && s <= t {
   987  			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   988  		}
   989  
   990  		if debugAsserts && canRepeat && int(offset1) > len(src) {
   991  			panic("invalid offset")
   992  		}
   993  
   994  		// Extend the n-byte match as long as possible.
   995  		l := matched
   996  
   997  		// Extend backwards
   998  		tMin := s - e.maxMatchOff
   999  		if tMin < 0 {
  1000  			tMin = 0
  1001  		}
  1002  		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  1003  			s--
  1004  			t--
  1005  			l++
  1006  		}
  1007  
  1008  		// Write our sequence
  1009  		var seq seq
  1010  		seq.litLen = uint32(s - nextEmit)
  1011  		seq.matchLen = uint32(l - zstdMinMatch)
  1012  		if seq.litLen > 0 {
  1013  			blk.literals = append(blk.literals, src[nextEmit:s]...)
  1014  		}
  1015  		seq.offset = uint32(s-t) + 3
  1016  		s += l
  1017  		if debugSequences {
  1018  			println("sequence", seq, "next s:", s)
  1019  		}
  1020  		blk.sequences = append(blk.sequences, seq)
  1021  		nextEmit = s
  1022  		if s >= sLimit {
  1023  			break encodeLoop
  1024  		}
  1025  
  1026  		// Index match start+1 (long) -> s - 1
  1027  		index0 := s - l + 1
  1028  		for index0 < s-1 {
  1029  			cv0 := load6432(src, index0)
  1030  			cv1 := cv0 >> 8
  1031  			h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
  1032  			off := index0 + e.cur
  1033  			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  1034  			e.markLongShardDirty(h0)
  1035  			h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
  1036  			e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
  1037  			e.markShortShardDirty(h1)
  1038  			index0 += 2
  1039  		}
  1040  
  1041  		cv = load6432(src, s)
  1042  		if !canRepeat {
  1043  			continue
  1044  		}
  1045  
  1046  		// Check offset 2
  1047  		for {
  1048  			o2 := s - offset2
  1049  			if load3232(src, o2) != uint32(cv) {
  1050  				// Do regular search
  1051  				break
  1052  			}
  1053  
  1054  			// Store this, since we have it.
  1055  			nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
  1056  			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
  1057  
  1058  			// We have at least 4 byte match.
  1059  			// No need to check backwards. We come straight from a match
  1060  			l := 4 + e.matchlen(s+4, o2+4, src)
  1061  
  1062  			e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
  1063  			e.markLongShardDirty(nextHashL)
  1064  			e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  1065  			e.markShortShardDirty(nextHashS)
  1066  			seq.matchLen = uint32(l) - zstdMinMatch
  1067  			seq.litLen = 0
  1068  
  1069  			// Since litlen is always 0, this is offset 1.
  1070  			seq.offset = 1
  1071  			s += l
  1072  			nextEmit = s
  1073  			if debugSequences {
  1074  				println("sequence", seq, "next s:", s)
  1075  			}
  1076  			blk.sequences = append(blk.sequences, seq)
  1077  
  1078  			// Swap offset 1 and 2.
  1079  			offset1, offset2 = offset2, offset1
  1080  			if s >= sLimit {
  1081  				// Finished
  1082  				break encodeLoop
  1083  			}
  1084  			cv = load6432(src, s)
  1085  		}
  1086  	}
  1087  
  1088  	if int(nextEmit) < len(src) {
  1089  		blk.literals = append(blk.literals, src[nextEmit:]...)
  1090  		blk.extraLits = len(src) - int(nextEmit)
  1091  	}
  1092  	blk.recentOffsets[0] = uint32(offset1)
  1093  	blk.recentOffsets[1] = uint32(offset2)
  1094  	if debugEncoder {
  1095  		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  1096  	}
  1097  }
  1098  
  1099  // ResetDict will reset and set a dictionary if not nil
  1100  func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
  1101  	e.resetBase(d, singleBlock)
  1102  	if d != nil {
  1103  		panic("betterFastEncoder: Reset with dict")
  1104  	}
  1105  }
  1106  
  1107  // ResetDict will reset and set a dictionary if not nil
  1108  func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
  1109  	e.resetBase(d, singleBlock)
  1110  	if d == nil {
  1111  		return
  1112  	}
  1113  	// Init or copy dict table
  1114  	if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
  1115  		if len(e.dictTable) != len(e.table) {
  1116  			e.dictTable = make([]tableEntry, len(e.table))
  1117  		}
  1118  		end := int32(len(d.content)) - 8 + e.maxMatchOff
  1119  		for i := e.maxMatchOff; i < end; i += 4 {
  1120  			const hashLog = betterShortTableBits
  1121  
  1122  			cv := load6432(d.content, i-e.maxMatchOff)
  1123  			nextHash := hashLen(cv, hashLog, betterShortLen)      // 0 -> 4
  1124  			nextHash1 := hashLen(cv>>8, hashLog, betterShortLen)  // 1 -> 5
  1125  			nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6
  1126  			nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7
  1127  			e.dictTable[nextHash] = tableEntry{
  1128  				val:    uint32(cv),
  1129  				offset: i,
  1130  			}
  1131  			e.dictTable[nextHash1] = tableEntry{
  1132  				val:    uint32(cv >> 8),
  1133  				offset: i + 1,
  1134  			}
  1135  			e.dictTable[nextHash2] = tableEntry{
  1136  				val:    uint32(cv >> 16),
  1137  				offset: i + 2,
  1138  			}
  1139  			e.dictTable[nextHash3] = tableEntry{
  1140  				val:    uint32(cv >> 24),
  1141  				offset: i + 3,
  1142  			}
  1143  		}
  1144  		e.lastDictID = d.id
  1145  		e.allDirty = true
  1146  	}
  1147  
  1148  	// Init or copy dict table
  1149  	if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
  1150  		if len(e.dictLongTable) != len(e.longTable) {
  1151  			e.dictLongTable = make([]prevEntry, len(e.longTable))
  1152  		}
  1153  		if len(d.content) >= 8 {
  1154  			cv := load6432(d.content, 0)
  1155  			h := hashLen(cv, betterLongTableBits, betterLongLen)
  1156  			e.dictLongTable[h] = prevEntry{
  1157  				offset: e.maxMatchOff,
  1158  				prev:   e.dictLongTable[h].offset,
  1159  			}
  1160  
  1161  			end := int32(len(d.content)) - 8 + e.maxMatchOff
  1162  			off := 8 // First to read
  1163  			for i := e.maxMatchOff + 1; i < end; i++ {
  1164  				cv = cv>>8 | (uint64(d.content[off]) << 56)
  1165  				h := hashLen(cv, betterLongTableBits, betterLongLen)
  1166  				e.dictLongTable[h] = prevEntry{
  1167  					offset: i,
  1168  					prev:   e.dictLongTable[h].offset,
  1169  				}
  1170  				off++
  1171  			}
  1172  		}
  1173  		e.lastDictID = d.id
  1174  		e.allDirty = true
  1175  	}
  1176  
  1177  	// Reset table to initial state
  1178  	{
  1179  		dirtyShardCnt := 0
  1180  		if !e.allDirty {
  1181  			for i := range e.shortTableShardDirty {
  1182  				if e.shortTableShardDirty[i] {
  1183  					dirtyShardCnt++
  1184  				}
  1185  			}
  1186  		}
  1187  		const shardCnt = betterShortTableShardCnt
  1188  		const shardSize = betterShortTableShardSize
  1189  		if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
  1190  			copy(e.table[:], e.dictTable)
  1191  			for i := range e.shortTableShardDirty {
  1192  				e.shortTableShardDirty[i] = false
  1193  			}
  1194  		} else {
  1195  			for i := range e.shortTableShardDirty {
  1196  				if !e.shortTableShardDirty[i] {
  1197  					continue
  1198  				}
  1199  
  1200  				copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
  1201  				e.shortTableShardDirty[i] = false
  1202  			}
  1203  		}
  1204  	}
  1205  	{
  1206  		dirtyShardCnt := 0
  1207  		if !e.allDirty {
  1208  			for i := range e.shortTableShardDirty {
  1209  				if e.shortTableShardDirty[i] {
  1210  					dirtyShardCnt++
  1211  				}
  1212  			}
  1213  		}
  1214  		const shardCnt = betterLongTableShardCnt
  1215  		const shardSize = betterLongTableShardSize
  1216  		if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
  1217  			copy(e.longTable[:], e.dictLongTable)
  1218  			for i := range e.longTableShardDirty {
  1219  				e.longTableShardDirty[i] = false
  1220  			}
  1221  		} else {
  1222  			for i := range e.longTableShardDirty {
  1223  				if !e.longTableShardDirty[i] {
  1224  					continue
  1225  				}
  1226  
  1227  				copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize])
  1228  				e.longTableShardDirty[i] = false
  1229  			}
  1230  		}
  1231  	}
  1232  	e.cur = e.maxMatchOff
  1233  	e.allDirty = false
  1234  }
  1235  
  1236  func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) {
  1237  	e.longTableShardDirty[entryNum/betterLongTableShardSize] = true
  1238  }
  1239  
  1240  func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) {
  1241  	e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true
  1242  }