github.com/bir3/gocompiler@v0.9.2202/extra/compress/zstd/enc_fast.go

github.com/bir3/gocompiler@v0.9.2202/extra/compress/zstd/enc_fast.go (about)

     1  // Copyright 2019+ Klaus Post. All rights reserved.
     2  // License information can be found in the LICENSE file.
     3  // Based on work by Yann Collet, released under BSD License.
     4  
     5  package zstd
     6  
     7  import (
     8  	"fmt"
     9  )
    10  
    11  const (
    12  	tableBits        = 15                               // Bits used in the table
    13  	tableSize        = 1 << tableBits                   // Size of the table
    14  	tableShardCnt    = 1 << (tableBits - dictShardBits) // Number of shards in the table
    15  	tableShardSize   = tableSize / tableShardCnt        // Size of an individual shard
    16  	tableFastHashLen = 6
    17  	tableMask        = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
    18  	maxMatchLength   = 131074
    19  )
    20  
    21  type tableEntry struct {
    22  	val    uint32
    23  	offset int32
    24  }
    25  
    26  type fastEncoder struct {
    27  	fastBase
    28  	table [tableSize]tableEntry
    29  }
    30  
    31  type fastEncoderDict struct {
    32  	fastEncoder
    33  	dictTable       []tableEntry
    34  	tableShardDirty [tableShardCnt]bool
    35  	allDirty        bool
    36  }
    37  
    38  // Encode mimmics functionality in zstd_fast.c
    39  func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
    40  	const (
    41  		inputMargin            = 8
    42  		minNonLiteralBlockSize = 1 + 1 + inputMargin
    43  	)
    44  
    45  	// Protect against e.cur wraparound.
    46  	for e.cur >= e.bufferReset-int32(len(e.hist)) {
    47  		if len(e.hist) == 0 {
    48  			for i := range e.table[:] {
    49  				e.table[i] = tableEntry{}
    50  			}
    51  			e.cur = e.maxMatchOff
    52  			break
    53  		}
    54  		// Shift down everything in the table that isn't already too far away.
    55  		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
    56  		for i := range e.table[:] {
    57  			v := e.table[i].offset
    58  			if v < minOff {
    59  				v = 0
    60  			} else {
    61  				v = v - e.cur + e.maxMatchOff
    62  			}
    63  			e.table[i].offset = v
    64  		}
    65  		e.cur = e.maxMatchOff
    66  		break
    67  	}
    68  
    69  	s := e.addBlock(src)
    70  	blk.size = len(src)
    71  	if len(src) < minNonLiteralBlockSize {
    72  		blk.extraLits = len(src)
    73  		blk.literals = blk.literals[:len(src)]
    74  		copy(blk.literals, src)
    75  		return
    76  	}
    77  
    78  	// Override src
    79  	src = e.hist
    80  	sLimit := int32(len(src)) - inputMargin
    81  	// stepSize is the number of bytes to skip on every main loop iteration.
    82  	// It should be >= 2.
    83  	const stepSize = 2
    84  
    85  	// TEMPLATE
    86  	const hashLog = tableBits
    87  	// seems global, but would be nice to tweak.
    88  	const kSearchStrength = 6
    89  
    90  	// nextEmit is where in src the next emitLiteral should start from.
    91  	nextEmit := s
    92  	cv := load6432(src, s)
    93  
    94  	// Relative offsets
    95  	offset1 := int32(blk.recentOffsets[0])
    96  	offset2 := int32(blk.recentOffsets[1])
    97  
    98  	addLiterals := func(s *seq, until int32) {
    99  		if until == nextEmit {
   100  			return
   101  		}
   102  		blk.literals = append(blk.literals, src[nextEmit:until]...)
   103  		s.litLen = uint32(until - nextEmit)
   104  	}
   105  	if debugEncoder {
   106  		println("recent offsets:", blk.recentOffsets)
   107  	}
   108  
   109  encodeLoop:
   110  	for {
   111  		// t will contain the match offset when we find one.
   112  		// When existing the search loop, we have already checked 4 bytes.
   113  		var t int32
   114  
   115  		// We will not use repeat offsets across blocks.
   116  		// By not using them for the first 3 matches
   117  		canRepeat := len(blk.sequences) > 2
   118  
   119  		for {
   120  			if debugAsserts && canRepeat && offset1 == 0 {
   121  				panic("offset0 was 0")
   122  			}
   123  
   124  			nextHash := hashLen(cv, hashLog, tableFastHashLen)
   125  			nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
   126  			candidate := e.table[nextHash]
   127  			candidate2 := e.table[nextHash2]
   128  			repIndex := s - offset1 + 2
   129  
   130  			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
   131  			e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
   132  
   133  			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
   134  				// Consider history as well.
   135  				var seq seq
   136  				var length int32
   137  				length = 4 + e.matchlen(s+6, repIndex+4, src)
   138  				seq.matchLen = uint32(length - zstdMinMatch)
   139  
   140  				// We might be able to match backwards.
   141  				// Extend as long as we can.
   142  				start := s + 2
   143  				// We end the search early, so we don't risk 0 literals
   144  				// and have to do special offset treatment.
   145  				startLimit := nextEmit + 1
   146  
   147  				sMin := s - e.maxMatchOff
   148  				if sMin < 0 {
   149  					sMin = 0
   150  				}
   151  				for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
   152  					repIndex--
   153  					start--
   154  					seq.matchLen++
   155  				}
   156  				addLiterals(&seq, start)
   157  
   158  				// rep 0
   159  				seq.offset = 1
   160  				if debugSequences {
   161  					println("repeat sequence", seq, "next s:", s)
   162  				}
   163  				blk.sequences = append(blk.sequences, seq)
   164  				s += length + 2
   165  				nextEmit = s
   166  				if s >= sLimit {
   167  					if debugEncoder {
   168  						println("repeat ended", s, length)
   169  
   170  					}
   171  					break encodeLoop
   172  				}
   173  				cv = load6432(src, s)
   174  				continue
   175  			}
   176  			coffset0 := s - (candidate.offset - e.cur)
   177  			coffset1 := s - (candidate2.offset - e.cur) + 1
   178  			if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
   179  				// found a regular match
   180  				t = candidate.offset - e.cur
   181  				if debugAsserts && s <= t {
   182  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   183  				}
   184  				if debugAsserts && s-t > e.maxMatchOff {
   185  					panic("s - t >e.maxMatchOff")
   186  				}
   187  				break
   188  			}
   189  
   190  			if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
   191  				// found a regular match
   192  				t = candidate2.offset - e.cur
   193  				s++
   194  				if debugAsserts && s <= t {
   195  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   196  				}
   197  				if debugAsserts && s-t > e.maxMatchOff {
   198  					panic("s - t >e.maxMatchOff")
   199  				}
   200  				if debugAsserts && t < 0 {
   201  					panic("t<0")
   202  				}
   203  				break
   204  			}
   205  			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
   206  			if s >= sLimit {
   207  				break encodeLoop
   208  			}
   209  			cv = load6432(src, s)
   210  		}
   211  		// A 4-byte match has been found. We'll later see if more than 4 bytes.
   212  		offset2 = offset1
   213  		offset1 = s - t
   214  
   215  		if debugAsserts && s <= t {
   216  			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   217  		}
   218  
   219  		if debugAsserts && canRepeat && int(offset1) > len(src) {
   220  			panic("invalid offset")
   221  		}
   222  
   223  		// Extend the 4-byte match as long as possible.
   224  		l := e.matchlen(s+4, t+4, src) + 4
   225  
   226  		// Extend backwards
   227  		tMin := s - e.maxMatchOff
   228  		if tMin < 0 {
   229  			tMin = 0
   230  		}
   231  		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
   232  			s--
   233  			t--
   234  			l++
   235  		}
   236  
   237  		// Write our sequence.
   238  		var seq seq
   239  		seq.litLen = uint32(s - nextEmit)
   240  		seq.matchLen = uint32(l - zstdMinMatch)
   241  		if seq.litLen > 0 {
   242  			blk.literals = append(blk.literals, src[nextEmit:s]...)
   243  		}
   244  		// Don't use repeat offsets
   245  		seq.offset = uint32(s-t) + 3
   246  		s += l
   247  		if debugSequences {
   248  			println("sequence", seq, "next s:", s)
   249  		}
   250  		blk.sequences = append(blk.sequences, seq)
   251  		nextEmit = s
   252  		if s >= sLimit {
   253  			break encodeLoop
   254  		}
   255  		cv = load6432(src, s)
   256  
   257  		// Check offset 2
   258  		if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
   259  			// We have at least 4 byte match.
   260  			// No need to check backwards. We come straight from a match
   261  			l := 4 + e.matchlen(s+4, o2+4, src)
   262  
   263  			// Store this, since we have it.
   264  			nextHash := hashLen(cv, hashLog, tableFastHashLen)
   265  			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
   266  			seq.matchLen = uint32(l) - zstdMinMatch
   267  			seq.litLen = 0
   268  			// Since litlen is always 0, this is offset 1.
   269  			seq.offset = 1
   270  			s += l
   271  			nextEmit = s
   272  			if debugSequences {
   273  				println("sequence", seq, "next s:", s)
   274  			}
   275  			blk.sequences = append(blk.sequences, seq)
   276  
   277  			// Swap offset 1 and 2.
   278  			offset1, offset2 = offset2, offset1
   279  			if s >= sLimit {
   280  				break encodeLoop
   281  			}
   282  			// Prepare next loop.
   283  			cv = load6432(src, s)
   284  		}
   285  	}
   286  
   287  	if int(nextEmit) < len(src) {
   288  		blk.literals = append(blk.literals, src[nextEmit:]...)
   289  		blk.extraLits = len(src) - int(nextEmit)
   290  	}
   291  	blk.recentOffsets[0] = uint32(offset1)
   292  	blk.recentOffsets[1] = uint32(offset2)
   293  	if debugEncoder {
   294  		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
   295  	}
   296  }
   297  
   298  // EncodeNoHist will encode a block with no history and no following blocks.
   299  // Most notable difference is that src will not be copied for history and
   300  // we do not need to check for max match length.
   301  func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
   302  	const (
   303  		inputMargin            = 8
   304  		minNonLiteralBlockSize = 1 + 1 + inputMargin
   305  	)
   306  	if debugEncoder {
   307  		if len(src) > maxCompressedBlockSize {
   308  			panic("src too big")
   309  		}
   310  	}
   311  
   312  	// Protect against e.cur wraparound.
   313  	if e.cur >= e.bufferReset {
   314  		for i := range e.table[:] {
   315  			e.table[i] = tableEntry{}
   316  		}
   317  		e.cur = e.maxMatchOff
   318  	}
   319  
   320  	s := int32(0)
   321  	blk.size = len(src)
   322  	if len(src) < minNonLiteralBlockSize {
   323  		blk.extraLits = len(src)
   324  		blk.literals = blk.literals[:len(src)]
   325  		copy(blk.literals, src)
   326  		return
   327  	}
   328  
   329  	sLimit := int32(len(src)) - inputMargin
   330  	// stepSize is the number of bytes to skip on every main loop iteration.
   331  	// It should be >= 2.
   332  	const stepSize = 2
   333  
   334  	// TEMPLATE
   335  	const hashLog = tableBits
   336  	// seems global, but would be nice to tweak.
   337  	const kSearchStrength = 6
   338  
   339  	// nextEmit is where in src the next emitLiteral should start from.
   340  	nextEmit := s
   341  	cv := load6432(src, s)
   342  
   343  	// Relative offsets
   344  	offset1 := int32(blk.recentOffsets[0])
   345  	offset2 := int32(blk.recentOffsets[1])
   346  
   347  	addLiterals := func(s *seq, until int32) {
   348  		if until == nextEmit {
   349  			return
   350  		}
   351  		blk.literals = append(blk.literals, src[nextEmit:until]...)
   352  		s.litLen = uint32(until - nextEmit)
   353  	}
   354  	if debugEncoder {
   355  		println("recent offsets:", blk.recentOffsets)
   356  	}
   357  
   358  encodeLoop:
   359  	for {
   360  		// t will contain the match offset when we find one.
   361  		// When existing the search loop, we have already checked 4 bytes.
   362  		var t int32
   363  
   364  		// We will not use repeat offsets across blocks.
   365  		// By not using them for the first 3 matches
   366  
   367  		for {
   368  			nextHash := hashLen(cv, hashLog, tableFastHashLen)
   369  			nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
   370  			candidate := e.table[nextHash]
   371  			candidate2 := e.table[nextHash2]
   372  			repIndex := s - offset1 + 2
   373  
   374  			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
   375  			e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
   376  
   377  			if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
   378  				// Consider history as well.
   379  				var seq seq
   380  				length := 4 + e.matchlen(s+6, repIndex+4, src)
   381  
   382  				seq.matchLen = uint32(length - zstdMinMatch)
   383  
   384  				// We might be able to match backwards.
   385  				// Extend as long as we can.
   386  				start := s + 2
   387  				// We end the search early, so we don't risk 0 literals
   388  				// and have to do special offset treatment.
   389  				startLimit := nextEmit + 1
   390  
   391  				sMin := s - e.maxMatchOff
   392  				if sMin < 0 {
   393  					sMin = 0
   394  				}
   395  				for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] {
   396  					repIndex--
   397  					start--
   398  					seq.matchLen++
   399  				}
   400  				addLiterals(&seq, start)
   401  
   402  				// rep 0
   403  				seq.offset = 1
   404  				if debugSequences {
   405  					println("repeat sequence", seq, "next s:", s)
   406  				}
   407  				blk.sequences = append(blk.sequences, seq)
   408  				s += length + 2
   409  				nextEmit = s
   410  				if s >= sLimit {
   411  					if debugEncoder {
   412  						println("repeat ended", s, length)
   413  
   414  					}
   415  					break encodeLoop
   416  				}
   417  				cv = load6432(src, s)
   418  				continue
   419  			}
   420  			coffset0 := s - (candidate.offset - e.cur)
   421  			coffset1 := s - (candidate2.offset - e.cur) + 1
   422  			if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
   423  				// found a regular match
   424  				t = candidate.offset - e.cur
   425  				if debugAsserts && s <= t {
   426  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   427  				}
   428  				if debugAsserts && s-t > e.maxMatchOff {
   429  					panic("s - t >e.maxMatchOff")
   430  				}
   431  				if debugAsserts && t < 0 {
   432  					panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
   433  				}
   434  				break
   435  			}
   436  
   437  			if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
   438  				// found a regular match
   439  				t = candidate2.offset - e.cur
   440  				s++
   441  				if debugAsserts && s <= t {
   442  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   443  				}
   444  				if debugAsserts && s-t > e.maxMatchOff {
   445  					panic("s - t >e.maxMatchOff")
   446  				}
   447  				if debugAsserts && t < 0 {
   448  					panic("t<0")
   449  				}
   450  				break
   451  			}
   452  			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
   453  			if s >= sLimit {
   454  				break encodeLoop
   455  			}
   456  			cv = load6432(src, s)
   457  		}
   458  		// A 4-byte match has been found. We'll later see if more than 4 bytes.
   459  		offset2 = offset1
   460  		offset1 = s - t
   461  
   462  		if debugAsserts && s <= t {
   463  			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   464  		}
   465  
   466  		if debugAsserts && t < 0 {
   467  			panic(fmt.Sprintf("t (%d) < 0 ", t))
   468  		}
   469  		// Extend the 4-byte match as long as possible.
   470  		l := e.matchlen(s+4, t+4, src) + 4
   471  
   472  		// Extend backwards
   473  		tMin := s - e.maxMatchOff
   474  		if tMin < 0 {
   475  			tMin = 0
   476  		}
   477  		for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
   478  			s--
   479  			t--
   480  			l++
   481  		}
   482  
   483  		// Write our sequence.
   484  		var seq seq
   485  		seq.litLen = uint32(s - nextEmit)
   486  		seq.matchLen = uint32(l - zstdMinMatch)
   487  		if seq.litLen > 0 {
   488  			blk.literals = append(blk.literals, src[nextEmit:s]...)
   489  		}
   490  		// Don't use repeat offsets
   491  		seq.offset = uint32(s-t) + 3
   492  		s += l
   493  		if debugSequences {
   494  			println("sequence", seq, "next s:", s)
   495  		}
   496  		blk.sequences = append(blk.sequences, seq)
   497  		nextEmit = s
   498  		if s >= sLimit {
   499  			break encodeLoop
   500  		}
   501  		cv = load6432(src, s)
   502  
   503  		// Check offset 2
   504  		if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
   505  			// We have at least 4 byte match.
   506  			// No need to check backwards. We come straight from a match
   507  			l := 4 + e.matchlen(s+4, o2+4, src)
   508  
   509  			// Store this, since we have it.
   510  			nextHash := hashLen(cv, hashLog, tableFastHashLen)
   511  			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
   512  			seq.matchLen = uint32(l) - zstdMinMatch
   513  			seq.litLen = 0
   514  			// Since litlen is always 0, this is offset 1.
   515  			seq.offset = 1
   516  			s += l
   517  			nextEmit = s
   518  			if debugSequences {
   519  				println("sequence", seq, "next s:", s)
   520  			}
   521  			blk.sequences = append(blk.sequences, seq)
   522  
   523  			// Swap offset 1 and 2.
   524  			offset1, offset2 = offset2, offset1
   525  			if s >= sLimit {
   526  				break encodeLoop
   527  			}
   528  			// Prepare next loop.
   529  			cv = load6432(src, s)
   530  		}
   531  	}
   532  
   533  	if int(nextEmit) < len(src) {
   534  		blk.literals = append(blk.literals, src[nextEmit:]...)
   535  		blk.extraLits = len(src) - int(nextEmit)
   536  	}
   537  	if debugEncoder {
   538  		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
   539  	}
   540  	// We do not store history, so we must offset e.cur to avoid false matches for next user.
   541  	if e.cur < e.bufferReset {
   542  		e.cur += int32(len(src))
   543  	}
   544  }
   545  
   546  // Encode will encode the content, with a dictionary if initialized for it.
   547  func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
   548  	const (
   549  		inputMargin            = 8
   550  		minNonLiteralBlockSize = 1 + 1 + inputMargin
   551  	)
   552  	if e.allDirty || len(src) > 32<<10 {
   553  		e.fastEncoder.Encode(blk, src)
   554  		e.allDirty = true
   555  		return
   556  	}
   557  	// Protect against e.cur wraparound.
   558  	for e.cur >= e.bufferReset-int32(len(e.hist)) {
   559  		if len(e.hist) == 0 {
   560  			e.table = [tableSize]tableEntry{}
   561  			e.cur = e.maxMatchOff
   562  			break
   563  		}
   564  		// Shift down everything in the table that isn't already too far away.
   565  		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
   566  		for i := range e.table[:] {
   567  			v := e.table[i].offset
   568  			if v < minOff {
   569  				v = 0
   570  			} else {
   571  				v = v - e.cur + e.maxMatchOff
   572  			}
   573  			e.table[i].offset = v
   574  		}
   575  		e.cur = e.maxMatchOff
   576  		break
   577  	}
   578  
   579  	s := e.addBlock(src)
   580  	blk.size = len(src)
   581  	if len(src) < minNonLiteralBlockSize {
   582  		blk.extraLits = len(src)
   583  		blk.literals = blk.literals[:len(src)]
   584  		copy(blk.literals, src)
   585  		return
   586  	}
   587  
   588  	// Override src
   589  	src = e.hist
   590  	sLimit := int32(len(src)) - inputMargin
   591  	// stepSize is the number of bytes to skip on every main loop iteration.
   592  	// It should be >= 2.
   593  	const stepSize = 2
   594  
   595  	// TEMPLATE
   596  	const hashLog = tableBits
   597  	// seems global, but would be nice to tweak.
   598  	const kSearchStrength = 7
   599  
   600  	// nextEmit is where in src the next emitLiteral should start from.
   601  	nextEmit := s
   602  	cv := load6432(src, s)
   603  
   604  	// Relative offsets
   605  	offset1 := int32(blk.recentOffsets[0])
   606  	offset2 := int32(blk.recentOffsets[1])
   607  
   608  	addLiterals := func(s *seq, until int32) {
   609  		if until == nextEmit {
   610  			return
   611  		}
   612  		blk.literals = append(blk.literals, src[nextEmit:until]...)
   613  		s.litLen = uint32(until - nextEmit)
   614  	}
   615  	if debugEncoder {
   616  		println("recent offsets:", blk.recentOffsets)
   617  	}
   618  
   619  encodeLoop:
   620  	for {
   621  		// t will contain the match offset when we find one.
   622  		// When existing the search loop, we have already checked 4 bytes.
   623  		var t int32
   624  
   625  		// We will not use repeat offsets across blocks.
   626  		// By not using them for the first 3 matches
   627  		canRepeat := len(blk.sequences) > 2
   628  
   629  		for {
   630  			if debugAsserts && canRepeat && offset1 == 0 {
   631  				panic("offset0 was 0")
   632  			}
   633  
   634  			nextHash := hashLen(cv, hashLog, tableFastHashLen)
   635  			nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
   636  			candidate := e.table[nextHash]
   637  			candidate2 := e.table[nextHash2]
   638  			repIndex := s - offset1 + 2
   639  
   640  			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
   641  			e.markShardDirty(nextHash)
   642  			e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
   643  			e.markShardDirty(nextHash2)
   644  
   645  			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
   646  				// Consider history as well.
   647  				var seq seq
   648  				var length int32
   649  				length = 4 + e.matchlen(s+6, repIndex+4, src)
   650  
   651  				seq.matchLen = uint32(length - zstdMinMatch)
   652  
   653  				// We might be able to match backwards.
   654  				// Extend as long as we can.
   655  				start := s + 2
   656  				// We end the search early, so we don't risk 0 literals
   657  				// and have to do special offset treatment.
   658  				startLimit := nextEmit + 1
   659  
   660  				sMin := s - e.maxMatchOff
   661  				if sMin < 0 {
   662  					sMin = 0
   663  				}
   664  				for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
   665  					repIndex--
   666  					start--
   667  					seq.matchLen++
   668  				}
   669  				addLiterals(&seq, start)
   670  
   671  				// rep 0
   672  				seq.offset = 1
   673  				if debugSequences {
   674  					println("repeat sequence", seq, "next s:", s)
   675  				}
   676  				blk.sequences = append(blk.sequences, seq)
   677  				s += length + 2
   678  				nextEmit = s
   679  				if s >= sLimit {
   680  					if debugEncoder {
   681  						println("repeat ended", s, length)
   682  
   683  					}
   684  					break encodeLoop
   685  				}
   686  				cv = load6432(src, s)
   687  				continue
   688  			}
   689  			coffset0 := s - (candidate.offset - e.cur)
   690  			coffset1 := s - (candidate2.offset - e.cur) + 1
   691  			if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
   692  				// found a regular match
   693  				t = candidate.offset - e.cur
   694  				if debugAsserts && s <= t {
   695  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   696  				}
   697  				if debugAsserts && s-t > e.maxMatchOff {
   698  					panic("s - t >e.maxMatchOff")
   699  				}
   700  				break
   701  			}
   702  
   703  			if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
   704  				// found a regular match
   705  				t = candidate2.offset - e.cur
   706  				s++
   707  				if debugAsserts && s <= t {
   708  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   709  				}
   710  				if debugAsserts && s-t > e.maxMatchOff {
   711  					panic("s - t >e.maxMatchOff")
   712  				}
   713  				if debugAsserts && t < 0 {
   714  					panic("t<0")
   715  				}
   716  				break
   717  			}
   718  			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
   719  			if s >= sLimit {
   720  				break encodeLoop
   721  			}
   722  			cv = load6432(src, s)
   723  		}
   724  		// A 4-byte match has been found. We'll later see if more than 4 bytes.
   725  		offset2 = offset1
   726  		offset1 = s - t
   727  
   728  		if debugAsserts && s <= t {
   729  			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   730  		}
   731  
   732  		if debugAsserts && canRepeat && int(offset1) > len(src) {
   733  			panic("invalid offset")
   734  		}
   735  
   736  		// Extend the 4-byte match as long as possible.
   737  		l := e.matchlen(s+4, t+4, src) + 4
   738  
   739  		// Extend backwards
   740  		tMin := s - e.maxMatchOff
   741  		if tMin < 0 {
   742  			tMin = 0
   743  		}
   744  		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
   745  			s--
   746  			t--
   747  			l++
   748  		}
   749  
   750  		// Write our sequence.
   751  		var seq seq
   752  		seq.litLen = uint32(s - nextEmit)
   753  		seq.matchLen = uint32(l - zstdMinMatch)
   754  		if seq.litLen > 0 {
   755  			blk.literals = append(blk.literals, src[nextEmit:s]...)
   756  		}
   757  		// Don't use repeat offsets
   758  		seq.offset = uint32(s-t) + 3
   759  		s += l
   760  		if debugSequences {
   761  			println("sequence", seq, "next s:", s)
   762  		}
   763  		blk.sequences = append(blk.sequences, seq)
   764  		nextEmit = s
   765  		if s >= sLimit {
   766  			break encodeLoop
   767  		}
   768  		cv = load6432(src, s)
   769  
   770  		// Check offset 2
   771  		if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
   772  			// We have at least 4 byte match.
   773  			// No need to check backwards. We come straight from a match
   774  			l := 4 + e.matchlen(s+4, o2+4, src)
   775  
   776  			// Store this, since we have it.
   777  			nextHash := hashLen(cv, hashLog, tableFastHashLen)
   778  			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
   779  			e.markShardDirty(nextHash)
   780  			seq.matchLen = uint32(l) - zstdMinMatch
   781  			seq.litLen = 0
   782  			// Since litlen is always 0, this is offset 1.
   783  			seq.offset = 1
   784  			s += l
   785  			nextEmit = s
   786  			if debugSequences {
   787  				println("sequence", seq, "next s:", s)
   788  			}
   789  			blk.sequences = append(blk.sequences, seq)
   790  
   791  			// Swap offset 1 and 2.
   792  			offset1, offset2 = offset2, offset1
   793  			if s >= sLimit {
   794  				break encodeLoop
   795  			}
   796  			// Prepare next loop.
   797  			cv = load6432(src, s)
   798  		}
   799  	}
   800  
   801  	if int(nextEmit) < len(src) {
   802  		blk.literals = append(blk.literals, src[nextEmit:]...)
   803  		blk.extraLits = len(src) - int(nextEmit)
   804  	}
   805  	blk.recentOffsets[0] = uint32(offset1)
   806  	blk.recentOffsets[1] = uint32(offset2)
   807  	if debugEncoder {
   808  		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
   809  	}
   810  }
   811  
   812  // ResetDict will reset and set a dictionary if not nil
   813  func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
   814  	e.resetBase(d, singleBlock)
   815  	if d != nil {
   816  		panic("fastEncoder: Reset with dict")
   817  	}
   818  }
   819  
   820  // ResetDict will reset and set a dictionary if not nil
   821  func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
   822  	e.resetBase(d, singleBlock)
   823  	if d == nil {
   824  		return
   825  	}
   826  
   827  	// Init or copy dict table
   828  	if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
   829  		if len(e.dictTable) != len(e.table) {
   830  			e.dictTable = make([]tableEntry, len(e.table))
   831  		}
   832  		if true {
   833  			end := e.maxMatchOff + int32(len(d.content)) - 8
   834  			for i := e.maxMatchOff; i < end; i += 3 {
   835  				const hashLog = tableBits
   836  
   837  				cv := load6432(d.content, i-e.maxMatchOff)
   838  				nextHash := hashLen(cv, hashLog, tableFastHashLen)      // 0 -> 5
   839  				nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen)  // 1 -> 6
   840  				nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7
   841  				e.dictTable[nextHash] = tableEntry{
   842  					val:    uint32(cv),
   843  					offset: i,
   844  				}
   845  				e.dictTable[nextHash1] = tableEntry{
   846  					val:    uint32(cv >> 8),
   847  					offset: i + 1,
   848  				}
   849  				e.dictTable[nextHash2] = tableEntry{
   850  					val:    uint32(cv >> 16),
   851  					offset: i + 2,
   852  				}
   853  			}
   854  		}
   855  		e.lastDictID = d.id
   856  		e.allDirty = true
   857  	}
   858  
   859  	e.cur = e.maxMatchOff
   860  	dirtyShardCnt := 0
   861  	if !e.allDirty {
   862  		for i := range e.tableShardDirty {
   863  			if e.tableShardDirty[i] {
   864  				dirtyShardCnt++
   865  			}
   866  		}
   867  	}
   868  
   869  	const shardCnt = tableShardCnt
   870  	const shardSize = tableShardSize
   871  	if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
   872  		//copy(e.table[:], e.dictTable)
   873  		e.table = *(*[tableSize]tableEntry)(e.dictTable)
   874  		for i := range e.tableShardDirty {
   875  			e.tableShardDirty[i] = false
   876  		}
   877  		e.allDirty = false
   878  		return
   879  	}
   880  	for i := range e.tableShardDirty {
   881  		if !e.tableShardDirty[i] {
   882  			continue
   883  		}
   884  
   885  		//copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
   886  		*(*[shardSize]tableEntry)(e.table[i*shardSize:]) = *(*[shardSize]tableEntry)(e.dictTable[i*shardSize:])
   887  		e.tableShardDirty[i] = false
   888  	}
   889  	e.allDirty = false
   890  }
   891  
   892  func (e *fastEncoderDict) markAllShardsDirty() {
   893  	e.allDirty = true
   894  }
   895  
   896  func (e *fastEncoderDict) markShardDirty(entryNum uint32) {
   897  	e.tableShardDirty[entryNum/tableShardSize] = true
   898  }