github.com/bir3/gocompiler@v0.9.2202/extra/compress/zstd/enc_dfast.go

github.com/bir3/gocompiler@v0.9.2202/extra/compress/zstd/enc_dfast.go (about)

     1  // Copyright 2019+ Klaus Post. All rights reserved.
     2  // License information can be found in the LICENSE file.
     3  // Based on work by Yann Collet, released under BSD License.
     4  
     5  package zstd
     6  
     7  import "fmt"
     8  
     9  const (
    10  	dFastLongTableBits = 17                      // Bits used in the long match table
    11  	dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
    12  	dFastLongTableMask = dFastLongTableSize - 1  // Mask for table indices. Redundant, but can eliminate bounds checks.
    13  	dFastLongLen       = 8                       // Bytes used for table hash
    14  
    15  	dLongTableShardCnt  = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table
    16  	dLongTableShardSize = dFastLongTableSize / tableShardCnt        // Size of an individual shard
    17  
    18  	dFastShortTableBits = tableBits                // Bits used in the short match table
    19  	dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
    20  	dFastShortTableMask = dFastShortTableSize - 1  // Mask for table indices. Redundant, but can eliminate bounds checks.
    21  	dFastShortLen       = 5                        // Bytes used for table hash
    22  
    23  )
    24  
    25  type doubleFastEncoder struct {
    26  	fastEncoder
    27  	longTable [dFastLongTableSize]tableEntry
    28  }
    29  
    30  type doubleFastEncoderDict struct {
    31  	fastEncoderDict
    32  	longTable           [dFastLongTableSize]tableEntry
    33  	dictLongTable       []tableEntry
    34  	longTableShardDirty [dLongTableShardCnt]bool
    35  }
    36  
    37  // Encode mimmics functionality in zstd_dfast.c
    38  func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
    39  	const (
    40  		// Input margin is the number of bytes we read (8)
    41  		// and the maximum we will read ahead (2)
    42  		inputMargin            = 8 + 2
    43  		minNonLiteralBlockSize = 16
    44  	)
    45  
    46  	// Protect against e.cur wraparound.
    47  	for e.cur >= e.bufferReset-int32(len(e.hist)) {
    48  		if len(e.hist) == 0 {
    49  			e.table = [dFastShortTableSize]tableEntry{}
    50  			e.longTable = [dFastLongTableSize]tableEntry{}
    51  			e.cur = e.maxMatchOff
    52  			break
    53  		}
    54  		// Shift down everything in the table that isn't already too far away.
    55  		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
    56  		for i := range e.table[:] {
    57  			v := e.table[i].offset
    58  			if v < minOff {
    59  				v = 0
    60  			} else {
    61  				v = v - e.cur + e.maxMatchOff
    62  			}
    63  			e.table[i].offset = v
    64  		}
    65  		for i := range e.longTable[:] {
    66  			v := e.longTable[i].offset
    67  			if v < minOff {
    68  				v = 0
    69  			} else {
    70  				v = v - e.cur + e.maxMatchOff
    71  			}
    72  			e.longTable[i].offset = v
    73  		}
    74  		e.cur = e.maxMatchOff
    75  		break
    76  	}
    77  
    78  	s := e.addBlock(src)
    79  	blk.size = len(src)
    80  	if len(src) < minNonLiteralBlockSize {
    81  		blk.extraLits = len(src)
    82  		blk.literals = blk.literals[:len(src)]
    83  		copy(blk.literals, src)
    84  		return
    85  	}
    86  
    87  	// Override src
    88  	src = e.hist
    89  	sLimit := int32(len(src)) - inputMargin
    90  	// stepSize is the number of bytes to skip on every main loop iteration.
    91  	// It should be >= 1.
    92  	const stepSize = 1
    93  
    94  	const kSearchStrength = 8
    95  
    96  	// nextEmit is where in src the next emitLiteral should start from.
    97  	nextEmit := s
    98  	cv := load6432(src, s)
    99  
   100  	// Relative offsets
   101  	offset1 := int32(blk.recentOffsets[0])
   102  	offset2 := int32(blk.recentOffsets[1])
   103  
   104  	addLiterals := func(s *seq, until int32) {
   105  		if until == nextEmit {
   106  			return
   107  		}
   108  		blk.literals = append(blk.literals, src[nextEmit:until]...)
   109  		s.litLen = uint32(until - nextEmit)
   110  	}
   111  	if debugEncoder {
   112  		println("recent offsets:", blk.recentOffsets)
   113  	}
   114  
   115  encodeLoop:
   116  	for {
   117  		var t int32
   118  		// We allow the encoder to optionally turn off repeat offsets across blocks
   119  		canRepeat := len(blk.sequences) > 2
   120  
   121  		for {
   122  			if debugAsserts && canRepeat && offset1 == 0 {
   123  				panic("offset0 was 0")
   124  			}
   125  
   126  			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
   127  			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
   128  			candidateL := e.longTable[nextHashL]
   129  			candidateS := e.table[nextHashS]
   130  
   131  			const repOff = 1
   132  			repIndex := s - offset1 + repOff
   133  			entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
   134  			e.longTable[nextHashL] = entry
   135  			e.table[nextHashS] = entry
   136  
   137  			if canRepeat {
   138  				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
   139  					// Consider history as well.
   140  					var seq seq
   141  					lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
   142  
   143  					seq.matchLen = uint32(lenght - zstdMinMatch)
   144  
   145  					// We might be able to match backwards.
   146  					// Extend as long as we can.
   147  					start := s + repOff
   148  					// We end the search early, so we don't risk 0 literals
   149  					// and have to do special offset treatment.
   150  					startLimit := nextEmit + 1
   151  
   152  					tMin := s - e.maxMatchOff
   153  					if tMin < 0 {
   154  						tMin = 0
   155  					}
   156  					for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
   157  						repIndex--
   158  						start--
   159  						seq.matchLen++
   160  					}
   161  					addLiterals(&seq, start)
   162  
   163  					// rep 0
   164  					seq.offset = 1
   165  					if debugSequences {
   166  						println("repeat sequence", seq, "next s:", s)
   167  					}
   168  					blk.sequences = append(blk.sequences, seq)
   169  					s += lenght + repOff
   170  					nextEmit = s
   171  					if s >= sLimit {
   172  						if debugEncoder {
   173  							println("repeat ended", s, lenght)
   174  
   175  						}
   176  						break encodeLoop
   177  					}
   178  					cv = load6432(src, s)
   179  					continue
   180  				}
   181  			}
   182  			// Find the offsets of our two matches.
   183  			coffsetL := s - (candidateL.offset - e.cur)
   184  			coffsetS := s - (candidateS.offset - e.cur)
   185  
   186  			// Check if we have a long match.
   187  			if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
   188  				// Found a long match, likely at least 8 bytes.
   189  				// Reference encoder checks all 8 bytes, we only check 4,
   190  				// but the likelihood of both the first 4 bytes and the hash matching should be enough.
   191  				t = candidateL.offset - e.cur
   192  				if debugAsserts && s <= t {
   193  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   194  				}
   195  				if debugAsserts && s-t > e.maxMatchOff {
   196  					panic("s - t >e.maxMatchOff")
   197  				}
   198  				if debugMatches {
   199  					println("long match")
   200  				}
   201  				break
   202  			}
   203  
   204  			// Check if we have a short match.
   205  			if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
   206  				// found a regular match
   207  				// See if we can find a long match at s+1
   208  				const checkAt = 1
   209  				cv := load6432(src, s+checkAt)
   210  				nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
   211  				candidateL = e.longTable[nextHashL]
   212  				coffsetL = s - (candidateL.offset - e.cur) + checkAt
   213  
   214  				// We can store it, since we have at least a 4 byte match.
   215  				e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
   216  				if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
   217  					// Found a long match, likely at least 8 bytes.
   218  					// Reference encoder checks all 8 bytes, we only check 4,
   219  					// but the likelihood of both the first 4 bytes and the hash matching should be enough.
   220  					t = candidateL.offset - e.cur
   221  					s += checkAt
   222  					if debugMatches {
   223  						println("long match (after short)")
   224  					}
   225  					break
   226  				}
   227  
   228  				t = candidateS.offset - e.cur
   229  				if debugAsserts && s <= t {
   230  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   231  				}
   232  				if debugAsserts && s-t > e.maxMatchOff {
   233  					panic("s - t >e.maxMatchOff")
   234  				}
   235  				if debugAsserts && t < 0 {
   236  					panic("t<0")
   237  				}
   238  				if debugMatches {
   239  					println("short match")
   240  				}
   241  				break
   242  			}
   243  
   244  			// No match found, move forward in input.
   245  			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
   246  			if s >= sLimit {
   247  				break encodeLoop
   248  			}
   249  			cv = load6432(src, s)
   250  		}
   251  
   252  		// A 4-byte match has been found. Update recent offsets.
   253  		// We'll later see if more than 4 bytes.
   254  		offset2 = offset1
   255  		offset1 = s - t
   256  
   257  		if debugAsserts && s <= t {
   258  			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   259  		}
   260  
   261  		if debugAsserts && canRepeat && int(offset1) > len(src) {
   262  			panic("invalid offset")
   263  		}
   264  
   265  		// Extend the 4-byte match as long as possible.
   266  		l := e.matchlen(s+4, t+4, src) + 4
   267  
   268  		// Extend backwards
   269  		tMin := s - e.maxMatchOff
   270  		if tMin < 0 {
   271  			tMin = 0
   272  		}
   273  		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
   274  			s--
   275  			t--
   276  			l++
   277  		}
   278  
   279  		// Write our sequence
   280  		var seq seq
   281  		seq.litLen = uint32(s - nextEmit)
   282  		seq.matchLen = uint32(l - zstdMinMatch)
   283  		if seq.litLen > 0 {
   284  			blk.literals = append(blk.literals, src[nextEmit:s]...)
   285  		}
   286  		seq.offset = uint32(s-t) + 3
   287  		s += l
   288  		if debugSequences {
   289  			println("sequence", seq, "next s:", s)
   290  		}
   291  		blk.sequences = append(blk.sequences, seq)
   292  		nextEmit = s
   293  		if s >= sLimit {
   294  			break encodeLoop
   295  		}
   296  
   297  		// Index match start+1 (long) and start+2 (short)
   298  		index0 := s - l + 1
   299  		// Index match end-2 (long) and end-1 (short)
   300  		index1 := s - 2
   301  
   302  		cv0 := load6432(src, index0)
   303  		cv1 := load6432(src, index1)
   304  		te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
   305  		te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
   306  		e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0
   307  		e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1
   308  		cv0 >>= 8
   309  		cv1 >>= 8
   310  		te0.offset++
   311  		te1.offset++
   312  		te0.val = uint32(cv0)
   313  		te1.val = uint32(cv1)
   314  		e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0
   315  		e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1
   316  
   317  		cv = load6432(src, s)
   318  
   319  		if !canRepeat {
   320  			continue
   321  		}
   322  
   323  		// Check offset 2
   324  		for {
   325  			o2 := s - offset2
   326  			if load3232(src, o2) != uint32(cv) {
   327  				// Do regular search
   328  				break
   329  			}
   330  
   331  			// Store this, since we have it.
   332  			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
   333  			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
   334  
   335  			// We have at least 4 byte match.
   336  			// No need to check backwards. We come straight from a match
   337  			l := 4 + e.matchlen(s+4, o2+4, src)
   338  
   339  			entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
   340  			e.longTable[nextHashL] = entry
   341  			e.table[nextHashS] = entry
   342  			seq.matchLen = uint32(l) - zstdMinMatch
   343  			seq.litLen = 0
   344  
   345  			// Since litlen is always 0, this is offset 1.
   346  			seq.offset = 1
   347  			s += l
   348  			nextEmit = s
   349  			if debugSequences {
   350  				println("sequence", seq, "next s:", s)
   351  			}
   352  			blk.sequences = append(blk.sequences, seq)
   353  
   354  			// Swap offset 1 and 2.
   355  			offset1, offset2 = offset2, offset1
   356  			if s >= sLimit {
   357  				// Finished
   358  				break encodeLoop
   359  			}
   360  			cv = load6432(src, s)
   361  		}
   362  	}
   363  
   364  	if int(nextEmit) < len(src) {
   365  		blk.literals = append(blk.literals, src[nextEmit:]...)
   366  		blk.extraLits = len(src) - int(nextEmit)
   367  	}
   368  	blk.recentOffsets[0] = uint32(offset1)
   369  	blk.recentOffsets[1] = uint32(offset2)
   370  	if debugEncoder {
   371  		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
   372  	}
   373  }
   374  
   375  // EncodeNoHist will encode a block with no history and no following blocks.
   376  // Most notable difference is that src will not be copied for history and
   377  // we do not need to check for max match length.
   378  func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
   379  	const (
   380  		// Input margin is the number of bytes we read (8)
   381  		// and the maximum we will read ahead (2)
   382  		inputMargin            = 8 + 2
   383  		minNonLiteralBlockSize = 16
   384  	)
   385  
   386  	// Protect against e.cur wraparound.
   387  	if e.cur >= e.bufferReset {
   388  		for i := range e.table[:] {
   389  			e.table[i] = tableEntry{}
   390  		}
   391  		for i := range e.longTable[:] {
   392  			e.longTable[i] = tableEntry{}
   393  		}
   394  		e.cur = e.maxMatchOff
   395  	}
   396  
   397  	s := int32(0)
   398  	blk.size = len(src)
   399  	if len(src) < minNonLiteralBlockSize {
   400  		blk.extraLits = len(src)
   401  		blk.literals = blk.literals[:len(src)]
   402  		copy(blk.literals, src)
   403  		return
   404  	}
   405  
   406  	// Override src
   407  	sLimit := int32(len(src)) - inputMargin
   408  	// stepSize is the number of bytes to skip on every main loop iteration.
   409  	// It should be >= 1.
   410  	const stepSize = 1
   411  
   412  	const kSearchStrength = 8
   413  
   414  	// nextEmit is where in src the next emitLiteral should start from.
   415  	nextEmit := s
   416  	cv := load6432(src, s)
   417  
   418  	// Relative offsets
   419  	offset1 := int32(blk.recentOffsets[0])
   420  	offset2 := int32(blk.recentOffsets[1])
   421  
   422  	addLiterals := func(s *seq, until int32) {
   423  		if until == nextEmit {
   424  			return
   425  		}
   426  		blk.literals = append(blk.literals, src[nextEmit:until]...)
   427  		s.litLen = uint32(until - nextEmit)
   428  	}
   429  	if debugEncoder {
   430  		println("recent offsets:", blk.recentOffsets)
   431  	}
   432  
   433  encodeLoop:
   434  	for {
   435  		var t int32
   436  		for {
   437  
   438  			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
   439  			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
   440  			candidateL := e.longTable[nextHashL]
   441  			candidateS := e.table[nextHashS]
   442  
   443  			const repOff = 1
   444  			repIndex := s - offset1 + repOff
   445  			entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
   446  			e.longTable[nextHashL] = entry
   447  			e.table[nextHashS] = entry
   448  
   449  			if len(blk.sequences) > 2 {
   450  				if load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
   451  					// Consider history as well.
   452  					var seq seq
   453  					//length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
   454  					length := 4 + int32(matchLen(src[s+4+repOff:], src[repIndex+4:]))
   455  
   456  					seq.matchLen = uint32(length - zstdMinMatch)
   457  
   458  					// We might be able to match backwards.
   459  					// Extend as long as we can.
   460  					start := s + repOff
   461  					// We end the search early, so we don't risk 0 literals
   462  					// and have to do special offset treatment.
   463  					startLimit := nextEmit + 1
   464  
   465  					tMin := s - e.maxMatchOff
   466  					if tMin < 0 {
   467  						tMin = 0
   468  					}
   469  					for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] {
   470  						repIndex--
   471  						start--
   472  						seq.matchLen++
   473  					}
   474  					addLiterals(&seq, start)
   475  
   476  					// rep 0
   477  					seq.offset = 1
   478  					if debugSequences {
   479  						println("repeat sequence", seq, "next s:", s)
   480  					}
   481  					blk.sequences = append(blk.sequences, seq)
   482  					s += length + repOff
   483  					nextEmit = s
   484  					if s >= sLimit {
   485  						if debugEncoder {
   486  							println("repeat ended", s, length)
   487  
   488  						}
   489  						break encodeLoop
   490  					}
   491  					cv = load6432(src, s)
   492  					continue
   493  				}
   494  			}
   495  			// Find the offsets of our two matches.
   496  			coffsetL := s - (candidateL.offset - e.cur)
   497  			coffsetS := s - (candidateS.offset - e.cur)
   498  
   499  			// Check if we have a long match.
   500  			if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
   501  				// Found a long match, likely at least 8 bytes.
   502  				// Reference encoder checks all 8 bytes, we only check 4,
   503  				// but the likelihood of both the first 4 bytes and the hash matching should be enough.
   504  				t = candidateL.offset - e.cur
   505  				if debugAsserts && s <= t {
   506  					panic(fmt.Sprintf("s (%d) <= t (%d). cur: %d", s, t, e.cur))
   507  				}
   508  				if debugAsserts && s-t > e.maxMatchOff {
   509  					panic("s - t >e.maxMatchOff")
   510  				}
   511  				if debugMatches {
   512  					println("long match")
   513  				}
   514  				break
   515  			}
   516  
   517  			// Check if we have a short match.
   518  			if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
   519  				// found a regular match
   520  				// See if we can find a long match at s+1
   521  				const checkAt = 1
   522  				cv := load6432(src, s+checkAt)
   523  				nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
   524  				candidateL = e.longTable[nextHashL]
   525  				coffsetL = s - (candidateL.offset - e.cur) + checkAt
   526  
   527  				// We can store it, since we have at least a 4 byte match.
   528  				e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
   529  				if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
   530  					// Found a long match, likely at least 8 bytes.
   531  					// Reference encoder checks all 8 bytes, we only check 4,
   532  					// but the likelihood of both the first 4 bytes and the hash matching should be enough.
   533  					t = candidateL.offset - e.cur
   534  					s += checkAt
   535  					if debugMatches {
   536  						println("long match (after short)")
   537  					}
   538  					break
   539  				}
   540  
   541  				t = candidateS.offset - e.cur
   542  				if debugAsserts && s <= t {
   543  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   544  				}
   545  				if debugAsserts && s-t > e.maxMatchOff {
   546  					panic("s - t >e.maxMatchOff")
   547  				}
   548  				if debugAsserts && t < 0 {
   549  					panic("t<0")
   550  				}
   551  				if debugMatches {
   552  					println("short match")
   553  				}
   554  				break
   555  			}
   556  
   557  			// No match found, move forward in input.
   558  			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
   559  			if s >= sLimit {
   560  				break encodeLoop
   561  			}
   562  			cv = load6432(src, s)
   563  		}
   564  
   565  		// A 4-byte match has been found. Update recent offsets.
   566  		// We'll later see if more than 4 bytes.
   567  		offset2 = offset1
   568  		offset1 = s - t
   569  
   570  		if debugAsserts && s <= t {
   571  			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   572  		}
   573  
   574  		// Extend the 4-byte match as long as possible.
   575  		//l := e.matchlen(s+4, t+4, src) + 4
   576  		l := int32(matchLen(src[s+4:], src[t+4:])) + 4
   577  
   578  		// Extend backwards
   579  		tMin := s - e.maxMatchOff
   580  		if tMin < 0 {
   581  			tMin = 0
   582  		}
   583  		for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
   584  			s--
   585  			t--
   586  			l++
   587  		}
   588  
   589  		// Write our sequence
   590  		var seq seq
   591  		seq.litLen = uint32(s - nextEmit)
   592  		seq.matchLen = uint32(l - zstdMinMatch)
   593  		if seq.litLen > 0 {
   594  			blk.literals = append(blk.literals, src[nextEmit:s]...)
   595  		}
   596  		seq.offset = uint32(s-t) + 3
   597  		s += l
   598  		if debugSequences {
   599  			println("sequence", seq, "next s:", s)
   600  		}
   601  		blk.sequences = append(blk.sequences, seq)
   602  		nextEmit = s
   603  		if s >= sLimit {
   604  			break encodeLoop
   605  		}
   606  
   607  		// Index match start+1 (long) and start+2 (short)
   608  		index0 := s - l + 1
   609  		// Index match end-2 (long) and end-1 (short)
   610  		index1 := s - 2
   611  
   612  		cv0 := load6432(src, index0)
   613  		cv1 := load6432(src, index1)
   614  		te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
   615  		te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
   616  		e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0
   617  		e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1
   618  		cv0 >>= 8
   619  		cv1 >>= 8
   620  		te0.offset++
   621  		te1.offset++
   622  		te0.val = uint32(cv0)
   623  		te1.val = uint32(cv1)
   624  		e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0
   625  		e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1
   626  
   627  		cv = load6432(src, s)
   628  
   629  		if len(blk.sequences) <= 2 {
   630  			continue
   631  		}
   632  
   633  		// Check offset 2
   634  		for {
   635  			o2 := s - offset2
   636  			if load3232(src, o2) != uint32(cv) {
   637  				// Do regular search
   638  				break
   639  			}
   640  
   641  			// Store this, since we have it.
   642  			nextHashS := hashLen(cv1>>8, dFastShortTableBits, dFastShortLen)
   643  			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
   644  
   645  			// We have at least 4 byte match.
   646  			// No need to check backwards. We come straight from a match
   647  			//l := 4 + e.matchlen(s+4, o2+4, src)
   648  			l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
   649  
   650  			entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
   651  			e.longTable[nextHashL] = entry
   652  			e.table[nextHashS] = entry
   653  			seq.matchLen = uint32(l) - zstdMinMatch
   654  			seq.litLen = 0
   655  
   656  			// Since litlen is always 0, this is offset 1.
   657  			seq.offset = 1
   658  			s += l
   659  			nextEmit = s
   660  			if debugSequences {
   661  				println("sequence", seq, "next s:", s)
   662  			}
   663  			blk.sequences = append(blk.sequences, seq)
   664  
   665  			// Swap offset 1 and 2.
   666  			offset1, offset2 = offset2, offset1
   667  			if s >= sLimit {
   668  				// Finished
   669  				break encodeLoop
   670  			}
   671  			cv = load6432(src, s)
   672  		}
   673  	}
   674  
   675  	if int(nextEmit) < len(src) {
   676  		blk.literals = append(blk.literals, src[nextEmit:]...)
   677  		blk.extraLits = len(src) - int(nextEmit)
   678  	}
   679  	if debugEncoder {
   680  		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
   681  	}
   682  
   683  	// We do not store history, so we must offset e.cur to avoid false matches for next user.
   684  	if e.cur < e.bufferReset {
   685  		e.cur += int32(len(src))
   686  	}
   687  }
   688  
   689  // Encode will encode the content, with a dictionary if initialized for it.
   690  func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
   691  	const (
   692  		// Input margin is the number of bytes we read (8)
   693  		// and the maximum we will read ahead (2)
   694  		inputMargin            = 8 + 2
   695  		minNonLiteralBlockSize = 16
   696  	)
   697  
   698  	// Protect against e.cur wraparound.
   699  	for e.cur >= e.bufferReset-int32(len(e.hist)) {
   700  		if len(e.hist) == 0 {
   701  			for i := range e.table[:] {
   702  				e.table[i] = tableEntry{}
   703  			}
   704  			for i := range e.longTable[:] {
   705  				e.longTable[i] = tableEntry{}
   706  			}
   707  			e.markAllShardsDirty()
   708  			e.cur = e.maxMatchOff
   709  			break
   710  		}
   711  		// Shift down everything in the table that isn't already too far away.
   712  		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
   713  		for i := range e.table[:] {
   714  			v := e.table[i].offset
   715  			if v < minOff {
   716  				v = 0
   717  			} else {
   718  				v = v - e.cur + e.maxMatchOff
   719  			}
   720  			e.table[i].offset = v
   721  		}
   722  		for i := range e.longTable[:] {
   723  			v := e.longTable[i].offset
   724  			if v < minOff {
   725  				v = 0
   726  			} else {
   727  				v = v - e.cur + e.maxMatchOff
   728  			}
   729  			e.longTable[i].offset = v
   730  		}
   731  		e.markAllShardsDirty()
   732  		e.cur = e.maxMatchOff
   733  		break
   734  	}
   735  
   736  	s := e.addBlock(src)
   737  	blk.size = len(src)
   738  	if len(src) < minNonLiteralBlockSize {
   739  		blk.extraLits = len(src)
   740  		blk.literals = blk.literals[:len(src)]
   741  		copy(blk.literals, src)
   742  		return
   743  	}
   744  
   745  	// Override src
   746  	src = e.hist
   747  	sLimit := int32(len(src)) - inputMargin
   748  	// stepSize is the number of bytes to skip on every main loop iteration.
   749  	// It should be >= 1.
   750  	const stepSize = 1
   751  
   752  	const kSearchStrength = 8
   753  
   754  	// nextEmit is where in src the next emitLiteral should start from.
   755  	nextEmit := s
   756  	cv := load6432(src, s)
   757  
   758  	// Relative offsets
   759  	offset1 := int32(blk.recentOffsets[0])
   760  	offset2 := int32(blk.recentOffsets[1])
   761  
   762  	addLiterals := func(s *seq, until int32) {
   763  		if until == nextEmit {
   764  			return
   765  		}
   766  		blk.literals = append(blk.literals, src[nextEmit:until]...)
   767  		s.litLen = uint32(until - nextEmit)
   768  	}
   769  	if debugEncoder {
   770  		println("recent offsets:", blk.recentOffsets)
   771  	}
   772  
   773  encodeLoop:
   774  	for {
   775  		var t int32
   776  		// We allow the encoder to optionally turn off repeat offsets across blocks
   777  		canRepeat := len(blk.sequences) > 2
   778  
   779  		for {
   780  			if debugAsserts && canRepeat && offset1 == 0 {
   781  				panic("offset0 was 0")
   782  			}
   783  
   784  			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
   785  			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
   786  			candidateL := e.longTable[nextHashL]
   787  			candidateS := e.table[nextHashS]
   788  
   789  			const repOff = 1
   790  			repIndex := s - offset1 + repOff
   791  			entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
   792  			e.longTable[nextHashL] = entry
   793  			e.markLongShardDirty(nextHashL)
   794  			e.table[nextHashS] = entry
   795  			e.markShardDirty(nextHashS)
   796  
   797  			if canRepeat {
   798  				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
   799  					// Consider history as well.
   800  					var seq seq
   801  					lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
   802  
   803  					seq.matchLen = uint32(lenght - zstdMinMatch)
   804  
   805  					// We might be able to match backwards.
   806  					// Extend as long as we can.
   807  					start := s + repOff
   808  					// We end the search early, so we don't risk 0 literals
   809  					// and have to do special offset treatment.
   810  					startLimit := nextEmit + 1
   811  
   812  					tMin := s - e.maxMatchOff
   813  					if tMin < 0 {
   814  						tMin = 0
   815  					}
   816  					for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
   817  						repIndex--
   818  						start--
   819  						seq.matchLen++
   820  					}
   821  					addLiterals(&seq, start)
   822  
   823  					// rep 0
   824  					seq.offset = 1
   825  					if debugSequences {
   826  						println("repeat sequence", seq, "next s:", s)
   827  					}
   828  					blk.sequences = append(blk.sequences, seq)
   829  					s += lenght + repOff
   830  					nextEmit = s
   831  					if s >= sLimit {
   832  						if debugEncoder {
   833  							println("repeat ended", s, lenght)
   834  
   835  						}
   836  						break encodeLoop
   837  					}
   838  					cv = load6432(src, s)
   839  					continue
   840  				}
   841  			}
   842  			// Find the offsets of our two matches.
   843  			coffsetL := s - (candidateL.offset - e.cur)
   844  			coffsetS := s - (candidateS.offset - e.cur)
   845  
   846  			// Check if we have a long match.
   847  			if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
   848  				// Found a long match, likely at least 8 bytes.
   849  				// Reference encoder checks all 8 bytes, we only check 4,
   850  				// but the likelihood of both the first 4 bytes and the hash matching should be enough.
   851  				t = candidateL.offset - e.cur
   852  				if debugAsserts && s <= t {
   853  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   854  				}
   855  				if debugAsserts && s-t > e.maxMatchOff {
   856  					panic("s - t >e.maxMatchOff")
   857  				}
   858  				if debugMatches {
   859  					println("long match")
   860  				}
   861  				break
   862  			}
   863  
   864  			// Check if we have a short match.
   865  			if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
   866  				// found a regular match
   867  				// See if we can find a long match at s+1
   868  				const checkAt = 1
   869  				cv := load6432(src, s+checkAt)
   870  				nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
   871  				candidateL = e.longTable[nextHashL]
   872  				coffsetL = s - (candidateL.offset - e.cur) + checkAt
   873  
   874  				// We can store it, since we have at least a 4 byte match.
   875  				e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
   876  				e.markLongShardDirty(nextHashL)
   877  				if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
   878  					// Found a long match, likely at least 8 bytes.
   879  					// Reference encoder checks all 8 bytes, we only check 4,
   880  					// but the likelihood of both the first 4 bytes and the hash matching should be enough.
   881  					t = candidateL.offset - e.cur
   882  					s += checkAt
   883  					if debugMatches {
   884  						println("long match (after short)")
   885  					}
   886  					break
   887  				}
   888  
   889  				t = candidateS.offset - e.cur
   890  				if debugAsserts && s <= t {
   891  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   892  				}
   893  				if debugAsserts && s-t > e.maxMatchOff {
   894  					panic("s - t >e.maxMatchOff")
   895  				}
   896  				if debugAsserts && t < 0 {
   897  					panic("t<0")
   898  				}
   899  				if debugMatches {
   900  					println("short match")
   901  				}
   902  				break
   903  			}
   904  
   905  			// No match found, move forward in input.
   906  			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
   907  			if s >= sLimit {
   908  				break encodeLoop
   909  			}
   910  			cv = load6432(src, s)
   911  		}
   912  
   913  		// A 4-byte match has been found. Update recent offsets.
   914  		// We'll later see if more than 4 bytes.
   915  		offset2 = offset1
   916  		offset1 = s - t
   917  
   918  		if debugAsserts && s <= t {
   919  			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   920  		}
   921  
   922  		if debugAsserts && canRepeat && int(offset1) > len(src) {
   923  			panic("invalid offset")
   924  		}
   925  
   926  		// Extend the 4-byte match as long as possible.
   927  		l := e.matchlen(s+4, t+4, src) + 4
   928  
   929  		// Extend backwards
   930  		tMin := s - e.maxMatchOff
   931  		if tMin < 0 {
   932  			tMin = 0
   933  		}
   934  		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
   935  			s--
   936  			t--
   937  			l++
   938  		}
   939  
   940  		// Write our sequence
   941  		var seq seq
   942  		seq.litLen = uint32(s - nextEmit)
   943  		seq.matchLen = uint32(l - zstdMinMatch)
   944  		if seq.litLen > 0 {
   945  			blk.literals = append(blk.literals, src[nextEmit:s]...)
   946  		}
   947  		seq.offset = uint32(s-t) + 3
   948  		s += l
   949  		if debugSequences {
   950  			println("sequence", seq, "next s:", s)
   951  		}
   952  		blk.sequences = append(blk.sequences, seq)
   953  		nextEmit = s
   954  		if s >= sLimit {
   955  			break encodeLoop
   956  		}
   957  
   958  		// Index match start+1 (long) and start+2 (short)
   959  		index0 := s - l + 1
   960  		// Index match end-2 (long) and end-1 (short)
   961  		index1 := s - 2
   962  
   963  		cv0 := load6432(src, index0)
   964  		cv1 := load6432(src, index1)
   965  		te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
   966  		te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
   967  		longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
   968  		longHash2 := hashLen(cv1, dFastLongTableBits, dFastLongLen)
   969  		e.longTable[longHash1] = te0
   970  		e.longTable[longHash2] = te1
   971  		e.markLongShardDirty(longHash1)
   972  		e.markLongShardDirty(longHash2)
   973  		cv0 >>= 8
   974  		cv1 >>= 8
   975  		te0.offset++
   976  		te1.offset++
   977  		te0.val = uint32(cv0)
   978  		te1.val = uint32(cv1)
   979  		hashVal1 := hashLen(cv0, dFastShortTableBits, dFastShortLen)
   980  		hashVal2 := hashLen(cv1, dFastShortTableBits, dFastShortLen)
   981  		e.table[hashVal1] = te0
   982  		e.markShardDirty(hashVal1)
   983  		e.table[hashVal2] = te1
   984  		e.markShardDirty(hashVal2)
   985  
   986  		cv = load6432(src, s)
   987  
   988  		if !canRepeat {
   989  			continue
   990  		}
   991  
   992  		// Check offset 2
   993  		for {
   994  			o2 := s - offset2
   995  			if load3232(src, o2) != uint32(cv) {
   996  				// Do regular search
   997  				break
   998  			}
   999  
  1000  			// Store this, since we have it.
  1001  			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
  1002  			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
  1003  
  1004  			// We have at least 4 byte match.
  1005  			// No need to check backwards. We come straight from a match
  1006  			l := 4 + e.matchlen(s+4, o2+4, src)
  1007  
  1008  			entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
  1009  			e.longTable[nextHashL] = entry
  1010  			e.markLongShardDirty(nextHashL)
  1011  			e.table[nextHashS] = entry
  1012  			e.markShardDirty(nextHashS)
  1013  			seq.matchLen = uint32(l) - zstdMinMatch
  1014  			seq.litLen = 0
  1015  
  1016  			// Since litlen is always 0, this is offset 1.
  1017  			seq.offset = 1
  1018  			s += l
  1019  			nextEmit = s
  1020  			if debugSequences {
  1021  				println("sequence", seq, "next s:", s)
  1022  			}
  1023  			blk.sequences = append(blk.sequences, seq)
  1024  
  1025  			// Swap offset 1 and 2.
  1026  			offset1, offset2 = offset2, offset1
  1027  			if s >= sLimit {
  1028  				// Finished
  1029  				break encodeLoop
  1030  			}
  1031  			cv = load6432(src, s)
  1032  		}
  1033  	}
  1034  
  1035  	if int(nextEmit) < len(src) {
  1036  		blk.literals = append(blk.literals, src[nextEmit:]...)
  1037  		blk.extraLits = len(src) - int(nextEmit)
  1038  	}
  1039  	blk.recentOffsets[0] = uint32(offset1)
  1040  	blk.recentOffsets[1] = uint32(offset2)
  1041  	if debugEncoder {
  1042  		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  1043  	}
  1044  	// If we encoded more than 64K mark all dirty.
  1045  	if len(src) > 64<<10 {
  1046  		e.markAllShardsDirty()
  1047  	}
  1048  }
  1049  
  1050  // ResetDict will reset and set a dictionary if not nil
  1051  func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) {
  1052  	e.fastEncoder.Reset(d, singleBlock)
  1053  	if d != nil {
  1054  		panic("doubleFastEncoder: Reset with dict not supported")
  1055  	}
  1056  }
  1057  
  1058  // ResetDict will reset and set a dictionary if not nil
  1059  func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
  1060  	allDirty := e.allDirty
  1061  	e.fastEncoderDict.Reset(d, singleBlock)
  1062  	if d == nil {
  1063  		return
  1064  	}
  1065  
  1066  	// Init or copy dict table
  1067  	if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
  1068  		if len(e.dictLongTable) != len(e.longTable) {
  1069  			e.dictLongTable = make([]tableEntry, len(e.longTable))
  1070  		}
  1071  		if len(d.content) >= 8 {
  1072  			cv := load6432(d.content, 0)
  1073  			e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{
  1074  				val:    uint32(cv),
  1075  				offset: e.maxMatchOff,
  1076  			}
  1077  			end := int32(len(d.content)) - 8 + e.maxMatchOff
  1078  			for i := e.maxMatchOff + 1; i < end; i++ {
  1079  				cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56)
  1080  				e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{
  1081  					val:    uint32(cv),
  1082  					offset: i,
  1083  				}
  1084  			}
  1085  		}
  1086  		e.lastDictID = d.id
  1087  		e.allDirty = true
  1088  	}
  1089  	// Reset table to initial state
  1090  	e.cur = e.maxMatchOff
  1091  
  1092  	dirtyShardCnt := 0
  1093  	if !allDirty {
  1094  		for i := range e.longTableShardDirty {
  1095  			if e.longTableShardDirty[i] {
  1096  				dirtyShardCnt++
  1097  			}
  1098  		}
  1099  	}
  1100  
  1101  	if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
  1102  		//copy(e.longTable[:], e.dictLongTable)
  1103  		e.longTable = *(*[dFastLongTableSize]tableEntry)(e.dictLongTable)
  1104  		for i := range e.longTableShardDirty {
  1105  			e.longTableShardDirty[i] = false
  1106  		}
  1107  		return
  1108  	}
  1109  	for i := range e.longTableShardDirty {
  1110  		if !e.longTableShardDirty[i] {
  1111  			continue
  1112  		}
  1113  
  1114  		// copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
  1115  		*(*[dLongTableShardSize]tableEntry)(e.longTable[i*dLongTableShardSize:]) = *(*[dLongTableShardSize]tableEntry)(e.dictLongTable[i*dLongTableShardSize:])
  1116  
  1117  		e.longTableShardDirty[i] = false
  1118  	}
  1119  }
  1120  
  1121  func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) {
  1122  	e.longTableShardDirty[entryNum/dLongTableShardSize] = true
  1123  }