github.com/bir3/gocompiler@v0.3.205/src/cmd/gocmd/compress/zstd/enc_best.go

github.com/bir3/gocompiler@v0.3.205/src/cmd/gocmd/compress/zstd/enc_best.go (about)

     1  // Copyright 2019+ Klaus Post. All rights reserved.
     2  // License information can be found in the LICENSE file.
     3  // Based on work by Yann Collet, released under BSD License.
     4  
     5  package zstd
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  
    11  	"github.com/bir3/gocompiler/src/cmd/gocmd/compress"
    12  )
    13  
    14  const (
    15  	bestLongTableBits = 22                     // Bits used in the long match table
    16  	bestLongTableSize = 1 << bestLongTableBits // Size of the table
    17  	bestLongLen       = 8                      // Bytes used for table hash
    18  
    19  	// Note: Increasing the short table bits or making the hash shorter
    20  	// can actually lead to compression degradation since it will 'steal' more from the
    21  	// long match table and match offsets are quite big.
    22  	// This greatly depends on the type of input.
    23  	bestShortTableBits = 18                      // Bits used in the short match table
    24  	bestShortTableSize = 1 << bestShortTableBits // Size of the table
    25  	bestShortLen       = 4                       // Bytes used for table hash
    26  
    27  )
    28  
    29  type match struct {
    30  	offset int32
    31  	s      int32
    32  	length int32
    33  	rep    int32
    34  	est    int32
    35  }
    36  
    37  const highScore = maxMatchLen * 8
    38  
    39  // estBits will estimate output bits from predefined tables.
    40  func (m *match) estBits(bitsPerByte int32) {
    41  	mlc := mlCode(uint32(m.length - zstdMinMatch))
    42  	var ofc uint8
    43  	if m.rep < 0 {
    44  		ofc = ofCode(uint32(m.s-m.offset) + 3)
    45  	} else {
    46  		ofc = ofCode(uint32(m.rep))
    47  	}
    48  	// Cost, excluding
    49  	ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
    50  
    51  	// Add cost of match encoding...
    52  	m.est = int32(ofTT.outBits + mlTT.outBits)
    53  	m.est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16)
    54  	// Subtract savings compared to literal encoding...
    55  	m.est -= (m.length * bitsPerByte) >> 10
    56  	if m.est > 0 {
    57  		// Unlikely gain..
    58  		m.length = 0
    59  		m.est = highScore
    60  	}
    61  }
    62  
    63  // bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
    64  // The long match table contains the previous entry with the same hash,
    65  // effectively making it a "chain" of length 2.
    66  // When we find a long match we choose between the two values and select the longest.
    67  // When we find a short match, after checking the long, we check if we can find a long at n+1
    68  // and that it is longer (lazy matching).
    69  type bestFastEncoder struct {
    70  	fastBase
    71  	table         [bestShortTableSize]prevEntry
    72  	longTable     [bestLongTableSize]prevEntry
    73  	dictTable     []prevEntry
    74  	dictLongTable []prevEntry
    75  }
    76  
    77  // Encode improves compression...
    78  func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
    79  	const (
    80  		// Input margin is the number of bytes we read (8)
    81  		// and the maximum we will read ahead (2)
    82  		inputMargin            = 8 + 4
    83  		minNonLiteralBlockSize = 16
    84  	)
    85  
    86  	// Protect against e.cur wraparound.
    87  	for e.cur >= e.bufferReset-int32(len(e.hist)) {
    88  		if len(e.hist) == 0 {
    89  			e.table = [bestShortTableSize]prevEntry{}
    90  			e.longTable = [bestLongTableSize]prevEntry{}
    91  			e.cur = e.maxMatchOff
    92  			break
    93  		}
    94  		// Shift down everything in the table that isn't already too far away.
    95  		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
    96  		for i := range e.table[:] {
    97  			v := e.table[i].offset
    98  			v2 := e.table[i].prev
    99  			if v < minOff {
   100  				v = 0
   101  				v2 = 0
   102  			} else {
   103  				v = v - e.cur + e.maxMatchOff
   104  				if v2 < minOff {
   105  					v2 = 0
   106  				} else {
   107  					v2 = v2 - e.cur + e.maxMatchOff
   108  				}
   109  			}
   110  			e.table[i] = prevEntry{
   111  				offset: v,
   112  				prev:   v2,
   113  			}
   114  		}
   115  		for i := range e.longTable[:] {
   116  			v := e.longTable[i].offset
   117  			v2 := e.longTable[i].prev
   118  			if v < minOff {
   119  				v = 0
   120  				v2 = 0
   121  			} else {
   122  				v = v - e.cur + e.maxMatchOff
   123  				if v2 < minOff {
   124  					v2 = 0
   125  				} else {
   126  					v2 = v2 - e.cur + e.maxMatchOff
   127  				}
   128  			}
   129  			e.longTable[i] = prevEntry{
   130  				offset: v,
   131  				prev:   v2,
   132  			}
   133  		}
   134  		e.cur = e.maxMatchOff
   135  		break
   136  	}
   137  
   138  	s := e.addBlock(src)
   139  	blk.size = len(src)
   140  	if len(src) < minNonLiteralBlockSize {
   141  		blk.extraLits = len(src)
   142  		blk.literals = blk.literals[:len(src)]
   143  		copy(blk.literals, src)
   144  		return
   145  	}
   146  
   147  	// Use this to estimate literal cost.
   148  	// Scaled by 10 bits.
   149  	bitsPerByte := int32((compress.ShannonEntropyBits(src) * 1024) / len(src))
   150  	// Huffman can never go < 1 bit/byte
   151  	if bitsPerByte < 1024 {
   152  		bitsPerByte = 1024
   153  	}
   154  
   155  	// Override src
   156  	src = e.hist
   157  	sLimit := int32(len(src)) - inputMargin
   158  	const kSearchStrength = 10
   159  
   160  	// nextEmit is where in src the next emitLiteral should start from.
   161  	nextEmit := s
   162  
   163  	// Relative offsets
   164  	offset1 := int32(blk.recentOffsets[0])
   165  	offset2 := int32(blk.recentOffsets[1])
   166  	offset3 := int32(blk.recentOffsets[2])
   167  
   168  	addLiterals := func(s *seq, until int32) {
   169  		if until == nextEmit {
   170  			return
   171  		}
   172  		blk.literals = append(blk.literals, src[nextEmit:until]...)
   173  		s.litLen = uint32(until - nextEmit)
   174  	}
   175  
   176  	if debugEncoder {
   177  		println("recent offsets:", blk.recentOffsets)
   178  	}
   179  
   180  encodeLoop:
   181  	for {
   182  		// We allow the encoder to optionally turn off repeat offsets across blocks
   183  		canRepeat := len(blk.sequences) > 2
   184  
   185  		if debugAsserts && canRepeat && offset1 == 0 {
   186  			panic("offset0 was 0")
   187  		}
   188  
   189  		const goodEnough = 250
   190  
   191  		cv := load6432(src, s)
   192  
   193  		nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
   194  		nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
   195  		candidateL := e.longTable[nextHashL]
   196  		candidateS := e.table[nextHashS]
   197  
   198  		// Set m to a match at offset if it looks like that will improve compression.
   199  		improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
   200  			if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
   201  				return
   202  			}
   203  			if debugAsserts {
   204  				if offset <= 0 {
   205  					panic(offset)
   206  				}
   207  				if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
   208  					panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
   209  				}
   210  			}
   211  			// Try to quick reject if we already have a long match.
   212  			if m.length > 16 {
   213  				left := len(src) - int(m.s+m.length)
   214  				// If we are too close to the end, keep as is.
   215  				if left <= 0 {
   216  					return
   217  				}
   218  				checkLen := m.length - (s - m.s) - 8
   219  				if left > 2 && checkLen > 4 {
   220  					// Check 4 bytes, 4 bytes from the end of the current match.
   221  					a := load3232(src, offset+checkLen)
   222  					b := load3232(src, s+checkLen)
   223  					if a != b {
   224  						return
   225  					}
   226  				}
   227  			}
   228  			l := 4 + e.matchlen(s+4, offset+4, src)
   229  			if rep < 0 {
   230  				// Extend candidate match backwards as far as possible.
   231  				tMin := s - e.maxMatchOff
   232  				if tMin < 0 {
   233  					tMin = 0
   234  				}
   235  				for offset > tMin && s > nextEmit && src[offset-1] == src[s-1] && l < maxMatchLength {
   236  					s--
   237  					offset--
   238  					l++
   239  				}
   240  			}
   241  
   242  			cand := match{offset: offset, s: s, length: l, rep: rep}
   243  			cand.estBits(bitsPerByte)
   244  			if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
   245  				*m = cand
   246  			}
   247  		}
   248  
   249  		best := match{s: s, est: highScore}
   250  		improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1)
   251  		improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1)
   252  		improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1)
   253  		improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1)
   254  
   255  		if canRepeat && best.length < goodEnough {
   256  			if s == nextEmit {
   257  				// Check repeats straight after a match.
   258  				improve(&best, s-offset2, s, uint32(cv), 1|4)
   259  				improve(&best, s-offset3, s, uint32(cv), 2|4)
   260  				if offset1 > 1 {
   261  					improve(&best, s-(offset1-1), s, uint32(cv), 3|4)
   262  				}
   263  			}
   264  
   265  			// If either no match or a non-repeat match, check at + 1
   266  			if best.rep <= 0 {
   267  				cv32 := uint32(cv >> 8)
   268  				spp := s + 1
   269  				improve(&best, spp-offset1, spp, cv32, 1)
   270  				improve(&best, spp-offset2, spp, cv32, 2)
   271  				improve(&best, spp-offset3, spp, cv32, 3)
   272  				if best.rep < 0 {
   273  					cv32 = uint32(cv >> 24)
   274  					spp += 2
   275  					improve(&best, spp-offset1, spp, cv32, 1)
   276  					improve(&best, spp-offset2, spp, cv32, 2)
   277  					improve(&best, spp-offset3, spp, cv32, 3)
   278  				}
   279  			}
   280  		}
   281  		// Load next and check...
   282  		e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
   283  		e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
   284  
   285  		// Look far ahead, unless we have a really long match already...
   286  		if best.length < goodEnough {
   287  			// No match found, move forward on input, no need to check forward...
   288  			if best.length < 4 {
   289  				s += 1 + (s-nextEmit)>>(kSearchStrength-1)
   290  				if s >= sLimit {
   291  					break encodeLoop
   292  				}
   293  				continue
   294  			}
   295  
   296  			candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
   297  			cv = load6432(src, s+1)
   298  			cv2 := load6432(src, s+2)
   299  			candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
   300  			candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
   301  
   302  			// Short at s+1
   303  			improve(&best, candidateS.offset-e.cur, s+1, uint32(cv), -1)
   304  			// Long at s+1, s+2
   305  			improve(&best, candidateL.offset-e.cur, s+1, uint32(cv), -1)
   306  			improve(&best, candidateL.prev-e.cur, s+1, uint32(cv), -1)
   307  			improve(&best, candidateL2.offset-e.cur, s+2, uint32(cv2), -1)
   308  			improve(&best, candidateL2.prev-e.cur, s+2, uint32(cv2), -1)
   309  			if false {
   310  				// Short at s+3.
   311  				// Too often worse...
   312  				improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+3, uint32(cv2>>8), -1)
   313  			}
   314  
   315  			// Start check at a fixed offset to allow for a few mismatches.
   316  			// For this compression level 2 yields the best results.
   317  			// We cannot do this if we have already indexed this position.
   318  			const skipBeginning = 2
   319  			if best.s > s-skipBeginning {
   320  				// See if we can find a better match by checking where the current best ends.
   321  				// Use that offset to see if we can find a better full match.
   322  				if sAt := best.s + best.length; sAt < sLimit {
   323  					nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
   324  					candidateEnd := e.longTable[nextHashL]
   325  
   326  					if off := candidateEnd.offset - e.cur - best.length + skipBeginning; off >= 0 {
   327  						improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
   328  						if off := candidateEnd.prev - e.cur - best.length + skipBeginning; off >= 0 {
   329  							improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
   330  						}
   331  					}
   332  				}
   333  			}
   334  		}
   335  
   336  		if debugAsserts {
   337  			if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
   338  				panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
   339  			}
   340  		}
   341  
   342  		// We have a match, we can store the forward value
   343  		if best.rep > 0 {
   344  			var seq seq
   345  			seq.matchLen = uint32(best.length - zstdMinMatch)
   346  			if debugAsserts && s <= nextEmit {
   347  				panic("s <= nextEmit")
   348  			}
   349  			addLiterals(&seq, best.s)
   350  
   351  			// Repeat. If bit 4 is set, this is a non-lit repeat.
   352  			seq.offset = uint32(best.rep & 3)
   353  			if debugSequences {
   354  				println("repeat sequence", seq, "next s:", s)
   355  			}
   356  			blk.sequences = append(blk.sequences, seq)
   357  
   358  			// Index old s + 1 -> s - 1
   359  			index0 := s + 1
   360  			s = best.s + best.length
   361  
   362  			nextEmit = s
   363  			if s >= sLimit {
   364  				if debugEncoder {
   365  					println("repeat ended", s, best.length)
   366  				}
   367  				break encodeLoop
   368  			}
   369  			// Index skipped...
   370  			off := index0 + e.cur
   371  			for index0 < s {
   372  				cv0 := load6432(src, index0)
   373  				h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
   374  				h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
   375  				e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
   376  				e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
   377  				off++
   378  				index0++
   379  			}
   380  			switch best.rep {
   381  			case 2, 4 | 1:
   382  				offset1, offset2 = offset2, offset1
   383  			case 3, 4 | 2:
   384  				offset1, offset2, offset3 = offset3, offset1, offset2
   385  			case 4 | 3:
   386  				offset1, offset2, offset3 = offset1-1, offset1, offset2
   387  			}
   388  			continue
   389  		}
   390  
   391  		// A 4-byte match has been found. Update recent offsets.
   392  		// We'll later see if more than 4 bytes.
   393  		index0 := s + 1
   394  		s = best.s
   395  		t := best.offset
   396  		offset1, offset2, offset3 = s-t, offset1, offset2
   397  
   398  		if debugAsserts && s <= t {
   399  			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   400  		}
   401  
   402  		if debugAsserts && int(offset1) > len(src) {
   403  			panic("invalid offset")
   404  		}
   405  
   406  		// Write our sequence
   407  		var seq seq
   408  		l := best.length
   409  		seq.litLen = uint32(s - nextEmit)
   410  		seq.matchLen = uint32(l - zstdMinMatch)
   411  		if seq.litLen > 0 {
   412  			blk.literals = append(blk.literals, src[nextEmit:s]...)
   413  		}
   414  		seq.offset = uint32(s-t) + 3
   415  		s += l
   416  		if debugSequences {
   417  			println("sequence", seq, "next s:", s)
   418  		}
   419  		blk.sequences = append(blk.sequences, seq)
   420  		nextEmit = s
   421  		if s >= sLimit {
   422  			break encodeLoop
   423  		}
   424  
   425  		// Index old s + 1 -> s - 1
   426  		for index0 < s {
   427  			cv0 := load6432(src, index0)
   428  			h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
   429  			h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
   430  			off := index0 + e.cur
   431  			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
   432  			e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
   433  			index0++
   434  		}
   435  	}
   436  
   437  	if int(nextEmit) < len(src) {
   438  		blk.literals = append(blk.literals, src[nextEmit:]...)
   439  		blk.extraLits = len(src) - int(nextEmit)
   440  	}
   441  	blk.recentOffsets[0] = uint32(offset1)
   442  	blk.recentOffsets[1] = uint32(offset2)
   443  	blk.recentOffsets[2] = uint32(offset3)
   444  	if debugEncoder {
   445  		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
   446  	}
   447  }
   448  
   449  // EncodeNoHist will encode a block with no history and no following blocks.
   450  // Most notable difference is that src will not be copied for history and
   451  // we do not need to check for max match length.
   452  func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
   453  	e.ensureHist(len(src))
   454  	e.Encode(blk, src)
   455  }
   456  
   457  // Reset will reset and set a dictionary if not nil
   458  func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
   459  	e.resetBase(d, singleBlock)
   460  	if d == nil {
   461  		return
   462  	}
   463  	// Init or copy dict table
   464  	if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
   465  		if len(e.dictTable) != len(e.table) {
   466  			e.dictTable = make([]prevEntry, len(e.table))
   467  		}
   468  		end := int32(len(d.content)) - 8 + e.maxMatchOff
   469  		for i := e.maxMatchOff; i < end; i += 4 {
   470  			const hashLog = bestShortTableBits
   471  
   472  			cv := load6432(d.content, i-e.maxMatchOff)
   473  			nextHash := hashLen(cv, hashLog, bestShortLen)      // 0 -> 4
   474  			nextHash1 := hashLen(cv>>8, hashLog, bestShortLen)  // 1 -> 5
   475  			nextHash2 := hashLen(cv>>16, hashLog, bestShortLen) // 2 -> 6
   476  			nextHash3 := hashLen(cv>>24, hashLog, bestShortLen) // 3 -> 7
   477  			e.dictTable[nextHash] = prevEntry{
   478  				prev:   e.dictTable[nextHash].offset,
   479  				offset: i,
   480  			}
   481  			e.dictTable[nextHash1] = prevEntry{
   482  				prev:   e.dictTable[nextHash1].offset,
   483  				offset: i + 1,
   484  			}
   485  			e.dictTable[nextHash2] = prevEntry{
   486  				prev:   e.dictTable[nextHash2].offset,
   487  				offset: i + 2,
   488  			}
   489  			e.dictTable[nextHash3] = prevEntry{
   490  				prev:   e.dictTable[nextHash3].offset,
   491  				offset: i + 3,
   492  			}
   493  		}
   494  		e.lastDictID = d.id
   495  	}
   496  
   497  	// Init or copy dict table
   498  	if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
   499  		if len(e.dictLongTable) != len(e.longTable) {
   500  			e.dictLongTable = make([]prevEntry, len(e.longTable))
   501  		}
   502  		if len(d.content) >= 8 {
   503  			cv := load6432(d.content, 0)
   504  			h := hashLen(cv, bestLongTableBits, bestLongLen)
   505  			e.dictLongTable[h] = prevEntry{
   506  				offset: e.maxMatchOff,
   507  				prev:   e.dictLongTable[h].offset,
   508  			}
   509  
   510  			end := int32(len(d.content)) - 8 + e.maxMatchOff
   511  			off := 8 // First to read
   512  			for i := e.maxMatchOff + 1; i < end; i++ {
   513  				cv = cv>>8 | (uint64(d.content[off]) << 56)
   514  				h := hashLen(cv, bestLongTableBits, bestLongLen)
   515  				e.dictLongTable[h] = prevEntry{
   516  					offset: i,
   517  					prev:   e.dictLongTable[h].offset,
   518  				}
   519  				off++
   520  			}
   521  		}
   522  		e.lastDictID = d.id
   523  	}
   524  	// Reset table to initial state
   525  	copy(e.longTable[:], e.dictLongTable)
   526  
   527  	e.cur = e.maxMatchOff
   528  	// Reset table to initial state
   529  	copy(e.table[:], e.dictTable)
   530  }