github.com/andybalholm/brotli@v1.0.6/hash.go (about)

     1  package brotli
     2  
     3  import (
     4  	"encoding/binary"
     5  	"fmt"
     6  )
     7  
     8  type hasherCommon struct {
     9  	params           hasherParams
    10  	is_prepared_     bool
    11  	dict_num_lookups uint
    12  	dict_num_matches uint
    13  }
    14  
    15  func (h *hasherCommon) Common() *hasherCommon {
    16  	return h
    17  }
    18  
    19  type hasherHandle interface {
    20  	Common() *hasherCommon
    21  	Initialize(params *encoderParams)
    22  	Prepare(one_shot bool, input_size uint, data []byte)
    23  	StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint)
    24  	HashTypeLength() uint
    25  	StoreLookahead() uint
    26  	PrepareDistanceCache(distance_cache []int)
    27  	FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult)
    28  	StoreRange(data []byte, mask uint, ix_start uint, ix_end uint)
    29  	Store(data []byte, mask uint, ix uint)
    30  }
    31  
    32  const kCutoffTransformsCount uint32 = 10
    33  
    34  /*   0,  12,   27,    23,    42,    63,    56,    48,    59,    64 */
    35  /* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
    36  const kCutoffTransforms uint64 = 0x071B520ADA2D3200
    37  
    38  type hasherSearchResult struct {
    39  	len            uint
    40  	distance       uint
    41  	score          uint
    42  	len_code_delta int
    43  }
    44  
    45  /* kHashMul32 multiplier has these properties:
    46     * The multiplier must be odd. Otherwise we may lose the highest bit.
    47     * No long streaks of ones or zeros.
    48     * There is no effort to ensure that it is a prime, the oddity is enough
    49       for this use.
    50     * The number has been tuned heuristically against compression benchmarks. */
    51  const kHashMul32 uint32 = 0x1E35A7BD
    52  
    53  const kHashMul64 uint64 = 0x1E35A7BD1E35A7BD
    54  
    55  const kHashMul64Long uint64 = 0x1FE35A7BD3579BD3
    56  
    57  func hash14(data []byte) uint32 {
    58  	var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
    59  
    60  	/* The higher bits contain more mixture from the multiplication,
    61  	   so we take our results from there. */
    62  	return h >> (32 - 14)
    63  }
    64  
    65  func prepareDistanceCache(distance_cache []int, num_distances int) {
    66  	if num_distances > 4 {
    67  		var last_distance int = distance_cache[0]
    68  		distance_cache[4] = last_distance - 1
    69  		distance_cache[5] = last_distance + 1
    70  		distance_cache[6] = last_distance - 2
    71  		distance_cache[7] = last_distance + 2
    72  		distance_cache[8] = last_distance - 3
    73  		distance_cache[9] = last_distance + 3
    74  		if num_distances > 10 {
    75  			var next_last_distance int = distance_cache[1]
    76  			distance_cache[10] = next_last_distance - 1
    77  			distance_cache[11] = next_last_distance + 1
    78  			distance_cache[12] = next_last_distance - 2
    79  			distance_cache[13] = next_last_distance + 2
    80  			distance_cache[14] = next_last_distance - 3
    81  			distance_cache[15] = next_last_distance + 3
    82  		}
    83  	}
    84  }
    85  
    86  const literalByteScore = 135
    87  
    88  const distanceBitPenalty = 30
    89  
    90  /* Score must be positive after applying maximal penalty. */
    91  const scoreBase = (distanceBitPenalty * 8 * 8)
    92  
    93  /* Usually, we always choose the longest backward reference. This function
    94     allows for the exception of that rule.
    95  
    96     If we choose a backward reference that is further away, it will
    97     usually be coded with more bits. We approximate this by assuming
    98     log2(distance). If the distance can be expressed in terms of the
    99     last four distances, we use some heuristic constants to estimate
   100     the bits cost. For the first up to four literals we use the bit
   101     cost of the literals from the literal cost model, after that we
   102     use the average bit cost of the cost model.
   103  
   104     This function is used to sometimes discard a longer backward reference
   105     when it is not much longer and the bit cost for encoding it is more
   106     than the saved literals.
   107  
   108     backward_reference_offset MUST be positive. */
   109  func backwardReferenceScore(copy_length uint, backward_reference_offset uint) uint {
   110  	return scoreBase + literalByteScore*uint(copy_length) - distanceBitPenalty*uint(log2FloorNonZero(backward_reference_offset))
   111  }
   112  
   113  func backwardReferenceScoreUsingLastDistance(copy_length uint) uint {
   114  	return literalByteScore*uint(copy_length) + scoreBase + 15
   115  }
   116  
   117  func backwardReferencePenaltyUsingLastDistance(distance_short_code uint) uint {
   118  	return uint(39) + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE)
   119  }
   120  
   121  func testStaticDictionaryItem(dictionary *encoderDictionary, item uint, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult) bool {
   122  	var len uint
   123  	var word_idx uint
   124  	var offset uint
   125  	var matchlen uint
   126  	var backward uint
   127  	var score uint
   128  	len = item & 0x1F
   129  	word_idx = item >> 5
   130  	offset = uint(dictionary.words.offsets_by_length[len]) + len*word_idx
   131  	if len > max_length {
   132  		return false
   133  	}
   134  
   135  	matchlen = findMatchLengthWithLimit(data, dictionary.words.data[offset:], uint(len))
   136  	if matchlen+uint(dictionary.cutoffTransformsCount) <= len || matchlen == 0 {
   137  		return false
   138  	}
   139  	{
   140  		var cut uint = len - matchlen
   141  		var transform_id uint = (cut << 2) + uint((dictionary.cutoffTransforms>>(cut*6))&0x3F)
   142  		backward = max_backward + 1 + word_idx + (transform_id << dictionary.words.size_bits_by_length[len])
   143  	}
   144  
   145  	if backward > max_distance {
   146  		return false
   147  	}
   148  
   149  	score = backwardReferenceScore(matchlen, backward)
   150  	if score < out.score {
   151  		return false
   152  	}
   153  
   154  	out.len = matchlen
   155  	out.len_code_delta = int(len) - int(matchlen)
   156  	out.distance = backward
   157  	out.score = score
   158  	return true
   159  }
   160  
   161  func searchInStaticDictionary(dictionary *encoderDictionary, handle hasherHandle, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult, shallow bool) {
   162  	var key uint
   163  	var i uint
   164  	var self *hasherCommon = handle.Common()
   165  	if self.dict_num_matches < self.dict_num_lookups>>7 {
   166  		return
   167  	}
   168  
   169  	key = uint(hash14(data) << 1)
   170  	for i = 0; ; (func() { i++; key++ })() {
   171  		var tmp uint
   172  		if shallow {
   173  			tmp = 1
   174  		} else {
   175  			tmp = 2
   176  		}
   177  		if i >= tmp {
   178  			break
   179  		}
   180  		var item uint = uint(dictionary.hash_table[key])
   181  		self.dict_num_lookups++
   182  		if item != 0 {
   183  			var item_matches bool = testStaticDictionaryItem(dictionary, item, data, max_length, max_backward, max_distance, out)
   184  			if item_matches {
   185  				self.dict_num_matches++
   186  			}
   187  		}
   188  	}
   189  }
   190  
   191  type backwardMatch struct {
   192  	distance        uint32
   193  	length_and_code uint32
   194  }
   195  
   196  func initBackwardMatch(self *backwardMatch, dist uint, len uint) {
   197  	self.distance = uint32(dist)
   198  	self.length_and_code = uint32(len << 5)
   199  }
   200  
   201  func initDictionaryBackwardMatch(self *backwardMatch, dist uint, len uint, len_code uint) {
   202  	self.distance = uint32(dist)
   203  	var tmp uint
   204  	if len == len_code {
   205  		tmp = 0
   206  	} else {
   207  		tmp = len_code
   208  	}
   209  	self.length_and_code = uint32(len<<5 | tmp)
   210  }
   211  
   212  func backwardMatchLength(self *backwardMatch) uint {
   213  	return uint(self.length_and_code >> 5)
   214  }
   215  
   216  func backwardMatchLengthCode(self *backwardMatch) uint {
   217  	var code uint = uint(self.length_and_code) & 31
   218  	if code != 0 {
   219  		return code
   220  	} else {
   221  		return backwardMatchLength(self)
   222  	}
   223  }
   224  
   225  func hasherReset(handle hasherHandle) {
   226  	if handle == nil {
   227  		return
   228  	}
   229  	handle.Common().is_prepared_ = false
   230  }
   231  
   232  func newHasher(typ int) hasherHandle {
   233  	switch typ {
   234  	case 2:
   235  		return &hashLongestMatchQuickly{
   236  			bucketBits:    16,
   237  			bucketSweep:   1,
   238  			hashLen:       5,
   239  			useDictionary: true,
   240  		}
   241  	case 3:
   242  		return &hashLongestMatchQuickly{
   243  			bucketBits:    16,
   244  			bucketSweep:   2,
   245  			hashLen:       5,
   246  			useDictionary: false,
   247  		}
   248  	case 4:
   249  		return &hashLongestMatchQuickly{
   250  			bucketBits:    17,
   251  			bucketSweep:   4,
   252  			hashLen:       5,
   253  			useDictionary: true,
   254  		}
   255  	case 5:
   256  		return new(h5)
   257  	case 6:
   258  		return new(h6)
   259  	case 10:
   260  		return new(h10)
   261  	case 35:
   262  		return &hashComposite{
   263  			ha: newHasher(3),
   264  			hb: &hashRolling{jump: 4},
   265  		}
   266  	case 40:
   267  		return &hashForgetfulChain{
   268  			bucketBits:              15,
   269  			numBanks:                1,
   270  			bankBits:                16,
   271  			numLastDistancesToCheck: 4,
   272  		}
   273  	case 41:
   274  		return &hashForgetfulChain{
   275  			bucketBits:              15,
   276  			numBanks:                1,
   277  			bankBits:                16,
   278  			numLastDistancesToCheck: 10,
   279  		}
   280  	case 42:
   281  		return &hashForgetfulChain{
   282  			bucketBits:              15,
   283  			numBanks:                512,
   284  			bankBits:                9,
   285  			numLastDistancesToCheck: 16,
   286  		}
   287  	case 54:
   288  		return &hashLongestMatchQuickly{
   289  			bucketBits:    20,
   290  			bucketSweep:   4,
   291  			hashLen:       7,
   292  			useDictionary: false,
   293  		}
   294  	case 55:
   295  		return &hashComposite{
   296  			ha: newHasher(54),
   297  			hb: &hashRolling{jump: 4},
   298  		}
   299  	case 65:
   300  		return &hashComposite{
   301  			ha: newHasher(6),
   302  			hb: &hashRolling{jump: 1},
   303  		}
   304  	}
   305  
   306  	panic(fmt.Sprintf("unknown hasher type: %d", typ))
   307  }
   308  
   309  func hasherSetup(handle *hasherHandle, params *encoderParams, data []byte, position uint, input_size uint, is_last bool) {
   310  	var self hasherHandle = nil
   311  	var common *hasherCommon = nil
   312  	var one_shot bool = (position == 0 && is_last)
   313  	if *handle == nil {
   314  		chooseHasher(params, &params.hasher)
   315  		self = newHasher(params.hasher.type_)
   316  
   317  		*handle = self
   318  		common = self.Common()
   319  		common.params = params.hasher
   320  		self.Initialize(params)
   321  	}
   322  
   323  	self = *handle
   324  	common = self.Common()
   325  	if !common.is_prepared_ {
   326  		self.Prepare(one_shot, input_size, data)
   327  
   328  		if position == 0 {
   329  			common.dict_num_lookups = 0
   330  			common.dict_num_matches = 0
   331  		}
   332  
   333  		common.is_prepared_ = true
   334  	}
   335  }
   336  
   337  func initOrStitchToPreviousBlock(handle *hasherHandle, data []byte, mask uint, params *encoderParams, position uint, input_size uint, is_last bool) {
   338  	var self hasherHandle
   339  	hasherSetup(handle, params, data, position, input_size, is_last)
   340  	self = *handle
   341  	self.StitchToPreviousBlock(input_size, position, data, mask)
   342  }