github.com/andybalholm/brotli@v1.0.6/hash.go (about) 1 package brotli 2 3 import ( 4 "encoding/binary" 5 "fmt" 6 ) 7 8 type hasherCommon struct { 9 params hasherParams 10 is_prepared_ bool 11 dict_num_lookups uint 12 dict_num_matches uint 13 } 14 15 func (h *hasherCommon) Common() *hasherCommon { 16 return h 17 } 18 19 type hasherHandle interface { 20 Common() *hasherCommon 21 Initialize(params *encoderParams) 22 Prepare(one_shot bool, input_size uint, data []byte) 23 StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) 24 HashTypeLength() uint 25 StoreLookahead() uint 26 PrepareDistanceCache(distance_cache []int) 27 FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) 28 StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) 29 Store(data []byte, mask uint, ix uint) 30 } 31 32 const kCutoffTransformsCount uint32 = 10 33 34 /* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */ 35 /* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */ 36 const kCutoffTransforms uint64 = 0x071B520ADA2D3200 37 38 type hasherSearchResult struct { 39 len uint 40 distance uint 41 score uint 42 len_code_delta int 43 } 44 45 /* kHashMul32 multiplier has these properties: 46 * The multiplier must be odd. Otherwise we may lose the highest bit. 47 * No long streaks of ones or zeros. 48 * There is no effort to ensure that it is a prime, the oddity is enough 49 for this use. 50 * The number has been tuned heuristically against compression benchmarks. */ 51 const kHashMul32 uint32 = 0x1E35A7BD 52 53 const kHashMul64 uint64 = 0x1E35A7BD1E35A7BD 54 55 const kHashMul64Long uint64 = 0x1FE35A7BD3579BD3 56 57 func hash14(data []byte) uint32 { 58 var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32 59 60 /* The higher bits contain more mixture from the multiplication, 61 so we take our results from there. */ 62 return h >> (32 - 14) 63 } 64 65 func prepareDistanceCache(distance_cache []int, num_distances int) { 66 if num_distances > 4 { 67 var last_distance int = distance_cache[0] 68 distance_cache[4] = last_distance - 1 69 distance_cache[5] = last_distance + 1 70 distance_cache[6] = last_distance - 2 71 distance_cache[7] = last_distance + 2 72 distance_cache[8] = last_distance - 3 73 distance_cache[9] = last_distance + 3 74 if num_distances > 10 { 75 var next_last_distance int = distance_cache[1] 76 distance_cache[10] = next_last_distance - 1 77 distance_cache[11] = next_last_distance + 1 78 distance_cache[12] = next_last_distance - 2 79 distance_cache[13] = next_last_distance + 2 80 distance_cache[14] = next_last_distance - 3 81 distance_cache[15] = next_last_distance + 3 82 } 83 } 84 } 85 86 const literalByteScore = 135 87 88 const distanceBitPenalty = 30 89 90 /* Score must be positive after applying maximal penalty. */ 91 const scoreBase = (distanceBitPenalty * 8 * 8) 92 93 /* Usually, we always choose the longest backward reference. This function 94 allows for the exception of that rule. 95 96 If we choose a backward reference that is further away, it will 97 usually be coded with more bits. We approximate this by assuming 98 log2(distance). If the distance can be expressed in terms of the 99 last four distances, we use some heuristic constants to estimate 100 the bits cost. For the first up to four literals we use the bit 101 cost of the literals from the literal cost model, after that we 102 use the average bit cost of the cost model. 103 104 This function is used to sometimes discard a longer backward reference 105 when it is not much longer and the bit cost for encoding it is more 106 than the saved literals. 107 108 backward_reference_offset MUST be positive. */ 109 func backwardReferenceScore(copy_length uint, backward_reference_offset uint) uint { 110 return scoreBase + literalByteScore*uint(copy_length) - distanceBitPenalty*uint(log2FloorNonZero(backward_reference_offset)) 111 } 112 113 func backwardReferenceScoreUsingLastDistance(copy_length uint) uint { 114 return literalByteScore*uint(copy_length) + scoreBase + 15 115 } 116 117 func backwardReferencePenaltyUsingLastDistance(distance_short_code uint) uint { 118 return uint(39) + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE) 119 } 120 121 func testStaticDictionaryItem(dictionary *encoderDictionary, item uint, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult) bool { 122 var len uint 123 var word_idx uint 124 var offset uint 125 var matchlen uint 126 var backward uint 127 var score uint 128 len = item & 0x1F 129 word_idx = item >> 5 130 offset = uint(dictionary.words.offsets_by_length[len]) + len*word_idx 131 if len > max_length { 132 return false 133 } 134 135 matchlen = findMatchLengthWithLimit(data, dictionary.words.data[offset:], uint(len)) 136 if matchlen+uint(dictionary.cutoffTransformsCount) <= len || matchlen == 0 { 137 return false 138 } 139 { 140 var cut uint = len - matchlen 141 var transform_id uint = (cut << 2) + uint((dictionary.cutoffTransforms>>(cut*6))&0x3F) 142 backward = max_backward + 1 + word_idx + (transform_id << dictionary.words.size_bits_by_length[len]) 143 } 144 145 if backward > max_distance { 146 return false 147 } 148 149 score = backwardReferenceScore(matchlen, backward) 150 if score < out.score { 151 return false 152 } 153 154 out.len = matchlen 155 out.len_code_delta = int(len) - int(matchlen) 156 out.distance = backward 157 out.score = score 158 return true 159 } 160 161 func searchInStaticDictionary(dictionary *encoderDictionary, handle hasherHandle, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult, shallow bool) { 162 var key uint 163 var i uint 164 var self *hasherCommon = handle.Common() 165 if self.dict_num_matches < self.dict_num_lookups>>7 { 166 return 167 } 168 169 key = uint(hash14(data) << 1) 170 for i = 0; ; (func() { i++; key++ })() { 171 var tmp uint 172 if shallow { 173 tmp = 1 174 } else { 175 tmp = 2 176 } 177 if i >= tmp { 178 break 179 } 180 var item uint = uint(dictionary.hash_table[key]) 181 self.dict_num_lookups++ 182 if item != 0 { 183 var item_matches bool = testStaticDictionaryItem(dictionary, item, data, max_length, max_backward, max_distance, out) 184 if item_matches { 185 self.dict_num_matches++ 186 } 187 } 188 } 189 } 190 191 type backwardMatch struct { 192 distance uint32 193 length_and_code uint32 194 } 195 196 func initBackwardMatch(self *backwardMatch, dist uint, len uint) { 197 self.distance = uint32(dist) 198 self.length_and_code = uint32(len << 5) 199 } 200 201 func initDictionaryBackwardMatch(self *backwardMatch, dist uint, len uint, len_code uint) { 202 self.distance = uint32(dist) 203 var tmp uint 204 if len == len_code { 205 tmp = 0 206 } else { 207 tmp = len_code 208 } 209 self.length_and_code = uint32(len<<5 | tmp) 210 } 211 212 func backwardMatchLength(self *backwardMatch) uint { 213 return uint(self.length_and_code >> 5) 214 } 215 216 func backwardMatchLengthCode(self *backwardMatch) uint { 217 var code uint = uint(self.length_and_code) & 31 218 if code != 0 { 219 return code 220 } else { 221 return backwardMatchLength(self) 222 } 223 } 224 225 func hasherReset(handle hasherHandle) { 226 if handle == nil { 227 return 228 } 229 handle.Common().is_prepared_ = false 230 } 231 232 func newHasher(typ int) hasherHandle { 233 switch typ { 234 case 2: 235 return &hashLongestMatchQuickly{ 236 bucketBits: 16, 237 bucketSweep: 1, 238 hashLen: 5, 239 useDictionary: true, 240 } 241 case 3: 242 return &hashLongestMatchQuickly{ 243 bucketBits: 16, 244 bucketSweep: 2, 245 hashLen: 5, 246 useDictionary: false, 247 } 248 case 4: 249 return &hashLongestMatchQuickly{ 250 bucketBits: 17, 251 bucketSweep: 4, 252 hashLen: 5, 253 useDictionary: true, 254 } 255 case 5: 256 return new(h5) 257 case 6: 258 return new(h6) 259 case 10: 260 return new(h10) 261 case 35: 262 return &hashComposite{ 263 ha: newHasher(3), 264 hb: &hashRolling{jump: 4}, 265 } 266 case 40: 267 return &hashForgetfulChain{ 268 bucketBits: 15, 269 numBanks: 1, 270 bankBits: 16, 271 numLastDistancesToCheck: 4, 272 } 273 case 41: 274 return &hashForgetfulChain{ 275 bucketBits: 15, 276 numBanks: 1, 277 bankBits: 16, 278 numLastDistancesToCheck: 10, 279 } 280 case 42: 281 return &hashForgetfulChain{ 282 bucketBits: 15, 283 numBanks: 512, 284 bankBits: 9, 285 numLastDistancesToCheck: 16, 286 } 287 case 54: 288 return &hashLongestMatchQuickly{ 289 bucketBits: 20, 290 bucketSweep: 4, 291 hashLen: 7, 292 useDictionary: false, 293 } 294 case 55: 295 return &hashComposite{ 296 ha: newHasher(54), 297 hb: &hashRolling{jump: 4}, 298 } 299 case 65: 300 return &hashComposite{ 301 ha: newHasher(6), 302 hb: &hashRolling{jump: 1}, 303 } 304 } 305 306 panic(fmt.Sprintf("unknown hasher type: %d", typ)) 307 } 308 309 func hasherSetup(handle *hasherHandle, params *encoderParams, data []byte, position uint, input_size uint, is_last bool) { 310 var self hasherHandle = nil 311 var common *hasherCommon = nil 312 var one_shot bool = (position == 0 && is_last) 313 if *handle == nil { 314 chooseHasher(params, ¶ms.hasher) 315 self = newHasher(params.hasher.type_) 316 317 *handle = self 318 common = self.Common() 319 common.params = params.hasher 320 self.Initialize(params) 321 } 322 323 self = *handle 324 common = self.Common() 325 if !common.is_prepared_ { 326 self.Prepare(one_shot, input_size, data) 327 328 if position == 0 { 329 common.dict_num_lookups = 0 330 common.dict_num_matches = 0 331 } 332 333 common.is_prepared_ = true 334 } 335 } 336 337 func initOrStitchToPreviousBlock(handle *hasherHandle, data []byte, mask uint, params *encoderParams, position uint, input_size uint, is_last bool) { 338 var self hasherHandle 339 hasherSetup(handle, params, data, position, input_size, is_last) 340 self = *handle 341 self.StitchToPreviousBlock(input_size, position, data, mask) 342 }