github.com/andybalholm/brotli@v1.0.6/hash_forgetful_chain.go (about) 1 package brotli 2 3 import "encoding/binary" 4 5 /* Copyright 2016 Google Inc. All Rights Reserved. 6 7 Distributed under MIT license. 8 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 9 */ 10 11 func (*hashForgetfulChain) HashTypeLength() uint { 12 return 4 13 } 14 15 func (*hashForgetfulChain) StoreLookahead() uint { 16 return 4 17 } 18 19 /* HashBytes is the function that chooses the bucket to place the address in.*/ 20 func (h *hashForgetfulChain) HashBytes(data []byte) uint { 21 var hash uint32 = binary.LittleEndian.Uint32(data) * kHashMul32 22 23 /* The higher bits contain more mixture from the multiplication, 24 so we take our results from there. */ 25 return uint(hash >> (32 - h.bucketBits)) 26 } 27 28 type slot struct { 29 delta uint16 30 next uint16 31 } 32 33 /* A (forgetful) hash table to the data seen by the compressor, to 34 help create backward references to previous data. 35 36 Hashes are stored in chains which are bucketed to groups. Group of chains 37 share a storage "bank". When more than "bank size" chain nodes are added, 38 oldest nodes are replaced; this way several chains may share a tail. */ 39 type hashForgetfulChain struct { 40 hasherCommon 41 42 bucketBits uint 43 numBanks uint 44 bankBits uint 45 numLastDistancesToCheck int 46 47 addr []uint32 48 head []uint16 49 tiny_hash [65536]byte 50 banks [][]slot 51 free_slot_idx []uint16 52 max_hops uint 53 } 54 55 func (h *hashForgetfulChain) Initialize(params *encoderParams) { 56 var q uint 57 if params.quality > 6 { 58 q = 7 59 } else { 60 q = 8 61 } 62 h.max_hops = q << uint(params.quality-4) 63 64 bankSize := 1 << h.bankBits 65 bucketSize := 1 << h.bucketBits 66 67 h.addr = make([]uint32, bucketSize) 68 h.head = make([]uint16, bucketSize) 69 h.banks = make([][]slot, h.numBanks) 70 for i := range h.banks { 71 h.banks[i] = make([]slot, bankSize) 72 } 73 h.free_slot_idx = make([]uint16, h.numBanks) 74 } 75 76 func (h *hashForgetfulChain) Prepare(one_shot bool, input_size uint, data []byte) { 77 var partial_prepare_threshold uint = (1 << h.bucketBits) >> 6 78 /* Partial preparation is 100 times slower (per socket). */ 79 if one_shot && input_size <= partial_prepare_threshold { 80 var i uint 81 for i = 0; i < input_size; i++ { 82 var bucket uint = h.HashBytes(data[i:]) 83 84 /* See InitEmpty comment. */ 85 h.addr[bucket] = 0xCCCCCCCC 86 87 h.head[bucket] = 0xCCCC 88 } 89 } else { 90 /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position 91 processed by hasher never reaches 3GB + 64M; this makes all new chains 92 to be terminated after the first node. */ 93 for i := range h.addr { 94 h.addr[i] = 0xCCCCCCCC 95 } 96 97 for i := range h.head { 98 h.head[i] = 0 99 } 100 } 101 102 h.tiny_hash = [65536]byte{} 103 for i := range h.free_slot_idx { 104 h.free_slot_idx[i] = 0 105 } 106 } 107 108 /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend 109 node to corresponding chain; also update tiny_hash for current position. */ 110 func (h *hashForgetfulChain) Store(data []byte, mask uint, ix uint) { 111 var key uint = h.HashBytes(data[ix&mask:]) 112 var bank uint = key & (h.numBanks - 1) 113 idx := uint(h.free_slot_idx[bank]) & ((1 << h.bankBits) - 1) 114 h.free_slot_idx[bank]++ 115 var delta uint = ix - uint(h.addr[key]) 116 h.tiny_hash[uint16(ix)] = byte(key) 117 if delta > 0xFFFF { 118 delta = 0xFFFF 119 } 120 h.banks[bank][idx].delta = uint16(delta) 121 h.banks[bank][idx].next = h.head[key] 122 h.addr[key] = uint32(ix) 123 h.head[key] = uint16(idx) 124 } 125 126 func (h *hashForgetfulChain) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) { 127 var i uint 128 for i = ix_start; i < ix_end; i++ { 129 h.Store(data, mask, i) 130 } 131 } 132 133 func (h *hashForgetfulChain) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) { 134 if num_bytes >= h.HashTypeLength()-1 && position >= 3 { 135 /* Prepare the hashes for three last bytes of the last write. 136 These could not be calculated before, since they require knowledge 137 of both the previous and the current block. */ 138 h.Store(ringbuffer, ring_buffer_mask, position-3) 139 h.Store(ringbuffer, ring_buffer_mask, position-2) 140 h.Store(ringbuffer, ring_buffer_mask, position-1) 141 } 142 } 143 144 func (h *hashForgetfulChain) PrepareDistanceCache(distance_cache []int) { 145 prepareDistanceCache(distance_cache, h.numLastDistancesToCheck) 146 } 147 148 /* Find a longest backward match of &data[cur_ix] up to the length of 149 max_length and stores the position cur_ix in the hash table. 150 151 REQUIRES: PrepareDistanceCachehashForgetfulChain must be invoked for current distance cache 152 values; if this method is invoked repeatedly with the same distance 153 cache values, it is enough to invoke PrepareDistanceCachehashForgetfulChain once. 154 155 Does not look for matches longer than max_length. 156 Does not look for matches further away than max_backward. 157 Writes the best match into |out|. 158 |out|->score is updated only if a better match is found. */ 159 func (h *hashForgetfulChain) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) { 160 var cur_ix_masked uint = cur_ix & ring_buffer_mask 161 var min_score uint = out.score 162 var best_score uint = out.score 163 var best_len uint = out.len 164 var key uint = h.HashBytes(data[cur_ix_masked:]) 165 var tiny_hash byte = byte(key) 166 /* Don't accept a short copy from far away. */ 167 out.len = 0 168 169 out.len_code_delta = 0 170 171 /* Try last distance first. */ 172 for i := 0; i < h.numLastDistancesToCheck; i++ { 173 var backward uint = uint(distance_cache[i]) 174 var prev_ix uint = (cur_ix - backward) 175 176 /* For distance code 0 we want to consider 2-byte matches. */ 177 if i > 0 && h.tiny_hash[uint16(prev_ix)] != tiny_hash { 178 continue 179 } 180 if prev_ix >= cur_ix || backward > max_backward { 181 continue 182 } 183 184 prev_ix &= ring_buffer_mask 185 { 186 var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) 187 if len >= 2 { 188 var score uint = backwardReferenceScoreUsingLastDistance(uint(len)) 189 if best_score < score { 190 if i != 0 { 191 score -= backwardReferencePenaltyUsingLastDistance(uint(i)) 192 } 193 if best_score < score { 194 best_score = score 195 best_len = uint(len) 196 out.len = best_len 197 out.distance = backward 198 out.score = best_score 199 } 200 } 201 } 202 } 203 } 204 { 205 var bank uint = key & (h.numBanks - 1) 206 var backward uint = 0 207 var hops uint = h.max_hops 208 var delta uint = cur_ix - uint(h.addr[key]) 209 var slot uint = uint(h.head[key]) 210 for { 211 tmp6 := hops 212 hops-- 213 if tmp6 == 0 { 214 break 215 } 216 var prev_ix uint 217 var last uint = slot 218 backward += delta 219 if backward > max_backward { 220 break 221 } 222 prev_ix = (cur_ix - backward) & ring_buffer_mask 223 slot = uint(h.banks[bank][last].next) 224 delta = uint(h.banks[bank][last].delta) 225 if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] { 226 continue 227 } 228 { 229 var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) 230 if len >= 4 { 231 /* Comparing for >= 3 does not change the semantics, but just saves 232 for a few unnecessary binary logarithms in backward reference 233 score, since we are not interested in such short matches. */ 234 var score uint = backwardReferenceScore(uint(len), backward) 235 if best_score < score { 236 best_score = score 237 best_len = uint(len) 238 out.len = best_len 239 out.distance = backward 240 out.score = best_score 241 } 242 } 243 } 244 } 245 246 h.Store(data, ring_buffer_mask, cur_ix) 247 } 248 249 if out.score == min_score { 250 searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, false) 251 } 252 }