github.com/andybalholm/brotli@v1.0.6/h10.go (about) 1 package brotli 2 3 import "encoding/binary" 4 5 /* Copyright 2016 Google Inc. All Rights Reserved. 6 7 Distributed under MIT license. 8 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 9 */ 10 11 func (*h10) HashTypeLength() uint { 12 return 4 13 } 14 15 func (*h10) StoreLookahead() uint { 16 return 128 17 } 18 19 func hashBytesH10(data []byte) uint32 { 20 var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32 21 22 /* The higher bits contain more mixture from the multiplication, 23 so we take our results from there. */ 24 return h >> (32 - 17) 25 } 26 27 /* A (forgetful) hash table where each hash bucket contains a binary tree of 28 sequences whose first 4 bytes share the same hash code. 29 Each sequence is 128 long and is identified by its starting 30 position in the input data. The binary tree is sorted by the lexicographic 31 order of the sequences, and it is also a max-heap with respect to the 32 starting positions. */ 33 type h10 struct { 34 hasherCommon 35 window_mask_ uint 36 buckets_ [1 << 17]uint32 37 invalid_pos_ uint32 38 forest []uint32 39 } 40 41 func (h *h10) Initialize(params *encoderParams) { 42 h.window_mask_ = (1 << params.lgwin) - 1 43 h.invalid_pos_ = uint32(0 - h.window_mask_) 44 var num_nodes uint = uint(1) << params.lgwin 45 h.forest = make([]uint32, 2*num_nodes) 46 } 47 48 func (h *h10) Prepare(one_shot bool, input_size uint, data []byte) { 49 var invalid_pos uint32 = h.invalid_pos_ 50 var i uint32 51 for i = 0; i < 1<<17; i++ { 52 h.buckets_[i] = invalid_pos 53 } 54 } 55 56 func leftChildIndexH10(self *h10, pos uint) uint { 57 return 2 * (pos & self.window_mask_) 58 } 59 60 func rightChildIndexH10(self *h10, pos uint) uint { 61 return 2*(pos&self.window_mask_) + 1 62 } 63 64 /* Stores the hash of the next 4 bytes and in a single tree-traversal, the 65 hash bucket's binary tree is searched for matches and is re-rooted at the 66 current position. 67 68 If less than 128 data is available, the hash bucket of the 69 current position is searched for matches, but the state of the hash table 70 is not changed, since we can not know the final sorting order of the 71 current (incomplete) sequence. 72 73 This function must be called with increasing cur_ix positions. */ 74 func storeAndFindMatchesH10(self *h10, data []byte, cur_ix uint, ring_buffer_mask uint, max_length uint, max_backward uint, best_len *uint, matches []backwardMatch) []backwardMatch { 75 var cur_ix_masked uint = cur_ix & ring_buffer_mask 76 var max_comp_len uint = brotli_min_size_t(max_length, 128) 77 var should_reroot_tree bool = (max_length >= 128) 78 var key uint32 = hashBytesH10(data[cur_ix_masked:]) 79 var forest []uint32 = self.forest 80 var prev_ix uint = uint(self.buckets_[key]) 81 var node_left uint = leftChildIndexH10(self, cur_ix) 82 var node_right uint = rightChildIndexH10(self, cur_ix) 83 var best_len_left uint = 0 84 var best_len_right uint = 0 85 var depth_remaining uint 86 /* The forest index of the rightmost node of the left subtree of the new 87 root, updated as we traverse and re-root the tree of the hash bucket. */ 88 89 /* The forest index of the leftmost node of the right subtree of the new 90 root, updated as we traverse and re-root the tree of the hash bucket. */ 91 92 /* The match length of the rightmost node of the left subtree of the new 93 root, updated as we traverse and re-root the tree of the hash bucket. */ 94 95 /* The match length of the leftmost node of the right subtree of the new 96 root, updated as we traverse and re-root the tree of the hash bucket. */ 97 if should_reroot_tree { 98 self.buckets_[key] = uint32(cur_ix) 99 } 100 101 for depth_remaining = 64; ; depth_remaining-- { 102 var backward uint = cur_ix - prev_ix 103 var prev_ix_masked uint = prev_ix & ring_buffer_mask 104 if backward == 0 || backward > max_backward || depth_remaining == 0 { 105 if should_reroot_tree { 106 forest[node_left] = self.invalid_pos_ 107 forest[node_right] = self.invalid_pos_ 108 } 109 110 break 111 } 112 { 113 var cur_len uint = brotli_min_size_t(best_len_left, best_len_right) 114 var len uint 115 assert(cur_len <= 128) 116 len = cur_len + findMatchLengthWithLimit(data[cur_ix_masked+cur_len:], data[prev_ix_masked+cur_len:], max_length-cur_len) 117 if matches != nil && len > *best_len { 118 *best_len = uint(len) 119 initBackwardMatch(&matches[0], backward, uint(len)) 120 matches = matches[1:] 121 } 122 123 if len >= max_comp_len { 124 if should_reroot_tree { 125 forest[node_left] = forest[leftChildIndexH10(self, prev_ix)] 126 forest[node_right] = forest[rightChildIndexH10(self, prev_ix)] 127 } 128 129 break 130 } 131 132 if data[cur_ix_masked+len] > data[prev_ix_masked+len] { 133 best_len_left = uint(len) 134 if should_reroot_tree { 135 forest[node_left] = uint32(prev_ix) 136 } 137 138 node_left = rightChildIndexH10(self, prev_ix) 139 prev_ix = uint(forest[node_left]) 140 } else { 141 best_len_right = uint(len) 142 if should_reroot_tree { 143 forest[node_right] = uint32(prev_ix) 144 } 145 146 node_right = leftChildIndexH10(self, prev_ix) 147 prev_ix = uint(forest[node_right]) 148 } 149 } 150 } 151 152 return matches 153 } 154 155 /* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the 156 length of max_length and stores the position cur_ix in the hash table. 157 158 Sets *num_matches to the number of matches found, and stores the found 159 matches in matches[0] to matches[*num_matches - 1]. The matches will be 160 sorted by strictly increasing length and (non-strictly) increasing 161 distance. */ 162 func findAllMatchesH10(handle *h10, dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, cur_ix uint, max_length uint, max_backward uint, gap uint, params *encoderParams, matches []backwardMatch) uint { 163 var orig_matches []backwardMatch = matches 164 var cur_ix_masked uint = cur_ix & ring_buffer_mask 165 var best_len uint = 1 166 var short_match_max_backward uint 167 if params.quality != hqZopflificationQuality { 168 short_match_max_backward = 16 169 } else { 170 short_match_max_backward = 64 171 } 172 var stop uint = cur_ix - short_match_max_backward 173 var dict_matches [maxStaticDictionaryMatchLen + 1]uint32 174 var i uint 175 if cur_ix < short_match_max_backward { 176 stop = 0 177 } 178 for i = cur_ix - 1; i > stop && best_len <= 2; i-- { 179 var prev_ix uint = i 180 var backward uint = cur_ix - prev_ix 181 if backward > max_backward { 182 break 183 } 184 185 prev_ix &= ring_buffer_mask 186 if data[cur_ix_masked] != data[prev_ix] || data[cur_ix_masked+1] != data[prev_ix+1] { 187 continue 188 } 189 { 190 var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) 191 if len > best_len { 192 best_len = uint(len) 193 initBackwardMatch(&matches[0], backward, uint(len)) 194 matches = matches[1:] 195 } 196 } 197 } 198 199 if best_len < max_length { 200 matches = storeAndFindMatchesH10(handle, data, cur_ix, ring_buffer_mask, max_length, max_backward, &best_len, matches) 201 } 202 203 for i = 0; i <= maxStaticDictionaryMatchLen; i++ { 204 dict_matches[i] = kInvalidMatch 205 } 206 { 207 var minlen uint = brotli_max_size_t(4, best_len+1) 208 if findAllStaticDictionaryMatches(dictionary, data[cur_ix_masked:], minlen, max_length, dict_matches[0:]) { 209 var maxlen uint = brotli_min_size_t(maxStaticDictionaryMatchLen, max_length) 210 var l uint 211 for l = minlen; l <= maxlen; l++ { 212 var dict_id uint32 = dict_matches[l] 213 if dict_id < kInvalidMatch { 214 var distance uint = max_backward + gap + uint(dict_id>>5) + 1 215 if distance <= params.dist.max_distance { 216 initDictionaryBackwardMatch(&matches[0], distance, l, uint(dict_id&31)) 217 matches = matches[1:] 218 } 219 } 220 } 221 } 222 } 223 224 return uint(-cap(matches) + cap(orig_matches)) 225 } 226 227 /* Stores the hash of the next 4 bytes and re-roots the binary tree at the 228 current sequence, without returning any matches. 229 REQUIRES: ix + 128 <= end-of-current-block */ 230 func (h *h10) Store(data []byte, mask uint, ix uint) { 231 var max_backward uint = h.window_mask_ - windowGap + 1 232 /* Maximum distance is window size - 16, see section 9.1. of the spec. */ 233 storeAndFindMatchesH10(h, data, ix, mask, 128, max_backward, nil, nil) 234 } 235 236 func (h *h10) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) { 237 var i uint = ix_start 238 var j uint = ix_start 239 if ix_start+63 <= ix_end { 240 i = ix_end - 63 241 } 242 243 if ix_start+512 <= i { 244 for ; j < i; j += 8 { 245 h.Store(data, mask, j) 246 } 247 } 248 249 for ; i < ix_end; i++ { 250 h.Store(data, mask, i) 251 } 252 } 253 254 func (h *h10) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) { 255 if num_bytes >= h.HashTypeLength()-1 && position >= 128 { 256 var i_start uint = position - 128 + 1 257 var i_end uint = brotli_min_size_t(position, i_start+num_bytes) 258 /* Store the last `128 - 1` positions in the hasher. 259 These could not be calculated before, since they require knowledge 260 of both the previous and the current block. */ 261 262 var i uint 263 for i = i_start; i < i_end; i++ { 264 /* Maximum distance is window size - 16, see section 9.1. of the spec. 265 Furthermore, we have to make sure that we don't look further back 266 from the start of the next block than the window size, otherwise we 267 could access already overwritten areas of the ring-buffer. */ 268 var max_backward uint = h.window_mask_ - brotli_max_size_t(windowGap-1, position-i) 269 270 /* We know that i + 128 <= position + num_bytes, i.e. the 271 end of the current block and that we have at least 272 128 tail in the ring-buffer. */ 273 storeAndFindMatchesH10(h, ringbuffer, i, ringbuffer_mask, 128, max_backward, nil, nil) 274 } 275 } 276 } 277 278 /* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */ 279 const maxNumMatchesH10 = 128 280 281 func (*h10) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) { 282 panic("unimplemented") 283 } 284 285 func (*h10) PrepareDistanceCache(distance_cache []int) { 286 panic("unimplemented") 287 }