github.com/andybalholm/brotli@v1.0.6/hash_longest_match_quickly.go (about) 1 package brotli 2 3 import "encoding/binary" 4 5 /* Copyright 2010 Google Inc. All Rights Reserved. 6 7 Distributed under MIT license. 8 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 9 */ 10 11 /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression 12 a little faster (0.5% - 1%) and it compresses 0.15% better on small text 13 and HTML inputs. */ 14 15 func (*hashLongestMatchQuickly) HashTypeLength() uint { 16 return 8 17 } 18 19 func (*hashLongestMatchQuickly) StoreLookahead() uint { 20 return 8 21 } 22 23 /* HashBytes is the function that chooses the bucket to place 24 the address in. The HashLongestMatch and hashLongestMatchQuickly 25 classes have separate, different implementations of hashing. */ 26 func (h *hashLongestMatchQuickly) HashBytes(data []byte) uint32 { 27 var hash uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*h.hashLen)) * kHashMul64) 28 29 /* The higher bits contain more mixture from the multiplication, 30 so we take our results from there. */ 31 return uint32(hash >> (64 - h.bucketBits)) 32 } 33 34 /* A (forgetful) hash table to the data seen by the compressor, to 35 help create backward references to previous data. 36 37 This is a hash map of fixed size (1 << 16). Starting from the 38 given index, 1 buckets are used to store values of a key. */ 39 type hashLongestMatchQuickly struct { 40 hasherCommon 41 42 bucketBits uint 43 bucketSweep int 44 hashLen uint 45 useDictionary bool 46 47 buckets []uint32 48 } 49 50 func (h *hashLongestMatchQuickly) Initialize(params *encoderParams) { 51 h.buckets = make([]uint32, 1<<h.bucketBits+h.bucketSweep) 52 } 53 54 func (h *hashLongestMatchQuickly) Prepare(one_shot bool, input_size uint, data []byte) { 55 var partial_prepare_threshold uint = (4 << h.bucketBits) >> 7 56 /* Partial preparation is 100 times slower (per socket). */ 57 if one_shot && input_size <= partial_prepare_threshold { 58 var i uint 59 for i = 0; i < input_size; i++ { 60 var key uint32 = h.HashBytes(data[i:]) 61 for j := 0; j < h.bucketSweep; j++ { 62 h.buckets[key+uint32(j)] = 0 63 } 64 } 65 } else { 66 /* It is not strictly necessary to fill this buffer here, but 67 not filling will make the results of the compression stochastic 68 (but correct). This is because random data would cause the 69 system to find accidentally good backward references here and there. */ 70 for i := range h.buckets { 71 h.buckets[i] = 0 72 } 73 } 74 } 75 76 /* Look at 5 bytes at &data[ix & mask]. 77 Compute a hash from these, and store the value somewhere within 78 [ix .. ix+3]. */ 79 func (h *hashLongestMatchQuickly) Store(data []byte, mask uint, ix uint) { 80 var key uint32 = h.HashBytes(data[ix&mask:]) 81 var off uint32 = uint32(ix>>3) % uint32(h.bucketSweep) 82 /* Wiggle the value with the bucket sweep range. */ 83 h.buckets[key+off] = uint32(ix) 84 } 85 86 func (h *hashLongestMatchQuickly) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) { 87 var i uint 88 for i = ix_start; i < ix_end; i++ { 89 h.Store(data, mask, i) 90 } 91 } 92 93 func (h *hashLongestMatchQuickly) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) { 94 if num_bytes >= h.HashTypeLength()-1 && position >= 3 { 95 /* Prepare the hashes for three last bytes of the last write. 96 These could not be calculated before, since they require knowledge 97 of both the previous and the current block. */ 98 h.Store(ringbuffer, ringbuffer_mask, position-3) 99 h.Store(ringbuffer, ringbuffer_mask, position-2) 100 h.Store(ringbuffer, ringbuffer_mask, position-1) 101 } 102 } 103 104 func (*hashLongestMatchQuickly) PrepareDistanceCache(distance_cache []int) { 105 } 106 107 /* Find a longest backward match of &data[cur_ix & ring_buffer_mask] 108 up to the length of max_length and stores the position cur_ix in the 109 hash table. 110 111 Does not look for matches longer than max_length. 112 Does not look for matches further away than max_backward. 113 Writes the best match into |out|. 114 |out|->score is updated only if a better match is found. */ 115 func (h *hashLongestMatchQuickly) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) { 116 var best_len_in uint = out.len 117 var cur_ix_masked uint = cur_ix & ring_buffer_mask 118 var key uint32 = h.HashBytes(data[cur_ix_masked:]) 119 var compare_char int = int(data[cur_ix_masked+best_len_in]) 120 var min_score uint = out.score 121 var best_score uint = out.score 122 var best_len uint = best_len_in 123 var cached_backward uint = uint(distance_cache[0]) 124 var prev_ix uint = cur_ix - cached_backward 125 var bucket []uint32 126 out.len_code_delta = 0 127 if prev_ix < cur_ix { 128 prev_ix &= uint(uint32(ring_buffer_mask)) 129 if compare_char == int(data[prev_ix+best_len]) { 130 var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) 131 if len >= 4 { 132 var score uint = backwardReferenceScoreUsingLastDistance(uint(len)) 133 if best_score < score { 134 best_score = score 135 best_len = uint(len) 136 out.len = uint(len) 137 out.distance = cached_backward 138 out.score = best_score 139 compare_char = int(data[cur_ix_masked+best_len]) 140 if h.bucketSweep == 1 { 141 h.buckets[key] = uint32(cur_ix) 142 return 143 } 144 } 145 } 146 } 147 } 148 149 if h.bucketSweep == 1 { 150 var backward uint 151 var len uint 152 153 /* Only one to look for, don't bother to prepare for a loop. */ 154 prev_ix = uint(h.buckets[key]) 155 156 h.buckets[key] = uint32(cur_ix) 157 backward = cur_ix - prev_ix 158 prev_ix &= uint(uint32(ring_buffer_mask)) 159 if compare_char != int(data[prev_ix+best_len_in]) { 160 return 161 } 162 163 if backward == 0 || backward > max_backward { 164 return 165 } 166 167 len = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) 168 if len >= 4 { 169 var score uint = backwardReferenceScore(uint(len), backward) 170 if best_score < score { 171 out.len = uint(len) 172 out.distance = backward 173 out.score = score 174 return 175 } 176 } 177 } else { 178 bucket = h.buckets[key:] 179 var i int 180 prev_ix = uint(bucket[0]) 181 bucket = bucket[1:] 182 for i = 0; i < h.bucketSweep; (func() { i++; tmp3 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp3[0]) })() { 183 var backward uint = cur_ix - prev_ix 184 var len uint 185 prev_ix &= uint(uint32(ring_buffer_mask)) 186 if compare_char != int(data[prev_ix+best_len]) { 187 continue 188 } 189 190 if backward == 0 || backward > max_backward { 191 continue 192 } 193 194 len = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) 195 if len >= 4 { 196 var score uint = backwardReferenceScore(uint(len), backward) 197 if best_score < score { 198 best_score = score 199 best_len = uint(len) 200 out.len = best_len 201 out.distance = backward 202 out.score = score 203 compare_char = int(data[cur_ix_masked+best_len]) 204 } 205 } 206 } 207 } 208 209 if h.useDictionary && min_score == out.score { 210 searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true) 211 } 212 213 h.buckets[key+uint32((cur_ix>>3)%uint(h.bucketSweep))] = uint32(cur_ix) 214 }