github.com/andybalholm/brotli@v1.0.6/hash_longest_match_quickly.go (about)

     1  package brotli
     2  
     3  import "encoding/binary"
     4  
     5  /* Copyright 2010 Google Inc. All Rights Reserved.
     6  
     7     Distributed under MIT license.
     8     See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
     9  */
    10  
    11  /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
    12     a little faster (0.5% - 1%) and it compresses 0.15% better on small text
    13     and HTML inputs. */
    14  
    15  func (*hashLongestMatchQuickly) HashTypeLength() uint {
    16  	return 8
    17  }
    18  
    19  func (*hashLongestMatchQuickly) StoreLookahead() uint {
    20  	return 8
    21  }
    22  
    23  /* HashBytes is the function that chooses the bucket to place
    24     the address in. The HashLongestMatch and hashLongestMatchQuickly
    25     classes have separate, different implementations of hashing. */
    26  func (h *hashLongestMatchQuickly) HashBytes(data []byte) uint32 {
    27  	var hash uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*h.hashLen)) * kHashMul64)
    28  
    29  	/* The higher bits contain more mixture from the multiplication,
    30  	   so we take our results from there. */
    31  	return uint32(hash >> (64 - h.bucketBits))
    32  }
    33  
    34  /* A (forgetful) hash table to the data seen by the compressor, to
    35     help create backward references to previous data.
    36  
    37     This is a hash map of fixed size (1 << 16). Starting from the
    38     given index, 1 buckets are used to store values of a key. */
    39  type hashLongestMatchQuickly struct {
    40  	hasherCommon
    41  
    42  	bucketBits    uint
    43  	bucketSweep   int
    44  	hashLen       uint
    45  	useDictionary bool
    46  
    47  	buckets []uint32
    48  }
    49  
    50  func (h *hashLongestMatchQuickly) Initialize(params *encoderParams) {
    51  	h.buckets = make([]uint32, 1<<h.bucketBits+h.bucketSweep)
    52  }
    53  
    54  func (h *hashLongestMatchQuickly) Prepare(one_shot bool, input_size uint, data []byte) {
    55  	var partial_prepare_threshold uint = (4 << h.bucketBits) >> 7
    56  	/* Partial preparation is 100 times slower (per socket). */
    57  	if one_shot && input_size <= partial_prepare_threshold {
    58  		var i uint
    59  		for i = 0; i < input_size; i++ {
    60  			var key uint32 = h.HashBytes(data[i:])
    61  			for j := 0; j < h.bucketSweep; j++ {
    62  				h.buckets[key+uint32(j)] = 0
    63  			}
    64  		}
    65  	} else {
    66  		/* It is not strictly necessary to fill this buffer here, but
    67  		   not filling will make the results of the compression stochastic
    68  		   (but correct). This is because random data would cause the
    69  		   system to find accidentally good backward references here and there. */
    70  		for i := range h.buckets {
    71  			h.buckets[i] = 0
    72  		}
    73  	}
    74  }
    75  
    76  /* Look at 5 bytes at &data[ix & mask].
    77     Compute a hash from these, and store the value somewhere within
    78     [ix .. ix+3]. */
    79  func (h *hashLongestMatchQuickly) Store(data []byte, mask uint, ix uint) {
    80  	var key uint32 = h.HashBytes(data[ix&mask:])
    81  	var off uint32 = uint32(ix>>3) % uint32(h.bucketSweep)
    82  	/* Wiggle the value with the bucket sweep range. */
    83  	h.buckets[key+off] = uint32(ix)
    84  }
    85  
    86  func (h *hashLongestMatchQuickly) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
    87  	var i uint
    88  	for i = ix_start; i < ix_end; i++ {
    89  		h.Store(data, mask, i)
    90  	}
    91  }
    92  
    93  func (h *hashLongestMatchQuickly) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
    94  	if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
    95  		/* Prepare the hashes for three last bytes of the last write.
    96  		   These could not be calculated before, since they require knowledge
    97  		   of both the previous and the current block. */
    98  		h.Store(ringbuffer, ringbuffer_mask, position-3)
    99  		h.Store(ringbuffer, ringbuffer_mask, position-2)
   100  		h.Store(ringbuffer, ringbuffer_mask, position-1)
   101  	}
   102  }
   103  
   104  func (*hashLongestMatchQuickly) PrepareDistanceCache(distance_cache []int) {
   105  }
   106  
   107  /* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
   108     up to the length of max_length and stores the position cur_ix in the
   109     hash table.
   110  
   111     Does not look for matches longer than max_length.
   112     Does not look for matches further away than max_backward.
   113     Writes the best match into |out|.
   114     |out|->score is updated only if a better match is found. */
   115  func (h *hashLongestMatchQuickly) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
   116  	var best_len_in uint = out.len
   117  	var cur_ix_masked uint = cur_ix & ring_buffer_mask
   118  	var key uint32 = h.HashBytes(data[cur_ix_masked:])
   119  	var compare_char int = int(data[cur_ix_masked+best_len_in])
   120  	var min_score uint = out.score
   121  	var best_score uint = out.score
   122  	var best_len uint = best_len_in
   123  	var cached_backward uint = uint(distance_cache[0])
   124  	var prev_ix uint = cur_ix - cached_backward
   125  	var bucket []uint32
   126  	out.len_code_delta = 0
   127  	if prev_ix < cur_ix {
   128  		prev_ix &= uint(uint32(ring_buffer_mask))
   129  		if compare_char == int(data[prev_ix+best_len]) {
   130  			var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
   131  			if len >= 4 {
   132  				var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
   133  				if best_score < score {
   134  					best_score = score
   135  					best_len = uint(len)
   136  					out.len = uint(len)
   137  					out.distance = cached_backward
   138  					out.score = best_score
   139  					compare_char = int(data[cur_ix_masked+best_len])
   140  					if h.bucketSweep == 1 {
   141  						h.buckets[key] = uint32(cur_ix)
   142  						return
   143  					}
   144  				}
   145  			}
   146  		}
   147  	}
   148  
   149  	if h.bucketSweep == 1 {
   150  		var backward uint
   151  		var len uint
   152  
   153  		/* Only one to look for, don't bother to prepare for a loop. */
   154  		prev_ix = uint(h.buckets[key])
   155  
   156  		h.buckets[key] = uint32(cur_ix)
   157  		backward = cur_ix - prev_ix
   158  		prev_ix &= uint(uint32(ring_buffer_mask))
   159  		if compare_char != int(data[prev_ix+best_len_in]) {
   160  			return
   161  		}
   162  
   163  		if backward == 0 || backward > max_backward {
   164  			return
   165  		}
   166  
   167  		len = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
   168  		if len >= 4 {
   169  			var score uint = backwardReferenceScore(uint(len), backward)
   170  			if best_score < score {
   171  				out.len = uint(len)
   172  				out.distance = backward
   173  				out.score = score
   174  				return
   175  			}
   176  		}
   177  	} else {
   178  		bucket = h.buckets[key:]
   179  		var i int
   180  		prev_ix = uint(bucket[0])
   181  		bucket = bucket[1:]
   182  		for i = 0; i < h.bucketSweep; (func() { i++; tmp3 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp3[0]) })() {
   183  			var backward uint = cur_ix - prev_ix
   184  			var len uint
   185  			prev_ix &= uint(uint32(ring_buffer_mask))
   186  			if compare_char != int(data[prev_ix+best_len]) {
   187  				continue
   188  			}
   189  
   190  			if backward == 0 || backward > max_backward {
   191  				continue
   192  			}
   193  
   194  			len = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
   195  			if len >= 4 {
   196  				var score uint = backwardReferenceScore(uint(len), backward)
   197  				if best_score < score {
   198  					best_score = score
   199  					best_len = uint(len)
   200  					out.len = best_len
   201  					out.distance = backward
   202  					out.score = score
   203  					compare_char = int(data[cur_ix_masked+best_len])
   204  				}
   205  			}
   206  		}
   207  	}
   208  
   209  	if h.useDictionary && min_score == out.score {
   210  		searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true)
   211  	}
   212  
   213  	h.buckets[key+uint32((cur_ix>>3)%uint(h.bucketSweep))] = uint32(cur_ix)
   214  }