github.com/andybalholm/brotli@v1.0.6/h5.go (about)

     1  package brotli
     2  
     3  import "encoding/binary"
     4  
     5  /* Copyright 2010 Google Inc. All Rights Reserved.
     6  
     7     Distributed under MIT license.
     8     See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
     9  */
    10  
    11  /* A (forgetful) hash table to the data seen by the compressor, to
    12     help create backward references to previous data.
    13  
    14     This is a hash map of fixed size (bucket_size_) to a ring buffer of
    15     fixed size (block_size_). The ring buffer contains the last block_size_
    16     index positions of the given hash key in the compressed data. */
    17  func (*h5) HashTypeLength() uint {
    18  	return 4
    19  }
    20  
    21  func (*h5) StoreLookahead() uint {
    22  	return 4
    23  }
    24  
    25  /* HashBytes is the function that chooses the bucket to place the address in. */
    26  func hashBytesH5(data []byte, shift int) uint32 {
    27  	var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
    28  
    29  	/* The higher bits contain more mixture from the multiplication,
    30  	   so we take our results from there. */
    31  	return uint32(h >> uint(shift))
    32  }
    33  
    34  type h5 struct {
    35  	hasherCommon
    36  	bucket_size_ uint
    37  	block_size_  uint
    38  	hash_shift_  int
    39  	block_mask_  uint32
    40  	num          []uint16
    41  	buckets      []uint32
    42  }
    43  
    44  func (h *h5) Initialize(params *encoderParams) {
    45  	h.hash_shift_ = 32 - h.params.bucket_bits
    46  	h.bucket_size_ = uint(1) << uint(h.params.bucket_bits)
    47  	h.block_size_ = uint(1) << uint(h.params.block_bits)
    48  	h.block_mask_ = uint32(h.block_size_ - 1)
    49  	h.num = make([]uint16, h.bucket_size_)
    50  	h.buckets = make([]uint32, h.block_size_*h.bucket_size_)
    51  }
    52  
    53  func (h *h5) Prepare(one_shot bool, input_size uint, data []byte) {
    54  	var num []uint16 = h.num
    55  	var partial_prepare_threshold uint = h.bucket_size_ >> 6
    56  	/* Partial preparation is 100 times slower (per socket). */
    57  	if one_shot && input_size <= partial_prepare_threshold {
    58  		var i uint
    59  		for i = 0; i < input_size; i++ {
    60  			var key uint32 = hashBytesH5(data[i:], h.hash_shift_)
    61  			num[key] = 0
    62  		}
    63  	} else {
    64  		for i := 0; i < int(h.bucket_size_); i++ {
    65  			num[i] = 0
    66  		}
    67  	}
    68  }
    69  
    70  /* Look at 4 bytes at &data[ix & mask].
    71     Compute a hash from these, and store the value of ix at that position. */
    72  func (h *h5) Store(data []byte, mask uint, ix uint) {
    73  	var num []uint16 = h.num
    74  	var key uint32 = hashBytesH5(data[ix&mask:], h.hash_shift_)
    75  	var minor_ix uint = uint(num[key]) & uint(h.block_mask_)
    76  	var offset uint = minor_ix + uint(key<<uint(h.params.block_bits))
    77  	h.buckets[offset] = uint32(ix)
    78  	num[key]++
    79  }
    80  
    81  func (h *h5) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
    82  	var i uint
    83  	for i = ix_start; i < ix_end; i++ {
    84  		h.Store(data, mask, i)
    85  	}
    86  }
    87  
    88  func (h *h5) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
    89  	if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
    90  		/* Prepare the hashes for three last bytes of the last write.
    91  		   These could not be calculated before, since they require knowledge
    92  		   of both the previous and the current block. */
    93  		h.Store(ringbuffer, ringbuffer_mask, position-3)
    94  		h.Store(ringbuffer, ringbuffer_mask, position-2)
    95  		h.Store(ringbuffer, ringbuffer_mask, position-1)
    96  	}
    97  }
    98  
    99  func (h *h5) PrepareDistanceCache(distance_cache []int) {
   100  	prepareDistanceCache(distance_cache, h.params.num_last_distances_to_check)
   101  }
   102  
   103  /* Find a longest backward match of &data[cur_ix] up to the length of
   104     max_length and stores the position cur_ix in the hash table.
   105  
   106     REQUIRES: PrepareDistanceCacheH5 must be invoked for current distance cache
   107               values; if this method is invoked repeatedly with the same distance
   108               cache values, it is enough to invoke PrepareDistanceCacheH5 once.
   109  
   110     Does not look for matches longer than max_length.
   111     Does not look for matches further away than max_backward.
   112     Writes the best match into |out|.
   113     |out|->score is updated only if a better match is found. */
   114  func (h *h5) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
   115  	var num []uint16 = h.num
   116  	var buckets []uint32 = h.buckets
   117  	var cur_ix_masked uint = cur_ix & ring_buffer_mask
   118  	var min_score uint = out.score
   119  	var best_score uint = out.score
   120  	var best_len uint = out.len
   121  	var i uint
   122  	var bucket []uint32
   123  	/* Don't accept a short copy from far away. */
   124  	out.len = 0
   125  
   126  	out.len_code_delta = 0
   127  
   128  	/* Try last distance first. */
   129  	for i = 0; i < uint(h.params.num_last_distances_to_check); i++ {
   130  		var backward uint = uint(distance_cache[i])
   131  		var prev_ix uint = uint(cur_ix - backward)
   132  		if prev_ix >= cur_ix {
   133  			continue
   134  		}
   135  
   136  		if backward > max_backward {
   137  			continue
   138  		}
   139  
   140  		prev_ix &= ring_buffer_mask
   141  
   142  		if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
   143  			continue
   144  		}
   145  		{
   146  			var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
   147  			if len >= 3 || (len == 2 && i < 2) {
   148  				/* Comparing for >= 2 does not change the semantics, but just saves for
   149  				   a few unnecessary binary logarithms in backward reference score,
   150  				   since we are not interested in such short matches. */
   151  				var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
   152  				if best_score < score {
   153  					if i != 0 {
   154  						score -= backwardReferencePenaltyUsingLastDistance(i)
   155  					}
   156  					if best_score < score {
   157  						best_score = score
   158  						best_len = uint(len)
   159  						out.len = best_len
   160  						out.distance = backward
   161  						out.score = best_score
   162  					}
   163  				}
   164  			}
   165  		}
   166  	}
   167  	{
   168  		var key uint32 = hashBytesH5(data[cur_ix_masked:], h.hash_shift_)
   169  		bucket = buckets[key<<uint(h.params.block_bits):]
   170  		var down uint
   171  		if uint(num[key]) > h.block_size_ {
   172  			down = uint(num[key]) - h.block_size_
   173  		} else {
   174  			down = 0
   175  		}
   176  		for i = uint(num[key]); i > down; {
   177  			var prev_ix uint
   178  			i--
   179  			prev_ix = uint(bucket[uint32(i)&h.block_mask_])
   180  			var backward uint = cur_ix - prev_ix
   181  			if backward > max_backward {
   182  				break
   183  			}
   184  
   185  			prev_ix &= ring_buffer_mask
   186  			if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
   187  				continue
   188  			}
   189  			{
   190  				var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
   191  				if len >= 4 {
   192  					/* Comparing for >= 3 does not change the semantics, but just saves
   193  					   for a few unnecessary binary logarithms in backward reference
   194  					   score, since we are not interested in such short matches. */
   195  					var score uint = backwardReferenceScore(uint(len), backward)
   196  					if best_score < score {
   197  						best_score = score
   198  						best_len = uint(len)
   199  						out.len = best_len
   200  						out.distance = backward
   201  						out.score = best_score
   202  					}
   203  				}
   204  			}
   205  		}
   206  
   207  		bucket[uint32(num[key])&h.block_mask_] = uint32(cur_ix)
   208  		num[key]++
   209  	}
   210  
   211  	if min_score == out.score {
   212  		searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, false)
   213  	}
   214  }