github.com/andybalholm/brotli@v1.0.6/h6.go (about)

     1  package brotli
     2  
     3  import "encoding/binary"
     4  
     5  /* Copyright 2010 Google Inc. All Rights Reserved.
     6  
     7     Distributed under MIT license.
     8     See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
     9  */
    10  
    11  /* A (forgetful) hash table to the data seen by the compressor, to
    12     help create backward references to previous data.
    13  
    14     This is a hash map of fixed size (bucket_size_) to a ring buffer of
    15     fixed size (block_size_). The ring buffer contains the last block_size_
    16     index positions of the given hash key in the compressed data. */
    17  func (*h6) HashTypeLength() uint {
    18  	return 8
    19  }
    20  
    21  func (*h6) StoreLookahead() uint {
    22  	return 8
    23  }
    24  
    25  /* HashBytes is the function that chooses the bucket to place the address in. */
    26  func hashBytesH6(data []byte, mask uint64, shift int) uint32 {
    27  	var h uint64 = (binary.LittleEndian.Uint64(data) & mask) * kHashMul64Long
    28  
    29  	/* The higher bits contain more mixture from the multiplication,
    30  	   so we take our results from there. */
    31  	return uint32(h >> uint(shift))
    32  }
    33  
    34  type h6 struct {
    35  	hasherCommon
    36  	bucket_size_ uint
    37  	block_size_  uint
    38  	hash_shift_  int
    39  	hash_mask_   uint64
    40  	block_mask_  uint32
    41  	num          []uint16
    42  	buckets      []uint32
    43  }
    44  
    45  func (h *h6) Initialize(params *encoderParams) {
    46  	h.hash_shift_ = 64 - h.params.bucket_bits
    47  	h.hash_mask_ = (^(uint64(0))) >> uint(64-8*h.params.hash_len)
    48  	h.bucket_size_ = uint(1) << uint(h.params.bucket_bits)
    49  	h.block_size_ = uint(1) << uint(h.params.block_bits)
    50  	h.block_mask_ = uint32(h.block_size_ - 1)
    51  	h.num = make([]uint16, h.bucket_size_)
    52  	h.buckets = make([]uint32, h.block_size_*h.bucket_size_)
    53  }
    54  
    55  func (h *h6) Prepare(one_shot bool, input_size uint, data []byte) {
    56  	var num []uint16 = h.num
    57  	var partial_prepare_threshold uint = h.bucket_size_ >> 6
    58  	/* Partial preparation is 100 times slower (per socket). */
    59  	if one_shot && input_size <= partial_prepare_threshold {
    60  		var i uint
    61  		for i = 0; i < input_size; i++ {
    62  			var key uint32 = hashBytesH6(data[i:], h.hash_mask_, h.hash_shift_)
    63  			num[key] = 0
    64  		}
    65  	} else {
    66  		for i := 0; i < int(h.bucket_size_); i++ {
    67  			num[i] = 0
    68  		}
    69  	}
    70  }
    71  
    72  /* Look at 4 bytes at &data[ix & mask].
    73     Compute a hash from these, and store the value of ix at that position. */
    74  func (h *h6) Store(data []byte, mask uint, ix uint) {
    75  	var num []uint16 = h.num
    76  	var key uint32 = hashBytesH6(data[ix&mask:], h.hash_mask_, h.hash_shift_)
    77  	var minor_ix uint = uint(num[key]) & uint(h.block_mask_)
    78  	var offset uint = minor_ix + uint(key<<uint(h.params.block_bits))
    79  	h.buckets[offset] = uint32(ix)
    80  	num[key]++
    81  }
    82  
    83  func (h *h6) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
    84  	var i uint
    85  	for i = ix_start; i < ix_end; i++ {
    86  		h.Store(data, mask, i)
    87  	}
    88  }
    89  
    90  func (h *h6) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
    91  	if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
    92  		/* Prepare the hashes for three last bytes of the last write.
    93  		   These could not be calculated before, since they require knowledge
    94  		   of both the previous and the current block. */
    95  		h.Store(ringbuffer, ringbuffer_mask, position-3)
    96  		h.Store(ringbuffer, ringbuffer_mask, position-2)
    97  		h.Store(ringbuffer, ringbuffer_mask, position-1)
    98  	}
    99  }
   100  
   101  func (h *h6) PrepareDistanceCache(distance_cache []int) {
   102  	prepareDistanceCache(distance_cache, h.params.num_last_distances_to_check)
   103  }
   104  
   105  /* Find a longest backward match of &data[cur_ix] up to the length of
   106     max_length and stores the position cur_ix in the hash table.
   107  
   108     REQUIRES: PrepareDistanceCacheH6 must be invoked for current distance cache
   109               values; if this method is invoked repeatedly with the same distance
   110               cache values, it is enough to invoke PrepareDistanceCacheH6 once.
   111  
   112     Does not look for matches longer than max_length.
   113     Does not look for matches further away than max_backward.
   114     Writes the best match into |out|.
   115     |out|->score is updated only if a better match is found. */
   116  func (h *h6) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
   117  	var num []uint16 = h.num
   118  	var buckets []uint32 = h.buckets
   119  	var cur_ix_masked uint = cur_ix & ring_buffer_mask
   120  	var min_score uint = out.score
   121  	var best_score uint = out.score
   122  	var best_len uint = out.len
   123  	var i uint
   124  	var bucket []uint32
   125  	/* Don't accept a short copy from far away. */
   126  	out.len = 0
   127  
   128  	out.len_code_delta = 0
   129  
   130  	/* Try last distance first. */
   131  	for i = 0; i < uint(h.params.num_last_distances_to_check); i++ {
   132  		var backward uint = uint(distance_cache[i])
   133  		var prev_ix uint = uint(cur_ix - backward)
   134  		if prev_ix >= cur_ix {
   135  			continue
   136  		}
   137  
   138  		if backward > max_backward {
   139  			continue
   140  		}
   141  
   142  		prev_ix &= ring_buffer_mask
   143  
   144  		if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
   145  			continue
   146  		}
   147  		{
   148  			var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
   149  			if len >= 3 || (len == 2 && i < 2) {
   150  				/* Comparing for >= 2 does not change the semantics, but just saves for
   151  				   a few unnecessary binary logarithms in backward reference score,
   152  				   since we are not interested in such short matches. */
   153  				var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
   154  				if best_score < score {
   155  					if i != 0 {
   156  						score -= backwardReferencePenaltyUsingLastDistance(i)
   157  					}
   158  					if best_score < score {
   159  						best_score = score
   160  						best_len = uint(len)
   161  						out.len = best_len
   162  						out.distance = backward
   163  						out.score = best_score
   164  					}
   165  				}
   166  			}
   167  		}
   168  	}
   169  	{
   170  		var key uint32 = hashBytesH6(data[cur_ix_masked:], h.hash_mask_, h.hash_shift_)
   171  		bucket = buckets[key<<uint(h.params.block_bits):]
   172  		var down uint
   173  		if uint(num[key]) > h.block_size_ {
   174  			down = uint(num[key]) - h.block_size_
   175  		} else {
   176  			down = 0
   177  		}
   178  		for i = uint(num[key]); i > down; {
   179  			var prev_ix uint
   180  			i--
   181  			prev_ix = uint(bucket[uint32(i)&h.block_mask_])
   182  			var backward uint = cur_ix - prev_ix
   183  			if backward > max_backward {
   184  				break
   185  			}
   186  
   187  			prev_ix &= ring_buffer_mask
   188  			if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
   189  				continue
   190  			}
   191  			{
   192  				var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
   193  				if len >= 4 {
   194  					/* Comparing for >= 3 does not change the semantics, but just saves
   195  					   for a few unnecessary binary logarithms in backward reference
   196  					   score, since we are not interested in such short matches. */
   197  					var score uint = backwardReferenceScore(uint(len), backward)
   198  					if best_score < score {
   199  						best_score = score
   200  						best_len = uint(len)
   201  						out.len = best_len
   202  						out.distance = backward
   203  						out.score = best_score
   204  					}
   205  				}
   206  			}
   207  		}
   208  
   209  		bucket[uint32(num[key])&h.block_mask_] = uint32(cur_ix)
   210  		num[key]++
   211  	}
   212  
   213  	if min_score == out.score {
   214  		searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, false)
   215  	}
   216  }