github.com/andybalholm/brotli@v1.0.6/hash_forgetful_chain.go (about)

     1  package brotli
     2  
     3  import "encoding/binary"
     4  
     5  /* Copyright 2016 Google Inc. All Rights Reserved.
     6  
     7     Distributed under MIT license.
     8     See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
     9  */
    10  
    11  func (*hashForgetfulChain) HashTypeLength() uint {
    12  	return 4
    13  }
    14  
    15  func (*hashForgetfulChain) StoreLookahead() uint {
    16  	return 4
    17  }
    18  
    19  /* HashBytes is the function that chooses the bucket to place the address in.*/
    20  func (h *hashForgetfulChain) HashBytes(data []byte) uint {
    21  	var hash uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
    22  
    23  	/* The higher bits contain more mixture from the multiplication,
    24  	   so we take our results from there. */
    25  	return uint(hash >> (32 - h.bucketBits))
    26  }
    27  
    28  type slot struct {
    29  	delta uint16
    30  	next  uint16
    31  }
    32  
    33  /* A (forgetful) hash table to the data seen by the compressor, to
    34     help create backward references to previous data.
    35  
    36     Hashes are stored in chains which are bucketed to groups. Group of chains
    37     share a storage "bank". When more than "bank size" chain nodes are added,
    38     oldest nodes are replaced; this way several chains may share a tail. */
    39  type hashForgetfulChain struct {
    40  	hasherCommon
    41  
    42  	bucketBits              uint
    43  	numBanks                uint
    44  	bankBits                uint
    45  	numLastDistancesToCheck int
    46  
    47  	addr          []uint32
    48  	head          []uint16
    49  	tiny_hash     [65536]byte
    50  	banks         [][]slot
    51  	free_slot_idx []uint16
    52  	max_hops      uint
    53  }
    54  
    55  func (h *hashForgetfulChain) Initialize(params *encoderParams) {
    56  	var q uint
    57  	if params.quality > 6 {
    58  		q = 7
    59  	} else {
    60  		q = 8
    61  	}
    62  	h.max_hops = q << uint(params.quality-4)
    63  
    64  	bankSize := 1 << h.bankBits
    65  	bucketSize := 1 << h.bucketBits
    66  
    67  	h.addr = make([]uint32, bucketSize)
    68  	h.head = make([]uint16, bucketSize)
    69  	h.banks = make([][]slot, h.numBanks)
    70  	for i := range h.banks {
    71  		h.banks[i] = make([]slot, bankSize)
    72  	}
    73  	h.free_slot_idx = make([]uint16, h.numBanks)
    74  }
    75  
    76  func (h *hashForgetfulChain) Prepare(one_shot bool, input_size uint, data []byte) {
    77  	var partial_prepare_threshold uint = (1 << h.bucketBits) >> 6
    78  	/* Partial preparation is 100 times slower (per socket). */
    79  	if one_shot && input_size <= partial_prepare_threshold {
    80  		var i uint
    81  		for i = 0; i < input_size; i++ {
    82  			var bucket uint = h.HashBytes(data[i:])
    83  
    84  			/* See InitEmpty comment. */
    85  			h.addr[bucket] = 0xCCCCCCCC
    86  
    87  			h.head[bucket] = 0xCCCC
    88  		}
    89  	} else {
    90  		/* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
    91  		   processed by hasher never reaches 3GB + 64M; this makes all new chains
    92  		   to be terminated after the first node. */
    93  		for i := range h.addr {
    94  			h.addr[i] = 0xCCCCCCCC
    95  		}
    96  
    97  		for i := range h.head {
    98  			h.head[i] = 0
    99  		}
   100  	}
   101  
   102  	h.tiny_hash = [65536]byte{}
   103  	for i := range h.free_slot_idx {
   104  		h.free_slot_idx[i] = 0
   105  	}
   106  }
   107  
   108  /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
   109     node to corresponding chain; also update tiny_hash for current position. */
   110  func (h *hashForgetfulChain) Store(data []byte, mask uint, ix uint) {
   111  	var key uint = h.HashBytes(data[ix&mask:])
   112  	var bank uint = key & (h.numBanks - 1)
   113  	idx := uint(h.free_slot_idx[bank]) & ((1 << h.bankBits) - 1)
   114  	h.free_slot_idx[bank]++
   115  	var delta uint = ix - uint(h.addr[key])
   116  	h.tiny_hash[uint16(ix)] = byte(key)
   117  	if delta > 0xFFFF {
   118  		delta = 0xFFFF
   119  	}
   120  	h.banks[bank][idx].delta = uint16(delta)
   121  	h.banks[bank][idx].next = h.head[key]
   122  	h.addr[key] = uint32(ix)
   123  	h.head[key] = uint16(idx)
   124  }
   125  
   126  func (h *hashForgetfulChain) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
   127  	var i uint
   128  	for i = ix_start; i < ix_end; i++ {
   129  		h.Store(data, mask, i)
   130  	}
   131  }
   132  
   133  func (h *hashForgetfulChain) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) {
   134  	if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
   135  		/* Prepare the hashes for three last bytes of the last write.
   136  		   These could not be calculated before, since they require knowledge
   137  		   of both the previous and the current block. */
   138  		h.Store(ringbuffer, ring_buffer_mask, position-3)
   139  		h.Store(ringbuffer, ring_buffer_mask, position-2)
   140  		h.Store(ringbuffer, ring_buffer_mask, position-1)
   141  	}
   142  }
   143  
   144  func (h *hashForgetfulChain) PrepareDistanceCache(distance_cache []int) {
   145  	prepareDistanceCache(distance_cache, h.numLastDistancesToCheck)
   146  }
   147  
   148  /* Find a longest backward match of &data[cur_ix] up to the length of
   149     max_length and stores the position cur_ix in the hash table.
   150  
   151     REQUIRES: PrepareDistanceCachehashForgetfulChain must be invoked for current distance cache
   152               values; if this method is invoked repeatedly with the same distance
   153               cache values, it is enough to invoke PrepareDistanceCachehashForgetfulChain once.
   154  
   155     Does not look for matches longer than max_length.
   156     Does not look for matches further away than max_backward.
   157     Writes the best match into |out|.
   158     |out|->score is updated only if a better match is found. */
   159  func (h *hashForgetfulChain) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
   160  	var cur_ix_masked uint = cur_ix & ring_buffer_mask
   161  	var min_score uint = out.score
   162  	var best_score uint = out.score
   163  	var best_len uint = out.len
   164  	var key uint = h.HashBytes(data[cur_ix_masked:])
   165  	var tiny_hash byte = byte(key)
   166  	/* Don't accept a short copy from far away. */
   167  	out.len = 0
   168  
   169  	out.len_code_delta = 0
   170  
   171  	/* Try last distance first. */
   172  	for i := 0; i < h.numLastDistancesToCheck; i++ {
   173  		var backward uint = uint(distance_cache[i])
   174  		var prev_ix uint = (cur_ix - backward)
   175  
   176  		/* For distance code 0 we want to consider 2-byte matches. */
   177  		if i > 0 && h.tiny_hash[uint16(prev_ix)] != tiny_hash {
   178  			continue
   179  		}
   180  		if prev_ix >= cur_ix || backward > max_backward {
   181  			continue
   182  		}
   183  
   184  		prev_ix &= ring_buffer_mask
   185  		{
   186  			var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
   187  			if len >= 2 {
   188  				var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
   189  				if best_score < score {
   190  					if i != 0 {
   191  						score -= backwardReferencePenaltyUsingLastDistance(uint(i))
   192  					}
   193  					if best_score < score {
   194  						best_score = score
   195  						best_len = uint(len)
   196  						out.len = best_len
   197  						out.distance = backward
   198  						out.score = best_score
   199  					}
   200  				}
   201  			}
   202  		}
   203  	}
   204  	{
   205  		var bank uint = key & (h.numBanks - 1)
   206  		var backward uint = 0
   207  		var hops uint = h.max_hops
   208  		var delta uint = cur_ix - uint(h.addr[key])
   209  		var slot uint = uint(h.head[key])
   210  		for {
   211  			tmp6 := hops
   212  			hops--
   213  			if tmp6 == 0 {
   214  				break
   215  			}
   216  			var prev_ix uint
   217  			var last uint = slot
   218  			backward += delta
   219  			if backward > max_backward {
   220  				break
   221  			}
   222  			prev_ix = (cur_ix - backward) & ring_buffer_mask
   223  			slot = uint(h.banks[bank][last].next)
   224  			delta = uint(h.banks[bank][last].delta)
   225  			if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
   226  				continue
   227  			}
   228  			{
   229  				var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
   230  				if len >= 4 {
   231  					/* Comparing for >= 3 does not change the semantics, but just saves
   232  					   for a few unnecessary binary logarithms in backward reference
   233  					   score, since we are not interested in such short matches. */
   234  					var score uint = backwardReferenceScore(uint(len), backward)
   235  					if best_score < score {
   236  						best_score = score
   237  						best_len = uint(len)
   238  						out.len = best_len
   239  						out.distance = backward
   240  						out.score = best_score
   241  					}
   242  				}
   243  			}
   244  		}
   245  
   246  		h.Store(data, ring_buffer_mask, cur_ix)
   247  	}
   248  
   249  	if out.score == min_score {
   250  		searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, false)
   251  	}
   252  }