github.com/andybalholm/brotli@v1.0.6/hash_rolling.go (about)

     1  package brotli
     2  
     3  /* Copyright 2018 Google Inc. All Rights Reserved.
     4  
     5     Distributed under MIT license.
     6     See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
     7  */
     8  
     9  /* NOTE: this hasher does not search in the dictionary. It is used as
    10     backup-hasher, the main hasher already searches in it. */
    11  
    12  const kRollingHashMul32 uint32 = 69069
    13  
    14  const kInvalidPosHashRolling uint32 = 0xffffffff
    15  
    16  /* This hasher uses a longer forward length, but returning a higher value here
    17     will hurt compression by the main hasher when combined with a composite
    18     hasher. The hasher tests for forward itself instead. */
    19  func (*hashRolling) HashTypeLength() uint {
    20  	return 4
    21  }
    22  
    23  func (*hashRolling) StoreLookahead() uint {
    24  	return 4
    25  }
    26  
    27  /* Computes a code from a single byte. A lookup table of 256 values could be
    28     used, but simply adding 1 works about as good. */
    29  func (*hashRolling) HashByte(b byte) uint32 {
    30  	return uint32(b) + 1
    31  }
    32  
    33  func (h *hashRolling) HashRollingFunctionInitial(state uint32, add byte, factor uint32) uint32 {
    34  	return uint32(factor*state + h.HashByte(add))
    35  }
    36  
    37  func (h *hashRolling) HashRollingFunction(state uint32, add byte, rem byte, factor uint32, factor_remove uint32) uint32 {
    38  	return uint32(factor*state + h.HashByte(add) - factor_remove*h.HashByte(rem))
    39  }
    40  
    41  /* Rolling hash for long distance long string matches. Stores one position
    42     per bucket, bucket key is computed over a long region. */
    43  type hashRolling struct {
    44  	hasherCommon
    45  
    46  	jump int
    47  
    48  	state         uint32
    49  	table         []uint32
    50  	next_ix       uint
    51  	factor        uint32
    52  	factor_remove uint32
    53  }
    54  
    55  func (h *hashRolling) Initialize(params *encoderParams) {
    56  	h.state = 0
    57  	h.next_ix = 0
    58  
    59  	h.factor = kRollingHashMul32
    60  
    61  	/* Compute the factor of the oldest byte to remove: factor**steps modulo
    62  	   0xffffffff (the multiplications rely on 32-bit overflow) */
    63  	h.factor_remove = 1
    64  
    65  	for i := 0; i < 32; i += h.jump {
    66  		h.factor_remove *= h.factor
    67  	}
    68  
    69  	h.table = make([]uint32, 16777216)
    70  	for i := 0; i < 16777216; i++ {
    71  		h.table[i] = kInvalidPosHashRolling
    72  	}
    73  }
    74  
    75  func (h *hashRolling) Prepare(one_shot bool, input_size uint, data []byte) {
    76  	/* Too small size, cannot use this hasher. */
    77  	if input_size < 32 {
    78  		return
    79  	}
    80  	h.state = 0
    81  	for i := 0; i < 32; i += h.jump {
    82  		h.state = h.HashRollingFunctionInitial(h.state, data[i], h.factor)
    83  	}
    84  }
    85  
    86  func (*hashRolling) Store(data []byte, mask uint, ix uint) {
    87  }
    88  
    89  func (*hashRolling) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
    90  }
    91  
    92  func (h *hashRolling) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) {
    93  	var position_masked uint
    94  	/* In this case we must re-initialize the hasher from scratch from the
    95  	   current position. */
    96  
    97  	var available uint = num_bytes
    98  	if position&uint(h.jump-1) != 0 {
    99  		var diff uint = uint(h.jump) - (position & uint(h.jump-1))
   100  		if diff > available {
   101  			available = 0
   102  		} else {
   103  			available = available - diff
   104  		}
   105  		position += diff
   106  	}
   107  
   108  	position_masked = position & ring_buffer_mask
   109  
   110  	/* wrapping around ringbuffer not handled. */
   111  	if available > ring_buffer_mask-position_masked {
   112  		available = ring_buffer_mask - position_masked
   113  	}
   114  
   115  	h.Prepare(false, available, ringbuffer[position&ring_buffer_mask:])
   116  	h.next_ix = position
   117  }
   118  
   119  func (*hashRolling) PrepareDistanceCache(distance_cache []int) {
   120  }
   121  
   122  func (h *hashRolling) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
   123  	var cur_ix_masked uint = cur_ix & ring_buffer_mask
   124  	var pos uint = h.next_ix
   125  
   126  	if cur_ix&uint(h.jump-1) != 0 {
   127  		return
   128  	}
   129  
   130  	/* Not enough lookahead */
   131  	if max_length < 32 {
   132  		return
   133  	}
   134  
   135  	for pos = h.next_ix; pos <= cur_ix; pos += uint(h.jump) {
   136  		var code uint32 = h.state & ((16777216 * 64) - 1)
   137  		var rem byte = data[pos&ring_buffer_mask]
   138  		var add byte = data[(pos+32)&ring_buffer_mask]
   139  		var found_ix uint = uint(kInvalidPosHashRolling)
   140  
   141  		h.state = h.HashRollingFunction(h.state, add, rem, h.factor, h.factor_remove)
   142  
   143  		if code < 16777216 {
   144  			found_ix = uint(h.table[code])
   145  			h.table[code] = uint32(pos)
   146  			if pos == cur_ix && uint32(found_ix) != kInvalidPosHashRolling {
   147  				/* The cast to 32-bit makes backward distances up to 4GB work even
   148  				   if cur_ix is above 4GB, despite using 32-bit values in the table. */
   149  				var backward uint = uint(uint32(cur_ix - found_ix))
   150  				if backward <= max_backward {
   151  					var found_ix_masked uint = found_ix & ring_buffer_mask
   152  					var len uint = findMatchLengthWithLimit(data[found_ix_masked:], data[cur_ix_masked:], max_length)
   153  					if len >= 4 && len > out.len {
   154  						var score uint = backwardReferenceScore(uint(len), backward)
   155  						if score > out.score {
   156  							out.len = uint(len)
   157  							out.distance = backward
   158  							out.score = score
   159  							out.len_code_delta = 0
   160  						}
   161  					}
   162  				}
   163  			}
   164  		}
   165  	}
   166  
   167  	h.next_ix = cur_ix + uint(h.jump)
   168  }