github.com/jbendotnet/noms@v0.0.0-20190904222105-c43e4293ea92/go/nbs/benchmarks/gen/rolling_value_hasher.go (about)

     1  // Copyright 2016 Attic Labs, Inc. All rights reserved.
     2  // Licensed under the Apache License, version 2.0:
     3  // http://www.apache.org/licenses/LICENSE-2.0
     4  
     5  package gen
     6  
     7  import "github.com/kch42/buzhash"
     8  
     9  const (
    10  	chunkPattern = uint32(1<<12 - 1) // Avg Chunk Size of 4k
    11  
    12  	// The window size to use for computing the rolling hash. This is way more than necessary assuming random data (two bytes would be sufficient with a target chunk size of 4k). The benefit of a larger window is it allows for better distribution on input with lower entropy. At a target chunk size of 4k, any given byte changing has roughly a 1.5% chance of affecting an existing boundary, which seems like an acceptable trade-off.
    13  	chunkWindow = uint32(64)
    14  )
    15  
    16  type rollingValueHasher struct {
    17  	bz *buzhash.BuzHash
    18  }
    19  
    20  func newRollingValueHasher() *rollingValueHasher {
    21  	return &rollingValueHasher{buzhash.NewBuzHash(chunkWindow)}
    22  }
    23  
    24  func (rv *rollingValueHasher) HashByte(b byte) bool {
    25  	rv.bz.HashByte(b)
    26  	return rv.bz.Sum32()&chunkPattern == chunkPattern
    27  }