github.com/jbendotnet/noms@v0.0.0-20190904222105-c43e4293ea92/go/nbs/benchmarks/gen/rolling_value_hasher.go (about) 1 // Copyright 2016 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 package gen 6 7 import "github.com/kch42/buzhash" 8 9 const ( 10 chunkPattern = uint32(1<<12 - 1) // Avg Chunk Size of 4k 11 12 // The window size to use for computing the rolling hash. This is way more than necessary assuming random data (two bytes would be sufficient with a target chunk size of 4k). The benefit of a larger window is it allows for better distribution on input with lower entropy. At a target chunk size of 4k, any given byte changing has roughly a 1.5% chance of affecting an existing boundary, which seems like an acceptable trade-off. 13 chunkWindow = uint32(64) 14 ) 15 16 type rollingValueHasher struct { 17 bz *buzhash.BuzHash 18 } 19 20 func newRollingValueHasher() *rollingValueHasher { 21 return &rollingValueHasher{buzhash.NewBuzHash(chunkWindow)} 22 } 23 24 func (rv *rollingValueHasher) HashByte(b byte) bool { 25 rv.bz.HashByte(b) 26 return rv.bz.Sum32()&chunkPattern == chunkPattern 27 }