github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/benchmarks/gen/rolling_value_hasher.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package gen 23 24 import "github.com/kch42/buzhash" 25 26 const ( 27 chunkPattern = uint32(1<<12 - 1) // Avg Chunk Size of 4k 28 29 // The window size to use for computing the rolling hash. This is way more than necessary assuming random data (two bytes would be sufficient with a target chunk size of 4k). The benefit of a larger window is it allows for better distribution on input with lower entropy. At a target chunk size of 4k, any given byte changing has roughly a 1.5% chance of affecting an existing boundary, which seems like an acceptable trade-off. 30 chunkWindow = uint32(64) 31 ) 32 33 type rollingValueHasher struct { 34 bz *buzhash.BuzHash 35 } 36 37 func newRollingValueHasher() *rollingValueHasher { 38 return &rollingValueHasher{buzhash.NewBuzHash(chunkWindow)} 39 } 40 41 func (rv *rollingValueHasher) HashByte(b byte) bool { 42 rv.bz.HashByte(b) 43 return rv.bz.Sum32()&chunkPattern == chunkPattern 44 }