github.com/creachadair/ffs@v0.17.3/block/hash.go (about) 1 // Copyright 2019 Michael J. Fromberger. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package block 16 17 // A Hasher constructs rolling hash instances. Use the Hash method to obtain a 18 // fresh instance. 19 type Hasher interface { 20 // Hash returns a fresh Hash instance using the settings from the Hasher. 21 // Instances are independent and can be safely used concurrently. 22 Hash() Hash 23 } 24 25 // A Hash implements a rolling hash. 26 type Hash interface { 27 // Add a byte to the rolling hash, and return the updated value. 28 Update(byte) uint64 29 } 30 31 // rkHasher implements the Hasher interface using the Rabin-Karp construction. 32 type rkHasher struct { 33 // hashing rounds compute base^x % mod 34 // mod should be prime, and must be coprime to base. 35 base, mod int64 // 36 37 // precomputed modular inverse of base^(size-1) for quick subtraction 38 inv int64 39 40 // buffer window size 41 size int 42 } 43 44 // Hash implements the required method of Hasher. 45 func (h rkHasher) Hash() Hash { 46 return &rkHash{rkHasher: h, buf: make([]byte, h.size)} 47 } 48 49 // RabinKarpHasher returns a Rabin-Karp rolling hasher using the given base, 50 // modulus, and window size. The base and modulus must be coprime and the 51 // modulus should be prime (but note that the constructor does not check this). 52 func RabinKarpHasher(base, modulus int64, windowSize int) Hasher { 53 return rkHasher{ 54 base: base, 55 mod: modulus, 56 inv: exptmod(base, int64(windowSize-1), modulus), 57 size: windowSize, 58 } 59 } 60 61 // rkHash implements a rolling hash using the settings from an rkHasher. 62 type rkHash struct { 63 rkHasher // base settings shared by all instances 64 65 hash uint64 // last hash value 66 next int // next offset in the window buffer 67 buf []byte // window buffer (per instance) 68 } 69 70 // Update adds b to the rolling hash and returns the updated hash value. 71 func (h *rkHash) Update(b byte) uint64 { 72 old := int64(h.buf[h.next]) // the displaced oldest byte 73 h.buf[h.next] = b 74 h.next = (h.next + 1) % h.size 75 76 // Subtract away the old byte being displaced. Multiplying by h.inv shifts 77 // the value the correct number of digits forward (mod m). 78 newHash := (h.base*(int64(h.hash)-h.inv*old) + int64(b)) % h.mod 79 if newHash < 0 { 80 newHash += h.mod // pin a non-negative representative 81 } 82 h.hash = uint64(newHash) 83 return h.hash 84 } 85 86 // exptmod(b, e, m) computes b**e modulo m. This is used once per rkHasher to 87 // pre-shift base to the window size, so that evicting the "old" byte can be 88 // done with a single multiplication and subtraction. 89 func exptmod(b, e, m int64) int64 { 90 s := int64(1) 91 for e != 0 { 92 if e&1 == 1 { 93 s = (s * b) % m 94 } 95 b = (b * b) % m 96 e >>= 1 97 } 98 return s 99 }