github.com/creachadair/ffs@v0.17.3/block/hash.go (about)

     1  // Copyright 2019 Michael J. Fromberger. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package block
    16  
    17  // A Hasher constructs rolling hash instances. Use the Hash method to obtain a
    18  // fresh instance.
    19  type Hasher interface {
    20  	// Hash returns a fresh Hash instance using the settings from the Hasher.
    21  	// Instances are independent and can be safely used concurrently.
    22  	Hash() Hash
    23  }
    24  
    25  // A Hash implements a rolling hash.
    26  type Hash interface {
    27  	// Add a byte to the rolling hash, and return the updated value.
    28  	Update(byte) uint64
    29  }
    30  
    31  // rkHasher implements the Hasher interface using the Rabin-Karp construction.
    32  type rkHasher struct {
    33  	// hashing rounds compute base^x % mod
    34  	// mod should be prime, and must be coprime to base.
    35  	base, mod int64 //
    36  
    37  	// precomputed modular inverse of base^(size-1) for quick subtraction
    38  	inv int64
    39  
    40  	// buffer window size
    41  	size int
    42  }
    43  
    44  // Hash implements the required method of Hasher.
    45  func (h rkHasher) Hash() Hash {
    46  	return &rkHash{rkHasher: h, buf: make([]byte, h.size)}
    47  }
    48  
    49  // RabinKarpHasher returns a Rabin-Karp rolling hasher using the given base,
    50  // modulus, and window size. The base and modulus must be coprime and the
    51  // modulus should be prime (but note that the constructor does not check this).
    52  func RabinKarpHasher(base, modulus int64, windowSize int) Hasher {
    53  	return rkHasher{
    54  		base: base,
    55  		mod:  modulus,
    56  		inv:  exptmod(base, int64(windowSize-1), modulus),
    57  		size: windowSize,
    58  	}
    59  }
    60  
    61  // rkHash implements a rolling hash using the settings from an rkHasher.
    62  type rkHash struct {
    63  	rkHasher // base settings shared by all instances
    64  
    65  	hash uint64 // last hash value
    66  	next int    // next offset in the window buffer
    67  	buf  []byte // window buffer (per instance)
    68  }
    69  
    70  // Update adds b to the rolling hash and returns the updated hash value.
    71  func (h *rkHash) Update(b byte) uint64 {
    72  	old := int64(h.buf[h.next]) // the displaced oldest byte
    73  	h.buf[h.next] = b
    74  	h.next = (h.next + 1) % h.size
    75  
    76  	// Subtract away the old byte being displaced. Multiplying by h.inv shifts
    77  	// the value the correct number of digits forward (mod m).
    78  	newHash := (h.base*(int64(h.hash)-h.inv*old) + int64(b)) % h.mod
    79  	if newHash < 0 {
    80  		newHash += h.mod // pin a non-negative representative
    81  	}
    82  	h.hash = uint64(newHash)
    83  	return h.hash
    84  }
    85  
    86  // exptmod(b, e, m) computes b**e modulo m. This is used once per rkHasher to
    87  // pre-shift base to the window size, so that evicting the "old" byte can be
    88  // done with a single multiplication and subtraction.
    89  func exptmod(b, e, m int64) int64 {
    90  	s := int64(1)
    91  	for e != 0 {
    92  		if e&1 == 1 {
    93  			s = (s * b) % m
    94  		}
    95  		b = (b * b) % m
    96  		e >>= 1
    97  	}
    98  	return s
    99  }