github.com/creachadair/ffs@v0.17.3/block/hash_test.go (about)

     1  // Copyright 2019 Michael J. Fromberger. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package block_test
    16  
    17  import (
    18  	"testing"
    19  
    20  	"github.com/creachadair/ffs/block"
    21  )
    22  
    23  func max(a, b int) int {
    24  	if a < b {
    25  		return b
    26  	}
    27  	return a
    28  }
    29  
    30  func TestModHashSimple(t *testing.T) {
    31  	// A trivial validation, make sure we get the expected results when the
    32  	// base and modulus are round powers of two, so that the hash values will
    33  	// match an exact suffix of the input bytes.
    34  	h := block.RabinKarpHasher(256, 1<<32, 8).Hash()
    35  	tests := []struct {
    36  		in   byte
    37  		want uint64
    38  	}{
    39  		{1, 0x00000001},
    40  		{2, 0x00000102},
    41  		{3, 0x00010203},
    42  		{4, 0x01020304},
    43  		{160, 0x020304A0},
    44  		{163, 0x0304a0a3},
    45  		{170, 0x04a0a3aa},
    46  		{15, 0xa0a3aa0f},
    47  		{16, 0xa3aa0f10},
    48  		{17, 0xaa0f1011},
    49  		{18, 0x0f101112}, // match 1
    50  		{15, 0x1011120f},
    51  		{16, 0x11120f10},
    52  		{17, 0x120f1011},
    53  		{18, 0x0f101112}, // match 2
    54  	}
    55  
    56  	for _, test := range tests {
    57  		got := h.Update(test.in)
    58  		if got != test.want {
    59  			t.Errorf("Update(%x): got %x, want %x", test.in, got, test.want)
    60  		}
    61  	}
    62  }
    63  
    64  func TestModHashComplex(t *testing.T) {
    65  	const (
    66  		base = 7
    67  		mod  = 257
    68  		size = 5
    69  	)
    70  	input := []byte{
    71  		1, 3, 2, 8, 9, 4, 7, 11, 75,
    72  		1, 0, 1, 3, 2, 8, 9, 15, 7,
    73  		13, 15, 24, 100, 125, 180, 1, 0,
    74  		0, 1, 0, 9, 80, 3, 2, 1,
    75  	}
    76  
    77  	// Walk through each viable slice of input comparing the rolling hash value
    78  	// to the expected value computed by brute force without rolling.
    79  	h := block.RabinKarpHasher(base, mod, size).Hash()
    80  	for i := range input {
    81  		data := input[max(0, i-size):i]
    82  		if len(data) == 0 {
    83  			continue
    84  		}
    85  
    86  		b := data[len(data)-1]
    87  		want := wantHash(base, mod, data)
    88  		got := h.Update(b)
    89  		if got != want {
    90  			t.Errorf("At offset %d: Update(%x): got %x, want %x", i, b, got, want)
    91  		}
    92  	}
    93  }
    94  
    95  func TestModHash(t *testing.T) {
    96  	const (
    97  		base      = 2147483659
    98  		mod       = 1031
    99  		maxWindow = 8
   100  	)
   101  	for i := 1; i <= maxWindow; i++ {
   102  		windowTest(t, block.RabinKarpHasher(base, mod, i), i)
   103  	}
   104  }
   105  
   106  func windowTest(t *testing.T, h block.Hasher, size int) {
   107  	// Make sure that we get the same hash value when the window has the same
   108  	// contents.
   109  	const keyValue = 22
   110  	testData := make([]byte, size)
   111  	testData = append(testData, []byte{
   112  		1, 2, 3, 4, 5, 6, 7, 8, 11, keyValue, 2, 3, 4, 5, 6, 7, 8, 11, 15, 17,
   113  		33, 44, 55, 66, 77, 88, 3, 5, 7, 11, 13, 17, 19, 23, 3, 4, 5, 6, 7, 8,
   114  		11, keyValue, 2, 3, 4, 5, 6, 7, 8, 11, keyValue, 24, 26, 28, 30,
   115  	}...)
   116  
   117  	var keyHash uint64
   118  	rh := h.Hash()
   119  	for i, in := range testData[size:] {
   120  		v := rh.Update(in)
   121  		if in != keyValue {
   122  			continue
   123  		}
   124  		if keyHash == 0 {
   125  			keyHash = v
   126  			t.Logf("At #%d, set hash for key value %d to %08x", i, keyValue, keyHash)
   127  		} else if v != keyHash {
   128  			t.Errorf("#%d: Update(%02x): got %d, want %d", i, in, v, keyHash)
   129  		}
   130  	}
   131  }
   132  
   133  // wantHash computes a raw mod-hash over the given slice without using sliding.
   134  // This is used to check the outcome of a modHash that does slide.
   135  func wantHash(base, mod int, data []byte) uint64 {
   136  	var want int
   137  	for _, v := range data {
   138  		want = ((want * base) + int(v)) % mod
   139  	}
   140  	return uint64(want)
   141  }