github.com/panmari/cuckoofilter@v1.0.7-0.20231223155748-763d1d471ee8/cuckoofilter_test.go (about)

     1  package cuckoo
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"math"
     7  	"math/rand"
     8  	"os"
     9  	"testing"
    10  
    11  	"github.com/google/go-cmp/cmp"
    12  )
    13  
    14  // optFloatNear considers float64 as equal if the relative delta is small.
    15  var optFloatNear = cmp.Comparer(func(x, y float64) bool {
    16  	delta := math.Abs(x - y)
    17  	mean := math.Abs(x+y) / 2.0
    18  	return delta/mean < 0.00001
    19  })
    20  
    21  func TestInsertion(t *testing.T) {
    22  	cf := NewFilter(1000000)
    23  	fd, err := os.Open("/usr/share/dict/words")
    24  	if err != nil {
    25  		t.Skipf("failed reading words: %v", err)
    26  	}
    27  	scanner := bufio.NewScanner(fd)
    28  
    29  	var values [][]byte
    30  	var lineCount uint
    31  	for scanner.Scan() {
    32  		s := []byte(scanner.Text())
    33  		if cf.Insert(s) {
    34  			lineCount++
    35  		}
    36  		values = append(values, s)
    37  	}
    38  
    39  	if got, want := cf.Count(), lineCount; got != want {
    40  		t.Errorf("After inserting: Count() = %d, want %d", got, want)
    41  	}
    42  	if got, want := cf.LoadFactor(), float64(0.95); got >= want {
    43  		t.Errorf("After inserting: LoadFactor() = %f, want less than %f.", got, want)
    44  	}
    45  
    46  	for _, v := range values {
    47  		cf.Delete(v)
    48  	}
    49  
    50  	if got, want := cf.Count(), uint(0); got != want {
    51  		t.Errorf("After deleting: Count() = %d, want %d", got, want)
    52  	}
    53  	if got, want := cf.LoadFactor(), float64(0); got != want {
    54  		t.Errorf("After deleting: LoadFactor() = %f, want %f", got, want)
    55  	}
    56  }
    57  
    58  func TestLookup(t *testing.T) {
    59  	cf := NewFilter(4)
    60  	cf.Insert([]byte("one"))
    61  	cf.Insert([]byte("two"))
    62  	cf.Insert([]byte("three"))
    63  
    64  	testCases := []struct {
    65  		word string
    66  		want bool
    67  	}{
    68  		{"one", true},
    69  		{"two", true},
    70  		{"three", true},
    71  		{"four", false},
    72  		{"five", false},
    73  	}
    74  	for _, tc := range testCases {
    75  		tc := tc
    76  		t.Run(fmt.Sprintf("cf.Lookup(%q)", tc.word), func(t *testing.T) {
    77  			t.Parallel()
    78  			if got := cf.Lookup([]byte(tc.word)); got != tc.want {
    79  				t.Errorf("cf.Lookup(%q) got %v, want %v", tc.word, got, tc.want)
    80  			}
    81  		})
    82  	}
    83  }
    84  
    85  func TestFilter_LookupLarge(t *testing.T) {
    86  	const size = 10000
    87  	insertFail := 0
    88  	cf := NewFilter(size)
    89  	for i := 0; i < size; i++ {
    90  		if !cf.Insert([]byte{byte(i)}) {
    91  			insertFail++
    92  		}
    93  	}
    94  	fn := 0
    95  	for i := 0; i < size; i++ {
    96  		if !cf.Lookup([]byte{byte(i)}) {
    97  			fn++
    98  		}
    99  	}
   100  
   101  	if fn != 0 {
   102  		t.Errorf("cf.Lookup() with %d items. False negatives = %d, want 0. Insert failed %d times", size, fn, insertFail)
   103  	}
   104  }
   105  
   106  func TestFilter_Insert(t *testing.T) {
   107  	filter := NewFilter(10000)
   108  	rng := rand.New(rand.NewSource(int64(42)))
   109  
   110  	hash := make([]byte, 32)
   111  	for i := 0; i < 100; i++ {
   112  		rng.Read(hash)
   113  		filter.Insert(hash)
   114  	}
   115  
   116  	if got, want := filter.Count(), uint(100); got != want {
   117  		t.Errorf("inserting 100 items, Count() = %d, want %d", got, want)
   118  	}
   119  }
   120  
   121  func BenchmarkFilter_Reset(b *testing.B) {
   122  	const cap = 10000
   123  	filter := NewFilter(cap)
   124  
   125  	b.ResetTimer()
   126  
   127  	for i := 0; i < b.N; i++ {
   128  		filter.Reset()
   129  	}
   130  }
   131  
   132  // benchmarkKeys returns a slice of keys for benchmarking with length `size`.
   133  func benchmarkKeys(b *testing.B, size int) [][]byte {
   134  	b.Helper()
   135  	keys := make([][]byte, size)
   136  	rng := rand.New(rand.NewSource(int64(size)))
   137  	for i := range keys {
   138  		keys[i] = make([]byte, 32)
   139  		if _, err := rng.Read(keys[i]); err != nil {
   140  			b.Error(err)
   141  		}
   142  	}
   143  	return keys
   144  }
   145  
   146  func BenchmarkFilter_Insert(b *testing.B) {
   147  	const size = 10000
   148  	keys := benchmarkKeys(b, int(float64(size)*0.9))
   149  	b.ResetTimer()
   150  
   151  	for i := 0; i < b.N; {
   152  		b.StopTimer()
   153  		filter := NewFilter(size)
   154  		b.StartTimer()
   155  		for _, k := range keys {
   156  			filter.Insert(k)
   157  		}
   158  		i += len(keys)
   159  	}
   160  }
   161  
   162  func BenchmarkFilter_Lookup(b *testing.B) {
   163  	const size = 10000
   164  	f := NewFilter(size)
   165  	keys := benchmarkKeys(b, int(float64(size)*0.9))
   166  	for _, k := range keys {
   167  		f.Insert(k)
   168  	}
   169  	// One half is likely missing, other half is present.
   170  	lookupKeys := append(benchmarkKeys(b, 1000), keys[0:1000]...)
   171  	rand.New(rand.NewSource(42)).Shuffle(len(lookupKeys), func(i, j int) {
   172  		lookupKeys[i], lookupKeys[j] = lookupKeys[j], lookupKeys[i]
   173  	})
   174  
   175  	b.ResetTimer()
   176  	for i := 0; i < b.N; {
   177  		for _, k := range lookupKeys {
   178  			f.Lookup(k)
   179  		}
   180  		i += len(lookupKeys)
   181  	}
   182  }
   183  
   184  func TestDelete(t *testing.T) {
   185  	cf := NewFilter(8)
   186  	cf.Insert([]byte("one"))
   187  	cf.Insert([]byte("two"))
   188  	cf.Insert([]byte("three"))
   189  
   190  	testCases := []struct {
   191  		word string
   192  		want bool
   193  	}{
   194  		{"four", false},
   195  		{"five", false},
   196  		{"one", true},
   197  		{"two", true},
   198  		{"three", true},
   199  	}
   200  	for _, tc := range testCases {
   201  		t.Run(fmt.Sprintf("cf.Delete(%q)", tc.word), func(t *testing.T) {
   202  			if got := cf.Delete([]byte(tc.word)); got != tc.want {
   203  				t.Errorf("cf.Delete(%q) got %v, want %v", tc.word, got, tc.want)
   204  			}
   205  		})
   206  	}
   207  }
   208  
   209  func TestDeleteMultipleSame(t *testing.T) {
   210  	cf := NewFilter(4)
   211  	for i := 0; i < 5; i++ {
   212  		if !cf.Insert([]byte("some_item")) {
   213  			t.Error("Failed insert during setup.")
   214  		}
   215  	}
   216  
   217  	testCases := []struct {
   218  		word      string
   219  		want      bool
   220  		wantCount uint
   221  	}{
   222  		{"missing", false, 5},
   223  		{"missing2", false, 5},
   224  		{"some_item", true, 4},
   225  		{"some_item", true, 3},
   226  		{"some_item", true, 2},
   227  		{"some_item", true, 1},
   228  		{"some_item", true, 0},
   229  		{"some_item", false, 0},
   230  	}
   231  	t.Logf("Filter state full: %v", cf)
   232  	for _, tc := range testCases {
   233  		t.Run(fmt.Sprintf("cf.Delete(%q)", tc.word), func(t *testing.T) {
   234  			if got, gotCount := cf.Delete([]byte(tc.word)), cf.Count(); got != tc.want || gotCount != tc.wantCount {
   235  				t.Errorf("cf.Delete(%q) = %v, count = %d; want %v, count = %d", tc.word, got, gotCount, tc.want, tc.wantCount)
   236  			}
   237  		})
   238  	}
   239  }
   240  
   241  func TestEncodeDecode(t *testing.T) {
   242  	cf := NewFilter(10)
   243  	cf.Insert([]byte{1})
   244  	cf.Insert([]byte{2})
   245  	cf.Insert([]byte{3})
   246  	cf.Insert([]byte{4})
   247  	cf.Insert([]byte{5})
   248  	cf.Insert([]byte{6})
   249  	cf.Insert([]byte{7})
   250  	cf.Insert([]byte{8})
   251  	cf.Insert([]byte{9})
   252  	encoded := cf.Encode()
   253  	got, err := Decode(encoded)
   254  	if err != nil {
   255  		t.Errorf("Expected no error, got %v", err)
   256  	}
   257  	if !cmp.Equal(cf, got, cmp.AllowUnexported(Filter{})) {
   258  		t.Errorf("Decode = %v, want %v, encoded = %v", got, cf, encoded)
   259  	}
   260  }