github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/bloom/bloom_test.go (about)

     1  // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package bloom
     6  
     7  import (
     8  	"testing"
     9  
    10  	"github.com/petermattis/pebble/internal/base"
    11  )
    12  
    13  func (f tableFilter) String() string {
    14  	s := make([]byte, 8*len(f))
    15  	for i, x := range f {
    16  		for j := 0; j < 8; j++ {
    17  			if x&(1<<uint(j)) != 0 {
    18  				s[8*i+j] = '1'
    19  			} else {
    20  				s[8*i+j] = '.'
    21  			}
    22  		}
    23  	}
    24  	return string(s)
    25  }
    26  
    27  func newTableFilter(buf []byte, keys [][]byte, bitsPerKey int) tableFilter {
    28  	w := FilterPolicy(bitsPerKey).NewWriter(base.TableFilter)
    29  	for _, key := range keys {
    30  		w.AddKey(key)
    31  	}
    32  	return tableFilter(w.Finish(nil))
    33  }
    34  
    35  func TestSmallBloomFilter(t *testing.T) {
    36  	f := newTableFilter(nil, [][]byte{
    37  		[]byte("hello"),
    38  		[]byte("world"),
    39  	}, 10)
    40  	got := f.String()
    41  	// TODO(tbg): verify that RocksDB produces the same string.
    42  	want := "........................1.....................................................1...............1.....................................1.......................................................................................................................1.....................................1.............................1.....................................................1.....................................................1...................1.................................1...1..........................11.....1..............................."
    43  	if got != want {
    44  		t.Fatalf("bits:\ngot  %q\nwant %q", got, want)
    45  	}
    46  
    47  	m := map[string]bool{
    48  		"hello": true,
    49  		"world": true,
    50  		"x":     false,
    51  		"foo":   false,
    52  	}
    53  	for k, want := range m {
    54  		got := f.MayContain([]byte(k))
    55  		if got != want {
    56  			t.Errorf("MayContain: k=%q: got %v, want %v", k, got, want)
    57  		}
    58  	}
    59  }
    60  
    61  func TestBloomFilter(t *testing.T) {
    62  	nextLength := func(x int) int {
    63  		if x < 10 {
    64  			return x + 1
    65  		}
    66  		if x < 100 {
    67  			return x + 10
    68  		}
    69  		if x < 1000 {
    70  			return x + 100
    71  		}
    72  		return x + 1000
    73  	}
    74  	le32 := func(i int) []byte {
    75  		b := make([]byte, 4)
    76  		b[0] = uint8(uint32(i) >> 0)
    77  		b[1] = uint8(uint32(i) >> 8)
    78  		b[2] = uint8(uint32(i) >> 16)
    79  		b[3] = uint8(uint32(i) >> 24)
    80  		return b
    81  	}
    82  
    83  	nMediocreFilters, nGoodFilters := 0, 0
    84  loop:
    85  	for length := 1; length <= 10000; length = nextLength(length) {
    86  		keys := make([][]byte, 0, length)
    87  		for i := 0; i < length; i++ {
    88  			keys = append(keys, le32(i))
    89  		}
    90  		f := newTableFilter(nil, keys, 10)
    91  		// The size of the table bloom filter is measured in multiples of the
    92  		// cache line size. The '+2' contribution captures the rounding up in the
    93  		// length division plus preferring an odd number of cache lines. As such,
    94  		// this formula isn't exact, but the exact formula is hard to read.
    95  		maxLen := 5 + ((length*10)/cacheLineBits+2)*cacheLineSize
    96  		if len(f) > maxLen {
    97  			t.Errorf("length=%d: len(f)=%d > max len %d", length, len(f), maxLen)
    98  			continue
    99  		}
   100  
   101  		// All added keys must match.
   102  		for _, key := range keys {
   103  			if !f.MayContain(key) {
   104  				t.Errorf("length=%d: did not contain key %q", length, key)
   105  				continue loop
   106  			}
   107  		}
   108  
   109  		// Check false positive rate.
   110  		nFalsePositive := 0
   111  		for i := 0; i < 10000; i++ {
   112  			if f.MayContain(le32(1e9 + i)) {
   113  				nFalsePositive++
   114  			}
   115  		}
   116  		if nFalsePositive > 0.02*10000 {
   117  			t.Errorf("length=%d: %d false positives in 10000", length, nFalsePositive)
   118  			continue
   119  		}
   120  		if nFalsePositive > 0.0125*10000 {
   121  			nMediocreFilters++
   122  		} else {
   123  			nGoodFilters++
   124  		}
   125  	}
   126  
   127  	if nMediocreFilters > nGoodFilters/5 {
   128  		t.Errorf("%d mediocre filters but only %d good filters", nMediocreFilters, nGoodFilters)
   129  	}
   130  }
   131  
   132  func TestHash(t *testing.T) {
   133  	// The magic want numbers come from running the C++ leveldb code in hash.cc.
   134  	testCases := []struct {
   135  		s    string
   136  		want uint32
   137  	}{
   138  		{"", 0xbc9f1d34},
   139  		{"g", 0xd04a8bda},
   140  		{"go", 0x3e0b0745},
   141  		{"gop", 0x0c326610},
   142  		{"goph", 0x8c9d6390},
   143  		{"gophe", 0x9bfd4b0a},
   144  		{"gopher", 0xa78edc7c},
   145  		{"I had a dream it would end this way.", 0xe14a9db9},
   146  	}
   147  	for _, tc := range testCases {
   148  		if got := hash([]byte(tc.s)); got != tc.want {
   149  			t.Errorf("s=%q: got 0x%08x, want 0x%08x", tc.s, got, tc.want)
   150  		}
   151  	}
   152  }