github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/bloom/bloom_test.go (about)

     1  // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package bloom
     6  
     7  import (
     8  	"crypto/rand"
     9  	"strings"
    10  	"testing"
    11  
    12  	"github.com/cockroachdb/pebble/internal/base"
    13  	"github.com/stretchr/testify/require"
    14  )
    15  
    16  func (f tableFilter) String() string {
    17  	var buf strings.Builder
    18  	for i, x := range f {
    19  		if i > 0 {
    20  			if i%8 == 0 {
    21  				buf.WriteString("\n")
    22  			} else {
    23  				buf.WriteString("  ")
    24  			}
    25  		}
    26  
    27  		for j := uint(0); j < 8; j++ {
    28  			if x&(1<<(7-j)) != 0 {
    29  				buf.WriteString("1")
    30  			} else {
    31  				buf.WriteString(".")
    32  			}
    33  		}
    34  	}
    35  	buf.WriteString("\n")
    36  	return buf.String()
    37  }
    38  
    39  func newTableFilter(bitsPerKey int, keys ...[]byte) tableFilter {
    40  	w := FilterPolicy(bitsPerKey).NewWriter(base.TableFilter)
    41  	for _, key := range keys {
    42  		w.AddKey(key)
    43  	}
    44  	return tableFilter(w.Finish(nil))
    45  }
    46  
    47  func TestSmallBloomFilter(t *testing.T) {
    48  	f := newTableFilter(10, []byte("hello"), []byte("world"))
    49  
    50  	// The magic expected string comes from running RocksDB's util/bloom_test.cc:FullBloomTest.FullSmall.
    51  	want := `
    52  ........  ........  ........  .......1  ........  ........  ........  ........
    53  ........  .1......  ........  .1......  ........  ........  ........  ........
    54  ...1....  ........  ........  ........  ........  ........  ........  ........
    55  ........  ........  ........  ........  ........  ........  ........  ...1....
    56  ........  ........  ........  ........  .....1..  ........  ........  ........
    57  .......1  ........  ........  ........  ........  ........  .1......  ........
    58  ........  ........  ........  ........  ........  ...1....  ........  ........
    59  .......1  ........  ........  ........  .1...1..  ........  ........  ........
    60  .....11.  .......1  ........  ........  ........
    61  `
    62  	want = strings.TrimLeft(want, "\n")
    63  	require.EqualValues(t, want, f.String())
    64  
    65  	m := map[string]bool{
    66  		"hello": true,
    67  		"world": true,
    68  		"x":     false,
    69  		"foo":   false,
    70  	}
    71  	for k, want := range m {
    72  		require.EqualValues(t, want, f.MayContain([]byte(k)))
    73  	}
    74  }
    75  
    76  func TestBloomFilter(t *testing.T) {
    77  	nextLength := func(x int) int {
    78  		if x < 10 {
    79  			return x + 1
    80  		}
    81  		if x < 100 {
    82  			return x + 10
    83  		}
    84  		if x < 1000 {
    85  			return x + 100
    86  		}
    87  		return x + 1000
    88  	}
    89  	le32 := func(i int) []byte {
    90  		b := make([]byte, 4)
    91  		b[0] = uint8(uint32(i) >> 0)
    92  		b[1] = uint8(uint32(i) >> 8)
    93  		b[2] = uint8(uint32(i) >> 16)
    94  		b[3] = uint8(uint32(i) >> 24)
    95  		return b
    96  	}
    97  
    98  	nMediocreFilters, nGoodFilters := 0, 0
    99  loop:
   100  	for length := 1; length <= 10000; length = nextLength(length) {
   101  		keys := make([][]byte, 0, length)
   102  		for i := 0; i < length; i++ {
   103  			keys = append(keys, le32(i))
   104  		}
   105  		f := newTableFilter(10, keys...)
   106  		// The size of the table bloom filter is measured in multiples of the
   107  		// cache line size. The '+2' contribution captures the rounding up in the
   108  		// length division plus preferring an odd number of cache lines. As such,
   109  		// this formula isn't exact, but the exact formula is hard to read.
   110  		maxLen := 5 + ((length*10)/cacheLineBits+2)*cacheLineSize
   111  		if len(f) > maxLen {
   112  			t.Errorf("length=%d: len(f)=%d > max len %d", length, len(f), maxLen)
   113  			continue
   114  		}
   115  
   116  		// All added keys must match.
   117  		for _, key := range keys {
   118  			if !f.MayContain(key) {
   119  				t.Errorf("length=%d: did not contain key %q", length, key)
   120  				continue loop
   121  			}
   122  		}
   123  
   124  		// Check false positive rate.
   125  		nFalsePositive := 0
   126  		for i := 0; i < 10000; i++ {
   127  			if f.MayContain(le32(1e9 + i)) {
   128  				nFalsePositive++
   129  			}
   130  		}
   131  		if nFalsePositive > 0.02*10000 {
   132  			t.Errorf("length=%d: %d false positives in 10000", length, nFalsePositive)
   133  			continue
   134  		}
   135  		if nFalsePositive > 0.0125*10000 {
   136  			nMediocreFilters++
   137  		} else {
   138  			nGoodFilters++
   139  		}
   140  	}
   141  
   142  	if nMediocreFilters > nGoodFilters/5 {
   143  		t.Errorf("%d mediocre filters but only %d good filters", nMediocreFilters, nGoodFilters)
   144  	}
   145  }
   146  
   147  func TestHash(t *testing.T) {
   148  	testCases := []struct {
   149  		s        string
   150  		expected uint32
   151  	}{
   152  		// The magic expected numbers come from RocksDB's util/hash_test.cc:TestHash.
   153  		{"", 3164544308},
   154  		{"\x08", 422599524},
   155  		{"\x17", 3168152998},
   156  		{"\x9a", 3195034349},
   157  		{"\x1c", 2651681383},
   158  		{"\x4d\x76", 2447836956},
   159  		{"\x52\xd5", 3854228105},
   160  		{"\x91\xf7", 31066776},
   161  		{"\xd6\x27", 1806091603},
   162  		{"\x30\x46\x0b", 3808221797},
   163  		{"\x56\xdc\xd6", 2157698265},
   164  		{"\xd4\x52\x33", 1721992661},
   165  		{"\x6a\xb5\xf4", 2469105222},
   166  		{"\x67\x53\x81\x1c", 118283265},
   167  		{"\x69\xb8\xc0\x88", 3416318611},
   168  		{"\x1e\x84\xaf\x2d", 3315003572},
   169  		{"\x46\xdc\x54\xbe", 447346355},
   170  		{"\xd0\x7a\x6e\xea\x56", 4255445370},
   171  		{"\x86\x83\xd5\xa4\xd8", 2390603402},
   172  		{"\xb7\x46\xbb\x77\xce", 2048907743},
   173  		{"\x6c\xa8\xbc\xe5\x99", 2177978500},
   174  		{"\x5c\x5e\xe1\xa0\x73\x81", 1036846008},
   175  		{"\x08\x5d\x73\x1c\xe5\x2e", 229980482},
   176  		{"\x42\xfb\xf2\x52\xb4\x10", 3655585422},
   177  		{"\x73\xe1\xff\x56\x9c\xce", 3502708029},
   178  		{"\x5c\xbe\x97\x75\x54\x9a\x52", 815120748},
   179  		{"\x16\x82\x39\x49\x88\x2b\x36", 3056033698},
   180  		{"\x59\x77\xf0\xa7\x24\xf4\x78", 587205227},
   181  		{"\xd3\xa5\x7c\x0e\xc0\x02\x07", 2030937252},
   182  		{"\x31\x1b\x98\x75\x96\x22\xd3\x9a", 469635402},
   183  		{"\x38\xd6\xf7\x28\x20\xb4\x8a\xe9", 3530274698},
   184  		{"\xbb\x18\x5d\xf4\x12\x03\xf7\x99", 1974545809},
   185  		{"\x80\xd4\x3b\x3b\xae\x22\xa2\x78", 3563570120},
   186  		{"\x1a\xb5\xd0\xfe\xab\xc3\x61\xb2\x99", 2706087434},
   187  		{"\x8e\x4a\xc3\x18\x20\x2f\x06\xe6\x3c", 1534654151},
   188  		{"\xb6\xc0\xdd\x05\x3f\xc4\x86\x4c\xef", 2355554696},
   189  		{"\x9a\x5f\x78\x0d\xaf\x50\xe1\x1f\x55", 1400800912},
   190  		{"\x22\x6f\x39\x1f\xf8\xdd\x4f\x52\x17\x94", 3420325137},
   191  		{"\x32\x89\x2a\x75\x48\x3a\x4a\x02\x69\xdd", 3427803584},
   192  		{"\x06\x92\x5c\xf4\x88\x0e\x7e\x68\x38\x3e", 1152407945},
   193  		{"\xbd\x2c\x63\x38\xbf\xe9\x78\xb7\xbf\x15", 3382479516},
   194  	}
   195  	for _, tc := range testCases {
   196  		t.Run("", func(t *testing.T) {
   197  			require.EqualValues(t, tc.expected, hash([]byte(tc.s)))
   198  		})
   199  	}
   200  }
   201  
   202  func BenchmarkBloomFilter(b *testing.B) {
   203  	const keyLen = 128
   204  	const numKeys = 1024
   205  	keys := make([][]byte, numKeys)
   206  	for i := range keys {
   207  		keys[i] = make([]byte, keyLen)
   208  		_, _ = rand.Read(keys[i])
   209  	}
   210  	b.ResetTimer()
   211  	policy := FilterPolicy(10)
   212  	for i := 0; i < b.N; i++ {
   213  		w := policy.NewWriter(base.TableFilter)
   214  		for _, key := range keys {
   215  			w.AddKey(key)
   216  		}
   217  		w.Finish(nil)
   218  	}
   219  }