github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/bloom/bloom_test.go (about) 1 // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package bloom 6 7 import ( 8 "crypto/rand" 9 "strings" 10 "testing" 11 12 "github.com/cockroachdb/pebble/internal/base" 13 "github.com/stretchr/testify/require" 14 ) 15 16 func (f tableFilter) String() string { 17 var buf strings.Builder 18 for i, x := range f { 19 if i > 0 { 20 if i%8 == 0 { 21 buf.WriteString("\n") 22 } else { 23 buf.WriteString(" ") 24 } 25 } 26 27 for j := uint(0); j < 8; j++ { 28 if x&(1<<(7-j)) != 0 { 29 buf.WriteString("1") 30 } else { 31 buf.WriteString(".") 32 } 33 } 34 } 35 buf.WriteString("\n") 36 return buf.String() 37 } 38 39 func newTableFilter(bitsPerKey int, keys ...[]byte) tableFilter { 40 w := FilterPolicy(bitsPerKey).NewWriter(base.TableFilter) 41 for _, key := range keys { 42 w.AddKey(key) 43 } 44 return tableFilter(w.Finish(nil)) 45 } 46 47 func TestSmallBloomFilter(t *testing.T) { 48 f := newTableFilter(10, []byte("hello"), []byte("world")) 49 50 // The magic expected string comes from running RocksDB's util/bloom_test.cc:FullBloomTest.FullSmall. 51 want := ` 52 ........ ........ ........ .......1 ........ ........ ........ ........ 53 ........ .1...... ........ .1...... ........ ........ ........ ........ 54 ...1.... ........ ........ ........ ........ ........ ........ ........ 55 ........ ........ ........ ........ ........ ........ ........ ...1.... 56 ........ ........ ........ ........ .....1.. ........ ........ ........ 57 .......1 ........ ........ ........ ........ ........ .1...... ........ 58 ........ ........ ........ ........ ........ ...1.... ........ ........ 59 .......1 ........ ........ ........ .1...1.. ........ ........ ........ 60 .....11. .......1 ........ ........ ........ 61 ` 62 want = strings.TrimLeft(want, "\n") 63 require.EqualValues(t, want, f.String()) 64 65 m := map[string]bool{ 66 "hello": true, 67 "world": true, 68 "x": false, 69 "foo": false, 70 } 71 for k, want := range m { 72 require.EqualValues(t, want, f.MayContain([]byte(k))) 73 } 74 } 75 76 func TestBloomFilter(t *testing.T) { 77 nextLength := func(x int) int { 78 if x < 10 { 79 return x + 1 80 } 81 if x < 100 { 82 return x + 10 83 } 84 if x < 1000 { 85 return x + 100 86 } 87 return x + 1000 88 } 89 le32 := func(i int) []byte { 90 b := make([]byte, 4) 91 b[0] = uint8(uint32(i) >> 0) 92 b[1] = uint8(uint32(i) >> 8) 93 b[2] = uint8(uint32(i) >> 16) 94 b[3] = uint8(uint32(i) >> 24) 95 return b 96 } 97 98 nMediocreFilters, nGoodFilters := 0, 0 99 loop: 100 for length := 1; length <= 10000; length = nextLength(length) { 101 keys := make([][]byte, 0, length) 102 for i := 0; i < length; i++ { 103 keys = append(keys, le32(i)) 104 } 105 f := newTableFilter(10, keys...) 106 // The size of the table bloom filter is measured in multiples of the 107 // cache line size. The '+2' contribution captures the rounding up in the 108 // length division plus preferring an odd number of cache lines. As such, 109 // this formula isn't exact, but the exact formula is hard to read. 110 maxLen := 5 + ((length*10)/cacheLineBits+2)*cacheLineSize 111 if len(f) > maxLen { 112 t.Errorf("length=%d: len(f)=%d > max len %d", length, len(f), maxLen) 113 continue 114 } 115 116 // All added keys must match. 117 for _, key := range keys { 118 if !f.MayContain(key) { 119 t.Errorf("length=%d: did not contain key %q", length, key) 120 continue loop 121 } 122 } 123 124 // Check false positive rate. 125 nFalsePositive := 0 126 for i := 0; i < 10000; i++ { 127 if f.MayContain(le32(1e9 + i)) { 128 nFalsePositive++ 129 } 130 } 131 if nFalsePositive > 0.02*10000 { 132 t.Errorf("length=%d: %d false positives in 10000", length, nFalsePositive) 133 continue 134 } 135 if nFalsePositive > 0.0125*10000 { 136 nMediocreFilters++ 137 } else { 138 nGoodFilters++ 139 } 140 } 141 142 if nMediocreFilters > nGoodFilters/5 { 143 t.Errorf("%d mediocre filters but only %d good filters", nMediocreFilters, nGoodFilters) 144 } 145 } 146 147 func TestHash(t *testing.T) { 148 testCases := []struct { 149 s string 150 expected uint32 151 }{ 152 // The magic expected numbers come from RocksDB's util/hash_test.cc:TestHash. 153 {"", 3164544308}, 154 {"\x08", 422599524}, 155 {"\x17", 3168152998}, 156 {"\x9a", 3195034349}, 157 {"\x1c", 2651681383}, 158 {"\x4d\x76", 2447836956}, 159 {"\x52\xd5", 3854228105}, 160 {"\x91\xf7", 31066776}, 161 {"\xd6\x27", 1806091603}, 162 {"\x30\x46\x0b", 3808221797}, 163 {"\x56\xdc\xd6", 2157698265}, 164 {"\xd4\x52\x33", 1721992661}, 165 {"\x6a\xb5\xf4", 2469105222}, 166 {"\x67\x53\x81\x1c", 118283265}, 167 {"\x69\xb8\xc0\x88", 3416318611}, 168 {"\x1e\x84\xaf\x2d", 3315003572}, 169 {"\x46\xdc\x54\xbe", 447346355}, 170 {"\xd0\x7a\x6e\xea\x56", 4255445370}, 171 {"\x86\x83\xd5\xa4\xd8", 2390603402}, 172 {"\xb7\x46\xbb\x77\xce", 2048907743}, 173 {"\x6c\xa8\xbc\xe5\x99", 2177978500}, 174 {"\x5c\x5e\xe1\xa0\x73\x81", 1036846008}, 175 {"\x08\x5d\x73\x1c\xe5\x2e", 229980482}, 176 {"\x42\xfb\xf2\x52\xb4\x10", 3655585422}, 177 {"\x73\xe1\xff\x56\x9c\xce", 3502708029}, 178 {"\x5c\xbe\x97\x75\x54\x9a\x52", 815120748}, 179 {"\x16\x82\x39\x49\x88\x2b\x36", 3056033698}, 180 {"\x59\x77\xf0\xa7\x24\xf4\x78", 587205227}, 181 {"\xd3\xa5\x7c\x0e\xc0\x02\x07", 2030937252}, 182 {"\x31\x1b\x98\x75\x96\x22\xd3\x9a", 469635402}, 183 {"\x38\xd6\xf7\x28\x20\xb4\x8a\xe9", 3530274698}, 184 {"\xbb\x18\x5d\xf4\x12\x03\xf7\x99", 1974545809}, 185 {"\x80\xd4\x3b\x3b\xae\x22\xa2\x78", 3563570120}, 186 {"\x1a\xb5\xd0\xfe\xab\xc3\x61\xb2\x99", 2706087434}, 187 {"\x8e\x4a\xc3\x18\x20\x2f\x06\xe6\x3c", 1534654151}, 188 {"\xb6\xc0\xdd\x05\x3f\xc4\x86\x4c\xef", 2355554696}, 189 {"\x9a\x5f\x78\x0d\xaf\x50\xe1\x1f\x55", 1400800912}, 190 {"\x22\x6f\x39\x1f\xf8\xdd\x4f\x52\x17\x94", 3420325137}, 191 {"\x32\x89\x2a\x75\x48\x3a\x4a\x02\x69\xdd", 3427803584}, 192 {"\x06\x92\x5c\xf4\x88\x0e\x7e\x68\x38\x3e", 1152407945}, 193 {"\xbd\x2c\x63\x38\xbf\xe9\x78\xb7\xbf\x15", 3382479516}, 194 } 195 for _, tc := range testCases { 196 t.Run("", func(t *testing.T) { 197 require.EqualValues(t, tc.expected, hash([]byte(tc.s))) 198 }) 199 } 200 } 201 202 func BenchmarkBloomFilter(b *testing.B) { 203 const keyLen = 128 204 const numKeys = 1024 205 keys := make([][]byte, numKeys) 206 for i := range keys { 207 keys[i] = make([]byte, keyLen) 208 _, _ = rand.Read(keys[i]) 209 } 210 b.ResetTimer() 211 policy := FilterPolicy(10) 212 for i := 0; i < b.N; i++ { 213 w := policy.NewWriter(base.TableFilter) 214 for _, key := range keys { 215 w.AddKey(key) 216 } 217 w.Finish(nil) 218 } 219 }