github.com/cockroachdb/pebble@v1.1.2/bloom/bloom.go (about) 1 // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 // Package bloom implements Bloom filters. 6 package bloom // import "github.com/cockroachdb/pebble/bloom" 7 8 import ( 9 "encoding/binary" 10 "fmt" 11 "sync" 12 13 "github.com/cockroachdb/pebble/internal/base" 14 ) 15 16 const ( 17 cacheLineSize = 64 18 cacheLineBits = cacheLineSize * 8 19 ) 20 21 type tableFilter []byte 22 23 func (f tableFilter) MayContain(key []byte) bool { 24 if len(f) <= 5 { 25 return false 26 } 27 n := len(f) - 5 28 nProbes := f[n] 29 nLines := binary.LittleEndian.Uint32(f[n+1:]) 30 cacheLineBits := 8 * (uint32(n) / nLines) 31 32 h := hash(key) 33 delta := h>>17 | h<<15 34 b := (h % nLines) * cacheLineBits 35 36 for j := uint8(0); j < nProbes; j++ { 37 bitPos := b + (h % cacheLineBits) 38 if f[bitPos/8]&(1<<(bitPos%8)) == 0 { 39 return false 40 } 41 h += delta 42 } 43 return true 44 } 45 46 func calculateProbes(bitsPerKey int) uint32 { 47 // We intentionally round down to reduce probing cost a little bit 48 n := uint32(float64(bitsPerKey) * 0.69) // 0.69 =~ ln(2) 49 if n < 1 { 50 n = 1 51 } 52 if n > 30 { 53 n = 30 54 } 55 return n 56 } 57 58 // extend appends n zero bytes to b. It returns the overall slice (of length 59 // n+len(originalB)) and the slice of n trailing zeroes. 60 func extend(b []byte, n int) (overall, trailer []byte) { 61 want := n + len(b) 62 if want <= cap(b) { 63 overall = b[:want] 64 trailer = overall[len(b):] 65 for i := range trailer { 66 trailer[i] = 0 67 } 68 } else { 69 // Grow the capacity exponentially, with a 1KiB minimum. 70 c := 1024 71 for c < want { 72 c += c / 4 73 } 74 overall = make([]byte, want, c) 75 trailer = overall[len(b):] 76 copy(overall, b) 77 } 78 return overall, trailer 79 } 80 81 // hash implements a hashing algorithm similar to the Murmur hash. 82 func hash(b []byte) uint32 { 83 const ( 84 seed = 0xbc9f1d34 85 m = 0xc6a4a793 86 ) 87 h := uint32(seed) ^ uint32(uint64(uint32(len(b))*m)) 88 for ; len(b) >= 4; b = b[4:] { 89 h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 90 h *= m 91 h ^= h >> 16 92 } 93 94 // The code below first casts each byte to a signed 8-bit integer. This is 95 // necessary to match RocksDB's behavior. Note that the `byte` type in Go is 96 // unsigned. What is the difference between casting a signed 8-bit value vs 97 // unsigned 8-bit value into an unsigned 32-bit value? 98 // Sign-extension. Consider the value 250 which has the bit pattern 11111010: 99 // 100 // uint32(250) = 00000000000000000000000011111010 101 // uint32(int8(250)) = 11111111111111111111111111111010 102 // 103 // Note that the original LevelDB code did not explicitly cast to a signed 104 // 8-bit value which left the behavior dependent on whether C characters were 105 // signed or unsigned which is a compiler flag for gcc (-funsigned-char). 106 switch len(b) { 107 case 3: 108 h += uint32(int8(b[2])) << 16 109 fallthrough 110 case 2: 111 h += uint32(int8(b[1])) << 8 112 fallthrough 113 case 1: 114 h += uint32(int8(b[0])) 115 h *= m 116 h ^= h >> 24 117 } 118 return h 119 } 120 121 const hashBlockLen = 16384 122 123 type hashBlock [hashBlockLen]uint32 124 125 var hashBlockPool = sync.Pool{ 126 New: func() interface{} { 127 return &hashBlock{} 128 }, 129 } 130 131 type tableFilterWriter struct { 132 bitsPerKey int 133 134 numHashes int 135 // We store the hashes in blocks. 136 blocks []*hashBlock 137 lastHash uint32 138 139 // Initial "in-line" storage for the blocks slice (to avoid some small 140 // allocations). 141 blocksBuf [16]*hashBlock 142 } 143 144 func newTableFilterWriter(bitsPerKey int) *tableFilterWriter { 145 w := &tableFilterWriter{ 146 bitsPerKey: bitsPerKey, 147 } 148 w.blocks = w.blocksBuf[:0] 149 return w 150 } 151 152 // AddKey implements the base.FilterWriter interface. 153 func (w *tableFilterWriter) AddKey(key []byte) { 154 h := hash(key) 155 if w.numHashes != 0 && h == w.lastHash { 156 return 157 } 158 ofs := w.numHashes % hashBlockLen 159 if ofs == 0 { 160 // Time for a new block. 161 w.blocks = append(w.blocks, hashBlockPool.Get().(*hashBlock)) 162 } 163 w.blocks[len(w.blocks)-1][ofs] = h 164 w.numHashes++ 165 w.lastHash = h 166 } 167 168 // Finish implements the base.FilterWriter interface. 169 func (w *tableFilterWriter) Finish(buf []byte) []byte { 170 // The table filter format matches the RocksDB full-file filter format. 171 var nLines int 172 if w.numHashes != 0 { 173 nLines = (w.numHashes*w.bitsPerKey + cacheLineBits - 1) / (cacheLineBits) 174 // Make nLines an odd number to make sure more bits are involved when 175 // determining which block. 176 if nLines%2 == 0 { 177 nLines++ 178 } 179 } 180 181 nBytes := nLines * cacheLineSize 182 // +5: 4 bytes for num-lines, 1 byte for num-probes 183 buf, filter := extend(buf, nBytes+5) 184 185 if nLines != 0 { 186 nProbes := calculateProbes(w.bitsPerKey) 187 for bIdx, b := range w.blocks { 188 length := hashBlockLen 189 if bIdx == len(w.blocks)-1 && w.numHashes%hashBlockLen != 0 { 190 length = w.numHashes % hashBlockLen 191 } 192 for _, h := range b[:length] { 193 delta := h>>17 | h<<15 // rotate right 17 bits 194 b := (h % uint32(nLines)) * (cacheLineBits) 195 for i := uint32(0); i < nProbes; i++ { 196 bitPos := b + (h % cacheLineBits) 197 filter[bitPos/8] |= (1 << (bitPos % 8)) 198 h += delta 199 } 200 } 201 } 202 filter[nBytes] = byte(nProbes) 203 binary.LittleEndian.PutUint32(filter[nBytes+1:], uint32(nLines)) 204 } 205 206 // Release the hash blocks. 207 for i, b := range w.blocks { 208 hashBlockPool.Put(b) 209 w.blocks[i] = nil 210 } 211 w.blocks = w.blocks[:0] 212 w.numHashes = 0 213 return buf 214 } 215 216 // FilterPolicy implements the FilterPolicy interface from the pebble package. 217 // 218 // The integer value is the approximate number of bits used per key. A good 219 // value is 10, which yields a filter with ~ 1% false positive rate. 220 type FilterPolicy int 221 222 var _ base.FilterPolicy = FilterPolicy(0) 223 224 // Name implements the pebble.FilterPolicy interface. 225 func (p FilterPolicy) Name() string { 226 // This string looks arbitrary, but its value is written to LevelDB .sst 227 // files, and should be this exact value to be compatible with those files 228 // and with the C++ LevelDB code. 229 return "rocksdb.BuiltinBloomFilter" 230 } 231 232 // MayContain implements the pebble.FilterPolicy interface. 233 func (p FilterPolicy) MayContain(ftype base.FilterType, f, key []byte) bool { 234 switch ftype { 235 case base.TableFilter: 236 return tableFilter(f).MayContain(key) 237 default: 238 panic(fmt.Sprintf("unknown filter type: %v", ftype)) 239 } 240 } 241 242 // NewWriter implements the pebble.FilterPolicy interface. 243 func (p FilterPolicy) NewWriter(ftype base.FilterType) base.FilterWriter { 244 switch ftype { 245 case base.TableFilter: 246 return newTableFilterWriter(int(p)) 247 default: 248 panic(fmt.Sprintf("unknown filter type: %v", ftype)) 249 } 250 }