github.com/df-mc/goleveldb@v1.1.9/leveldb/filter/bloom.go (about) 1 // Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> 2 // All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE file. 6 7 package filter 8 9 import ( 10 "github.com/df-mc/goleveldb/leveldb/util" 11 ) 12 13 func bloomHash(key []byte) uint32 { 14 return util.Hash(key, 0xbc9f1d34) 15 } 16 17 type bloomFilter int 18 19 // Name: The bloom filter serializes its parameters and is backward compatible 20 // with respect to them. Therefor, its parameters are not added to its 21 // name. 22 func (bloomFilter) Name() string { 23 return "leveldb.BuiltinBloomFilter" 24 } 25 26 func (f bloomFilter) Contains(filter, key []byte) bool { 27 nBytes := len(filter) - 1 28 if nBytes < 1 { 29 return false 30 } 31 nBits := uint32(nBytes * 8) 32 33 // Use the encoded k so that we can read filters generated by 34 // bloom filters created using different parameters. 35 k := filter[nBytes] 36 if k > 30 { 37 // Reserved for potentially new encodings for short bloom filters. 38 // Consider it a match. 39 return true 40 } 41 42 kh := bloomHash(key) 43 delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits 44 for j := uint8(0); j < k; j++ { 45 bitpos := kh % nBits 46 if (uint32(filter[bitpos/8]) & (1 << (bitpos % 8))) == 0 { 47 return false 48 } 49 kh += delta 50 } 51 return true 52 } 53 54 func (f bloomFilter) NewGenerator() FilterGenerator { 55 // Round down to reduce probing cost a little bit. 56 k := uint8(f * 69 / 100) // 0.69 =~ ln(2) 57 if k < 1 { 58 k = 1 59 } else if k > 30 { 60 k = 30 61 } 62 return &bloomFilterGenerator{ 63 n: int(f), 64 k: k, 65 } 66 } 67 68 type bloomFilterGenerator struct { 69 n int 70 k uint8 71 72 keyHashes []uint32 73 } 74 75 func (g *bloomFilterGenerator) Add(key []byte) { 76 // Use double-hashing to generate a sequence of hash values. 77 // See analysis in [Kirsch,Mitzenmacher 2006]. 78 g.keyHashes = append(g.keyHashes, bloomHash(key)) 79 } 80 81 func (g *bloomFilterGenerator) Generate(b Buffer) { 82 // Compute bloom filter size (in both bits and bytes) 83 nBits := uint32(len(g.keyHashes) * g.n) 84 // For small n, we can see a very high false positive rate. Fix it 85 // by enforcing a minimum bloom filter length. 86 if nBits < 64 { 87 nBits = 64 88 } 89 nBytes := (nBits + 7) / 8 90 nBits = nBytes * 8 91 92 dest := b.Alloc(int(nBytes) + 1) 93 dest[nBytes] = g.k 94 for _, kh := range g.keyHashes { 95 delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits 96 for j := uint8(0); j < g.k; j++ { 97 bitpos := kh % nBits 98 dest[bitpos/8] |= (1 << (bitpos % 8)) 99 kh += delta 100 } 101 } 102 103 g.keyHashes = g.keyHashes[:0] 104 } 105 106 // NewBloomFilter creates a new initialized bloom filter for given 107 // bitsPerKey. 108 // 109 // Since bitsPerKey is persisted individually for each bloom filter 110 // serialization, bloom filters are backwards compatible with respect to 111 // changing bitsPerKey. This means that no big performance penalty will 112 // be experienced when changing the parameter. See documentation for 113 // opt.Options.Filter for more information. 114 func NewBloomFilter(bitsPerKey int) Filter { 115 return bloomFilter(bitsPerKey) 116 }