github.com/andy2046/gopie@v0.7.0/pkg/bloom/bloom.go (about) 1 // Package bloom implements a Bloom filter. 2 package bloom 3 4 import ( 5 "math" 6 ) 7 8 type ( 9 // Bloom is the standard bloom filter. 10 Bloom interface { 11 Add([]byte) 12 AddString(string) 13 Exist([]byte) bool 14 ExistString(string) bool 15 FalsePositive() float64 16 GuessFalsePositive(uint64) float64 17 M() uint64 18 K() uint64 19 N() uint64 20 Clear() 21 } 22 23 // CountingBloom is the bloom filter which allows deletion of entries. 24 // Take note that an 16-bit counter is maintained for each entry. 25 CountingBloom interface { 26 Bloom 27 Remove([]byte) 28 RemoveString(string) 29 } 30 31 bloomFilter struct { 32 bitmap []uint16 // bloom filter counter 33 k uint64 // number of hash functions 34 n uint64 // number of elements in the bloom filter 35 m uint64 // size of the bloom filter bits 36 shift uint8 // the shift to get high/low bit fragments 37 } 38 ) 39 40 const ( 41 ln2 float64 = 0.6931471805599453 // math.Log(2) 42 maxCountingBloomSize uint64 = 1 << 37 // to avoid panic: makeslice: len out of range 43 maxCounter uint16 = 65535 44 ) 45 46 // New creates counting bloom filter based on the provided m/k. 47 // m is the size of bloom filter bits. 48 // k is the number of hash functions. 49 func New(m, k uint64) CountingBloom { 50 mm, exponent := adjustM(m) 51 return &bloomFilter{ 52 bitmap: make([]uint16, mm), 53 m: mm - 1, // x % 2^i = x & (2^i - 1) 54 k: k, 55 shift: 64 - exponent, 56 } 57 } 58 59 // NewGuess estimates m/k based on the provided n/p then creates counting bloom filter. 60 // n is the estimated number of elements in the bloom filter. 61 // p is the false positive probability. 62 func NewGuess(n uint64, p float64) CountingBloom { 63 m, k := Guess(n, p) 64 return New(m, k) 65 } 66 67 // Guess estimates m/k based on the provided n/p. 68 func Guess(n uint64, p float64) (m, k uint64) { 69 mm := math.Ceil(-1 * float64(n) * math.Log(p) / math.Pow(ln2, 2)) 70 kk := math.Ceil(ln2 * mm / float64(n)) 71 m, k = uint64(mm), uint64(kk) 72 return 73 } 74 75 func (bf *bloomFilter) Add(entry []byte) { 76 hash := sipHash(entry) 77 h := hash >> bf.shift 78 l := hash << bf.shift >> bf.shift 79 var idx uint64 80 for i := uint64(0); i < bf.k; i++ { 81 idx = (h + i*l) & bf.m 82 // avoid overflow 83 if bf.bitmap[idx] < maxCounter { 84 bf.bitmap[idx]++ 85 } 86 } 87 bf.n++ 88 } 89 90 func (bf *bloomFilter) AddString(entry string) { 91 bf.Add([]byte(entry)) 92 } 93 94 func (bf *bloomFilter) Remove(entry []byte) { 95 hash := sipHash(entry) 96 h := hash >> bf.shift 97 l := hash << bf.shift >> bf.shift 98 var idx uint64 99 for i := uint64(0); i < bf.k; i++ { 100 idx = (h + i*l) & bf.m 101 if bf.bitmap[idx] == 0 { 102 return 103 } 104 } 105 106 for i := uint64(0); i < bf.k; i++ { 107 idx = (h + i*l) & bf.m 108 // avoid overflow 109 if bf.bitmap[idx] > 0 { 110 bf.bitmap[idx]-- 111 } 112 } 113 bf.n-- 114 } 115 116 func (bf *bloomFilter) RemoveString(entry string) { 117 bf.Remove([]byte(entry)) 118 } 119 120 func (bf *bloomFilter) Exist(entry []byte) bool { 121 hash := sipHash(entry) 122 h := hash >> bf.shift 123 l := hash << bf.shift >> bf.shift 124 var idx uint64 125 for i := uint64(0); i < bf.k; i++ { 126 idx = (h + i*l) & bf.m 127 if bf.bitmap[idx] == 0 { 128 return false 129 } 130 } 131 132 return true 133 } 134 135 func (bf *bloomFilter) ExistString(entry string) bool { 136 return bf.Exist([]byte(entry)) 137 } 138 139 func (bf *bloomFilter) FalsePositive() float64 { 140 return math.Pow((1 - math.Exp(-float64(bf.k*bf.n)/float64(bf.m))), 141 float64(bf.k)) 142 } 143 144 func (bf *bloomFilter) GuessFalsePositive(n uint64) float64 { 145 return math.Pow((1 - math.Exp(-float64(bf.k*n)/float64(bf.m))), 146 float64(bf.k)) 147 } 148 149 func (bf *bloomFilter) M() uint64 { 150 return bf.m + 1 151 } 152 153 func (bf *bloomFilter) K() uint64 { 154 return bf.k 155 } 156 157 func (bf *bloomFilter) N() uint64 { 158 return bf.n 159 } 160 161 func (bf *bloomFilter) Clear() { 162 for i := range bf.bitmap { 163 bf.bitmap[i] = 0 164 } 165 bf.n = 0 166 } 167 168 func adjustM(x uint64) (m uint64, exponent uint8) { 169 if x < 512 { 170 x = 512 171 } 172 m = uint64(1) 173 for m < x && m < maxCountingBloomSize { 174 m <<= 1 175 exponent++ 176 } 177 return m, exponent 178 }