github.com/coocood/badger@v1.5.1-0.20200528065104-c02ac3616d04/cache/z/bbloom.go (about) 1 // The MIT License (MIT) 2 // Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt 3 4 // Permission is hereby granted, free of charge, to any person obtaining a copy of 5 // this software and associated documentation files (the "Software"), to deal in 6 // the Software without restriction, including without limitation the rights to 7 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 // the Software, and to permit persons to whom the Software is furnished to do so, 9 // subject to the following conditions: 10 11 // The above copyright notice and this permission notice shall be included in all 12 // copies or substantial portions of the Software. 13 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19 // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 21 package z 22 23 import ( 24 "bytes" 25 "encoding/json" 26 "log" 27 "math" 28 "unsafe" 29 ) 30 31 // helper 32 var mask = []uint8{1, 2, 4, 8, 16, 32, 64, 128} 33 34 func getSize(ui64 uint64) (size uint64, exponent uint64) { 35 if ui64 < uint64(512) { 36 ui64 = uint64(512) 37 } 38 size = uint64(1) 39 for size < ui64 { 40 size <<= 1 41 exponent++ 42 } 43 return size, exponent 44 } 45 46 func calcSizeByWrongPositives(numEntries, wrongs float64) (uint64, uint64) { 47 size := -1 * numEntries * math.Log(wrongs) / math.Pow(float64(0.69314718056), 2) 48 locs := math.Ceil(float64(0.69314718056) * size / numEntries) 49 return uint64(size), uint64(locs) 50 } 51 52 // NewBloomFilter returns a new bloomfilter. 53 func NewBloomFilter(params ...float64) (bloomfilter *Bloom) { 54 var entries, locs uint64 55 if len(params) == 2 { 56 if params[1] < 1 { 57 entries, locs = calcSizeByWrongPositives(params[0], params[1]) 58 } else { 59 entries, locs = uint64(params[0]), uint64(params[1]) 60 } 61 } else { 62 log.Fatal("usage: New(float64(number_of_entries), float64(number_of_hashlocations))" + 63 " i.e. New(float64(1000), float64(3)) or New(float64(number_of_entries)," + 64 " float64(number_of_hashlocations)) i.e. New(float64(1000), float64(0.03))") 65 } 66 size, exponent := getSize(entries) 67 bloomfilter = &Bloom{ 68 sizeExp: exponent, 69 size: size - 1, 70 setLocs: locs, 71 shift: 64 - exponent, 72 } 73 bloomfilter.Size(size) 74 return bloomfilter 75 } 76 77 // Bloom filter 78 type Bloom struct { 79 bitset []uint64 80 ElemNum uint64 81 sizeExp uint64 82 size uint64 83 setLocs uint64 84 shift uint64 85 } 86 87 // <--- http://www.cse.yorku.ca/~oz/hash.html 88 // modified Berkeley DB Hash (32bit) 89 // hash is casted to l, h = 16bit fragments 90 // func (bl Bloom) absdbm(b *[]byte) (l, h uint64) { 91 // hash := uint64(len(*b)) 92 // for _, c := range *b { 93 // hash = uint64(c) + (hash << 6) + (hash << bl.sizeExp) - hash 94 // } 95 // h = hash >> bl.shift 96 // l = hash << bl.shift >> bl.shift 97 // return l, h 98 // } 99 100 // Add adds hash of a key to the bloomfilter. 101 func (bl *Bloom) Add(hash uint64) { 102 h := hash >> bl.shift 103 l := hash << bl.shift >> bl.shift 104 for i := uint64(0); i < bl.setLocs; i++ { 105 bl.Set((h + i*l) & bl.size) 106 bl.ElemNum++ 107 } 108 } 109 110 // Has checks if bit(s) for entry hash is/are set, 111 // returns true if the hash was added to the Bloom Filter. 112 func (bl Bloom) Has(hash uint64) bool { 113 h := hash >> bl.shift 114 l := hash << bl.shift >> bl.shift 115 for i := uint64(0); i < bl.setLocs; i++ { 116 switch bl.IsSet((h + i*l) & bl.size) { 117 case false: 118 return false 119 } 120 } 121 return true 122 } 123 124 // AddIfNotHas only Adds hash, if it's not present in the bloomfilter. 125 // Returns true if hash was added. 126 // Returns false if hash was already registered in the bloomfilter. 127 func (bl *Bloom) AddIfNotHas(hash uint64) bool { 128 if bl.Has(hash) { 129 return false 130 } 131 bl.Add(hash) 132 return true 133 } 134 135 // Size makes Bloom filter with as bitset of size sz. 136 func (bl *Bloom) Size(sz uint64) { 137 bl.bitset = make([]uint64, sz>>6) 138 } 139 140 // Clear resets the Bloom filter. 141 func (bl *Bloom) Clear() { 142 for i := range bl.bitset { 143 bl.bitset[i] = 0 144 } 145 } 146 147 // Set sets the bit[idx] of bitset. 148 func (bl *Bloom) Set(idx uint64) { 149 ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3)) 150 *(*uint8)(ptr) |= mask[idx%8] 151 } 152 153 // IsSet checks if bit[idx] of bitset is set, returns true/false. 154 func (bl *Bloom) IsSet(idx uint64) bool { 155 ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3)) 156 r := ((*(*uint8)(ptr)) >> (idx % 8)) & 1 157 return r == 1 158 } 159 160 // bloomJSONImExport 161 // Im/Export structure used by JSONMarshal / JSONUnmarshal 162 type bloomJSONImExport struct { 163 FilterSet []byte 164 SetLocs uint64 165 } 166 167 // NewWithBoolset takes a []byte slice and number of locs per entry, 168 // returns the bloomfilter with a bitset populated according to the input []byte. 169 func newWithBoolset(bs *[]byte, locs uint64) *Bloom { 170 bloomfilter := NewBloomFilter(float64(len(*bs)<<3), float64(locs)) 171 for i, b := range *bs { 172 *(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&bloomfilter.bitset[0])) + uintptr(i))) = b 173 } 174 return bloomfilter 175 } 176 177 // JSONUnmarshal takes JSON-Object (type bloomJSONImExport) as []bytes 178 // returns bloom32 / bloom64 object. 179 func JSONUnmarshal(dbData []byte) *Bloom { 180 bloomImEx := bloomJSONImExport{} 181 json.Unmarshal(dbData, &bloomImEx) 182 buf := bytes.NewBuffer(bloomImEx.FilterSet) 183 bs := buf.Bytes() 184 bf := newWithBoolset(&bs, bloomImEx.SetLocs) 185 return bf 186 } 187 188 // JSONMarshal returns JSON-object (type bloomJSONImExport) as []byte. 189 func (bl Bloom) JSONMarshal() []byte { 190 bloomImEx := bloomJSONImExport{} 191 bloomImEx.SetLocs = bl.setLocs 192 bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3) 193 for i := range bloomImEx.FilterSet { 194 bloomImEx.FilterSet[i] = *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[0])) + 195 uintptr(i))) 196 } 197 data, err := json.Marshal(bloomImEx) 198 if err != nil { 199 log.Fatal("json.Marshal failed: ", err) 200 } 201 return data 202 }