github.com/outcaste-io/ristretto@v0.2.3/z/bbloom.go (about) 1 // The MIT License (MIT) 2 // Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt 3 4 // Permission is hereby granted, free of charge, to any person obtaining a copy of 5 // this software and associated documentation files (the "Software"), to deal in 6 // the Software without restriction, including without limitation the rights to 7 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 // the Software, and to permit persons to whom the Software is furnished to do so, 9 // subject to the following conditions: 10 11 // The above copyright notice and this permission notice shall be included in all 12 // copies or substantial portions of the Software. 13 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19 // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 21 package z 22 23 import ( 24 "bytes" 25 "encoding/json" 26 "math" 27 "unsafe" 28 ) 29 30 // helper 31 var mask = []uint8{1, 2, 4, 8, 16, 32, 64, 128} 32 33 func getSize(ui64 uint64) (size uint64, exponent uint64) { 34 if ui64 < uint64(512) { 35 ui64 = uint64(512) 36 } 37 size = uint64(1) 38 for size < ui64 { 39 size <<= 1 40 exponent++ 41 } 42 return size, exponent 43 } 44 45 func calcSizeByWrongPositives(numEntries, wrongs float64) (uint64, uint64) { 46 size := -1 * numEntries * math.Log(wrongs) / math.Pow(float64(0.69314718056), 2) 47 locs := math.Ceil(float64(0.69314718056) * size / numEntries) 48 return uint64(size), uint64(locs) 49 } 50 51 // NewBloomFilter returns a new bloomfilter. 52 func NewBloomFilter(params ...float64) (bloomfilter *Bloom) { 53 var entries, locs uint64 54 if len(params) == 2 { 55 if params[1] < 1 { 56 entries, locs = calcSizeByWrongPositives(params[0], params[1]) 57 } else { 58 entries, locs = uint64(params[0]), uint64(params[1]) 59 } 60 } else { 61 fatal("usage: New(float64(number_of_entries), float64(number_of_hashlocations))" + 62 " i.e. New(float64(1000), float64(3)) or New(float64(number_of_entries)," + 63 " float64(number_of_hashlocations)) i.e. New(float64(1000), float64(0.03))") 64 } 65 size, exponent := getSize(entries) 66 bloomfilter = &Bloom{ 67 sizeExp: exponent, 68 size: size - 1, 69 setLocs: locs, 70 shift: 64 - exponent, 71 } 72 bloomfilter.Size(size) 73 return bloomfilter 74 } 75 76 // Bloom filter 77 type Bloom struct { 78 bitset []uint64 79 ElemNum uint64 80 sizeExp uint64 81 size uint64 82 setLocs uint64 83 shift uint64 84 } 85 86 // <--- http://www.cse.yorku.ca/~oz/hash.html 87 // modified Berkeley DB Hash (32bit) 88 // hash is casted to l, h = 16bit fragments 89 // func (bl Bloom) absdbm(b *[]byte) (l, h uint64) { 90 // hash := uint64(len(*b)) 91 // for _, c := range *b { 92 // hash = uint64(c) + (hash << 6) + (hash << bl.sizeExp) - hash 93 // } 94 // h = hash >> bl.shift 95 // l = hash << bl.shift >> bl.shift 96 // return l, h 97 // } 98 99 // Add adds hash of a key to the bloomfilter. 100 func (bl *Bloom) Add(hash uint64) { 101 h := hash >> bl.shift 102 l := hash << bl.shift >> bl.shift 103 for i := uint64(0); i < bl.setLocs; i++ { 104 bl.Set((h + i*l) & bl.size) 105 bl.ElemNum++ 106 } 107 } 108 109 // Has checks if bit(s) for entry hash is/are set, 110 // returns true if the hash was added to the Bloom Filter. 111 func (bl Bloom) Has(hash uint64) bool { 112 h := hash >> bl.shift 113 l := hash << bl.shift >> bl.shift 114 for i := uint64(0); i < bl.setLocs; i++ { 115 if !bl.IsSet((h + i*l) & bl.size) { 116 return false 117 } 118 } 119 return true 120 } 121 122 // AddIfNotHas only Adds hash, if it's not present in the bloomfilter. 123 // Returns true if hash was added. 124 // Returns false if hash was already registered in the bloomfilter. 125 func (bl *Bloom) AddIfNotHas(hash uint64) bool { 126 if bl.Has(hash) { 127 return false 128 } 129 bl.Add(hash) 130 return true 131 } 132 133 // TotalSize returns the total size of the bloom filter. 134 func (bl *Bloom) TotalSize() int { 135 // The bl struct has 5 members and each one is 8 byte. The bitset is a 136 // uint64 byte slice. 137 return len(bl.bitset)*8 + 5*8 138 } 139 140 // Size makes Bloom filter with as bitset of size sz. 141 func (bl *Bloom) Size(sz uint64) { 142 bl.bitset = make([]uint64, sz>>6) 143 } 144 145 // Clear resets the Bloom filter. 146 func (bl *Bloom) Clear() { 147 for i := range bl.bitset { 148 bl.bitset[i] = 0 149 } 150 } 151 152 // Set sets the bit[idx] of bitset. 153 func (bl *Bloom) Set(idx uint64) { 154 ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3)) 155 *(*uint8)(ptr) |= mask[idx%8] 156 } 157 158 // IsSet checks if bit[idx] of bitset is set, returns true/false. 159 func (bl *Bloom) IsSet(idx uint64) bool { 160 ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3)) 161 r := ((*(*uint8)(ptr)) >> (idx % 8)) & 1 162 return r == 1 163 } 164 165 // bloomJSONImExport 166 // Im/Export structure used by JSONMarshal / JSONUnmarshal 167 type bloomJSONImExport struct { 168 FilterSet []byte 169 SetLocs uint64 170 } 171 172 // NewWithBoolset takes a []byte slice and number of locs per entry, 173 // returns the bloomfilter with a bitset populated according to the input []byte. 174 func newWithBoolset(bs *[]byte, locs uint64) *Bloom { 175 bloomfilter := NewBloomFilter(float64(len(*bs)<<3), float64(locs)) 176 for i, b := range *bs { 177 *(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&bloomfilter.bitset[0])) + uintptr(i))) = b 178 } 179 return bloomfilter 180 } 181 182 // JSONUnmarshal takes JSON-Object (type bloomJSONImExport) as []bytes 183 // returns bloom32 / bloom64 object. 184 func JSONUnmarshal(dbData []byte) (*Bloom, error) { 185 bloomImEx := bloomJSONImExport{} 186 if err := json.Unmarshal(dbData, &bloomImEx); err != nil { 187 return nil, err 188 } 189 buf := bytes.NewBuffer(bloomImEx.FilterSet) 190 bs := buf.Bytes() 191 bf := newWithBoolset(&bs, bloomImEx.SetLocs) 192 return bf, nil 193 } 194 195 // JSONMarshal returns JSON-object (type bloomJSONImExport) as []byte. 196 func (bl Bloom) JSONMarshal() []byte { 197 bloomImEx := bloomJSONImExport{} 198 bloomImEx.SetLocs = bl.setLocs 199 bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3) 200 for i := range bloomImEx.FilterSet { 201 bloomImEx.FilterSet[i] = *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[0])) + 202 uintptr(i))) 203 } 204 data, err := json.Marshal(bloomImEx) 205 if err != nil { 206 fatal("json.Marshal failed: ", err) 207 } 208 return data 209 }