github.com/fiatjaf/generic-ristretto@v0.0.1/z/bbloom.go (about) 1 // The MIT License (MIT) 2 // Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt 3 4 // Permission is hereby granted, free of charge, to any person obtaining a copy of 5 // this software and associated documentation files (the "Software"), to deal in 6 // the Software without restriction, including without limitation the rights to 7 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 // the Software, and to permit persons to whom the Software is furnished to do so, 9 // subject to the following conditions: 10 11 // The above copyright notice and this permission notice shall be included in all 12 // copies or substantial portions of the Software. 13 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19 // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 21 package z 22 23 import ( 24 "bytes" 25 "encoding/json" 26 "math" 27 "unsafe" 28 29 "github.com/golang/glog" 30 ) 31 32 // helper 33 var mask = []uint8{1, 2, 4, 8, 16, 32, 64, 128} 34 35 func getSize(ui64 uint64) (size uint64, exponent uint64) { 36 if ui64 < uint64(512) { 37 ui64 = uint64(512) 38 } 39 size = uint64(1) 40 for size < ui64 { 41 size <<= 1 42 exponent++ 43 } 44 return size, exponent 45 } 46 47 func calcSizeByWrongPositives(numEntries, wrongs float64) (uint64, uint64) { 48 size := -1 * numEntries * math.Log(wrongs) / math.Pow(float64(0.69314718056), 2) 49 locs := math.Ceil(float64(0.69314718056) * size / numEntries) 50 return uint64(size), uint64(locs) 51 } 52 53 // NewBloomFilter returns a new bloomfilter. 54 func NewBloomFilter(params ...float64) (bloomfilter *Bloom) { 55 var entries, locs uint64 56 if len(params) == 2 { 57 if params[1] < 1 { 58 entries, locs = calcSizeByWrongPositives(params[0], params[1]) 59 } else { 60 entries, locs = uint64(params[0]), uint64(params[1]) 61 } 62 } else { 63 glog.Fatal("usage: New(float64(number_of_entries), float64(number_of_hashlocations))" + 64 " i.e. New(float64(1000), float64(3)) or New(float64(number_of_entries)," + 65 " float64(number_of_hashlocations)) i.e. New(float64(1000), float64(0.03))") 66 } 67 size, exponent := getSize(entries) 68 bloomfilter = &Bloom{ 69 sizeExp: exponent, 70 size: size - 1, 71 setLocs: locs, 72 shift: 64 - exponent, 73 } 74 bloomfilter.Size(size) 75 return bloomfilter 76 } 77 78 // Bloom filter 79 type Bloom struct { 80 bitset []uint64 81 ElemNum uint64 82 sizeExp uint64 83 size uint64 84 setLocs uint64 85 shift uint64 86 } 87 88 // <--- http://www.cse.yorku.ca/~oz/hash.html 89 // modified Berkeley DB Hash (32bit) 90 // hash is casted to l, h = 16bit fragments 91 // func (bl Bloom) absdbm(b *[]byte) (l, h uint64) { 92 // hash := uint64(len(*b)) 93 // for _, c := range *b { 94 // hash = uint64(c) + (hash << 6) + (hash << bl.sizeExp) - hash 95 // } 96 // h = hash >> bl.shift 97 // l = hash << bl.shift >> bl.shift 98 // return l, h 99 // } 100 101 // Add adds hash of a key to the bloomfilter. 102 func (bl *Bloom) Add(hash uint64) { 103 h := hash >> bl.shift 104 l := hash << bl.shift >> bl.shift 105 for i := uint64(0); i < bl.setLocs; i++ { 106 bl.Set((h + i*l) & bl.size) 107 bl.ElemNum++ 108 } 109 } 110 111 // Has checks if bit(s) for entry hash is/are set, 112 // returns true if the hash was added to the Bloom Filter. 113 func (bl Bloom) Has(hash uint64) bool { 114 h := hash >> bl.shift 115 l := hash << bl.shift >> bl.shift 116 for i := uint64(0); i < bl.setLocs; i++ { 117 if !bl.IsSet((h + i*l) & bl.size) { 118 return false 119 } 120 } 121 return true 122 } 123 124 // AddIfNotHas only Adds hash, if it's not present in the bloomfilter. 125 // Returns true if hash was added. 126 // Returns false if hash was already registered in the bloomfilter. 127 func (bl *Bloom) AddIfNotHas(hash uint64) bool { 128 if bl.Has(hash) { 129 return false 130 } 131 bl.Add(hash) 132 return true 133 } 134 135 // TotalSize returns the total size of the bloom filter. 136 func (bl *Bloom) TotalSize() int { 137 // The bl struct has 5 members and each one is 8 byte. The bitset is a 138 // uint64 byte slice. 139 return len(bl.bitset)*8 + 5*8 140 } 141 142 // Size makes Bloom filter with as bitset of size sz. 143 func (bl *Bloom) Size(sz uint64) { 144 bl.bitset = make([]uint64, sz>>6) 145 } 146 147 // Clear resets the Bloom filter. 148 func (bl *Bloom) Clear() { 149 for i := range bl.bitset { 150 bl.bitset[i] = 0 151 } 152 } 153 154 // Set sets the bit[idx] of bitset. 155 func (bl *Bloom) Set(idx uint64) { 156 ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3)) 157 *(*uint8)(ptr) |= mask[idx%8] 158 } 159 160 // IsSet checks if bit[idx] of bitset is set, returns true/false. 161 func (bl *Bloom) IsSet(idx uint64) bool { 162 ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3)) 163 r := ((*(*uint8)(ptr)) >> (idx % 8)) & 1 164 return r == 1 165 } 166 167 // bloomJSONImExport 168 // Im/Export structure used by JSONMarshal / JSONUnmarshal 169 type bloomJSONImExport struct { 170 FilterSet []byte 171 SetLocs uint64 172 } 173 174 // NewWithBoolset takes a []byte slice and number of locs per entry, 175 // returns the bloomfilter with a bitset populated according to the input []byte. 176 func newWithBoolset(bs *[]byte, locs uint64) *Bloom { 177 bloomfilter := NewBloomFilter(float64(len(*bs)<<3), float64(locs)) 178 for i, b := range *bs { 179 *(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&bloomfilter.bitset[0])) + uintptr(i))) = b 180 } 181 return bloomfilter 182 } 183 184 // JSONUnmarshal takes JSON-Object (type bloomJSONImExport) as []bytes 185 // returns bloom32 / bloom64 object. 186 func JSONUnmarshal(dbData []byte) (*Bloom, error) { 187 bloomImEx := bloomJSONImExport{} 188 if err := json.Unmarshal(dbData, &bloomImEx); err != nil { 189 return nil, err 190 } 191 buf := bytes.NewBuffer(bloomImEx.FilterSet) 192 bs := buf.Bytes() 193 bf := newWithBoolset(&bs, bloomImEx.SetLocs) 194 return bf, nil 195 } 196 197 // JSONMarshal returns JSON-object (type bloomJSONImExport) as []byte. 198 func (bl Bloom) JSONMarshal() []byte { 199 bloomImEx := bloomJSONImExport{} 200 bloomImEx.SetLocs = bl.setLocs 201 bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3) 202 for i := range bloomImEx.FilterSet { 203 bloomImEx.FilterSet[i] = *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[0])) + 204 uintptr(i))) 205 } 206 data, err := json.Marshal(bloomImEx) 207 if err != nil { 208 glog.Fatal("json.Marshal failed: ", err) 209 } 210 return data 211 }