github.com/panmari/cuckoofilter@v1.0.7-0.20231223155748-763d1d471ee8/cuckoofilter.go (about) 1 package cuckoo 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "fmt" 7 ) 8 9 // maxCuckooKickouts is the maximum number of times reinsert 10 // is attempted. 11 const maxCuckooKickouts = 500 12 13 // Filter is a probabilistic counter. 14 type Filter struct { 15 buckets []bucket 16 count uint 17 // Bit mask set to len(buckets) - 1. As len(buckets) is always a power of 2, 18 // applying this mask mimics the operation x % len(buckets). 19 bucketIndexMask uint 20 } 21 22 // NewFilter returns a new cuckoofilter suitable for the given number of elements. 23 // When inserting more elements, insertion speed will drop significantly and insertions might fail altogether. 24 // A capacity of 1000000 is a normal default, which allocates 25 // about ~2MB on 64-bit machines. 26 func NewFilter(numElements uint) *Filter { 27 numBuckets := getNextPow2(uint64(numElements / bucketSize)) 28 if float64(numElements)/float64(numBuckets*bucketSize) > 0.96 { 29 numBuckets <<= 1 30 } 31 if numBuckets == 0 { 32 numBuckets = 1 33 } 34 buckets := make([]bucket, numBuckets) 35 return &Filter{ 36 buckets: buckets, 37 count: 0, 38 bucketIndexMask: uint(len(buckets) - 1), 39 } 40 } 41 42 // Lookup returns true if data is in the filter. 43 func (cf *Filter) Lookup(data []byte) bool { 44 i1, fp := getIndexAndFingerprint(data, cf.bucketIndexMask) 45 if b := cf.buckets[i1]; b.contains(fp) { 46 return true 47 } 48 i2 := getAltIndex(fp, i1, cf.bucketIndexMask) 49 b := cf.buckets[i2] 50 return b.contains(fp) 51 } 52 53 // Reset removes all items from the filter, setting count to 0. 54 func (cf *Filter) Reset() { 55 for i := range cf.buckets { 56 cf.buckets[i].reset() 57 } 58 cf.count = 0 59 } 60 61 // Insert data into the filter. Returns false if insertion failed. In the resulting state, the filter 62 // * Might return false negatives 63 // * Deletes are not guaranteed to work 64 // To increase success rate of inserts, create a larger filter. 65 func (cf *Filter) Insert(data []byte) bool { 66 i1, fp := getIndexAndFingerprint(data, cf.bucketIndexMask) 67 if cf.insert(fp, i1) { 68 return true 69 } 70 i2 := getAltIndex(fp, i1, cf.bucketIndexMask) 71 if cf.insert(fp, i2) { 72 return true 73 } 74 return cf.reinsert(fp, randi(i1, i2)) 75 } 76 77 func (cf *Filter) insert(fp fingerprint, i uint) bool { 78 if cf.buckets[i].insert(fp) { 79 cf.count++ 80 return true 81 } 82 return false 83 } 84 85 func (cf *Filter) reinsert(fp fingerprint, i uint) bool { 86 for k := 0; k < maxCuckooKickouts; k++ { 87 j := fastrandn(bucketSize) 88 // Swap fingerprint with bucket entry. 89 cf.buckets[i][j], fp = fp, cf.buckets[i][j] 90 91 // Move kicked out fingerprint to alternate location. 92 i = getAltIndex(fp, i, cf.bucketIndexMask) 93 if cf.insert(fp, i) { 94 return true 95 } 96 } 97 return false 98 } 99 100 // Delete data from the filter. Returns true if the data was found and deleted. 101 func (cf *Filter) Delete(data []byte) bool { 102 i1, fp := getIndexAndFingerprint(data, cf.bucketIndexMask) 103 i2 := getAltIndex(fp, i1, cf.bucketIndexMask) 104 return cf.delete(fp, i1) || cf.delete(fp, i2) 105 } 106 107 func (cf *Filter) delete(fp fingerprint, i uint) bool { 108 if cf.buckets[i].delete(fp) { 109 cf.count-- 110 return true 111 } 112 return false 113 } 114 115 // Count returns the number of items in the filter. 116 func (cf *Filter) Count() uint { 117 return cf.count 118 } 119 120 // LoadFactor returns the fraction slots that are occupied. 121 func (cf *Filter) LoadFactor() float64 { 122 return float64(cf.count) / float64(len(cf.buckets)*bucketSize) 123 } 124 125 const bytesPerBucket = bucketSize * fingerprintSizeBits / 8 126 127 // Encode returns a byte slice representing a Cuckoofilter. 128 func (cf *Filter) Encode() []byte { 129 res := new(bytes.Buffer) 130 res.Grow(len(cf.buckets) * bytesPerBucket) 131 132 for _, b := range cf.buckets { 133 for _, fp := range b { 134 binary.Write(res, binary.LittleEndian, fp) 135 } 136 } 137 return res.Bytes() 138 } 139 140 // Decode returns a Cuckoofilter from a byte slice created using Encode. 141 func Decode(data []byte) (*Filter, error) { 142 if len(data)%bucketSize != 0 { 143 return nil, fmt.Errorf("bytes must to be multiple of %d, got %d", bucketSize, len(data)) 144 } 145 numBuckets := len(data) / bytesPerBucket 146 if numBuckets < 1 { 147 return nil, fmt.Errorf("bytes can not be smaller than %d, size in bytes is %d", bytesPerBucket, len(data)) 148 } 149 if getNextPow2(uint64(numBuckets)) != uint(numBuckets) { 150 return nil, fmt.Errorf("numBuckets must to be a power of 2, got %d", numBuckets) 151 } 152 var count uint 153 buckets := make([]bucket, numBuckets) 154 reader := bytes.NewReader(data) 155 156 for i, b := range buckets { 157 for j := range b { 158 binary.Read(reader, binary.LittleEndian, &buckets[i][j]) 159 if buckets[i][j] != nullFp { 160 count++ 161 } 162 } 163 } 164 return &Filter{ 165 buckets: buckets, 166 count: count, 167 bucketIndexMask: uint(len(buckets) - 1), 168 }, nil 169 }