github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/cmn/prob/dyn_cuckoo.go (about) 1 // Package prob implements fully features dynamic probabilistic filter. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package prob 6 7 import ( 8 "sync" 9 10 cuckoo "github.com/seiflotfy/cuckoofilter" 11 ) 12 13 const ( 14 // filterInitSize determines the number of keys we can keep in first filter. 15 // For now we allow `10M` keys -> memory allocation of `10MB`. If we exceed 16 // that number of keys, additional filter is added dynamically of size: 17 // `last_filter_size` * `grow_factor`. 18 filterInitSize = 10 * 1000 * 1000 19 growFactor = 3 // how much size of next, new filter will grow comparing to previous filter 20 ) 21 22 // Filter is dynamic probabilistic filter which grows if there is more space 23 // needed. 24 // 25 // NOTE: Underneath it uses Cuckoo filters - Bloom filters could be also used 26 // but in the future we might need `Delete` method which Bloom filters cannot 27 // implement. 28 type Filter struct { 29 filters []*cuckoo.Filter 30 size uint 31 mtx sync.RWMutex 32 } 33 34 func NewFilter(initSize uint) *Filter { 35 return &Filter{ 36 filters: make([]*cuckoo.Filter, 0, 5), 37 size: initSize, 38 } 39 } 40 41 func NewDefaultFilter() *Filter { 42 return NewFilter(filterInitSize) 43 } 44 45 func (f *Filter) Lookup(k []byte) bool { 46 f.mtx.RLock() 47 for idx := len(f.filters) - 1; idx >= 0; idx-- { 48 if f.filters[idx].Lookup(k) { 49 f.mtx.RUnlock() 50 return true 51 } 52 } 53 f.mtx.RUnlock() 54 return false 55 } 56 57 func (f *Filter) Insert(k []byte) { 58 f.mtx.Lock() 59 60 var lastFilter *cuckoo.Filter 61 if len(f.filters) == 0 { 62 lastFilter = cuckoo.NewFilter(f.size) 63 f.filters = append(f.filters, lastFilter) 64 } else { 65 lastFilter = f.filters[len(f.filters)-1] 66 } 67 68 if !lastFilter.Insert(k) { 69 sf := cuckoo.NewFilter(f.size * growFactor) 70 f.filters = append(f.filters, sf) 71 sf.Insert(k) 72 } 73 f.mtx.Unlock() 74 } 75 76 func (f *Filter) Delete(k []byte) { 77 f.mtx.Lock() 78 needCleanup := false 79 for _, filter := range f.filters { 80 filter.Delete(k) 81 needCleanup = needCleanup || filter.Count() == 0 82 } 83 if needCleanup { 84 resultFilters := f.filters[:0] 85 for idx, filter := range f.filters { 86 // idx == 0 because initial filter should be always included 87 if idx == 0 || filter.Count() > 0 { 88 resultFilters = append(resultFilters, filter) 89 } 90 } 91 f.filters = resultFilters 92 } 93 f.mtx.Unlock() 94 } 95 96 func (f *Filter) Reset() { 97 f.mtx.Lock() 98 for idx := range len(f.filters) { 99 f.filters[idx].Reset() 100 } 101 clear(f.filters) 102 f.filters = f.filters[:0] 103 f.mtx.Unlock() 104 }