github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/cmn/prob/dyn_cuckoo.go (about)

     1  // Package prob implements fully features dynamic probabilistic filter.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package prob
     6  
     7  import (
     8  	"sync"
     9  
    10  	cuckoo "github.com/seiflotfy/cuckoofilter"
    11  )
    12  
    13  const (
    14  	// filterInitSize determines the number of keys we can keep in first filter.
    15  	// For now we allow `10M` keys -> memory allocation of `10MB`. If we exceed
    16  	// that number of keys, additional filter is added dynamically of size:
    17  	// `last_filter_size` * `grow_factor`.
    18  	filterInitSize = 10 * 1000 * 1000
    19  	growFactor     = 3 // how much size of next, new filter will grow comparing to previous filter
    20  )
    21  
    22  // Filter is dynamic probabilistic filter which grows if there is more space
    23  // needed.
    24  //
    25  // NOTE: Underneath it uses Cuckoo filters - Bloom filters could be also used
    26  // but in the future we might need `Delete` method which Bloom filters cannot
    27  // implement.
    28  type Filter struct {
    29  	filters []*cuckoo.Filter
    30  	size    uint
    31  	mtx     sync.RWMutex
    32  }
    33  
    34  func NewFilter(initSize uint) *Filter {
    35  	return &Filter{
    36  		filters: make([]*cuckoo.Filter, 0, 5),
    37  		size:    initSize,
    38  	}
    39  }
    40  
    41  func NewDefaultFilter() *Filter {
    42  	return NewFilter(filterInitSize)
    43  }
    44  
    45  func (f *Filter) Lookup(k []byte) bool {
    46  	f.mtx.RLock()
    47  	for idx := len(f.filters) - 1; idx >= 0; idx-- {
    48  		if f.filters[idx].Lookup(k) {
    49  			f.mtx.RUnlock()
    50  			return true
    51  		}
    52  	}
    53  	f.mtx.RUnlock()
    54  	return false
    55  }
    56  
    57  func (f *Filter) Insert(k []byte) {
    58  	f.mtx.Lock()
    59  
    60  	var lastFilter *cuckoo.Filter
    61  	if len(f.filters) == 0 {
    62  		lastFilter = cuckoo.NewFilter(f.size)
    63  		f.filters = append(f.filters, lastFilter)
    64  	} else {
    65  		lastFilter = f.filters[len(f.filters)-1]
    66  	}
    67  
    68  	if !lastFilter.Insert(k) {
    69  		sf := cuckoo.NewFilter(f.size * growFactor)
    70  		f.filters = append(f.filters, sf)
    71  		sf.Insert(k)
    72  	}
    73  	f.mtx.Unlock()
    74  }
    75  
    76  func (f *Filter) Delete(k []byte) {
    77  	f.mtx.Lock()
    78  	needCleanup := false
    79  	for _, filter := range f.filters {
    80  		filter.Delete(k)
    81  		needCleanup = needCleanup || filter.Count() == 0
    82  	}
    83  	if needCleanup {
    84  		resultFilters := f.filters[:0]
    85  		for idx, filter := range f.filters {
    86  			// idx == 0 because initial filter should be always included
    87  			if idx == 0 || filter.Count() > 0 {
    88  				resultFilters = append(resultFilters, filter)
    89  			}
    90  		}
    91  		f.filters = resultFilters
    92  	}
    93  	f.mtx.Unlock()
    94  }
    95  
    96  func (f *Filter) Reset() {
    97  	f.mtx.Lock()
    98  	for idx := range len(f.filters) {
    99  		f.filters[idx].Reset()
   100  	}
   101  	clear(f.filters)
   102  	f.filters = f.filters[:0]
   103  	f.mtx.Unlock()
   104  }