gitee.com/lh-her-team/common@v1.5.1/birdsnest/cuckoo.go (about)

     1  package birdsnest
     2  
     3  import (
     4  	"encoding/json"
     5  	"math"
     6  
     7  	birdsnestpb "gitee.com/lh-her-team/common/birdsnest/pb"
     8  
     9  	"github.com/linvon/cuckoo-filter"
    10  )
    11  
    12  var (
    13  	// The load factor
    14  	loadFactorMap map[uint32]float64
    15  )
    16  
    17  const (
    18  	// DefaultLoadFactor Default load factor
    19  	DefaultLoadFactor = 0.98
    20  )
    21  
    22  // init Initialize the load factor
    23  func init() {
    24  	loadFactorMap = make(map[uint32]float64)
    25  	// 大小 b=2、4 或 8 时则分别会增加到 84%、95% 和 98%
    26  	loadFactorMap[2] = 0.84
    27  	loadFactorMap[4] = 0.95
    28  	loadFactorMap[8] = DefaultLoadFactor
    29  }
    30  
    31  // CuckooFilterImpl Cuckoo Filter
    32  type CuckooFilterImpl struct {
    33  	// cuckoo filter
    34  	cuckoo cuckoo.Filter
    35  	// filter extension
    36  	extension FilterExtension
    37  	// cuckoo config
    38  	config *CuckooConfig
    39  	// cuckoo is full
    40  	full bool
    41  }
    42  
    43  // newCuckooFilters Create multiple CuckooFilter
    44  func newCuckooFilters(config *CuckooConfig, size uint32) []CuckooFilter {
    45  	filters := make([]CuckooFilter, size)
    46  	for i := uint32(0); i < size; i++ {
    47  		// New cuckoo filter
    48  		filters[i] = NewCuckooFilter(config)
    49  	}
    50  	return filters
    51  }
    52  
    53  // newCuckooFiltersByDecode New cuckoo filters by decode
    54  func newCuckooFiltersByDecode(filters []*birdsnestpb.CuckooFilter) ([]CuckooFilter, error) {
    55  	filters0 := make([]CuckooFilter, len(filters))
    56  	for i := 0; i < len(filters); i++ {
    57  		filter, err := NewCuckooFilterByDecode(filters[i])
    58  		if err != nil {
    59  			return nil, err
    60  		}
    61  		filters0[i] = filter
    62  		if filter.IsFull() {
    63  			continue
    64  		}
    65  		if filter.cuckoo.Size() >= uint(filter.config.MaxNumKeys) {
    66  			filter.full = true
    67  		}
    68  	}
    69  	return filters0, nil
    70  }
    71  
    72  /*
    73  	NewCuckooFilter
    74  	Params:
    75  	common.CuckooConfig.TableType    : has two constant parameters to choose from:
    76  									   1. TableTypeSingle normal single table
    77  									   2. TableTypePacked packed table, use semi-sort to save 1 bit per item
    78  	common.CuckooConfig.TagsPerBucket: num of tags for each bucket, which is b in paper. tag is fingerprint, which is f
    79  								       in paper.
    80  	common.CuckooConfig.MaxNumKeys   : num of keys that filter will store. this value should close to and lower
    81  									   nextPow2(maxNumKeys/tagsPerBucket) * maxLoadFactor. cause table.NumBuckets is
    82  									   always a power of two
    83  	common.CuckooConfig.BitsPerItem  : num of bits for each item, which is length of tag(fingerprint)
    84  	common.CuckooConfig.TableType    :
    85  	common.CuckooConfig.KeyType      :  0 TableTypeSingle normal single table
    86  								        1 TableTypePacked packed table, use semi-sort to save 1 bit per item
    87  								        1 is recommended
    88  	Result:
    89  	CuckooFilter
    90  */
    91  func NewCuckooFilter(config *CuckooConfig) CuckooFilter {
    92  	extensionType := statusConvertExtension(config.KeyType)
    93  	if extensionType == -1 {
    94  		return nil
    95  	}
    96  	extension, err := Factory().New(extensionType)
    97  	if err != nil {
    98  		return nil
    99  	}
   100  	// maxNumKeys := getApproximationMaxNumKeys(config.MaxNumKeys, config.MaxNumKeys)
   101  	return &CuckooFilterImpl{
   102  		cuckoo: *cuckoo.NewFilter(uint(config.TagsPerBucket), uint(config.BitsPerItem),
   103  			getApproximationMaxNumKeys(config.MaxNumKeys, config.TagsPerBucket),
   104  			uint(config.TableType)),
   105  		extension: extension,
   106  		config:    config,
   107  	}
   108  }
   109  
   110  func NewCuckooFilterByDecode(filter *birdsnestpb.CuckooFilter) (*CuckooFilterImpl, error) {
   111  	decode, err := cuckoo.Decode(filter.Cuckoo)
   112  	if err != nil {
   113  		return nil, err
   114  	}
   115  	extension, err := ExtensionDeserialize(filter.Extension)
   116  	if err != nil {
   117  		return nil, err
   118  	}
   119  	if err != nil {
   120  		return nil, err
   121  	}
   122  	var config CuckooConfig
   123  	err = json.Unmarshal(filter.Config, &config)
   124  	if err != nil {
   125  		return nil, err
   126  	}
   127  	return &CuckooFilterImpl{
   128  		cuckoo:    *decode,
   129  		extension: extension,
   130  		config:    &config,
   131  	}, nil
   132  }
   133  
   134  func (c *CuckooFilterImpl) Extension() FilterExtension {
   135  	return c.extension
   136  }
   137  
   138  // IsFull is full
   139  func (c *CuckooFilterImpl) IsFull() bool {
   140  	return c.full
   141  }
   142  
   143  // Add key to cuckoo filter
   144  func (c *CuckooFilterImpl) Add(key Key) (bool, error) {
   145  	add := c.cuckoo.Add(key.Key())
   146  	if !add {
   147  		// The cuckoo filter is full if it is not added successfully
   148  		c.full = true
   149  		return false, nil
   150  	}
   151  	if c.cuckoo.Size() >= uint(c.config.MaxNumKeys) {
   152  		// If the size of the cuckoo filter is greater than or equal to the configured size, the filter is full
   153  		c.full = true
   154  	}
   155  	//
   156  	err := c.extension.Store(key)
   157  	if err != nil {
   158  		return false, err
   159  	}
   160  	return true, nil
   161  }
   162  
   163  // Contains Whether the cuckoo filter contains keys
   164  func (c *CuckooFilterImpl) Contains(key Key) (bool, error) {
   165  	err := c.extension.Validate(key, c.IsFull())
   166  	if err != nil {
   167  		if err == ErrKeyTimeIsNotInTheFilterRange {
   168  			// Not in the time interval
   169  			return false, nil
   170  		}
   171  		return false, err
   172  	}
   173  	return c.cuckoo.Contain(key.Key()), nil
   174  }
   175  
   176  func (c *CuckooFilterImpl) Encode() (FilterEncoder, error) {
   177  	encode, err := c.cuckoo.Encode()
   178  	if err != nil {
   179  		return FilterEncoder{}, err
   180  	}
   181  	config, err := json.Marshal(c.config)
   182  	if err != nil {
   183  		return FilterEncoder{}, err
   184  	}
   185  
   186  	return newFilterEncoder(encode, config, c.full), nil
   187  }
   188  
   189  func (c *CuckooFilterImpl) Config() ([]byte, error) {
   190  	return c.cuckoo.Encode()
   191  }
   192  
   193  // Info
   194  // index 0 cuckoo size
   195  // index 1 Space occupied by cuckoo
   196  func (c *CuckooFilterImpl) Info() []uint64 {
   197  	var info = make([]uint64, 2)
   198  	info[0] = uint64(c.cuckoo.Size())
   199  	info[1] = uint64(c.cuckoo.SizeInBytes())
   200  	return info
   201  }
   202  
   203  type FilterEncoder struct {
   204  	filter []byte
   205  	config []byte
   206  	full   bool
   207  }
   208  
   209  func newFilterEncoder(filter []byte, config []byte, full bool) FilterEncoder {
   210  	return FilterEncoder{filter: filter, config: config, full: full}
   211  }
   212  
   213  func getApproximationMaxNumKeys(maxNumKeys, b uint32) uint {
   214  	loadFactor, ok := loadFactorMap[b]
   215  	if !ok {
   216  		loadFactor = DefaultLoadFactor
   217  	}
   218  	got := float64(maxNumKeys) * 1.25 / loadFactor
   219  	for i := float64(1); true; i++ {
   220  		pow := math.Pow(2, i)
   221  		rl := pow * loadFactor
   222  		if rl > got {
   223  			return uint(rl)
   224  		}
   225  	}
   226  	return uint(maxNumKeys)
   227  }