github.com/andy2046/gopie@v0.7.0/pkg/bloom/bloom.go (about)

     1  // Package bloom implements a Bloom filter.
     2  package bloom
     3  
     4  import (
     5  	"math"
     6  )
     7  
     8  type (
     9  	// Bloom is the standard bloom filter.
    10  	Bloom interface {
    11  		Add([]byte)
    12  		AddString(string)
    13  		Exist([]byte) bool
    14  		ExistString(string) bool
    15  		FalsePositive() float64
    16  		GuessFalsePositive(uint64) float64
    17  		M() uint64
    18  		K() uint64
    19  		N() uint64
    20  		Clear()
    21  	}
    22  
    23  	// CountingBloom is the bloom filter which allows deletion of entries.
    24  	// Take note that an 16-bit counter is maintained for each entry.
    25  	CountingBloom interface {
    26  		Bloom
    27  		Remove([]byte)
    28  		RemoveString(string)
    29  	}
    30  
    31  	bloomFilter struct {
    32  		bitmap []uint16 // bloom filter counter
    33  		k      uint64   // number of hash functions
    34  		n      uint64   // number of elements in the bloom filter
    35  		m      uint64   // size of the bloom filter bits
    36  		shift  uint8    // the shift to get high/low bit fragments
    37  	}
    38  )
    39  
    40  const (
    41  	ln2                  float64 = 0.6931471805599453 // math.Log(2)
    42  	maxCountingBloomSize uint64  = 1 << 37            // to avoid panic: makeslice: len out of range
    43  	maxCounter           uint16  = 65535
    44  )
    45  
    46  // New creates counting bloom filter based on the provided m/k.
    47  // m is the size of bloom filter bits.
    48  // k is the number of hash functions.
    49  func New(m, k uint64) CountingBloom {
    50  	mm, exponent := adjustM(m)
    51  	return &bloomFilter{
    52  		bitmap: make([]uint16, mm),
    53  		m:      mm - 1, // x % 2^i = x & (2^i - 1)
    54  		k:      k,
    55  		shift:  64 - exponent,
    56  	}
    57  }
    58  
    59  // NewGuess estimates m/k based on the provided n/p then creates counting bloom filter.
    60  // n is the estimated number of elements in the bloom filter.
    61  // p is the false positive probability.
    62  func NewGuess(n uint64, p float64) CountingBloom {
    63  	m, k := Guess(n, p)
    64  	return New(m, k)
    65  }
    66  
    67  // Guess estimates m/k based on the provided n/p.
    68  func Guess(n uint64, p float64) (m, k uint64) {
    69  	mm := math.Ceil(-1 * float64(n) * math.Log(p) / math.Pow(ln2, 2))
    70  	kk := math.Ceil(ln2 * mm / float64(n))
    71  	m, k = uint64(mm), uint64(kk)
    72  	return
    73  }
    74  
    75  func (bf *bloomFilter) Add(entry []byte) {
    76  	hash := sipHash(entry)
    77  	h := hash >> bf.shift
    78  	l := hash << bf.shift >> bf.shift
    79  	var idx uint64
    80  	for i := uint64(0); i < bf.k; i++ {
    81  		idx = (h + i*l) & bf.m
    82  		// avoid overflow
    83  		if bf.bitmap[idx] < maxCounter {
    84  			bf.bitmap[idx]++
    85  		}
    86  	}
    87  	bf.n++
    88  }
    89  
    90  func (bf *bloomFilter) AddString(entry string) {
    91  	bf.Add([]byte(entry))
    92  }
    93  
    94  func (bf *bloomFilter) Remove(entry []byte) {
    95  	hash := sipHash(entry)
    96  	h := hash >> bf.shift
    97  	l := hash << bf.shift >> bf.shift
    98  	var idx uint64
    99  	for i := uint64(0); i < bf.k; i++ {
   100  		idx = (h + i*l) & bf.m
   101  		if bf.bitmap[idx] == 0 {
   102  			return
   103  		}
   104  	}
   105  
   106  	for i := uint64(0); i < bf.k; i++ {
   107  		idx = (h + i*l) & bf.m
   108  		// avoid overflow
   109  		if bf.bitmap[idx] > 0 {
   110  			bf.bitmap[idx]--
   111  		}
   112  	}
   113  	bf.n--
   114  }
   115  
   116  func (bf *bloomFilter) RemoveString(entry string) {
   117  	bf.Remove([]byte(entry))
   118  }
   119  
   120  func (bf *bloomFilter) Exist(entry []byte) bool {
   121  	hash := sipHash(entry)
   122  	h := hash >> bf.shift
   123  	l := hash << bf.shift >> bf.shift
   124  	var idx uint64
   125  	for i := uint64(0); i < bf.k; i++ {
   126  		idx = (h + i*l) & bf.m
   127  		if bf.bitmap[idx] == 0 {
   128  			return false
   129  		}
   130  	}
   131  
   132  	return true
   133  }
   134  
   135  func (bf *bloomFilter) ExistString(entry string) bool {
   136  	return bf.Exist([]byte(entry))
   137  }
   138  
   139  func (bf *bloomFilter) FalsePositive() float64 {
   140  	return math.Pow((1 - math.Exp(-float64(bf.k*bf.n)/float64(bf.m))),
   141  		float64(bf.k))
   142  }
   143  
   144  func (bf *bloomFilter) GuessFalsePositive(n uint64) float64 {
   145  	return math.Pow((1 - math.Exp(-float64(bf.k*n)/float64(bf.m))),
   146  		float64(bf.k))
   147  }
   148  
   149  func (bf *bloomFilter) M() uint64 {
   150  	return bf.m + 1
   151  }
   152  
   153  func (bf *bloomFilter) K() uint64 {
   154  	return bf.k
   155  }
   156  
   157  func (bf *bloomFilter) N() uint64 {
   158  	return bf.n
   159  }
   160  
   161  func (bf *bloomFilter) Clear() {
   162  	for i := range bf.bitmap {
   163  		bf.bitmap[i] = 0
   164  	}
   165  	bf.n = 0
   166  }
   167  
   168  func adjustM(x uint64) (m uint64, exponent uint8) {
   169  	if x < 512 {
   170  		x = 512
   171  	}
   172  	m = uint64(1)
   173  	for m < x && m < maxCountingBloomSize {
   174  		m <<= 1
   175  		exponent++
   176  	}
   177  	return m, exponent
   178  }