v8.run/go/exp@v0.0.26-0.20230226010534-afcdbd3f782d/hash/hashutil/bloom/bloomf.go (about)

     1  package bloom
     2  
     3  import (
     4  	"encoding/binary"
     5  	"errors"
     6  	"math"
     7  
     8  	"v8.run/go/exp/fastrand/alg/splitmix64"
     9  	"v8.run/go/exp/hash/wyhash"
    10  )
    11  
    12  type Bloom struct {
    13  	m    uint64
    14  	k    uint64
    15  	buf  []byte
    16  	bits []byte
    17  }
    18  
    19  func NewBloom(N uint64, P float64) *Bloom {
    20  	m := uint64(math.Ceil(float64(N) * math.Log(P) / math.Log(1.0/math.Pow(2.0, math.Log(2.0)))))
    21  	k := uint64(math.Round((float64(m) / float64(N)) * math.Log(2.0)))
    22  	bf := &Bloom{
    23  		m: m,
    24  		k: k,
    25  	}
    26  	bf.buf = make([]byte, ((m+7)/8)+16)
    27  	binary.LittleEndian.PutUint64(bf.buf, m)
    28  	binary.LittleEndian.PutUint64(bf.buf[8:], k)
    29  	bf.bits = bf.buf[16:]
    30  	return bf
    31  }
    32  
    33  func (bf *Bloom) Bytes() []byte {
    34  	return bf.buf
    35  }
    36  
    37  var ErrInvalidBloom = errors.New("invalid bloom filter")
    38  
    39  func FromBytes(buf []byte) (*Bloom, error) {
    40  	if len(buf) < 16 {
    41  		return nil, ErrInvalidBloom
    42  	}
    43  	m := binary.LittleEndian.Uint64(buf)
    44  	k := binary.LittleEndian.Uint64(buf[8:])
    45  	if len(buf) < int(16+(m+7)/8) {
    46  		return nil, ErrInvalidBloom
    47  	}
    48  	bf := &Bloom{
    49  		m:    m,
    50  		k:    k,
    51  		buf:  buf,
    52  		bits: buf[16:],
    53  	}
    54  	return bf, nil
    55  }
    56  
    57  func hash(s []byte, i uint64) uint64 {
    58  	seed := splitmix64.Splitmix64(&i)
    59  	seed = wyhash.WYRAND(&seed)
    60  	return wyhash.Hash(s, seed)
    61  }
    62  
    63  func hashstr(s string, i uint64) uint64 {
    64  	seed := splitmix64.Splitmix64(&i)
    65  	return wyhash.HashString(s, seed)
    66  }
    67  
    68  func (bf *Bloom) Set(s []byte) {
    69  	for i := uint64(0); i < bf.k; i++ {
    70  		h := hash(s, i) % bf.m
    71  		bf.bits[h/8] |= 1 << (h % 8)
    72  	}
    73  }
    74  
    75  func (bf *Bloom) SetString(s string) {
    76  	for i := uint64(0); i < bf.k; i++ {
    77  		h := hashstr(s, i) % bf.m
    78  		bf.bits[h/8] |= 1 << (h % 8)
    79  	}
    80  }
    81  
    82  func (bf *Bloom) Get(s []byte) bool {
    83  	for i := uint64(0); i < bf.k; i++ {
    84  		h := hash(s, i) % bf.m
    85  		if bf.bits[h/8]&(1<<(h%8)) == 0 {
    86  			return false
    87  		}
    88  	}
    89  	return true
    90  }
    91  
    92  func (bf *Bloom) GetString(s string) bool {
    93  	for i := uint64(0); i < bf.k; i++ {
    94  		h := hashstr(s, i) % bf.m
    95  		if bf.bits[h/8]&(1<<(h%8)) == 0 {
    96  			return false
    97  		}
    98  	}
    99  	return true
   100  }
   101  
   102  func (bf *Bloom) Reset() {
   103  	for i := range bf.bits {
   104  		bf.bits[i] = 0
   105  	}
   106  }
   107  
   108  func (bf *Bloom) MarshalBinary() (data []byte, err error) {
   109  	return bf.Bytes(), nil
   110  }
   111  
   112  func (bf *Bloom) UnmarshalBinary(data []byte) error {
   113  	v, err := FromBytes(data)
   114  	if err != nil {
   115  		return err
   116  	}
   117  	*bf = *v
   118  	return nil
   119  }