github.com/tursom/GoCollections@v0.3.10/util/bloom/Bloom.go (about)

     1  package bloom
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"io"
     7  	"math"
     8  	"unsafe"
     9  
    10  	"github.com/spaolacci/murmur3"
    11  
    12  	"github.com/tursom/GoCollections/exceptions"
    13  	"github.com/tursom/GoCollections/lang"
    14  )
    15  
    16  var (
    17  	HashFunc = murmur3.Sum32WithSeed
    18  )
    19  
    20  type (
    21  	Bloom struct {
    22  		lang.BaseObject
    23  		m        lang.UInt8Array
    24  		k        uint
    25  		c        uint
    26  		hashCode int32
    27  	}
    28  )
    29  
    30  func max(i1, i2 uint) uint {
    31  	if i1 < i2 {
    32  		return i2
    33  	} else {
    34  		return i1
    35  	}
    36  }
    37  
    38  func numHashFunctions(n, m float64) uint {
    39  	return max(1, uint(math.Floor(0.5+m/n*math.Ln2)))
    40  }
    41  
    42  func NumHashFunctions(n, m uint) uint {
    43  	return numHashFunctions(float64(n), float64(m))
    44  }
    45  
    46  func calcBitLength(n float64, p float64) uint {
    47  	if p == 0 {
    48  		p = math.SmallestNonzeroFloat64
    49  	}
    50  	return uint(-n * math.Log(p) / (math.Ln2 * math.Ln2))
    51  }
    52  
    53  func CalcBitLength(n uint, p float64) uint {
    54  	return calcBitLength(float64(n), p)
    55  }
    56  
    57  func NewBloom(n uint, p float64) *Bloom {
    58  	m := CalcBitLength(n, p) - 1
    59  	return &Bloom{
    60  		m: make(lang.UInt8Array, m/8+1),
    61  		k: NumHashFunctions(n, m),
    62  	}
    63  }
    64  
    65  func (b *Bloom) C() uint {
    66  	return b.c
    67  }
    68  
    69  func (b *Bloom) K() uint {
    70  	return b.k
    71  }
    72  
    73  func (b *Bloom) M() uint {
    74  	return uint(len(b.m)) * 8
    75  }
    76  
    77  func (b *Bloom) Contains(data []byte) bool {
    78  	for i := 0; i < int(b.k); i++ {
    79  		hashCode := uint(HashFunc(data, uint32(i)))
    80  		if !b.m.GetBit(hashCode % b.m.BitLength()) {
    81  			return false
    82  		}
    83  	}
    84  
    85  	return true
    86  }
    87  
    88  func (b *Bloom) Add(data []byte) {
    89  	b.hashCode = 0
    90  	b.c++
    91  	for i := 0; i < int(b.k); i++ {
    92  		hashCode := uint(HashFunc(data, uint32(i)))
    93  		b.m.SetBit(hashCode%b.m.BitLength(), true)
    94  	}
    95  }
    96  
    97  func (b *Bloom) Marshal(writer io.Writer) {
    98  	if err := binary.Write(writer, binary.BigEndian, uint32(b.k)); err != nil {
    99  		panic(exceptions.Package(err))
   100  	}
   101  	if err := binary.Write(writer, binary.BigEndian, uint32(b.c)); err != nil {
   102  		panic(exceptions.Package(err))
   103  	}
   104  
   105  	if _, err := writer.Write(b.m.Bytes()); err != nil {
   106  		panic(exceptions.Package(err))
   107  	}
   108  }
   109  
   110  func Unmarshal(data []byte) *Bloom {
   111  	k := binary.BigEndian.Uint32(data)
   112  	c := binary.BigEndian.Uint32(data[4:])
   113  
   114  	m := data[8:]
   115  
   116  	return &Bloom{
   117  		m: *(*lang.UInt8Array)(unsafe.Pointer(&m)),
   118  		k: uint(k),
   119  		c: uint(c),
   120  	}
   121  }
   122  
   123  func (b *Bloom) Equals(t lang.Object) bool {
   124  	tb, ok := t.(*Bloom)
   125  	if !ok {
   126  		return false
   127  	}
   128  
   129  	return tb.k == b.k && bytes.Compare(b.m.Bytes(), tb.m.Bytes()) == 0
   130  }
   131  
   132  func (b *Bloom) HashCode() int32 {
   133  	if b.hashCode == 0 {
   134  		b.hashCode = int32(murmur3.Sum32(b.m.Bytes()))
   135  	}
   136  	return b.hashCode
   137  }
   138  
   139  func (b *Bloom) Merge(t *Bloom) bool {
   140  	if b.k != t.k {
   141  		return false
   142  	} else if len(b.m) != len(t.m) {
   143  		return false
   144  	}
   145  
   146  	b.hashCode = 0
   147  	for i := range b.m {
   148  		b.m[i] |= t.m[i]
   149  	}
   150  
   151  	return true
   152  }
   153  
   154  func (b *Bloom) MergeBM(bitMap []byte) bool {
   155  	if len(b.m) != len(bitMap) {
   156  		return false
   157  	}
   158  
   159  	b.hashCode = 0
   160  	for i := range b.m {
   161  		b.m[i] |= lang.UInt8(bitMap[i])
   162  	}
   163  
   164  	return true
   165  }