github.com/tursom/GoCollections@v0.3.10/util/bloom/Bloom.go (about) 1 package bloom 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "io" 7 "math" 8 "unsafe" 9 10 "github.com/spaolacci/murmur3" 11 12 "github.com/tursom/GoCollections/exceptions" 13 "github.com/tursom/GoCollections/lang" 14 ) 15 16 var ( 17 HashFunc = murmur3.Sum32WithSeed 18 ) 19 20 type ( 21 Bloom struct { 22 lang.BaseObject 23 m lang.UInt8Array 24 k uint 25 c uint 26 hashCode int32 27 } 28 ) 29 30 func max(i1, i2 uint) uint { 31 if i1 < i2 { 32 return i2 33 } else { 34 return i1 35 } 36 } 37 38 func numHashFunctions(n, m float64) uint { 39 return max(1, uint(math.Floor(0.5+m/n*math.Ln2))) 40 } 41 42 func NumHashFunctions(n, m uint) uint { 43 return numHashFunctions(float64(n), float64(m)) 44 } 45 46 func calcBitLength(n float64, p float64) uint { 47 if p == 0 { 48 p = math.SmallestNonzeroFloat64 49 } 50 return uint(-n * math.Log(p) / (math.Ln2 * math.Ln2)) 51 } 52 53 func CalcBitLength(n uint, p float64) uint { 54 return calcBitLength(float64(n), p) 55 } 56 57 func NewBloom(n uint, p float64) *Bloom { 58 m := CalcBitLength(n, p) - 1 59 return &Bloom{ 60 m: make(lang.UInt8Array, m/8+1), 61 k: NumHashFunctions(n, m), 62 } 63 } 64 65 func (b *Bloom) C() uint { 66 return b.c 67 } 68 69 func (b *Bloom) K() uint { 70 return b.k 71 } 72 73 func (b *Bloom) M() uint { 74 return uint(len(b.m)) * 8 75 } 76 77 func (b *Bloom) Contains(data []byte) bool { 78 for i := 0; i < int(b.k); i++ { 79 hashCode := uint(HashFunc(data, uint32(i))) 80 if !b.m.GetBit(hashCode % b.m.BitLength()) { 81 return false 82 } 83 } 84 85 return true 86 } 87 88 func (b *Bloom) Add(data []byte) { 89 b.hashCode = 0 90 b.c++ 91 for i := 0; i < int(b.k); i++ { 92 hashCode := uint(HashFunc(data, uint32(i))) 93 b.m.SetBit(hashCode%b.m.BitLength(), true) 94 } 95 } 96 97 func (b *Bloom) Marshal(writer io.Writer) { 98 if err := binary.Write(writer, binary.BigEndian, uint32(b.k)); err != nil { 99 panic(exceptions.Package(err)) 100 } 101 if err := binary.Write(writer, binary.BigEndian, uint32(b.c)); err != nil { 102 panic(exceptions.Package(err)) 103 } 104 105 if _, err := writer.Write(b.m.Bytes()); err != nil { 106 panic(exceptions.Package(err)) 107 } 108 } 109 110 func Unmarshal(data []byte) *Bloom { 111 k := binary.BigEndian.Uint32(data) 112 c := binary.BigEndian.Uint32(data[4:]) 113 114 m := data[8:] 115 116 return &Bloom{ 117 m: *(*lang.UInt8Array)(unsafe.Pointer(&m)), 118 k: uint(k), 119 c: uint(c), 120 } 121 } 122 123 func (b *Bloom) Equals(t lang.Object) bool { 124 tb, ok := t.(*Bloom) 125 if !ok { 126 return false 127 } 128 129 return tb.k == b.k && bytes.Compare(b.m.Bytes(), tb.m.Bytes()) == 0 130 } 131 132 func (b *Bloom) HashCode() int32 { 133 if b.hashCode == 0 { 134 b.hashCode = int32(murmur3.Sum32(b.m.Bytes())) 135 } 136 return b.hashCode 137 } 138 139 func (b *Bloom) Merge(t *Bloom) bool { 140 if b.k != t.k { 141 return false 142 } else if len(b.m) != len(t.m) { 143 return false 144 } 145 146 b.hashCode = 0 147 for i := range b.m { 148 b.m[i] |= t.m[i] 149 } 150 151 return true 152 } 153 154 func (b *Bloom) MergeBM(bitMap []byte) bool { 155 if len(b.m) != len(bitMap) { 156 return false 157 } 158 159 b.hashCode = 0 160 for i := range b.m { 161 b.m[i] |= lang.UInt8(bitMap[i]) 162 } 163 164 return true 165 }