github.com/keltia/go-ipfs@v0.3.8-0.20150909044612-210793031c63/blocks/bloom/filter.go (about) 1 // package bloom implements a simple bloom filter. 2 package bloom 3 4 import ( 5 "encoding/binary" 6 "errors" 7 // Non crypto hash, because speed 8 "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/mtchavez/jenkins" 9 "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/steakknife/hamming" 10 "hash" 11 ) 12 13 type Filter interface { 14 Add([]byte) 15 Find([]byte) bool 16 Merge(Filter) (Filter, error) 17 HammingDistance(Filter) (int, error) 18 } 19 20 func NewFilter(size int) Filter { 21 return &filter{ 22 hash: jenkins.New(), 23 filter: make([]byte, size), 24 k: 3, 25 } 26 } 27 28 type filter struct { 29 filter []byte 30 hash hash.Hash32 31 k int 32 } 33 34 func BasicFilter() Filter { 35 return NewFilter(2048) 36 } 37 38 func (f *filter) Add(bytes []byte) { 39 for _, bit := range f.getBitIndicies(bytes) { 40 f.setBit(bit) 41 } 42 } 43 44 func (f *filter) getBitIndicies(bytes []byte) []uint32 { 45 indicies := make([]uint32, f.k) 46 47 f.hash.Write(bytes) 48 b := make([]byte, 4) 49 50 for i := 0; i < f.k; i++ { 51 res := f.hash.Sum32() 52 indicies[i] = res % (uint32(len(f.filter)) * 8) 53 54 binary.LittleEndian.PutUint32(b, res) 55 f.hash.Write(b) 56 } 57 58 f.hash.Reset() 59 60 return indicies 61 } 62 63 func (f *filter) Find(bytes []byte) bool { 64 for _, bit := range f.getBitIndicies(bytes) { 65 if !f.getBit(bit) { 66 return false 67 } 68 } 69 return true 70 } 71 72 func (f *filter) setBit(i uint32) { 73 f.filter[i/8] |= (1 << byte(i%8)) 74 } 75 76 func (f *filter) getBit(i uint32) bool { 77 return f.filter[i/8]&(1<<byte(i%8)) != 0 78 } 79 80 func (f *filter) Merge(o Filter) (Filter, error) { 81 casfil, ok := o.(*filter) 82 if !ok { 83 return nil, errors.New("Unsupported filter type") 84 } 85 86 if len(casfil.filter) != len(f.filter) { 87 return nil, errors.New("filter lengths must match!") 88 } 89 90 if casfil.k != f.k { 91 return nil, errors.New("filter k-values must match!") 92 } 93 94 nfilt := new(filter) 95 nfilt.hash = f.hash 96 nfilt.filter = make([]byte, len(f.filter)) 97 nfilt.k = f.k 98 99 for i, v := range f.filter { 100 nfilt.filter[i] = v | casfil.filter[i] 101 } 102 103 return nfilt, nil 104 } 105 106 func (f *filter) HammingDistance(o Filter) (int, error) { 107 casfil, ok := o.(*filter) 108 if !ok { 109 return 0, errors.New("Unsupported filter type") 110 } 111 112 if len(f.filter) != len(casfil.filter) { 113 return 0, errors.New("filter lengths must match!") 114 } 115 116 acc := 0 117 118 // xor together 119 for i := 0; i < len(f.filter); i++ { 120 acc += hamming.Byte(f.filter[i], casfil.filter[i]) 121 } 122 123 return acc, nil 124 }