github.com/keltia/go-ipfs@v0.3.8-0.20150909044612-210793031c63/blocks/bloom/filter.go (about)

     1  // package bloom implements a simple bloom filter.
     2  package bloom
     3  
     4  import (
     5  	"encoding/binary"
     6  	"errors"
     7  	// Non crypto hash, because speed
     8  	"github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/mtchavez/jenkins"
     9  	"github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/steakknife/hamming"
    10  	"hash"
    11  )
    12  
    13  type Filter interface {
    14  	Add([]byte)
    15  	Find([]byte) bool
    16  	Merge(Filter) (Filter, error)
    17  	HammingDistance(Filter) (int, error)
    18  }
    19  
    20  func NewFilter(size int) Filter {
    21  	return &filter{
    22  		hash:   jenkins.New(),
    23  		filter: make([]byte, size),
    24  		k:      3,
    25  	}
    26  }
    27  
    28  type filter struct {
    29  	filter []byte
    30  	hash   hash.Hash32
    31  	k      int
    32  }
    33  
    34  func BasicFilter() Filter {
    35  	return NewFilter(2048)
    36  }
    37  
    38  func (f *filter) Add(bytes []byte) {
    39  	for _, bit := range f.getBitIndicies(bytes) {
    40  		f.setBit(bit)
    41  	}
    42  }
    43  
    44  func (f *filter) getBitIndicies(bytes []byte) []uint32 {
    45  	indicies := make([]uint32, f.k)
    46  
    47  	f.hash.Write(bytes)
    48  	b := make([]byte, 4)
    49  
    50  	for i := 0; i < f.k; i++ {
    51  		res := f.hash.Sum32()
    52  		indicies[i] = res % (uint32(len(f.filter)) * 8)
    53  
    54  		binary.LittleEndian.PutUint32(b, res)
    55  		f.hash.Write(b)
    56  	}
    57  
    58  	f.hash.Reset()
    59  
    60  	return indicies
    61  }
    62  
    63  func (f *filter) Find(bytes []byte) bool {
    64  	for _, bit := range f.getBitIndicies(bytes) {
    65  		if !f.getBit(bit) {
    66  			return false
    67  		}
    68  	}
    69  	return true
    70  }
    71  
    72  func (f *filter) setBit(i uint32) {
    73  	f.filter[i/8] |= (1 << byte(i%8))
    74  }
    75  
    76  func (f *filter) getBit(i uint32) bool {
    77  	return f.filter[i/8]&(1<<byte(i%8)) != 0
    78  }
    79  
    80  func (f *filter) Merge(o Filter) (Filter, error) {
    81  	casfil, ok := o.(*filter)
    82  	if !ok {
    83  		return nil, errors.New("Unsupported filter type")
    84  	}
    85  
    86  	if len(casfil.filter) != len(f.filter) {
    87  		return nil, errors.New("filter lengths must match!")
    88  	}
    89  
    90  	if casfil.k != f.k {
    91  		return nil, errors.New("filter k-values must match!")
    92  	}
    93  
    94  	nfilt := new(filter)
    95  	nfilt.hash = f.hash
    96  	nfilt.filter = make([]byte, len(f.filter))
    97  	nfilt.k = f.k
    98  
    99  	for i, v := range f.filter {
   100  		nfilt.filter[i] = v | casfil.filter[i]
   101  	}
   102  
   103  	return nfilt, nil
   104  }
   105  
   106  func (f *filter) HammingDistance(o Filter) (int, error) {
   107  	casfil, ok := o.(*filter)
   108  	if !ok {
   109  		return 0, errors.New("Unsupported filter type")
   110  	}
   111  
   112  	if len(f.filter) != len(casfil.filter) {
   113  		return 0, errors.New("filter lengths must match!")
   114  	}
   115  
   116  	acc := 0
   117  
   118  	// xor together
   119  	for i := 0; i < len(f.filter); i++ {
   120  		acc += hamming.Byte(f.filter[i], casfil.filter[i])
   121  	}
   122  
   123  	return acc, nil
   124  }