github.com/outcaste-io/ristretto@v0.2.3/z/bbloom.go (about)

     1  // The MIT License (MIT)
     2  // Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt
     3  
     4  // Permission is hereby granted, free of charge, to any person obtaining a copy of
     5  // this software and associated documentation files (the "Software"), to deal in
     6  // the Software without restriction, including without limitation the rights to
     7  // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
     8  // the Software, and to permit persons to whom the Software is furnished to do so,
     9  // subject to the following conditions:
    10  
    11  // The above copyright notice and this permission notice shall be included in all
    12  // copies or substantial portions of the Software.
    13  
    14  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    15  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
    16  // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
    17  // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
    18  // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    19  // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    20  
    21  package z
    22  
    23  import (
    24  	"bytes"
    25  	"encoding/json"
    26  	"math"
    27  	"unsafe"
    28  )
    29  
    30  // helper
    31  var mask = []uint8{1, 2, 4, 8, 16, 32, 64, 128}
    32  
    33  func getSize(ui64 uint64) (size uint64, exponent uint64) {
    34  	if ui64 < uint64(512) {
    35  		ui64 = uint64(512)
    36  	}
    37  	size = uint64(1)
    38  	for size < ui64 {
    39  		size <<= 1
    40  		exponent++
    41  	}
    42  	return size, exponent
    43  }
    44  
    45  func calcSizeByWrongPositives(numEntries, wrongs float64) (uint64, uint64) {
    46  	size := -1 * numEntries * math.Log(wrongs) / math.Pow(float64(0.69314718056), 2)
    47  	locs := math.Ceil(float64(0.69314718056) * size / numEntries)
    48  	return uint64(size), uint64(locs)
    49  }
    50  
    51  // NewBloomFilter returns a new bloomfilter.
    52  func NewBloomFilter(params ...float64) (bloomfilter *Bloom) {
    53  	var entries, locs uint64
    54  	if len(params) == 2 {
    55  		if params[1] < 1 {
    56  			entries, locs = calcSizeByWrongPositives(params[0], params[1])
    57  		} else {
    58  			entries, locs = uint64(params[0]), uint64(params[1])
    59  		}
    60  	} else {
    61  		fatal("usage: New(float64(number_of_entries), float64(number_of_hashlocations))" +
    62  			" i.e. New(float64(1000), float64(3)) or New(float64(number_of_entries)," +
    63  			" float64(number_of_hashlocations)) i.e. New(float64(1000), float64(0.03))")
    64  	}
    65  	size, exponent := getSize(entries)
    66  	bloomfilter = &Bloom{
    67  		sizeExp: exponent,
    68  		size:    size - 1,
    69  		setLocs: locs,
    70  		shift:   64 - exponent,
    71  	}
    72  	bloomfilter.Size(size)
    73  	return bloomfilter
    74  }
    75  
    76  // Bloom filter
    77  type Bloom struct {
    78  	bitset  []uint64
    79  	ElemNum uint64
    80  	sizeExp uint64
    81  	size    uint64
    82  	setLocs uint64
    83  	shift   uint64
    84  }
    85  
    86  // <--- http://www.cse.yorku.ca/~oz/hash.html
    87  // modified Berkeley DB Hash (32bit)
    88  // hash is casted to l, h = 16bit fragments
    89  // func (bl Bloom) absdbm(b *[]byte) (l, h uint64) {
    90  // 	hash := uint64(len(*b))
    91  // 	for _, c := range *b {
    92  // 		hash = uint64(c) + (hash << 6) + (hash << bl.sizeExp) - hash
    93  // 	}
    94  // 	h = hash >> bl.shift
    95  // 	l = hash << bl.shift >> bl.shift
    96  // 	return l, h
    97  // }
    98  
    99  // Add adds hash of a key to the bloomfilter.
   100  func (bl *Bloom) Add(hash uint64) {
   101  	h := hash >> bl.shift
   102  	l := hash << bl.shift >> bl.shift
   103  	for i := uint64(0); i < bl.setLocs; i++ {
   104  		bl.Set((h + i*l) & bl.size)
   105  		bl.ElemNum++
   106  	}
   107  }
   108  
   109  // Has checks if bit(s) for entry hash is/are set,
   110  // returns true if the hash was added to the Bloom Filter.
   111  func (bl Bloom) Has(hash uint64) bool {
   112  	h := hash >> bl.shift
   113  	l := hash << bl.shift >> bl.shift
   114  	for i := uint64(0); i < bl.setLocs; i++ {
   115  		if !bl.IsSet((h + i*l) & bl.size) {
   116  			return false
   117  		}
   118  	}
   119  	return true
   120  }
   121  
   122  // AddIfNotHas only Adds hash, if it's not present in the bloomfilter.
   123  // Returns true if hash was added.
   124  // Returns false if hash was already registered in the bloomfilter.
   125  func (bl *Bloom) AddIfNotHas(hash uint64) bool {
   126  	if bl.Has(hash) {
   127  		return false
   128  	}
   129  	bl.Add(hash)
   130  	return true
   131  }
   132  
   133  // TotalSize returns the total size of the bloom filter.
   134  func (bl *Bloom) TotalSize() int {
   135  	// The bl struct has 5 members and each one is 8 byte. The bitset is a
   136  	// uint64 byte slice.
   137  	return len(bl.bitset)*8 + 5*8
   138  }
   139  
   140  // Size makes Bloom filter with as bitset of size sz.
   141  func (bl *Bloom) Size(sz uint64) {
   142  	bl.bitset = make([]uint64, sz>>6)
   143  }
   144  
   145  // Clear resets the Bloom filter.
   146  func (bl *Bloom) Clear() {
   147  	for i := range bl.bitset {
   148  		bl.bitset[i] = 0
   149  	}
   150  }
   151  
   152  // Set sets the bit[idx] of bitset.
   153  func (bl *Bloom) Set(idx uint64) {
   154  	ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
   155  	*(*uint8)(ptr) |= mask[idx%8]
   156  }
   157  
   158  // IsSet checks if bit[idx] of bitset is set, returns true/false.
   159  func (bl *Bloom) IsSet(idx uint64) bool {
   160  	ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
   161  	r := ((*(*uint8)(ptr)) >> (idx % 8)) & 1
   162  	return r == 1
   163  }
   164  
   165  // bloomJSONImExport
   166  // Im/Export structure used by JSONMarshal / JSONUnmarshal
   167  type bloomJSONImExport struct {
   168  	FilterSet []byte
   169  	SetLocs   uint64
   170  }
   171  
   172  // NewWithBoolset takes a []byte slice and number of locs per entry,
   173  // returns the bloomfilter with a bitset populated according to the input []byte.
   174  func newWithBoolset(bs *[]byte, locs uint64) *Bloom {
   175  	bloomfilter := NewBloomFilter(float64(len(*bs)<<3), float64(locs))
   176  	for i, b := range *bs {
   177  		*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&bloomfilter.bitset[0])) + uintptr(i))) = b
   178  	}
   179  	return bloomfilter
   180  }
   181  
   182  // JSONUnmarshal takes JSON-Object (type bloomJSONImExport) as []bytes
   183  // returns bloom32 / bloom64 object.
   184  func JSONUnmarshal(dbData []byte) (*Bloom, error) {
   185  	bloomImEx := bloomJSONImExport{}
   186  	if err := json.Unmarshal(dbData, &bloomImEx); err != nil {
   187  		return nil, err
   188  	}
   189  	buf := bytes.NewBuffer(bloomImEx.FilterSet)
   190  	bs := buf.Bytes()
   191  	bf := newWithBoolset(&bs, bloomImEx.SetLocs)
   192  	return bf, nil
   193  }
   194  
   195  // JSONMarshal returns JSON-object (type bloomJSONImExport) as []byte.
   196  func (bl Bloom) JSONMarshal() []byte {
   197  	bloomImEx := bloomJSONImExport{}
   198  	bloomImEx.SetLocs = bl.setLocs
   199  	bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3)
   200  	for i := range bloomImEx.FilterSet {
   201  		bloomImEx.FilterSet[i] = *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[0])) +
   202  			uintptr(i)))
   203  	}
   204  	data, err := json.Marshal(bloomImEx)
   205  	if err != nil {
   206  		fatal("json.Marshal failed: ", err)
   207  	}
   208  	return data
   209  }