github.com/fiatjaf/generic-ristretto@v0.0.1/z/bbloom.go (about)

     1  // The MIT License (MIT)
     2  // Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt
     3  
     4  // Permission is hereby granted, free of charge, to any person obtaining a copy of
     5  // this software and associated documentation files (the "Software"), to deal in
     6  // the Software without restriction, including without limitation the rights to
     7  // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
     8  // the Software, and to permit persons to whom the Software is furnished to do so,
     9  // subject to the following conditions:
    10  
    11  // The above copyright notice and this permission notice shall be included in all
    12  // copies or substantial portions of the Software.
    13  
    14  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    15  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
    16  // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
    17  // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
    18  // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    19  // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    20  
    21  package z
    22  
    23  import (
    24  	"bytes"
    25  	"encoding/json"
    26  	"math"
    27  	"unsafe"
    28  
    29  	"github.com/golang/glog"
    30  )
    31  
    32  // helper
    33  var mask = []uint8{1, 2, 4, 8, 16, 32, 64, 128}
    34  
    35  func getSize(ui64 uint64) (size uint64, exponent uint64) {
    36  	if ui64 < uint64(512) {
    37  		ui64 = uint64(512)
    38  	}
    39  	size = uint64(1)
    40  	for size < ui64 {
    41  		size <<= 1
    42  		exponent++
    43  	}
    44  	return size, exponent
    45  }
    46  
    47  func calcSizeByWrongPositives(numEntries, wrongs float64) (uint64, uint64) {
    48  	size := -1 * numEntries * math.Log(wrongs) / math.Pow(float64(0.69314718056), 2)
    49  	locs := math.Ceil(float64(0.69314718056) * size / numEntries)
    50  	return uint64(size), uint64(locs)
    51  }
    52  
    53  // NewBloomFilter returns a new bloomfilter.
    54  func NewBloomFilter(params ...float64) (bloomfilter *Bloom) {
    55  	var entries, locs uint64
    56  	if len(params) == 2 {
    57  		if params[1] < 1 {
    58  			entries, locs = calcSizeByWrongPositives(params[0], params[1])
    59  		} else {
    60  			entries, locs = uint64(params[0]), uint64(params[1])
    61  		}
    62  	} else {
    63  		glog.Fatal("usage: New(float64(number_of_entries), float64(number_of_hashlocations))" +
    64  			" i.e. New(float64(1000), float64(3)) or New(float64(number_of_entries)," +
    65  			" float64(number_of_hashlocations)) i.e. New(float64(1000), float64(0.03))")
    66  	}
    67  	size, exponent := getSize(entries)
    68  	bloomfilter = &Bloom{
    69  		sizeExp: exponent,
    70  		size:    size - 1,
    71  		setLocs: locs,
    72  		shift:   64 - exponent,
    73  	}
    74  	bloomfilter.Size(size)
    75  	return bloomfilter
    76  }
    77  
    78  // Bloom filter
    79  type Bloom struct {
    80  	bitset  []uint64
    81  	ElemNum uint64
    82  	sizeExp uint64
    83  	size    uint64
    84  	setLocs uint64
    85  	shift   uint64
    86  }
    87  
    88  // <--- http://www.cse.yorku.ca/~oz/hash.html
    89  // modified Berkeley DB Hash (32bit)
    90  // hash is casted to l, h = 16bit fragments
    91  // func (bl Bloom) absdbm(b *[]byte) (l, h uint64) {
    92  // 	hash := uint64(len(*b))
    93  // 	for _, c := range *b {
    94  // 		hash = uint64(c) + (hash << 6) + (hash << bl.sizeExp) - hash
    95  // 	}
    96  // 	h = hash >> bl.shift
    97  // 	l = hash << bl.shift >> bl.shift
    98  // 	return l, h
    99  // }
   100  
   101  // Add adds hash of a key to the bloomfilter.
   102  func (bl *Bloom) Add(hash uint64) {
   103  	h := hash >> bl.shift
   104  	l := hash << bl.shift >> bl.shift
   105  	for i := uint64(0); i < bl.setLocs; i++ {
   106  		bl.Set((h + i*l) & bl.size)
   107  		bl.ElemNum++
   108  	}
   109  }
   110  
   111  // Has checks if bit(s) for entry hash is/are set,
   112  // returns true if the hash was added to the Bloom Filter.
   113  func (bl Bloom) Has(hash uint64) bool {
   114  	h := hash >> bl.shift
   115  	l := hash << bl.shift >> bl.shift
   116  	for i := uint64(0); i < bl.setLocs; i++ {
   117  		if !bl.IsSet((h + i*l) & bl.size) {
   118  			return false
   119  		}
   120  	}
   121  	return true
   122  }
   123  
   124  // AddIfNotHas only Adds hash, if it's not present in the bloomfilter.
   125  // Returns true if hash was added.
   126  // Returns false if hash was already registered in the bloomfilter.
   127  func (bl *Bloom) AddIfNotHas(hash uint64) bool {
   128  	if bl.Has(hash) {
   129  		return false
   130  	}
   131  	bl.Add(hash)
   132  	return true
   133  }
   134  
   135  // TotalSize returns the total size of the bloom filter.
   136  func (bl *Bloom) TotalSize() int {
   137  	// The bl struct has 5 members and each one is 8 byte. The bitset is a
   138  	// uint64 byte slice.
   139  	return len(bl.bitset)*8 + 5*8
   140  }
   141  
   142  // Size makes Bloom filter with as bitset of size sz.
   143  func (bl *Bloom) Size(sz uint64) {
   144  	bl.bitset = make([]uint64, sz>>6)
   145  }
   146  
   147  // Clear resets the Bloom filter.
   148  func (bl *Bloom) Clear() {
   149  	for i := range bl.bitset {
   150  		bl.bitset[i] = 0
   151  	}
   152  }
   153  
   154  // Set sets the bit[idx] of bitset.
   155  func (bl *Bloom) Set(idx uint64) {
   156  	ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
   157  	*(*uint8)(ptr) |= mask[idx%8]
   158  }
   159  
   160  // IsSet checks if bit[idx] of bitset is set, returns true/false.
   161  func (bl *Bloom) IsSet(idx uint64) bool {
   162  	ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
   163  	r := ((*(*uint8)(ptr)) >> (idx % 8)) & 1
   164  	return r == 1
   165  }
   166  
   167  // bloomJSONImExport
   168  // Im/Export structure used by JSONMarshal / JSONUnmarshal
   169  type bloomJSONImExport struct {
   170  	FilterSet []byte
   171  	SetLocs   uint64
   172  }
   173  
   174  // NewWithBoolset takes a []byte slice and number of locs per entry,
   175  // returns the bloomfilter with a bitset populated according to the input []byte.
   176  func newWithBoolset(bs *[]byte, locs uint64) *Bloom {
   177  	bloomfilter := NewBloomFilter(float64(len(*bs)<<3), float64(locs))
   178  	for i, b := range *bs {
   179  		*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&bloomfilter.bitset[0])) + uintptr(i))) = b
   180  	}
   181  	return bloomfilter
   182  }
   183  
   184  // JSONUnmarshal takes JSON-Object (type bloomJSONImExport) as []bytes
   185  // returns bloom32 / bloom64 object.
   186  func JSONUnmarshal(dbData []byte) (*Bloom, error) {
   187  	bloomImEx := bloomJSONImExport{}
   188  	if err := json.Unmarshal(dbData, &bloomImEx); err != nil {
   189  		return nil, err
   190  	}
   191  	buf := bytes.NewBuffer(bloomImEx.FilterSet)
   192  	bs := buf.Bytes()
   193  	bf := newWithBoolset(&bs, bloomImEx.SetLocs)
   194  	return bf, nil
   195  }
   196  
   197  // JSONMarshal returns JSON-object (type bloomJSONImExport) as []byte.
   198  func (bl Bloom) JSONMarshal() []byte {
   199  	bloomImEx := bloomJSONImExport{}
   200  	bloomImEx.SetLocs = bl.setLocs
   201  	bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3)
   202  	for i := range bloomImEx.FilterSet {
   203  		bloomImEx.FilterSet[i] = *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[0])) +
   204  			uintptr(i)))
   205  	}
   206  	data, err := json.Marshal(bloomImEx)
   207  	if err != nil {
   208  		glog.Fatal("json.Marshal failed: ", err)
   209  	}
   210  	return data
   211  }