github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/cache/z/bbloom.go (about)

     1  // The MIT License (MIT)
     2  // Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt
     3  
     4  // Permission is hereby granted, free of charge, to any person obtaining a copy of
     5  // this software and associated documentation files (the "Software"), to deal in
     6  // the Software without restriction, including without limitation the rights to
     7  // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
     8  // the Software, and to permit persons to whom the Software is furnished to do so,
     9  // subject to the following conditions:
    10  
    11  // The above copyright notice and this permission notice shall be included in all
    12  // copies or substantial portions of the Software.
    13  
    14  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    15  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
    16  // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
    17  // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
    18  // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    19  // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    20  
    21  package z
    22  
    23  import (
    24  	"bytes"
    25  	"encoding/json"
    26  	"log"
    27  	"math"
    28  	"unsafe"
    29  )
    30  
    31  // helper
    32  var mask = []uint8{1, 2, 4, 8, 16, 32, 64, 128}
    33  
    34  func getSize(ui64 uint64) (size uint64, exponent uint64) {
    35  	if ui64 < uint64(512) {
    36  		ui64 = uint64(512)
    37  	}
    38  	size = uint64(1)
    39  	for size < ui64 {
    40  		size <<= 1
    41  		exponent++
    42  	}
    43  	return size, exponent
    44  }
    45  
    46  func calcSizeByWrongPositives(numEntries, wrongs float64) (uint64, uint64) {
    47  	size := -1 * numEntries * math.Log(wrongs) / math.Pow(float64(0.69314718056), 2)
    48  	locs := math.Ceil(float64(0.69314718056) * size / numEntries)
    49  	return uint64(size), uint64(locs)
    50  }
    51  
    52  // NewBloomFilter returns a new bloomfilter.
    53  func NewBloomFilter(params ...float64) (bloomfilter *Bloom) {
    54  	var entries, locs uint64
    55  	if len(params) == 2 {
    56  		if params[1] < 1 {
    57  			entries, locs = calcSizeByWrongPositives(params[0], params[1])
    58  		} else {
    59  			entries, locs = uint64(params[0]), uint64(params[1])
    60  		}
    61  	} else {
    62  		log.Fatal("usage: New(float64(number_of_entries), float64(number_of_hashlocations))" +
    63  			" i.e. New(float64(1000), float64(3)) or New(float64(number_of_entries)," +
    64  			" float64(number_of_hashlocations)) i.e. New(float64(1000), float64(0.03))")
    65  	}
    66  	size, exponent := getSize(entries)
    67  	bloomfilter = &Bloom{
    68  		sizeExp: exponent,
    69  		size:    size - 1,
    70  		setLocs: locs,
    71  		shift:   64 - exponent,
    72  	}
    73  	bloomfilter.Size(size)
    74  	return bloomfilter
    75  }
    76  
    77  // Bloom filter
    78  type Bloom struct {
    79  	bitset  []uint64
    80  	ElemNum uint64
    81  	sizeExp uint64
    82  	size    uint64
    83  	setLocs uint64
    84  	shift   uint64
    85  }
    86  
    87  // <--- http://www.cse.yorku.ca/~oz/hash.html
    88  // modified Berkeley DB Hash (32bit)
    89  // hash is casted to l, h = 16bit fragments
    90  // func (bl Bloom) absdbm(b *[]byte) (l, h uint64) {
    91  // 	hash := uint64(len(*b))
    92  // 	for _, c := range *b {
    93  // 		hash = uint64(c) + (hash << 6) + (hash << bl.sizeExp) - hash
    94  // 	}
    95  // 	h = hash >> bl.shift
    96  // 	l = hash << bl.shift >> bl.shift
    97  // 	return l, h
    98  // }
    99  
   100  // Add adds hash of a key to the bloomfilter.
   101  func (bl *Bloom) Add(hash uint64) {
   102  	h := hash >> bl.shift
   103  	l := hash << bl.shift >> bl.shift
   104  	for i := uint64(0); i < bl.setLocs; i++ {
   105  		bl.Set((h + i*l) & bl.size)
   106  		bl.ElemNum++
   107  	}
   108  }
   109  
   110  // Has checks if bit(s) for entry hash is/are set,
   111  // returns true if the hash was added to the Bloom Filter.
   112  func (bl Bloom) Has(hash uint64) bool {
   113  	h := hash >> bl.shift
   114  	l := hash << bl.shift >> bl.shift
   115  	for i := uint64(0); i < bl.setLocs; i++ {
   116  		switch bl.IsSet((h + i*l) & bl.size) {
   117  		case false:
   118  			return false
   119  		}
   120  	}
   121  	return true
   122  }
   123  
   124  // AddIfNotHas only Adds hash, if it's not present in the bloomfilter.
   125  // Returns true if hash was added.
   126  // Returns false if hash was already registered in the bloomfilter.
   127  func (bl *Bloom) AddIfNotHas(hash uint64) bool {
   128  	if bl.Has(hash) {
   129  		return false
   130  	}
   131  	bl.Add(hash)
   132  	return true
   133  }
   134  
   135  // Size makes Bloom filter with as bitset of size sz.
   136  func (bl *Bloom) Size(sz uint64) {
   137  	bl.bitset = make([]uint64, sz>>6)
   138  }
   139  
   140  // Clear resets the Bloom filter.
   141  func (bl *Bloom) Clear() {
   142  	for i := range bl.bitset {
   143  		bl.bitset[i] = 0
   144  	}
   145  }
   146  
   147  // Set sets the bit[idx] of bitset.
   148  func (bl *Bloom) Set(idx uint64) {
   149  	ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
   150  	*(*uint8)(ptr) |= mask[idx%8]
   151  }
   152  
   153  // IsSet checks if bit[idx] of bitset is set, returns true/false.
   154  func (bl *Bloom) IsSet(idx uint64) bool {
   155  	ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
   156  	r := ((*(*uint8)(ptr)) >> (idx % 8)) & 1
   157  	return r == 1
   158  }
   159  
   160  // bloomJSONImExport
   161  // Im/Export structure used by JSONMarshal / JSONUnmarshal
   162  type bloomJSONImExport struct {
   163  	FilterSet []byte
   164  	SetLocs   uint64
   165  }
   166  
   167  // NewWithBoolset takes a []byte slice and number of locs per entry,
   168  // returns the bloomfilter with a bitset populated according to the input []byte.
   169  func newWithBoolset(bs *[]byte, locs uint64) *Bloom {
   170  	bloomfilter := NewBloomFilter(float64(len(*bs)<<3), float64(locs))
   171  	for i, b := range *bs {
   172  		*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&bloomfilter.bitset[0])) + uintptr(i))) = b
   173  	}
   174  	return bloomfilter
   175  }
   176  
   177  // JSONUnmarshal takes JSON-Object (type bloomJSONImExport) as []bytes
   178  // returns bloom32 / bloom64 object.
   179  func JSONUnmarshal(dbData []byte) *Bloom {
   180  	bloomImEx := bloomJSONImExport{}
   181  	json.Unmarshal(dbData, &bloomImEx)
   182  	buf := bytes.NewBuffer(bloomImEx.FilterSet)
   183  	bs := buf.Bytes()
   184  	bf := newWithBoolset(&bs, bloomImEx.SetLocs)
   185  	return bf
   186  }
   187  
   188  // JSONMarshal returns JSON-object (type bloomJSONImExport) as []byte.
   189  func (bl Bloom) JSONMarshal() []byte {
   190  	bloomImEx := bloomJSONImExport{}
   191  	bloomImEx.SetLocs = bl.setLocs
   192  	bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3)
   193  	for i := range bloomImEx.FilterSet {
   194  		bloomImEx.FilterSet[i] = *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[0])) +
   195  			uintptr(i)))
   196  	}
   197  	data, err := json.Marshal(bloomImEx)
   198  	if err != nil {
   199  		log.Fatal("json.Marshal failed: ", err)
   200  	}
   201  	return data
   202  }