github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/bloom/bloom.go (about)

     1  // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  // Package bloom implements Bloom filters.
     6  package bloom // import "github.com/cockroachdb/pebble/bloom"
     7  
     8  import (
     9  	"encoding/binary"
    10  	"fmt"
    11  	"sync"
    12  
    13  	"github.com/cockroachdb/pebble/internal/base"
    14  )
    15  
    16  const (
    17  	cacheLineSize = 64
    18  	cacheLineBits = cacheLineSize * 8
    19  )
    20  
    21  type tableFilter []byte
    22  
    23  func (f tableFilter) MayContain(key []byte) bool {
    24  	if len(f) <= 5 {
    25  		return false
    26  	}
    27  	n := len(f) - 5
    28  	nProbes := f[n]
    29  	nLines := binary.LittleEndian.Uint32(f[n+1:])
    30  	cacheLineBits := 8 * (uint32(n) / nLines)
    31  
    32  	h := hash(key)
    33  	delta := h>>17 | h<<15
    34  	b := (h % nLines) * cacheLineBits
    35  
    36  	for j := uint8(0); j < nProbes; j++ {
    37  		bitPos := b + (h % cacheLineBits)
    38  		if f[bitPos/8]&(1<<(bitPos%8)) == 0 {
    39  			return false
    40  		}
    41  		h += delta
    42  	}
    43  	return true
    44  }
    45  
    46  func calculateProbes(bitsPerKey int) uint32 {
    47  	// We intentionally round down to reduce probing cost a little bit
    48  	n := uint32(float64(bitsPerKey) * 0.69) // 0.69 =~ ln(2)
    49  	if n < 1 {
    50  		n = 1
    51  	}
    52  	if n > 30 {
    53  		n = 30
    54  	}
    55  	return n
    56  }
    57  
    58  // extend appends n zero bytes to b. It returns the overall slice (of length
    59  // n+len(originalB)) and the slice of n trailing zeroes.
    60  func extend(b []byte, n int) (overall, trailer []byte) {
    61  	want := n + len(b)
    62  	if want <= cap(b) {
    63  		overall = b[:want]
    64  		trailer = overall[len(b):]
    65  		clear(trailer)
    66  	} else {
    67  		// Grow the capacity exponentially, with a 1KiB minimum.
    68  		c := 1024
    69  		for c < want {
    70  			c += c / 4
    71  		}
    72  		overall = make([]byte, want, c)
    73  		trailer = overall[len(b):]
    74  		copy(overall, b)
    75  	}
    76  	return overall, trailer
    77  }
    78  
    79  // hash implements a hashing algorithm similar to the Murmur hash.
    80  func hash(b []byte) uint32 {
    81  	const (
    82  		seed = 0xbc9f1d34
    83  		m    = 0xc6a4a793
    84  	)
    85  	h := uint32(seed) ^ uint32(uint64(uint32(len(b))*m))
    86  	for ; len(b) >= 4; b = b[4:] {
    87  		h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
    88  		h *= m
    89  		h ^= h >> 16
    90  	}
    91  
    92  	// The code below first casts each byte to a signed 8-bit integer. This is
    93  	// necessary to match RocksDB's behavior. Note that the `byte` type in Go is
    94  	// unsigned. What is the difference between casting a signed 8-bit value vs
    95  	// unsigned 8-bit value into an unsigned 32-bit value?
    96  	// Sign-extension. Consider the value 250 which has the bit pattern 11111010:
    97  	//
    98  	//   uint32(250)        = 00000000000000000000000011111010
    99  	//   uint32(int8(250))  = 11111111111111111111111111111010
   100  	//
   101  	// Note that the original LevelDB code did not explicitly cast to a signed
   102  	// 8-bit value which left the behavior dependent on whether C characters were
   103  	// signed or unsigned which is a compiler flag for gcc (-funsigned-char).
   104  	switch len(b) {
   105  	case 3:
   106  		h += uint32(int8(b[2])) << 16
   107  		fallthrough
   108  	case 2:
   109  		h += uint32(int8(b[1])) << 8
   110  		fallthrough
   111  	case 1:
   112  		h += uint32(int8(b[0]))
   113  		h *= m
   114  		h ^= h >> 24
   115  	}
   116  	return h
   117  }
   118  
   119  const hashBlockLen = 16384
   120  
   121  type hashBlock [hashBlockLen]uint32
   122  
   123  var hashBlockPool = sync.Pool{
   124  	New: func() interface{} {
   125  		return &hashBlock{}
   126  	},
   127  }
   128  
   129  type tableFilterWriter struct {
   130  	bitsPerKey int
   131  
   132  	numHashes int
   133  	// We store the hashes in blocks.
   134  	blocks   []*hashBlock
   135  	lastHash uint32
   136  
   137  	// Initial "in-line" storage for the blocks slice (to avoid some small
   138  	// allocations).
   139  	blocksBuf [16]*hashBlock
   140  }
   141  
   142  func newTableFilterWriter(bitsPerKey int) *tableFilterWriter {
   143  	w := &tableFilterWriter{
   144  		bitsPerKey: bitsPerKey,
   145  	}
   146  	w.blocks = w.blocksBuf[:0]
   147  	return w
   148  }
   149  
   150  // AddKey implements the base.FilterWriter interface.
   151  func (w *tableFilterWriter) AddKey(key []byte) {
   152  	h := hash(key)
   153  	if w.numHashes != 0 && h == w.lastHash {
   154  		return
   155  	}
   156  	ofs := w.numHashes % hashBlockLen
   157  	if ofs == 0 {
   158  		// Time for a new block.
   159  		w.blocks = append(w.blocks, hashBlockPool.Get().(*hashBlock))
   160  	}
   161  	w.blocks[len(w.blocks)-1][ofs] = h
   162  	w.numHashes++
   163  	w.lastHash = h
   164  }
   165  
   166  // Finish implements the base.FilterWriter interface.
   167  func (w *tableFilterWriter) Finish(buf []byte) []byte {
   168  	// The table filter format matches the RocksDB full-file filter format.
   169  	var nLines int
   170  	if w.numHashes != 0 {
   171  		nLines = (w.numHashes*w.bitsPerKey + cacheLineBits - 1) / (cacheLineBits)
   172  		// Make nLines an odd number to make sure more bits are involved when
   173  		// determining which block.
   174  		if nLines%2 == 0 {
   175  			nLines++
   176  		}
   177  	}
   178  
   179  	nBytes := nLines * cacheLineSize
   180  	// +5: 4 bytes for num-lines, 1 byte for num-probes
   181  	buf, filter := extend(buf, nBytes+5)
   182  
   183  	if nLines != 0 {
   184  		nProbes := calculateProbes(w.bitsPerKey)
   185  		for bIdx, b := range w.blocks {
   186  			length := hashBlockLen
   187  			if bIdx == len(w.blocks)-1 && w.numHashes%hashBlockLen != 0 {
   188  				length = w.numHashes % hashBlockLen
   189  			}
   190  			for _, h := range b[:length] {
   191  				delta := h>>17 | h<<15 // rotate right 17 bits
   192  				b := (h % uint32(nLines)) * (cacheLineBits)
   193  				for i := uint32(0); i < nProbes; i++ {
   194  					bitPos := b + (h % cacheLineBits)
   195  					filter[bitPos/8] |= (1 << (bitPos % 8))
   196  					h += delta
   197  				}
   198  			}
   199  		}
   200  		filter[nBytes] = byte(nProbes)
   201  		binary.LittleEndian.PutUint32(filter[nBytes+1:], uint32(nLines))
   202  	}
   203  
   204  	// Release the hash blocks.
   205  	for i, b := range w.blocks {
   206  		hashBlockPool.Put(b)
   207  		w.blocks[i] = nil
   208  	}
   209  	w.blocks = w.blocks[:0]
   210  	w.numHashes = 0
   211  	return buf
   212  }
   213  
   214  // FilterPolicy implements the FilterPolicy interface from the pebble package.
   215  //
   216  // The integer value is the approximate number of bits used per key. A good
   217  // value is 10, which yields a filter with ~ 1% false positive rate.
   218  type FilterPolicy int
   219  
   220  var _ base.FilterPolicy = FilterPolicy(0)
   221  
   222  // Name implements the pebble.FilterPolicy interface.
   223  func (p FilterPolicy) Name() string {
   224  	// This string looks arbitrary, but its value is written to LevelDB .sst
   225  	// files, and should be this exact value to be compatible with those files
   226  	// and with the C++ LevelDB code.
   227  	return "rocksdb.BuiltinBloomFilter"
   228  }
   229  
   230  // MayContain implements the pebble.FilterPolicy interface.
   231  func (p FilterPolicy) MayContain(ftype base.FilterType, f, key []byte) bool {
   232  	switch ftype {
   233  	case base.TableFilter:
   234  		return tableFilter(f).MayContain(key)
   235  	default:
   236  		panic(fmt.Sprintf("unknown filter type: %v", ftype))
   237  	}
   238  }
   239  
   240  // NewWriter implements the pebble.FilterPolicy interface.
   241  func (p FilterPolicy) NewWriter(ftype base.FilterType) base.FilterWriter {
   242  	switch ftype {
   243  	case base.TableFilter:
   244  		return newTableFilterWriter(int(p))
   245  	default:
   246  		panic(fmt.Sprintf("unknown filter type: %v", ftype))
   247  	}
   248  }