github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/bloom/bloom.go (about)

     1  // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  // Package bloom implements Bloom filters.
     6  package bloom // import "github.com/cockroachdb/pebble/bloom"
     7  
     8  import (
     9  	"encoding/binary"
    10  	"fmt"
    11  	"sync"
    12  
    13  	"github.com/cockroachdb/pebble/internal/base"
    14  )
    15  
    16  const (
    17  	cacheLineSize = 64
    18  	cacheLineBits = cacheLineSize * 8
    19  )
    20  
    21  type tableFilter []byte
    22  
    23  func (f tableFilter) MayContain(key []byte) bool {
    24  	if len(f) <= 5 {
    25  		return false
    26  	}
    27  	n := len(f) - 5
    28  	nProbes := f[n]
    29  	nLines := binary.LittleEndian.Uint32(f[n+1:])
    30  	cacheLineBits := 8 * (uint32(n) / nLines)
    31  
    32  	h := hash(key)
    33  	delta := h>>17 | h<<15
    34  	b := (h % nLines) * cacheLineBits
    35  
    36  	for j := uint8(0); j < nProbes; j++ {
    37  		bitPos := b + (h % cacheLineBits)
    38  		if f[bitPos/8]&(1<<(bitPos%8)) == 0 {
    39  			return false
    40  		}
    41  		h += delta
    42  	}
    43  	return true
    44  }
    45  
    46  func calculateProbes(bitsPerKey int) uint32 {
    47  	// We intentionally round down to reduce probing cost a little bit
    48  	n := uint32(float64(bitsPerKey) * 0.69) // 0.69 =~ ln(2)
    49  	if n < 1 {
    50  		n = 1
    51  	}
    52  	if n > 30 {
    53  		n = 30
    54  	}
    55  	return n
    56  }
    57  
    58  // extend appends n zero bytes to b. It returns the overall slice (of length
    59  // n+len(originalB)) and the slice of n trailing zeroes.
    60  func extend(b []byte, n int) (overall, trailer []byte) {
    61  	want := n + len(b)
    62  	if want <= cap(b) {
    63  		overall = b[:want]
    64  		trailer = overall[len(b):]
    65  		for i := range trailer {
    66  			trailer[i] = 0
    67  		}
    68  	} else {
    69  		// Grow the capacity exponentially, with a 1KiB minimum.
    70  		c := 1024
    71  		for c < want {
    72  			c += c / 4
    73  		}
    74  		overall = make([]byte, want, c)
    75  		trailer = overall[len(b):]
    76  		copy(overall, b)
    77  	}
    78  	return overall, trailer
    79  }
    80  
    81  // hash implements a hashing algorithm similar to the Murmur hash.
    82  func hash(b []byte) uint32 {
    83  	const (
    84  		seed = 0xbc9f1d34
    85  		m    = 0xc6a4a793
    86  	)
    87  	h := uint32(seed) ^ uint32(uint64(uint32(len(b))*m))
    88  	for ; len(b) >= 4; b = b[4:] {
    89  		h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
    90  		h *= m
    91  		h ^= h >> 16
    92  	}
    93  
    94  	// The code below first casts each byte to a signed 8-bit integer. This is
    95  	// necessary to match RocksDB's behavior. Note that the `byte` type in Go is
    96  	// unsigned. What is the difference between casting a signed 8-bit value vs
    97  	// unsigned 8-bit value into an unsigned 32-bit value?
    98  	// Sign-extension. Consider the value 250 which has the bit pattern 11111010:
    99  	//
   100  	//   uint32(250)        = 00000000000000000000000011111010
   101  	//   uint32(int8(250))  = 11111111111111111111111111111010
   102  	//
   103  	// Note that the original LevelDB code did not explicitly cast to a signed
   104  	// 8-bit value which left the behavior dependent on whether C characters were
   105  	// signed or unsigned which is a compiler flag for gcc (-funsigned-char).
   106  	switch len(b) {
   107  	case 3:
   108  		h += uint32(int8(b[2])) << 16
   109  		fallthrough
   110  	case 2:
   111  		h += uint32(int8(b[1])) << 8
   112  		fallthrough
   113  	case 1:
   114  		h += uint32(int8(b[0]))
   115  		h *= m
   116  		h ^= h >> 24
   117  	}
   118  	return h
   119  }
   120  
   121  const hashBlockLen = 16384
   122  
   123  type hashBlock [hashBlockLen]uint32
   124  
   125  var hashBlockPool = sync.Pool{
   126  	New: func() interface{} {
   127  		return &hashBlock{}
   128  	},
   129  }
   130  
   131  type tableFilterWriter struct {
   132  	bitsPerKey int
   133  
   134  	numHashes int
   135  	// We store the hashes in blocks.
   136  	blocks   []*hashBlock
   137  	lastHash uint32
   138  
   139  	// Initial "in-line" storage for the blocks slice (to avoid some small
   140  	// allocations).
   141  	blocksBuf [16]*hashBlock
   142  }
   143  
   144  func newTableFilterWriter(bitsPerKey int) *tableFilterWriter {
   145  	w := &tableFilterWriter{
   146  		bitsPerKey: bitsPerKey,
   147  	}
   148  	w.blocks = w.blocksBuf[:0]
   149  	return w
   150  }
   151  
   152  // AddKey implements the base.FilterWriter interface.
   153  func (w *tableFilterWriter) AddKey(key []byte) {
   154  	h := hash(key)
   155  	if w.numHashes != 0 && h == w.lastHash {
   156  		return
   157  	}
   158  	ofs := w.numHashes % hashBlockLen
   159  	if ofs == 0 {
   160  		// Time for a new block.
   161  		w.blocks = append(w.blocks, hashBlockPool.Get().(*hashBlock))
   162  	}
   163  	w.blocks[len(w.blocks)-1][ofs] = h
   164  	w.numHashes++
   165  	w.lastHash = h
   166  }
   167  
   168  // Finish implements the base.FilterWriter interface.
   169  func (w *tableFilterWriter) Finish(buf []byte) []byte {
   170  	// The table filter format matches the RocksDB full-file filter format.
   171  	var nLines int
   172  	if w.numHashes != 0 {
   173  		nLines = (w.numHashes*w.bitsPerKey + cacheLineBits - 1) / (cacheLineBits)
   174  		// Make nLines an odd number to make sure more bits are involved when
   175  		// determining which block.
   176  		if nLines%2 == 0 {
   177  			nLines++
   178  		}
   179  	}
   180  
   181  	nBytes := nLines * cacheLineSize
   182  	// +5: 4 bytes for num-lines, 1 byte for num-probes
   183  	buf, filter := extend(buf, nBytes+5)
   184  
   185  	if nLines != 0 {
   186  		nProbes := calculateProbes(w.bitsPerKey)
   187  		for bIdx, b := range w.blocks {
   188  			length := hashBlockLen
   189  			if bIdx == len(w.blocks)-1 && w.numHashes%hashBlockLen != 0 {
   190  				length = w.numHashes % hashBlockLen
   191  			}
   192  			for _, h := range b[:length] {
   193  				delta := h>>17 | h<<15 // rotate right 17 bits
   194  				b := (h % uint32(nLines)) * (cacheLineBits)
   195  				for i := uint32(0); i < nProbes; i++ {
   196  					bitPos := b + (h % cacheLineBits)
   197  					filter[bitPos/8] |= (1 << (bitPos % 8))
   198  					h += delta
   199  				}
   200  			}
   201  		}
   202  		filter[nBytes] = byte(nProbes)
   203  		binary.LittleEndian.PutUint32(filter[nBytes+1:], uint32(nLines))
   204  	}
   205  
   206  	// Release the hash blocks.
   207  	for i, b := range w.blocks {
   208  		hashBlockPool.Put(b)
   209  		w.blocks[i] = nil
   210  	}
   211  	w.blocks = w.blocks[:0]
   212  	w.numHashes = 0
   213  	return buf
   214  }
   215  
   216  // FilterPolicy implements the FilterPolicy interface from the pebble package.
   217  //
   218  // The integer value is the approximate number of bits used per key. A good
   219  // value is 10, which yields a filter with ~ 1% false positive rate.
   220  type FilterPolicy int
   221  
   222  var _ base.FilterPolicy = FilterPolicy(0)
   223  
   224  // Name implements the pebble.FilterPolicy interface.
   225  func (p FilterPolicy) Name() string {
   226  	// This string looks arbitrary, but its value is written to LevelDB .sst
   227  	// files, and should be this exact value to be compatible with those files
   228  	// and with the C++ LevelDB code.
   229  	return "rocksdb.BuiltinBloomFilter"
   230  }
   231  
   232  // MayContain implements the pebble.FilterPolicy interface.
   233  func (p FilterPolicy) MayContain(ftype base.FilterType, f, key []byte) bool {
   234  	switch ftype {
   235  	case base.TableFilter:
   236  		return tableFilter(f).MayContain(key)
   237  	default:
   238  		panic(fmt.Sprintf("unknown filter type: %v", ftype))
   239  	}
   240  }
   241  
   242  // NewWriter implements the pebble.FilterPolicy interface.
   243  func (p FilterPolicy) NewWriter(ftype base.FilterType) base.FilterWriter {
   244  	switch ftype {
   245  	case base.TableFilter:
   246  		return newTableFilterWriter(int(p))
   247  	default:
   248  		panic(fmt.Sprintf("unknown filter type: %v", ftype))
   249  	}
   250  }