github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/bloom/bloom_test.go (about)

     1  // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package bloom
     6  
     7  import (
     8  	"strings"
     9  	"testing"
    10  
    11  	"github.com/stretchr/testify/require"
    12  	"github.com/zuoyebang/bitalostable/internal/base"
    13  )
    14  
    15  func (f tableFilter) String() string {
    16  	var buf strings.Builder
    17  	for i, x := range f {
    18  		if i > 0 {
    19  			if i%8 == 0 {
    20  				buf.WriteString("\n")
    21  			} else {
    22  				buf.WriteString("  ")
    23  			}
    24  		}
    25  
    26  		for j := uint(0); j < 8; j++ {
    27  			if x&(1<<(7-j)) != 0 {
    28  				buf.WriteString("1")
    29  			} else {
    30  				buf.WriteString(".")
    31  			}
    32  		}
    33  	}
    34  	buf.WriteString("\n")
    35  	return buf.String()
    36  }
    37  
    38  func newTableFilter(bitsPerKey int, keys ...[]byte) tableFilter {
    39  	w := FilterPolicy(bitsPerKey).NewWriter(base.TableFilter)
    40  	for _, key := range keys {
    41  		w.AddKey(key)
    42  	}
    43  	return tableFilter(w.Finish(nil))
    44  }
    45  
    46  func TestSmallBloomFilter(t *testing.T) {
    47  	f := newTableFilter(10, []byte("hello"), []byte("world"))
    48  
    49  	// The magic expected string comes from running RocksDB's util/bloom_test.cc:FullBloomTest.FullSmall.
    50  	want := `
    51  ........  ........  ........  .......1  ........  ........  ........  ........
    52  ........  .1......  ........  .1......  ........  ........  ........  ........
    53  ...1....  ........  ........  ........  ........  ........  ........  ........
    54  ........  ........  ........  ........  ........  ........  ........  ...1....
    55  ........  ........  ........  ........  .....1..  ........  ........  ........
    56  .......1  ........  ........  ........  ........  ........  .1......  ........
    57  ........  ........  ........  ........  ........  ...1....  ........  ........
    58  .......1  ........  ........  ........  .1...1..  ........  ........  ........
    59  .....11.  .......1  ........  ........  ........
    60  `
    61  	want = strings.TrimLeft(want, "\n")
    62  	require.EqualValues(t, want, f.String())
    63  
    64  	m := map[string]bool{
    65  		"hello": true,
    66  		"world": true,
    67  		"x":     false,
    68  		"foo":   false,
    69  	}
    70  	for k, want := range m {
    71  		require.EqualValues(t, want, f.MayContain([]byte(k)))
    72  	}
    73  }
    74  
    75  func TestBloomFilter(t *testing.T) {
    76  	nextLength := func(x int) int {
    77  		if x < 10 {
    78  			return x + 1
    79  		}
    80  		if x < 100 {
    81  			return x + 10
    82  		}
    83  		if x < 1000 {
    84  			return x + 100
    85  		}
    86  		return x + 1000
    87  	}
    88  	le32 := func(i int) []byte {
    89  		b := make([]byte, 4)
    90  		b[0] = uint8(uint32(i) >> 0)
    91  		b[1] = uint8(uint32(i) >> 8)
    92  		b[2] = uint8(uint32(i) >> 16)
    93  		b[3] = uint8(uint32(i) >> 24)
    94  		return b
    95  	}
    96  
    97  	nMediocreFilters, nGoodFilters := 0, 0
    98  loop:
    99  	for length := 1; length <= 10000; length = nextLength(length) {
   100  		keys := make([][]byte, 0, length)
   101  		for i := 0; i < length; i++ {
   102  			keys = append(keys, le32(i))
   103  		}
   104  		f := newTableFilter(10, keys...)
   105  		// The size of the table bloom filter is measured in multiples of the
   106  		// cache line size. The '+2' contribution captures the rounding up in the
   107  		// length division plus preferring an odd number of cache lines. As such,
   108  		// this formula isn't exact, but the exact formula is hard to read.
   109  		maxLen := 5 + ((length*10)/cacheLineBits+2)*cacheLineSize
   110  		if len(f) > maxLen {
   111  			t.Errorf("length=%d: len(f)=%d > max len %d", length, len(f), maxLen)
   112  			continue
   113  		}
   114  
   115  		// All added keys must match.
   116  		for _, key := range keys {
   117  			if !f.MayContain(key) {
   118  				t.Errorf("length=%d: did not contain key %q", length, key)
   119  				continue loop
   120  			}
   121  		}
   122  
   123  		// Check false positive rate.
   124  		nFalsePositive := 0
   125  		for i := 0; i < 10000; i++ {
   126  			if f.MayContain(le32(1e9 + i)) {
   127  				nFalsePositive++
   128  			}
   129  		}
   130  		if nFalsePositive > 0.02*10000 {
   131  			t.Errorf("length=%d: %d false positives in 10000", length, nFalsePositive)
   132  			continue
   133  		}
   134  		if nFalsePositive > 0.0125*10000 {
   135  			nMediocreFilters++
   136  		} else {
   137  			nGoodFilters++
   138  		}
   139  	}
   140  
   141  	if nMediocreFilters > nGoodFilters/5 {
   142  		t.Errorf("%d mediocre filters but only %d good filters", nMediocreFilters, nGoodFilters)
   143  	}
   144  }
   145  
   146  func TestHash(t *testing.T) {
   147  	testCases := []struct {
   148  		s        string
   149  		expected uint32
   150  	}{
   151  		// The magic expected numbers come from RocksDB's util/hash_test.cc:TestHash.
   152  		{"", 3164544308},
   153  		{"\x08", 422599524},
   154  		{"\x17", 3168152998},
   155  		{"\x9a", 3195034349},
   156  		{"\x1c", 2651681383},
   157  		{"\x4d\x76", 2447836956},
   158  		{"\x52\xd5", 3854228105},
   159  		{"\x91\xf7", 31066776},
   160  		{"\xd6\x27", 1806091603},
   161  		{"\x30\x46\x0b", 3808221797},
   162  		{"\x56\xdc\xd6", 2157698265},
   163  		{"\xd4\x52\x33", 1721992661},
   164  		{"\x6a\xb5\xf4", 2469105222},
   165  		{"\x67\x53\x81\x1c", 118283265},
   166  		{"\x69\xb8\xc0\x88", 3416318611},
   167  		{"\x1e\x84\xaf\x2d", 3315003572},
   168  		{"\x46\xdc\x54\xbe", 447346355},
   169  		{"\xd0\x7a\x6e\xea\x56", 4255445370},
   170  		{"\x86\x83\xd5\xa4\xd8", 2390603402},
   171  		{"\xb7\x46\xbb\x77\xce", 2048907743},
   172  		{"\x6c\xa8\xbc\xe5\x99", 2177978500},
   173  		{"\x5c\x5e\xe1\xa0\x73\x81", 1036846008},
   174  		{"\x08\x5d\x73\x1c\xe5\x2e", 229980482},
   175  		{"\x42\xfb\xf2\x52\xb4\x10", 3655585422},
   176  		{"\x73\xe1\xff\x56\x9c\xce", 3502708029},
   177  		{"\x5c\xbe\x97\x75\x54\x9a\x52", 815120748},
   178  		{"\x16\x82\x39\x49\x88\x2b\x36", 3056033698},
   179  		{"\x59\x77\xf0\xa7\x24\xf4\x78", 587205227},
   180  		{"\xd3\xa5\x7c\x0e\xc0\x02\x07", 2030937252},
   181  		{"\x31\x1b\x98\x75\x96\x22\xd3\x9a", 469635402},
   182  		{"\x38\xd6\xf7\x28\x20\xb4\x8a\xe9", 3530274698},
   183  		{"\xbb\x18\x5d\xf4\x12\x03\xf7\x99", 1974545809},
   184  		{"\x80\xd4\x3b\x3b\xae\x22\xa2\x78", 3563570120},
   185  		{"\x1a\xb5\xd0\xfe\xab\xc3\x61\xb2\x99", 2706087434},
   186  		{"\x8e\x4a\xc3\x18\x20\x2f\x06\xe6\x3c", 1534654151},
   187  		{"\xb6\xc0\xdd\x05\x3f\xc4\x86\x4c\xef", 2355554696},
   188  		{"\x9a\x5f\x78\x0d\xaf\x50\xe1\x1f\x55", 1400800912},
   189  		{"\x22\x6f\x39\x1f\xf8\xdd\x4f\x52\x17\x94", 3420325137},
   190  		{"\x32\x89\x2a\x75\x48\x3a\x4a\x02\x69\xdd", 3427803584},
   191  		{"\x06\x92\x5c\xf4\x88\x0e\x7e\x68\x38\x3e", 1152407945},
   192  		{"\xbd\x2c\x63\x38\xbf\xe9\x78\xb7\xbf\x15", 3382479516},
   193  	}
   194  	for _, tc := range testCases {
   195  		t.Run("", func(t *testing.T) {
   196  			require.EqualValues(t, tc.expected, hash([]byte(tc.s)))
   197  		})
   198  	}
   199  }