github.com/bits-and-blooms/bloom/v3@v3.5.0/bloom_test.go

github.com/bits-and-blooms/bloom/v3@v3.5.0/bloom_test.go (about)

     1  package bloom
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"encoding/gob"
     7  	"encoding/json"
     8  	"math"
     9  	"testing"
    10  )
    11  
    12  // This implementation of Bloom filters is _not_
    13  // safe for concurrent use. Uncomment the following
    14  // method and run go test -race
    15  //
    16  // func TestConcurrent(t *testing.T) {
    17  // 	gmp := runtime.GOMAXPROCS(2)
    18  // 	defer runtime.GOMAXPROCS(gmp)
    19  //
    20  // 	f := New(1000, 4)
    21  // 	n1 := []byte("Bess")
    22  // 	n2 := []byte("Jane")
    23  // 	f.Add(n1)
    24  // 	f.Add(n2)
    25  //
    26  // 	var wg sync.WaitGroup
    27  // 	const try = 1000
    28  // 	var err1, err2 error
    29  //
    30  // 	wg.Add(1)
    31  // 	go func() {
    32  // 		for i := 0; i < try; i++ {
    33  // 			n1b := f.Test(n1)
    34  // 			if !n1b {
    35  // 				err1 = fmt.Errorf("%v should be in", n1)
    36  // 				break
    37  // 			}
    38  // 		}
    39  // 		wg.Done()
    40  // 	}()
    41  //
    42  // 	wg.Add(1)
    43  // 	go func() {
    44  // 		for i := 0; i < try; i++ {
    45  // 			n2b := f.Test(n2)
    46  // 			if !n2b {
    47  // 				err2 = fmt.Errorf("%v should be in", n2)
    48  // 				break
    49  // 			}
    50  // 		}
    51  // 		wg.Done()
    52  // 	}()
    53  //
    54  // 	wg.Wait()
    55  //
    56  // 	if err1 != nil {
    57  // 		t.Fatal(err1)
    58  // 	}
    59  // 	if err2 != nil {
    60  // 		t.Fatal(err2)
    61  // 	}
    62  // }
    63  
    64  func TestBasic(t *testing.T) {
    65  	f := New(1000, 4)
    66  	n1 := []byte("Bess")
    67  	n2 := []byte("Jane")
    68  	n3 := []byte("Emma")
    69  	f.Add(n1)
    70  	n3a := f.TestAndAdd(n3)
    71  	n1b := f.Test(n1)
    72  	n2b := f.Test(n2)
    73  	n3b := f.Test(n3)
    74  	if !n1b {
    75  		t.Errorf("%v should be in.", n1)
    76  	}
    77  	if n2b {
    78  		t.Errorf("%v should not be in.", n2)
    79  	}
    80  	if n3a {
    81  		t.Errorf("%v should not be in the first time we look.", n3)
    82  	}
    83  	if !n3b {
    84  		t.Errorf("%v should be in the second time we look.", n3)
    85  	}
    86  }
    87  
    88  func TestBasicUint32(t *testing.T) {
    89  	f := New(1000, 4)
    90  	n1 := make([]byte, 4)
    91  	n2 := make([]byte, 4)
    92  	n3 := make([]byte, 4)
    93  	n4 := make([]byte, 4)
    94  	n5 := make([]byte, 4)
    95  	binary.BigEndian.PutUint32(n1, 100)
    96  	binary.BigEndian.PutUint32(n2, 101)
    97  	binary.BigEndian.PutUint32(n3, 102)
    98  	binary.BigEndian.PutUint32(n4, 103)
    99  	binary.BigEndian.PutUint32(n5, 104)
   100  	f.Add(n1)
   101  	n3a := f.TestAndAdd(n3)
   102  	n1b := f.Test(n1)
   103  	n2b := f.Test(n2)
   104  	n3b := f.Test(n3)
   105  	n5a := f.TestOrAdd(n5)
   106  	n5b := f.Test(n5)
   107  	f.Test(n4)
   108  	if !n1b {
   109  		t.Errorf("%v should be in.", n1)
   110  	}
   111  	if n2b {
   112  		t.Errorf("%v should not be in.", n2)
   113  	}
   114  	if n3a {
   115  		t.Errorf("%v should not be in the first time we look.", n3)
   116  	}
   117  	if !n3b {
   118  		t.Errorf("%v should be in the second time we look.", n3)
   119  	}
   120  	if n5a {
   121  		t.Errorf("%v should not be in the first time we look.", n5)
   122  	}
   123  	if !n5b {
   124  		t.Errorf("%v should be in the second time we look.", n5)
   125  	}
   126  }
   127  
   128  func TestNewWithLowNumbers(t *testing.T) {
   129  	f := New(0, 0)
   130  	if f.k != 1 {
   131  		t.Errorf("%v should be 1", f.k)
   132  	}
   133  	if f.m != 1 {
   134  		t.Errorf("%v should be 1", f.m)
   135  	}
   136  }
   137  
   138  func TestString(t *testing.T) {
   139  	f := NewWithEstimates(1000, 0.001)
   140  	n1 := "Love"
   141  	n2 := "is"
   142  	n3 := "in"
   143  	n4 := "bloom"
   144  	n5 := "blooms"
   145  	f.AddString(n1)
   146  	n3a := f.TestAndAddString(n3)
   147  	n1b := f.TestString(n1)
   148  	n2b := f.TestString(n2)
   149  	n3b := f.TestString(n3)
   150  	n5a := f.TestOrAddString(n5)
   151  	n5b := f.TestString(n5)
   152  	f.TestString(n4)
   153  	if !n1b {
   154  		t.Errorf("%v should be in.", n1)
   155  	}
   156  	if n2b {
   157  		t.Errorf("%v should not be in.", n2)
   158  	}
   159  	if n3a {
   160  		t.Errorf("%v should not be in the first time we look.", n3)
   161  	}
   162  	if !n3b {
   163  		t.Errorf("%v should be in the second time we look.", n3)
   164  	}
   165  	if n5a {
   166  		t.Errorf("%v should not be in the first time we look.", n5)
   167  	}
   168  	if !n5b {
   169  		t.Errorf("%v should be in the second time we look.", n5)
   170  	}
   171  
   172  }
   173  
   174  func testEstimated(n uint, maxFp float64, t *testing.T) {
   175  	m, k := EstimateParameters(n, maxFp)
   176  	fpRate := EstimateFalsePositiveRate(m, k, n)
   177  	if fpRate > 1.5*maxFp {
   178  		t.Errorf("False positive rate too high: n: %v; m: %v; k: %v; maxFp: %f; fpRate: %f, fpRate/maxFp: %f", n, m, k, maxFp, fpRate, fpRate/maxFp)
   179  	}
   180  }
   181  
   182  func TestEstimated1000_0001(t *testing.T)   { testEstimated(1000, 0.000100, t) }
   183  func TestEstimated10000_0001(t *testing.T)  { testEstimated(10000, 0.000100, t) }
   184  func TestEstimated100000_0001(t *testing.T) { testEstimated(100000, 0.000100, t) }
   185  
   186  func TestEstimated1000_001(t *testing.T)   { testEstimated(1000, 0.001000, t) }
   187  func TestEstimated10000_001(t *testing.T)  { testEstimated(10000, 0.001000, t) }
   188  func TestEstimated100000_001(t *testing.T) { testEstimated(100000, 0.001000, t) }
   189  
   190  func TestEstimated1000_01(t *testing.T)   { testEstimated(1000, 0.010000, t) }
   191  func TestEstimated10000_01(t *testing.T)  { testEstimated(10000, 0.010000, t) }
   192  func TestEstimated100000_01(t *testing.T) { testEstimated(100000, 0.010000, t) }
   193  
   194  func min(a, b uint) uint {
   195  	if a < b {
   196  		return a
   197  	}
   198  	return b
   199  }
   200  
   201  // The following function courtesy of Nick @turgon
   202  // This helper function ranges over the input data, applying the hashing
   203  // which returns the bit locations to set in the filter.
   204  // For each location, increment a counter for that bit address.
   205  //
   206  // If the Bloom Filter's location() method distributes locations uniformly
   207  // at random, a property it should inherit from its hash function, then
   208  // each bit location in the filter should end up with roughly the same
   209  // number of hits.  Importantly, the value of k should not matter.
   210  //
   211  // Once the results are collected, we can run a chi squared goodness of fit
   212  // test, comparing the result histogram with the uniform distribition.
   213  // This yields a test statistic with degrees-of-freedom of m-1.
   214  func chiTestBloom(m, k, rounds uint, elements [][]byte) (succeeds bool) {
   215  	f := New(m, k)
   216  	results := make([]uint, m)
   217  	chi := make([]float64, m)
   218  
   219  	for _, data := range elements {
   220  		h := baseHashes(data)
   221  		for i := uint(0); i < f.k; i++ {
   222  			results[f.location(h, i)]++
   223  		}
   224  	}
   225  
   226  	// Each element of results should contain the same value: k * rounds / m.
   227  	// Let's run a chi-square goodness of fit and see how it fares.
   228  	var chiStatistic float64
   229  	e := float64(k*rounds) / float64(m)
   230  	for i := uint(0); i < m; i++ {
   231  		chi[i] = math.Pow(float64(results[i])-e, 2.0) / e
   232  		chiStatistic += chi[i]
   233  	}
   234  
   235  	// this tests at significant level 0.005 up to 20 degrees of freedom
   236  	table := [20]float64{
   237  		7.879, 10.597, 12.838, 14.86, 16.75, 18.548, 20.278,
   238  		21.955, 23.589, 25.188, 26.757, 28.3, 29.819, 31.319, 32.801, 34.267,
   239  		35.718, 37.156, 38.582, 39.997}
   240  	df := min(m-1, 20)
   241  
   242  	succeeds = table[df-1] > chiStatistic
   243  	return
   244  
   245  }
   246  
   247  func TestLocation(t *testing.T) {
   248  	var m, k, rounds uint
   249  
   250  	m = 8
   251  	k = 3
   252  
   253  	rounds = 100000 // 15000000
   254  
   255  	elements := make([][]byte, rounds)
   256  
   257  	for x := uint(0); x < rounds; x++ {
   258  		ctrlist := make([]uint8, 4)
   259  		ctrlist[0] = uint8(x)
   260  		ctrlist[1] = uint8(x >> 8)
   261  		ctrlist[2] = uint8(x >> 16)
   262  		ctrlist[3] = uint8(x >> 24)
   263  		data := []byte(ctrlist)
   264  		elements[x] = data
   265  	}
   266  
   267  	succeeds := chiTestBloom(m, k, rounds, elements)
   268  	if !succeeds {
   269  		t.Error("random assignment is too unrandom")
   270  	}
   271  
   272  }
   273  
   274  func TestCap(t *testing.T) {
   275  	f := New(1000, 4)
   276  	if f.Cap() != f.m {
   277  		t.Error("not accessing Cap() correctly")
   278  	}
   279  }
   280  
   281  func TestK(t *testing.T) {
   282  	f := New(1000, 4)
   283  	if f.K() != f.k {
   284  		t.Error("not accessing K() correctly")
   285  	}
   286  }
   287  
   288  func TestMarshalUnmarshalJSON(t *testing.T) {
   289  	f := New(1000, 4)
   290  	data, err := json.Marshal(f)
   291  	if err != nil {
   292  		t.Fatal(err.Error())
   293  	}
   294  
   295  	var g BloomFilter
   296  	err = json.Unmarshal(data, &g)
   297  	if err != nil {
   298  		t.Fatal(err.Error())
   299  	}
   300  	if g.m != f.m {
   301  		t.Error("invalid m value")
   302  	}
   303  	if g.k != f.k {
   304  		t.Error("invalid k value")
   305  	}
   306  	if g.b == nil {
   307  		t.Fatal("bitset is nil")
   308  	}
   309  	if !g.b.Equal(f.b) {
   310  		t.Error("bitsets are not equal")
   311  	}
   312  }
   313  
   314  func TestUnmarshalInvalidJSON(t *testing.T) {
   315  	data := []byte("{invalid}")
   316  
   317  	var g BloomFilter
   318  	err := g.UnmarshalJSON(data)
   319  	if err == nil {
   320  		t.Error("expected error while unmarshalling invalid data")
   321  	}
   322  }
   323  
   324  func TestWriteToReadFrom(t *testing.T) {
   325  	var b bytes.Buffer
   326  	f := New(1000, 4)
   327  	_, err := f.WriteTo(&b)
   328  	if err != nil {
   329  		t.Fatal(err)
   330  	}
   331  
   332  	g := New(1000, 1)
   333  	_, err = g.ReadFrom(&b)
   334  	if err != nil {
   335  		t.Fatal(err)
   336  	}
   337  	if g.m != f.m {
   338  		t.Error("invalid m value")
   339  	}
   340  	if g.k != f.k {
   341  		t.Error("invalid k value")
   342  	}
   343  	if g.b == nil {
   344  		t.Fatal("bitset is nil")
   345  	}
   346  	if !g.b.Equal(f.b) {
   347  		t.Error("bitsets are not equal")
   348  	}
   349  
   350  	g.Test([]byte(""))
   351  }
   352  
   353  func TestReadWriteBinary(t *testing.T) {
   354  	f := New(1000, 4)
   355  	var buf bytes.Buffer
   356  	bytesWritten, err := f.WriteTo(&buf)
   357  	if err != nil {
   358  		t.Fatal(err.Error())
   359  	}
   360  	if bytesWritten != int64(buf.Len()) {
   361  		t.Errorf("incorrect write length %d != %d", bytesWritten, buf.Len())
   362  	}
   363  
   364  	var g BloomFilter
   365  	bytesRead, err := g.ReadFrom(&buf)
   366  	if err != nil {
   367  		t.Fatal(err.Error())
   368  	}
   369  	if bytesRead != bytesWritten {
   370  		t.Errorf("read unexpected number of bytes %d != %d", bytesRead, bytesWritten)
   371  	}
   372  	if g.m != f.m {
   373  		t.Error("invalid m value")
   374  	}
   375  	if g.k != f.k {
   376  		t.Error("invalid k value")
   377  	}
   378  	if g.b == nil {
   379  		t.Fatal("bitset is nil")
   380  	}
   381  	if !g.b.Equal(f.b) {
   382  		t.Error("bitsets are not equal")
   383  	}
   384  }
   385  
   386  func TestEncodeDecodeGob(t *testing.T) {
   387  	f := New(1000, 4)
   388  	f.Add([]byte("one"))
   389  	f.Add([]byte("two"))
   390  	f.Add([]byte("three"))
   391  	var buf bytes.Buffer
   392  	err := gob.NewEncoder(&buf).Encode(f)
   393  	if err != nil {
   394  		t.Fatal(err.Error())
   395  	}
   396  
   397  	var g BloomFilter
   398  	err = gob.NewDecoder(&buf).Decode(&g)
   399  	if err != nil {
   400  		t.Fatal(err.Error())
   401  	}
   402  	if g.m != f.m {
   403  		t.Error("invalid m value")
   404  	}
   405  	if g.k != f.k {
   406  		t.Error("invalid k value")
   407  	}
   408  	if g.b == nil {
   409  		t.Fatal("bitset is nil")
   410  	}
   411  	if !g.b.Equal(f.b) {
   412  		t.Error("bitsets are not equal")
   413  	}
   414  	if !g.Test([]byte("three")) {
   415  		t.Errorf("missing value 'three'")
   416  	}
   417  	if !g.Test([]byte("two")) {
   418  		t.Errorf("missing value 'two'")
   419  	}
   420  	if !g.Test([]byte("one")) {
   421  		t.Errorf("missing value 'one'")
   422  	}
   423  }
   424  
   425  func TestEqual(t *testing.T) {
   426  	f := New(1000, 4)
   427  	f1 := New(1000, 4)
   428  	g := New(1000, 20)
   429  	h := New(10, 20)
   430  	n1 := []byte("Bess")
   431  	f1.Add(n1)
   432  	if !f.Equal(f) {
   433  		t.Errorf("%v should be equal to itself", f)
   434  	}
   435  	if f.Equal(f1) {
   436  		t.Errorf("%v should not be equal to %v", f, f1)
   437  	}
   438  	if f.Equal(g) {
   439  		t.Errorf("%v should not be equal to %v", f, g)
   440  	}
   441  	if f.Equal(h) {
   442  		t.Errorf("%v should not be equal to %v", f, h)
   443  	}
   444  }
   445  
   446  func BenchmarkEstimated(b *testing.B) {
   447  	for n := uint(100000); n <= 100000; n *= 10 {
   448  		for fp := 0.1; fp >= 0.0001; fp /= 10.0 {
   449  			f := NewWithEstimates(n, fp)
   450  			EstimateFalsePositiveRate(f.m, f.k, n)
   451  		}
   452  	}
   453  }
   454  
   455  func BenchmarkSeparateTestAndAdd(b *testing.B) {
   456  	f := NewWithEstimates(uint(b.N), 0.0001)
   457  	key := make([]byte, 100)
   458  	b.ResetTimer()
   459  	for i := 0; i < b.N; i++ {
   460  		binary.BigEndian.PutUint32(key, uint32(i))
   461  		f.Test(key)
   462  		f.Add(key)
   463  	}
   464  }
   465  
   466  func BenchmarkCombinedTestAndAdd(b *testing.B) {
   467  	f := NewWithEstimates(uint(b.N), 0.0001)
   468  	key := make([]byte, 100)
   469  	b.ResetTimer()
   470  	for i := 0; i < b.N; i++ {
   471  		binary.BigEndian.PutUint32(key, uint32(i))
   472  		f.TestAndAdd(key)
   473  	}
   474  }
   475  
   476  func TestMerge(t *testing.T) {
   477  	f := New(1000, 4)
   478  	n1 := []byte("f")
   479  	f.Add(n1)
   480  
   481  	g := New(1000, 4)
   482  	n2 := []byte("g")
   483  	g.Add(n2)
   484  
   485  	h := New(999, 4)
   486  	n3 := []byte("h")
   487  	h.Add(n3)
   488  
   489  	j := New(1000, 5)
   490  	n4 := []byte("j")
   491  	j.Add(n4)
   492  
   493  	err := f.Merge(g)
   494  	if err != nil {
   495  		t.Errorf("There should be no error when merging two similar filters")
   496  	}
   497  
   498  	err = f.Merge(h)
   499  	if err == nil {
   500  		t.Errorf("There should be an error when merging filters with mismatched m")
   501  	}
   502  
   503  	err = f.Merge(j)
   504  	if err == nil {
   505  		t.Errorf("There should be an error when merging filters with mismatched k")
   506  	}
   507  
   508  	n2b := f.Test(n2)
   509  	if !n2b {
   510  		t.Errorf("The value doesn't exist after a valid merge")
   511  	}
   512  
   513  	n3b := f.Test(n3)
   514  	if n3b {
   515  		t.Errorf("The value exists after an invalid merge")
   516  	}
   517  
   518  	n4b := f.Test(n4)
   519  	if n4b {
   520  		t.Errorf("The value exists after an invalid merge")
   521  	}
   522  }
   523  
   524  func TestCopy(t *testing.T) {
   525  	f := New(1000, 4)
   526  	n1 := []byte("f")
   527  	f.Add(n1)
   528  
   529  	// copy here instead of New
   530  	g := f.Copy()
   531  	n2 := []byte("g")
   532  	g.Add(n2)
   533  
   534  	n1fb := f.Test(n1)
   535  	if !n1fb {
   536  		t.Errorf("The value doesn't exist in original after making a copy")
   537  	}
   538  
   539  	n1gb := g.Test(n1)
   540  	if !n1gb {
   541  		t.Errorf("The value doesn't exist in the copy")
   542  	}
   543  
   544  	n2fb := f.Test(n2)
   545  	if n2fb {
   546  		t.Errorf("The value exists in the original, it should only exist in copy")
   547  	}
   548  
   549  	n2gb := g.Test(n2)
   550  	if !n2gb {
   551  		t.Errorf("The value doesn't exist in copy after Add()")
   552  	}
   553  }
   554  
   555  func TestFrom(t *testing.T) {
   556  	var (
   557  		k    = uint(5)
   558  		data = make([]uint64, 10)
   559  		test = []byte("test")
   560  	)
   561  
   562  	bf := From(data, k)
   563  	if bf.K() != k {
   564  		t.Errorf("Constant k does not match the expected value")
   565  	}
   566  
   567  	if bf.Cap() != uint(len(data)*64) {
   568  		t.Errorf("Capacity does not match the expected value")
   569  	}
   570  
   571  	if bf.Test(test) {
   572  		t.Errorf("Bloom filter should not contain the value")
   573  	}
   574  
   575  	bf.Add(test)
   576  	if !bf.Test(test) {
   577  		t.Errorf("Bloom filter should contain the value")
   578  	}
   579  
   580  	// create a new Bloom filter from an existing (populated) data slice.
   581  	bf = From(data, k)
   582  	if !bf.Test(test) {
   583  		t.Errorf("Bloom filter should contain the value")
   584  	}
   585  }
   586  
   587  func TestTestLocations(t *testing.T) {
   588  	f := NewWithEstimates(1000, 0.001)
   589  	n1 := []byte("Love")
   590  	n2 := []byte("is")
   591  	n3 := []byte("in")
   592  	n4 := []byte("bloom")
   593  	f.Add(n1)
   594  	n3a := f.TestLocations(Locations(n3, f.K()))
   595  	f.Add(n3)
   596  	n1b := f.TestLocations(Locations(n1, f.K()))
   597  	n2b := f.TestLocations(Locations(n2, f.K()))
   598  	n3b := f.TestLocations(Locations(n3, f.K()))
   599  	n4b := f.TestLocations(Locations(n4, f.K()))
   600  	if !n1b {
   601  		t.Errorf("%v should be in.", n1)
   602  	}
   603  	if n2b {
   604  		t.Errorf("%v should not be in.", n2)
   605  	}
   606  	if n3a {
   607  		t.Errorf("%v should not be in the first time we look.", n3)
   608  	}
   609  	if !n3b {
   610  		t.Errorf("%v should be in the second time we look.", n3)
   611  	}
   612  	if n4b {
   613  		t.Errorf("%v should be in.", n4)
   614  	}
   615  }
   616  
   617  func TestApproximatedSize(t *testing.T) {
   618  	f := NewWithEstimates(1000, 0.001)
   619  	f.Add([]byte("Love"))
   620  	f.Add([]byte("is"))
   621  	f.Add([]byte("in"))
   622  	f.Add([]byte("bloom"))
   623  	size := f.ApproximatedSize()
   624  	if size != 4 {
   625  		t.Errorf("%d should equal 4.", size)
   626  	}
   627  }
   628  
   629  func TestFPP(t *testing.T) {
   630  	f := NewWithEstimates(1000, 0.001)
   631  	for i := uint32(0); i < 1000; i++ {
   632  		n := make([]byte, 4)
   633  		binary.BigEndian.PutUint32(n, i)
   634  		f.Add(n)
   635  	}
   636  	count := 0
   637  
   638  	for i := uint32(0); i < 1000; i++ {
   639  		n := make([]byte, 4)
   640  		binary.BigEndian.PutUint32(n, i+1000)
   641  		if f.Test(n) {
   642  			count += 1
   643  		}
   644  	}
   645  	if float64(count)/1000.0 > 0.001 {
   646  		t.Errorf("Excessive fpp")
   647  	}
   648  }
   649  
   650  func TestEncodeDecodeBinary(t *testing.T) {
   651  	f := New(1000, 4)
   652  	f.Add([]byte("one"))
   653  	f.Add([]byte("two"))
   654  	f.Add([]byte("three"))
   655  	data, err := f.MarshalBinary()
   656  	if err != nil {
   657  		t.Fatal(err.Error())
   658  	}
   659  
   660  	var g BloomFilter
   661  	err = g.UnmarshalBinary(data)
   662  	if err != nil {
   663  		t.Fatal(err.Error())
   664  	}
   665  	if g.m != f.m {
   666  		t.Error("invalid m value")
   667  	}
   668  	if g.k != f.k {
   669  		t.Error("invalid k value")
   670  	}
   671  	if g.b == nil {
   672  		t.Fatal("bitset is nil")
   673  	}
   674  	if !g.b.Equal(f.b) {
   675  		t.Error("bitsets are not equal")
   676  	}
   677  	if !g.Test([]byte("three")) {
   678  		t.Errorf("missing value 'three'")
   679  	}
   680  	if !g.Test([]byte("two")) {
   681  		t.Errorf("missing value 'two'")
   682  	}
   683  	if !g.Test([]byte("one")) {
   684  		t.Errorf("missing value 'one'")
   685  	}
   686  }