github.com/symblcrowd/bloom@v2.0.5+incompatible/bloom_test.go (about)

     1  package bloom
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"encoding/gob"
     7  	"encoding/json"
     8  	"math"
     9  	"testing"
    10  )
    11  
    12  // This implementation of Bloom filters is _not_
    13  // safe for concurrent use. Uncomment the following
    14  // method and run go test -race
    15  //
    16  // func TestConcurrent(t *testing.T) {
    17  // 	gmp := runtime.GOMAXPROCS(2)
    18  // 	defer runtime.GOMAXPROCS(gmp)
    19  //
    20  // 	f := New(1000, 4)
    21  // 	n1 := []byte("Bess")
    22  // 	n2 := []byte("Jane")
    23  // 	f.Add(n1)
    24  // 	f.Add(n2)
    25  //
    26  // 	var wg sync.WaitGroup
    27  // 	const try = 1000
    28  // 	var err1, err2 error
    29  //
    30  // 	wg.Add(1)
    31  // 	go func() {
    32  // 		for i := 0; i < try; i++ {
    33  // 			n1b := f.Test(n1)
    34  // 			if !n1b {
    35  // 				err1 = fmt.Errorf("%v should be in", n1)
    36  // 				break
    37  // 			}
    38  // 		}
    39  // 		wg.Done()
    40  // 	}()
    41  //
    42  // 	wg.Add(1)
    43  // 	go func() {
    44  // 		for i := 0; i < try; i++ {
    45  // 			n2b := f.Test(n2)
    46  // 			if !n2b {
    47  // 				err2 = fmt.Errorf("%v should be in", n2)
    48  // 				break
    49  // 			}
    50  // 		}
    51  // 		wg.Done()
    52  // 	}()
    53  //
    54  // 	wg.Wait()
    55  //
    56  // 	if err1 != nil {
    57  // 		t.Fatal(err1)
    58  // 	}
    59  // 	if err2 != nil {
    60  // 		t.Fatal(err2)
    61  // 	}
    62  // }
    63  
    64  func TestBasic(t *testing.T) {
    65  	f := New(1000, 4)
    66  	n1 := []byte("Bess")
    67  	n2 := []byte("Jane")
    68  	n3 := []byte("Emma")
    69  	f.Add(n1)
    70  	n3a := f.TestAndAdd(n3)
    71  	n1b := f.Test(n1)
    72  	n2b := f.Test(n2)
    73  	n3b := f.Test(n3)
    74  	if !n1b {
    75  		t.Errorf("%v should be in.", n1)
    76  	}
    77  	if n2b {
    78  		t.Errorf("%v should not be in.", n2)
    79  	}
    80  	if n3a {
    81  		t.Errorf("%v should not be in the first time we look.", n3)
    82  	}
    83  	if !n3b {
    84  		t.Errorf("%v should be in the second time we look.", n3)
    85  	}
    86  }
    87  
    88  func TestBasicUint32(t *testing.T) {
    89  	f := New(1000, 4)
    90  	n1 := make([]byte, 4)
    91  	n2 := make([]byte, 4)
    92  	n3 := make([]byte, 4)
    93  	n4 := make([]byte, 4)
    94  	binary.BigEndian.PutUint32(n1, 100)
    95  	binary.BigEndian.PutUint32(n2, 101)
    96  	binary.BigEndian.PutUint32(n3, 102)
    97  	binary.BigEndian.PutUint32(n4, 103)
    98  	f.Add(n1)
    99  	n3a := f.TestAndAdd(n3)
   100  	n1b := f.Test(n1)
   101  	n2b := f.Test(n2)
   102  	n3b := f.Test(n3)
   103  	f.Test(n4)
   104  	if !n1b {
   105  		t.Errorf("%v should be in.", n1)
   106  	}
   107  	if n2b {
   108  		t.Errorf("%v should not be in.", n2)
   109  	}
   110  	if n3a {
   111  		t.Errorf("%v should not be in the first time we look.", n3)
   112  	}
   113  	if !n3b {
   114  		t.Errorf("%v should be in the second time we look.", n3)
   115  	}
   116  }
   117  
   118  func TestNewWithLowNumbers(t *testing.T) {
   119  	f := New(0, 0)
   120  	if f.k != 1 {
   121  		t.Errorf("%v should be 1", f.k)
   122  	}
   123  	if f.m != 1 {
   124  		t.Errorf("%v should be 1", f.m)
   125  	}
   126  }
   127  
   128  func TestString(t *testing.T) {
   129  	f := NewWithEstimates(1000, 0.001)
   130  	n1 := "Love"
   131  	n2 := "is"
   132  	n3 := "in"
   133  	n4 := "bloom"
   134  	f.AddString(n1)
   135  	n3a := f.TestAndAddString(n3)
   136  	n1b := f.TestString(n1)
   137  	n2b := f.TestString(n2)
   138  	n3b := f.TestString(n3)
   139  	f.TestString(n4)
   140  	if !n1b {
   141  		t.Errorf("%v should be in.", n1)
   142  	}
   143  	if n2b {
   144  		t.Errorf("%v should not be in.", n2)
   145  	}
   146  	if n3a {
   147  		t.Errorf("%v should not be in the first time we look.", n3)
   148  	}
   149  	if !n3b {
   150  		t.Errorf("%v should be in the second time we look.", n3)
   151  	}
   152  
   153  }
   154  
   155  func testEstimated(n uint, maxFp float64, t *testing.T) {
   156  	m, k := EstimateParameters(n, maxFp)
   157  	f := NewWithEstimates(n, maxFp)
   158  	fpRate := f.EstimateFalsePositiveRate(n)
   159  	if fpRate > 1.5*maxFp {
   160  		t.Errorf("False positive rate too high: n: %v; m: %v; k: %v; maxFp: %f; fpRate: %f, fpRate/maxFp: %f", n, m, k, maxFp, fpRate, fpRate/maxFp)
   161  	}
   162  }
   163  
   164  func TestEstimated1000_0001(t *testing.T)   { testEstimated(1000, 0.000100, t) }
   165  func TestEstimated10000_0001(t *testing.T)  { testEstimated(10000, 0.000100, t) }
   166  func TestEstimated100000_0001(t *testing.T) { testEstimated(100000, 0.000100, t) }
   167  
   168  func TestEstimated1000_001(t *testing.T)   { testEstimated(1000, 0.001000, t) }
   169  func TestEstimated10000_001(t *testing.T)  { testEstimated(10000, 0.001000, t) }
   170  func TestEstimated100000_001(t *testing.T) { testEstimated(100000, 0.001000, t) }
   171  
   172  func TestEstimated1000_01(t *testing.T)   { testEstimated(1000, 0.010000, t) }
   173  func TestEstimated10000_01(t *testing.T)  { testEstimated(10000, 0.010000, t) }
   174  func TestEstimated100000_01(t *testing.T) { testEstimated(100000, 0.010000, t) }
   175  
   176  func min(a, b uint) uint {
   177  	if a < b {
   178  		return a
   179  	}
   180  	return b
   181  }
   182  
   183  // The following function courtesy of Nick @turgon
   184  // This helper function ranges over the input data, applying the hashing
   185  // which returns the bit locations to set in the filter.
   186  // For each location, increment a counter for that bit address.
   187  //
   188  // If the Bloom Filter's location() method distributes locations uniformly
   189  // at random, a property it should inherit from its hash function, then
   190  // each bit location in the filter should end up with roughly the same
   191  // number of hits.  Importantly, the value of k should not matter.
   192  //
   193  // Once the results are collected, we can run a chi squared goodness of fit
   194  // test, comparing the result histogram with the uniform distribition.
   195  // This yields a test statistic with degrees-of-freedom of m-1.
   196  func chiTestBloom(m, k, rounds uint, elements [][]byte) (succeeds bool) {
   197  	f := New(m, k)
   198  	results := make([]uint, m)
   199  	chi := make([]float64, m)
   200  
   201  	for _, data := range elements {
   202  		h := baseHashes(data)
   203  		for i := uint(0); i < f.k; i++ {
   204  			results[f.location(h, i)]++
   205  		}
   206  	}
   207  
   208  	// Each element of results should contain the same value: k * rounds / m.
   209  	// Let's run a chi-square goodness of fit and see how it fares.
   210  	var chiStatistic float64
   211  	e := float64(k*rounds) / float64(m)
   212  	for i := uint(0); i < m; i++ {
   213  		chi[i] = math.Pow(float64(results[i])-e, 2.0) / e
   214  		chiStatistic += chi[i]
   215  	}
   216  
   217  	// this tests at significant level 0.005 up to 20 degrees of freedom
   218  	table := [20]float64{
   219  		7.879, 10.597, 12.838, 14.86, 16.75, 18.548, 20.278,
   220  		21.955, 23.589, 25.188, 26.757, 28.3, 29.819, 31.319, 32.801, 34.267,
   221  		35.718, 37.156, 38.582, 39.997}
   222  	df := min(m-1, 20)
   223  
   224  	succeeds = table[df-1] > chiStatistic
   225  	return
   226  
   227  }
   228  
   229  func TestLocation(t *testing.T) {
   230  	var m, k, rounds uint
   231  
   232  	m = 8
   233  	k = 3
   234  
   235  	rounds = 100000 // 15000000
   236  
   237  	elements := make([][]byte, rounds)
   238  
   239  	for x := uint(0); x < rounds; x++ {
   240  		ctrlist := make([]uint8, 4)
   241  		ctrlist[0] = uint8(x)
   242  		ctrlist[1] = uint8(x >> 8)
   243  		ctrlist[2] = uint8(x >> 16)
   244  		ctrlist[3] = uint8(x >> 24)
   245  		data := []byte(ctrlist)
   246  		elements[x] = data
   247  	}
   248  
   249  	succeeds := chiTestBloom(m, k, rounds, elements)
   250  	if !succeeds {
   251  		t.Error("random assignment is too unrandom")
   252  	}
   253  
   254  }
   255  
   256  func TestCap(t *testing.T) {
   257  	f := New(1000, 4)
   258  	if f.Cap() != f.m {
   259  		t.Error("not accessing Cap() correctly")
   260  	}
   261  }
   262  
   263  func TestK(t *testing.T) {
   264  	f := New(1000, 4)
   265  	if f.K() != f.k {
   266  		t.Error("not accessing K() correctly")
   267  	}
   268  }
   269  
   270  func TestMarshalUnmarshalJSON(t *testing.T) {
   271  	f := New(1000, 4)
   272  	data, err := json.Marshal(f)
   273  	if err != nil {
   274  		t.Fatal(err.Error())
   275  	}
   276  
   277  	var g BloomFilter
   278  	err = json.Unmarshal(data, &g)
   279  	if err != nil {
   280  		t.Fatal(err.Error())
   281  	}
   282  	if g.m != f.m {
   283  		t.Error("invalid m value")
   284  	}
   285  	if g.k != f.k {
   286  		t.Error("invalid k value")
   287  	}
   288  	if g.b == nil {
   289  		t.Fatal("bitset is nil")
   290  	}
   291  	if !g.b.Equal(f.b) {
   292  		t.Error("bitsets are not equal")
   293  	}
   294  }
   295  
   296  func TestUnmarshalInvalidJSON(t *testing.T) {
   297  	data := []byte("{invalid}")
   298  
   299  	var g BloomFilter
   300  	err := g.UnmarshalJSON(data)
   301  	if err == nil {
   302  		t.Error("expected error while unmarshalling invalid data")
   303  	}
   304  }
   305  
   306  func TestWriteToReadFrom(t *testing.T) {
   307  	var b bytes.Buffer
   308  	f := New(1000, 4)
   309  	_, err := f.WriteTo(&b)
   310  	if err != nil {
   311  		t.Fatal(err)
   312  	}
   313  
   314  	g := New(1000, 1)
   315  	_, err = g.ReadFrom(&b)
   316  	if err != nil {
   317  		t.Fatal(err)
   318  	}
   319  	if g.m != f.m {
   320  		t.Error("invalid m value")
   321  	}
   322  	if g.k != f.k {
   323  		t.Error("invalid k value")
   324  	}
   325  	if g.b == nil {
   326  		t.Fatal("bitset is nil")
   327  	}
   328  	if !g.b.Equal(f.b) {
   329  		t.Error("bitsets are not equal")
   330  	}
   331  
   332  	g.Test([]byte(""))
   333  }
   334  
   335  func TestReadWriteBinary(t *testing.T) {
   336  	f := New(1000, 4)
   337  	var buf bytes.Buffer
   338  	bytesWritten, err := f.WriteTo(&buf)
   339  	if err != nil {
   340  		t.Fatal(err.Error())
   341  	}
   342  	if bytesWritten != int64(buf.Len()) {
   343  		t.Errorf("incorrect write length %d != %d", bytesWritten, buf.Len())
   344  	}
   345  
   346  	var g BloomFilter
   347  	bytesRead, err := g.ReadFrom(&buf)
   348  	if err != nil {
   349  		t.Fatal(err.Error())
   350  	}
   351  	if bytesRead != bytesWritten {
   352  		t.Errorf("read unexpected number of bytes %d != %d", bytesRead, bytesWritten)
   353  	}
   354  	if g.m != f.m {
   355  		t.Error("invalid m value")
   356  	}
   357  	if g.k != f.k {
   358  		t.Error("invalid k value")
   359  	}
   360  	if g.b == nil {
   361  		t.Fatal("bitset is nil")
   362  	}
   363  	if !g.b.Equal(f.b) {
   364  		t.Error("bitsets are not equal")
   365  	}
   366  }
   367  
   368  func TestEncodeDecodeGob(t *testing.T) {
   369  	f := New(1000, 4)
   370  	f.Add([]byte("one"))
   371  	f.Add([]byte("two"))
   372  	f.Add([]byte("three"))
   373  	var buf bytes.Buffer
   374  	err := gob.NewEncoder(&buf).Encode(f)
   375  	if err != nil {
   376  		t.Fatal(err.Error())
   377  	}
   378  
   379  	var g BloomFilter
   380  	err = gob.NewDecoder(&buf).Decode(&g)
   381  	if err != nil {
   382  		t.Fatal(err.Error())
   383  	}
   384  	if g.m != f.m {
   385  		t.Error("invalid m value")
   386  	}
   387  	if g.k != f.k {
   388  		t.Error("invalid k value")
   389  	}
   390  	if g.b == nil {
   391  		t.Fatal("bitset is nil")
   392  	}
   393  	if !g.b.Equal(f.b) {
   394  		t.Error("bitsets are not equal")
   395  	}
   396  	if !g.Test([]byte("three")) {
   397  		t.Errorf("missing value 'three'")
   398  	}
   399  	if !g.Test([]byte("two")) {
   400  		t.Errorf("missing value 'two'")
   401  	}
   402  	if !g.Test([]byte("one")) {
   403  		t.Errorf("missing value 'one'")
   404  	}
   405  }
   406  
   407  func TestEqual(t *testing.T) {
   408  	f := New(1000, 4)
   409  	f1 := New(1000, 4)
   410  	g := New(1000, 20)
   411  	h := New(10, 20)
   412  	n1 := []byte("Bess")
   413  	f1.Add(n1)
   414  	if !f.Equal(f) {
   415  		t.Errorf("%v should be equal to itself", f)
   416  	}
   417  	if f.Equal(f1) {
   418  		t.Errorf("%v should not be equal to %v", f, f1)
   419  	}
   420  	if f.Equal(g) {
   421  		t.Errorf("%v should not be equal to %v", f, g)
   422  	}
   423  	if f.Equal(h) {
   424  		t.Errorf("%v should not be equal to %v", f, h)
   425  	}
   426  }
   427  
   428  func BenchmarkEstimated(b *testing.B) {
   429  	for n := uint(100000); n <= 100000; n *= 10 {
   430  		for fp := 0.1; fp >= 0.0001; fp /= 10.0 {
   431  			f := NewWithEstimates(n, fp)
   432  			f.EstimateFalsePositiveRate(n)
   433  		}
   434  	}
   435  }
   436  
   437  func BenchmarkSeparateTestAndAdd(b *testing.B) {
   438  	f := NewWithEstimates(uint(b.N), 0.0001)
   439  	key := make([]byte, 100)
   440  	b.ResetTimer()
   441  	for i := 0; i < b.N; i++ {
   442  		binary.BigEndian.PutUint32(key, uint32(i))
   443  		f.Test(key)
   444  		f.Add(key)
   445  	}
   446  }
   447  
   448  func BenchmarkCombinedTestAndAdd(b *testing.B) {
   449  	f := NewWithEstimates(uint(b.N), 0.0001)
   450  	key := make([]byte, 100)
   451  	b.ResetTimer()
   452  	for i := 0; i < b.N; i++ {
   453  		binary.BigEndian.PutUint32(key, uint32(i))
   454  		f.TestAndAdd(key)
   455  	}
   456  }
   457  
   458  func TestMerge(t *testing.T) {
   459  	f := New(1000, 4)
   460  	n1 := []byte("f")
   461  	f.Add(n1)
   462  
   463  	g := New(1000, 4)
   464  	n2 := []byte("g")
   465  	g.Add(n2)
   466  
   467  	h := New(999, 4)
   468  	n3 := []byte("h")
   469  	h.Add(n3)
   470  
   471  	j := New(1000, 5)
   472  	n4 := []byte("j")
   473  	j.Add(n4)
   474  
   475  	err := f.Merge(g)
   476  	if err != nil {
   477  		t.Errorf("There should be no error when merging two similar filters")
   478  	}
   479  
   480  	err = f.Merge(h)
   481  	if err == nil {
   482  		t.Errorf("There should be an error when merging filters with mismatched m")
   483  	}
   484  
   485  	err = f.Merge(j)
   486  	if err == nil {
   487  		t.Errorf("There should be an error when merging filters with mismatched k")
   488  	}
   489  
   490  	n2b := f.Test(n2)
   491  	if !n2b {
   492  		t.Errorf("The value doesn't exist after a valid merge")
   493  	}
   494  
   495  	n3b := f.Test(n3)
   496  	if n3b {
   497  		t.Errorf("The value exists after an invalid merge")
   498  	}
   499  
   500  	n4b := f.Test(n4)
   501  	if n4b {
   502  		t.Errorf("The value exists after an invalid merge")
   503  	}
   504  }
   505  
   506  func TestCopy(t *testing.T) {
   507  	f := New(1000, 4)
   508  	n1 := []byte("f")
   509  	f.Add(n1)
   510  
   511  	// copy here instead of New
   512  	g := f.Copy()
   513  	n2 := []byte("g")
   514  	g.Add(n2)
   515  
   516  	n1fb := f.Test(n1)
   517  	if !n1fb {
   518  		t.Errorf("The value doesn't exist in original after making a copy")
   519  	}
   520  
   521  	n1gb := g.Test(n1)
   522  	if !n1gb {
   523  		t.Errorf("The value doesn't exist in the copy")
   524  	}
   525  
   526  	n2fb := f.Test(n2)
   527  	if n2fb {
   528  		t.Errorf("The value exists in the original, it should only exist in copy")
   529  	}
   530  
   531  	n2gb := g.Test(n2)
   532  	if !n2gb {
   533  		t.Errorf("The value doesn't exist in copy after Add()")
   534  	}
   535  }
   536  
   537  func TestFrom(t *testing.T) {
   538  	var (
   539  		k    = uint(5)
   540  		data = make([]uint64, 10)
   541  		test = []byte("test")
   542  	)
   543  
   544  	bf := From(data, k)
   545  	if bf.K() != k {
   546  		t.Errorf("Constant k does not match the expected value")
   547  	}
   548  
   549  	if bf.Cap() != uint(len(data)*64) {
   550  		t.Errorf("Capacity does not match the expected value")
   551  	}
   552  
   553  	if bf.Test(test) {
   554  		t.Errorf("Bloom filter should not contain the value")
   555  	}
   556  
   557  	bf.Add(test)
   558  	if !bf.Test(test) {
   559  		t.Errorf("Bloom filter should contain the value")
   560  	}
   561  
   562  	// create a new Bloom filter from an existing (populated) data slice.
   563  	bf = From(data, k)
   564  	if !bf.Test(test) {
   565  		t.Errorf("Bloom filter should contain the value")
   566  	}
   567  }
   568  
   569  func TestTestLocations(t *testing.T) {
   570  	f := NewWithEstimates(1000, 0.001)
   571  	n1 := []byte("Love")
   572  	n2 := []byte("is")
   573  	n3 := []byte("in")
   574  	n4 := []byte("bloom")
   575  	f.Add(n1)
   576  	n3a := f.TestLocations(Locations(n3, f.K()))
   577  	f.Add(n3)
   578  	n1b := f.TestLocations(Locations(n1, f.K()))
   579  	n2b := f.TestLocations(Locations(n2, f.K()))
   580  	n3b := f.TestLocations(Locations(n3, f.K()))
   581  	n4b := f.TestLocations(Locations(n4, f.K()))
   582  	if !n1b {
   583  		t.Errorf("%v should be in.", n1)
   584  	}
   585  	if n2b {
   586  		t.Errorf("%v should not be in.", n2)
   587  	}
   588  	if n3a {
   589  		t.Errorf("%v should not be in the first time we look.", n3)
   590  	}
   591  	if !n3b {
   592  		t.Errorf("%v should be in the second time we look.", n3)
   593  	}
   594  	if n4b {
   595  		t.Errorf("%v should be in.", n4)
   596  	}
   597  }